#!/usr/bin/gawk -f #Min and Max are only used when a histogram or graph is going to be generated. They are only for explanation purposes and have no other use. #Given dist (and its flag) is meaningless for internal (non-leaf) nodes. They must be allowed to be generated from their child node dists. BEGIN { FS = OFS = ","; Seed = 1; TotalBins = 20; TotalRuns = 100; Policy = 1; Factor = 1; } /^$/ || /\#.*/ { next; } { LayerIndex = 1; GraphIndex = 2; NameIndex = 1+2; OperationIndex = 2+2; GoalPriorityIndex = 3+2; MinIndex = 4+2; MaxIndex = 5+2; GivenDistFlagIndex = 6+2; IsGivenDistIndex = 7+2; IsGoalDistIndex = 8+2; ChildrenCountIndex = 9+2; SanityCheckUniqueNode[$NameIndex]++; TotalNodes++; Layer[TotalNodes] = $LayerIndex; Graph[TotalNodes] = $GraphIndex; Index[$NameIndex] = TotalNodes; Name[TotalNodes] = $NameIndex; Operation[TotalNodes] = $OperationIndex; GoalPriority[TotalNodes] = $GoalPriorityIndex; Min[TotalNodes] = $MinIndex; Max[TotalNodes] = $MaxIndex; GivenDistFlag[TotalNodes] = $GivenDistFlagIndex; IsGivenDist[TotalNodes] = $IsGivenDistIndex; IsGoalDist[TotalNodes] = $IsGoalDistIndex; ChildrenCount[TotalNodes] = $ChildrenCountIndex; for (i = 1; i <= ChildrenCount[TotalNodes]; i++) Children[TotalNodes,i] = $(i+ChildrenCountIndex); GivenDistStartIndex = ChildrenCountIndex + ChildrenCount[TotalNodes]; if (IsGivenDist[TotalNodes] == 1) { for (i = 1; i <= TotalBins; i++) GivenDist[TotalNodes,i] = $(GivenDistStartIndex+i); GoalDistStartIndex = GivenDistStartIndex + TotalBins; } else GoalDistStartIndex = GivenDistStartIndex; #keep the goal dist normalized. This is not needed for given dist since it is normalized as needed. if (IsGoalDist[TotalNodes] == 1) { for (i = 1; i <= TotalBins; i++) tempDistArray[i] = $(GoalDistStartIndex+i); normalizeDist(tempDistArray); for (i = 1; i <= TotalBins; i++) GoalDist[TotalNodes,i] = tempDistArray[i]; } } END { srand(Seed); #this block is used to make certain ones (roots) with a given priority have a random goal distribution for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { if (GoalPriority[nodeCounter] != 0) { for (binCounter = 1; binCounter <= TotalBins; binCounter++) tempArray[binCounter] = int(rand() * 20); normalizeDist(tempArray); for (binCounter = 1; binCounter <= TotalBins; binCounter++) GoalDist[nodeCounter,binCounter] = tempArray[binCounter]; IsGoalDist[nodeCounter] = 1; } } #this block makes constraints (given and goal distributions) randomly created count = 0; countLimit = int(TotalNodes*Factor/100); while (count < countLimit) { #randomly generate goal and given distributions for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { # if (rand() < 0.5 && IsGoalDist[nodeCounter] == 0 && count < countLimit && ChildrenCount[nodeCounter] > 0) # { # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # tempArray[binCounter] = int(rand() * 20); # # normalizeDist(tempArray); # # for (binCounter = 1; binCounter <= TotalBins; binCounter++) # GoalDist[nodeCounter,binCounter] = tempArray[binCounter]; # # IsGoalDist[nodeCounter] = 1; # count++; # GoalPriority[nodeCounter] = int(rand() * 100); # } if (rand() < 0.5 && IsGivenDist[nodeCounter] == 0 && count < countLimit && ChildrenCount[nodeCounter] == 0) { for (binCounter = 1; binCounter <= TotalBins; binCounter++) tempArray[binCounter] = int(rand() * 20); normalizeDist(tempArray); for (binCounter = 1; binCounter <= TotalBins; binCounter++) GivenDist[nodeCounter,binCounter] = tempArray[binCounter]; IsGivenDist[nodeCounter] = 1; count++; if (rand() < 0.5) GivenDistFlag[nodeCounter] = "opt"; else GivenDistFlag[nodeCounter] = "fixed"; } } } #this is where the code starts normally if (sanityCheck() == -1) { print "Error! One or more of the child nodes are missing OR a node is defined recursively OR a node is defined more than once."; exit; } # reportStart(); AllRoots = 0; findSubgraphs(); for (rootCounter = 1; rootCounter <= AllRoots; rootCounter++) { # print "processing root of " Layer[SubGraph[rootCounter,0]],Graph[SubGraph[rootCounter,0]],Name[SubGraph[rootCounter,0]]; TotalSubGraphNodes = 0; for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) if (SubGraph[rootCounter,nodeCounter] == 1) TotalSubGraphNodes++; # print "with " TotalSubGraphNodes+1 " nodes"; HighestPriorityNode = findMaxPriorityNode(rootCounter); if (HighestPriorityNode == -1 || GoalPriority[rootCounter] == GoalPriority[HighestPriorityNode]) HighestPriorityNode = rootCounter; # print "with "Name[HighestPriorityNode]" as the one with highest priority of " GoalPriority[HighestPriorityNode]; for (binCounter = 1; binCounter <= TotalBins; binCounter++) { for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { MinValue[binCounter] = 10^20; BestBin[nodeCounter,binCounter] = 0; } } for (run = 1; run <= TotalRuns; run++) { #initialize (possibly old values) for a new run for (tempCounter = 0; tempCounter <= TotalNodes; tempCounter++) Processed[tempCounter] = 0; #process the leaves first for (leafCounter = 1; leafCounter <= TotalNodes; leafCounter++) { if (ChildrenCount[leafCounter] == 0 && SubGraph[rootCounter,leafCounter] == 1) { processLeafNode(leafCounter); Processed[leafCounter] = 1; Processed[0]++; } } #process the rest (all internal nodes) while (Processed[0] < TotalSubGraphNodes) { for (internalCounter = 1; internalCounter <= TotalNodes; internalCounter++) { if (Processed[internalCounter] == 0 && SubGraph[rootCounter,internalCounter] == 1) { processedChildren = 0; for (childCounter = 1; childCounter <= ChildrenCount[internalCounter]; childCounter++) { if (Processed[Index[Children[internalCounter,childCounter]]] == 1) processedChildren++; } #if all of its children are processed, process it then if (processedChildren == ChildrenCount[internalCounter]) { processInternalNode(internalCounter); Processed[internalCounter] = 1; Processed[0]++; } } } } score(rootCounter); } findError(rootCounter); # reportEnd(rootCounter); } for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { if (IsGivenDist[nodeCounter] == 1) { if (GivenDistFlag[nodeCounter] == "opt") optional++; else fixed++; given++; } if (IsGoalDist[nodeCounter] == 1) goal++; } # print "layer1,layer2,layer3,total-nodes,total-given,optional,fixed,total-goal,percentage-given"; #report final errors per layer for (layerCounter = 1; layerCounter <= 3; layerCounter++) if (GoalCount[layerCounter] > 0) printf ("%f,",SumError[layerCounter]/(GoalCount[layerCounter]+0.00001)); print TotalNodes,given+0,optional+0,fixed+0,goal+0,Factor; } function findMaxPriorityNode(root,tempMax,tempNode) { tempMax = 0; tempNode = -1; for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { if (GoalPriority[nodeCounter] >= tempMax && SubGraph[root,nodeCounter] == 1) { tempMax = GoalPriority[nodeCounter]; tempNode = nodeCounter; } } return tempNode; } function findSubgraphs(tempArray) { #find all the roots as the ones that are not a child of any other node for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) for (tempCounter = 1; tempCounter <= ChildrenCount[nodeCounter]; tempCounter++) tempArray[Index[Children[nodeCounter,tempCounter]]]++; for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { if (tempArray[nodeCounter] == 0) { #a root has been found AllRoots++; SubGraph[AllRoots,0] = nodeCounter; } } for (rootCounter = 1; rootCounter <= AllRoots; rootCounter++) { # print "processing root of " Layer[SubGraph[rootCounter,0]],Graph[SubGraph[rootCounter,0]],Name[SubGraph[rootCounter,0]]; SubGraph[rootCounter,SubGraph[rootCounter,0]] = 1; findAllChildren(SubGraph[rootCounter,0],rootCounter); # for (temp = 1; temp <= TotalNodes; temp++) # if (SubGraph[rootCounter,temp] == 1) # print Layer[temp],Graph[temp],Name[temp]; } } function findAllChildren(node,root,childCounter) { for (childCounter = 1; childCounter <= ChildrenCount[node]; childCounter++) { SubGraph[root,Index[Children[node,childCounter]]] = 1; findAllChildren(Index[Children[node,childCounter]],root); } } function sanityCheck() { for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) for (tempCounter = 1; tempCounter <= ChildrenCount[nodeCounter]; tempCounter++) if (Index[Children[nodeCounter,tempCounter]] == 0 || Index[Children[nodeCounter,tempCounter]] == Index[Name[nodeCounter]]) return -1; for (element in SanityCheckUniqueNode) if (SanityCheckUniqueNode[element] > 1) return -1; return 1; } function score(root,tempCounter,tempBinCounter,tempValue,tempScore) { for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempScore[tempCounter] = 0; for (tempCounter = 1; tempCounter <= TotalNodes; tempCounter++) { if (IsGoalDist[tempCounter] == 1 && SubGraph[root,tempCounter] == 1 && Layer[tempCounter] == Layer[SubGraph[root,0]]) { for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { tempValue = GoalDist[tempCounter,tempBinCounter] - FinalDistArray[Name[tempCounter],tempBinCounter]; if (tempValue < 0) tempValue *= -1; tempScore[tempBinCounter] = tempScore[tempBinCounter] + tempValue * GoalPriority[tempCounter]; } } } for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { if (tempScore[tempBinCounter] < MinValue[tempBinCounter]) { MinValue[tempBinCounter] = tempScore[tempBinCounter]; for (tempCounter = 1; tempCounter <= TotalNodes; tempCounter++) if (SubGraph[root,tempCounter] == 1) BestBin[tempCounter,tempBinCounter] = FinalDistArray[Name[tempCounter],tempBinCounter]; } } } function processInternalNode(nodeIndex,tempMinCount,tempMaxCount,tempCounter,tempBinCounter,tempOutArray) { #Having a flag for internal nodes is meaningless since we have to be able to use distributions from child nodes. for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { tempMaxCount[tempBinCounter] = -10^20; tempMinCount[tempBinCounter] = 10^20; } for (tempCounter = 1; tempCounter <= ChildrenCount[nodeIndex]; tempCounter++) { childName = Children[nodeIndex,tempCounter]; for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { if (tempMaxCount[tempBinCounter] < FinalDistArray[childName,tempBinCounter]) tempMaxCount[tempBinCounter] = FinalDistArray[childName,tempBinCounter]; if (tempMinCount[tempBinCounter] > FinalDistArray[childName,tempBinCounter]) tempMinCount[tempBinCounter] = FinalDistArray[childName,tempBinCounter]; } } if (Operation[nodeIndex] == "null") { childName = Children[nodeIndex,1]; for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = FinalDistArray[childName,tempCounter]; } else if (Operation[nodeIndex] == "not") { childName = Children[nodeIndex,1]; for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = 1 - FinalDistArray[childName,tempCounter]; } else if (Operation[nodeIndex] == "and") { for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = tempMinCount[tempCounter]; } else if (Operation[nodeIndex] == "or") { for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) tempOutArray[tempCounter] = tempMaxCount[tempCounter]; } normalizeDist(tempOutArray); for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) FinalDistArray[Name[nodeIndex],tempBinCounter] = tempOutArray[tempBinCounter]; } function processLeafNode(nodeIndex,tempBinCounter,tempOutArray) { if (IsGivenDist[nodeIndex] == 1 && (GivenDistFlag[nodeIndex] == "fixed" || run == 1)) getDist(nodeIndex,tempOutArray); else generateDist(nodeIndex,tempOutArray); normalizeDist(tempOutArray); for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) FinalDistArray[Name[nodeIndex],tempBinCounter] = tempOutArray[tempBinCounter]; } function getDist(nodeIndex,out,tempBinCounter) { for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) out[tempBinCounter] = GivenDist[nodeIndex,tempBinCounter]; } function generateDist(nodeIndex,out,tempBinCounter) { for (tempBinCounter = 1; tempBinCounter <= TotalBins; tempBinCounter++) { if (rand() > 0.5 && run > 1) out[tempBinCounter] = BestBin[nodeIndex,tempBinCounter]; else { bound = GoalDist[HighestPriorityNode,tempBinCounter] * 2; out[tempBinCounter] = rand() * bound; } } } function normalizeDist(inputArray,tempCounter,sum) { sum = 0; for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) sum += inputArray[tempCounter]; if (sum != 0) for (tempCounter = 1; tempCounter <= TotalBins; tempCounter++) inputArray[tempCounter] = inputArray[tempCounter] / sum; } function reportStart() { for (j = 1; j <= TotalNodes; j++) { printf("%s has %d children: (", Name[j],ChildrenCount[j]); for (i = 1; i <= ChildrenCount[j]; i++) printf("%s,", Children[j,i]); print ") with min: " Min[j] " and max: " Max[j] " and priority: " GoalPriority[j] " and flag '" GivenDistFlag[j] "' with given dist " IsGivenDist[j] " and goal dist " IsGoalDist[j]; if (IsGivenDist[j] == 1) { printf("Given Dist: "); for (i = 1; i <= TotalBins; i++) printf ("%.2f,",GivenDist[j,i]); print ""; } if (IsGoalDist[j] == 1) { printf("Goal Dist: "); for (i = 1; i <= TotalBins; i++) printf ("%.2f,",GoalDist[j,i]); print ""; } } } function findError(root) { for (nodeCounter = 1; nodeCounter <= TotalNodes; nodeCounter++) { if (SubGraph[root,nodeCounter] == 1) { sum = 0; if (IsGoalDist[nodeCounter] == 1) { for (binCounter = 1; binCounter <= TotalBins; binCounter++) { tempValue = BestBin[nodeCounter,binCounter] - GoalDist[nodeCounter,binCounter]; if (tempValue < 0) tempValue *= -1; sum += tempValue; } SumError[Layer[nodeCounter]] += sum; GoalCount[Layer[nodeCounter]]++; } } } } function reportEnd(root) { print "Final results:"; for (j = 1; j <= TotalNodes; j++) { if (SubGraph[root,j] == 1) { for (i = 1; i <= TotalBins; i++) printf("%.2f,",BestBin[j,i]); printf (" %s",Name[j]); if (ChildrenCount[j] > 0 ) { printf(" --> %s (",Operation[j]); comma = ""; for (i = 1; i <= ChildrenCount[j]; i++) { printf("%s",comma); printf("%s", Children[j,i]); comma = "," } printf (")"); } print ":"GivenDistFlag[j]; } } print ""; for (j = 1; j <= TotalNodes; j++) { if (SubGraph[root,j] == 1) { sum = 0; if (IsGoalDist[j] == 1) { for (i = 1; i <= TotalBins; i++) { tempValue = BestBin[j,i] - GoalDist[j,i]; if (tempValue < 0) tempValue *= -1; sum += tempValue; } for (i = 1; i <= TotalBins; i++) printf("%.2f,",BestBin[j,i]); print " --> PREDICTED " Name[j] " ( Priority: " GoalPriority[j] " and Error: " sum " )"; for (i = 1; i <= TotalBins; i++) printf("%.2f,",GoalDist[j,i]); print " --> GOAL " Name[j]; } } } }