#!/usr/bin/gawk -f #requirez (trees/bst2.awk) #/* vim: set filteype=awk : */ -*- awk -*- #Initial design for RBST implementation #DJ Boland 09/11/06 #------------------------------------------------------------------------------ BEGIN{ FS=" *, *"; # fields seperated by a comma and its preceeding # and trailing spaces. SUBSEP="_" val="val"; left="left"; right="right"; here="here"; all="all"; srand(); BuildTree=0; CurrentRecord=0; #Record currently being accessed Attr=0; #Number of Attributes seen } #Skip blank and comment lines {sub(/\%.*/,"")} #substitute comments with blanks /^[ \t]*$/ {next} #when blank line observed, go to the next line #Process attribute lines /@attribute/{ Attr++; split($1,a," "); Names[Attr]=a[2]; # print "saw attribute " Names[Attr]; next } /@data/{ BuildTree=1; CurrentRecord=1; myroot = Names[i] SUBSEP "1" for(i=1;i<=Attr; i++) { CurrentRoot[i] = "?" Rbst[myroot,here] = 0; Rbst[myroot,val] = 0; Rbst[myroot,all] = 0; Rbst[myroot,right] ="?"; Rbst[myroot,left] ="?"; } next } #for all data entries BuildTree && NF >1 { for (i=1; i<=Attr; i++) { field=$i; #val set to the value of the field currently being processed # print val if(field ~ /?/) continue; else CurrentRoot[i] = rbstinsert(CurrentRoot[i], Rbst, field, Names[i], CurrentRecord); }#end of for } CurrentRecord++; END{ for (i=1; i<=Attr; i++) { printtree(CurrentRoot[i], Rbst,"","") print "" } } ## ------------------------------------------insert from bst2.awk ----------- #bst2.awk #DJ's Binary Search Tree Functions #Insert a new root node function insertR(RootInstance, Tree, Value, Attr, Rec) { if (RootInstance == "?") #defaultvalue) { return newtree(Attr SUBSEP Rec,Tree, Value) } Tree[RootInstance,all]++ if (Value == Tree[RootInstance,val]) { Tree[RootInstance,here]++ return RootInstance } #print Tree[RootInstance,val] " vs " Value if (Value < Tree[RootInstance,val]) { # print "inserting left" Tree[RootInstance,left] = insertR(Tree[RootInstance,left], Tree, Value, Attr, Rec) RootInstance = rotateR(RootInstance,Tree) } else { Tree[RootInstance,right] = insertR(Tree[RootInstance,right], Tree, Value, Attr, Rec) RootInstance = rotateL(RootInstance,Tree) } return RootInstance } #Insert a new node (could be at root, but otherwise wherever it fits) function insertNR(RootInstance, Tree, Value, Attr, Rec) { if (RootInstance == "?" )#defaultvalue { return newtree() } Tree[RootInstance,all]++ if (Value == Tree[RootInstance,val]) { Tree[RootInstance,here]++ return RootInstance } else { if (Value < Tree[RootInstance,val]) { Tree[RootInstance,left] = insertNR(Tree[RootInstance,left], Tree, Value) } else { Tree[RootInstance,right] = insertNR(Tree[RootInstance,right], Tree, Value) } return RootInstance } } function rotateR(RootInstance, Tree, temp) { temp = Tree[RootInstance,left] Tree[RootInstance,left] = Tree[temp,right] Tree[temp,right] = RootInstance Tree[temp,all] = Tree[RootInstance,all] if((Tree[RootInstance,left] != "?") && (Tree[RootInstance,right] != "?")) { Tree[RootInstance,all]=Tree[Tree[RootInstance,left],all]+Tree[Tree[RootInstance,right],all]+Tree[RootInstance,here] } else { if((Tree[RootInstance,left] != "?")) { Tree[RootInstance,all] = Tree[RootInstance,here]+Tree[Tree[RootInstance,left],all] } else { if((Tree[RootInstance,right] != "?")) { Tree[RootInstance,all] = Tree[RootInstance,here]+Tree[Tree[RootInstance,right],all] } else { Tree[RootInstance,all] = Tree[RootInstance,here] } } } return temp } function rotateL(RootInstance, Tree, temp) { temp = Tree[RootInstance,right] Tree[RootInstance,right] = Tree[temp,left] Tree[temp,left] = RootInstance Tree[temp,all] = Tree[RootInstance,all] if((Tree[RootInstance,left] != "?") && (Tree[RootInstance,right] != "?")) { Tree[RootInstance,all]=Tree[Tree[RootInstance,left],all]+Tree[Tree[RootInstance,right],all]+Tree[RootInstance,here] } else { if((Tree[RootInstance,left] != "?")) { Tree[RootInstance,all] = Tree[RootInstance,here]+Tree[Tree[RootInstance,left],all] } else { if((Tree[RootInstance,right] != "?")) { Tree[RootInstance,all] = Tree[RootInstance,here]+Tree[Tree[RootInstance,right],all] } else { Tree[RootInstance,all] = Tree[RootInstance,here] } } } return temp } function newtree(RootInstance, Tree, Value) { Tree[RootInstance,left]="?" Tree[RootInstance,right]="?" Tree[RootInstance,val]=Value #print Tree[RootInstance,val] Tree[RootInstance,all]= 1.0 Tree[RootInstance,here]= 1.0 return RootInstance } function printtree(RootInstance, Tree, b4, indent,x) { x--; if (x==0) return; #Current code obtained and slightly modified from Timm's code's src website print indent b4 RootInstance " = " Tree[RootInstance, val] " [" Tree[RootInstance,here] "]" if(Tree[RootInstance,left] != "?") printtree(Tree[RootInstance,left], Tree, "< ", indent "| ",x) if(Tree[RootInstance,right] != "?") printtree(Tree[RootInstance,right], Tree, "> ", indent "| ",x) } function rbstinsert(RootInstance, Tree, Value, Attr, Rec) { if (RootInstance == "?")#defaultvalue { return newtree(Attr SUBSEP Rec, Tree,Value) } if (Value == Tree[RootInstance,val]) { Tree[RootInstance,here] = Tree[RootInstance,here]+ 1.0 Tree[RootInstance,all]= Tree[RootInstance,all]+ 1.0 return RootInstance } else { if ((rand()*Tree[RootInstance,all]) < 1.0) { R[Attr]++; # print "insert @ root" # print "inserting root: " Value " " RootInstance " " Tree[RootInstance,left] " " Tree[RootInstance,val] return insertR(RootInstance,Tree,Value, Attr, Rec) } L[Attr]++ if (Value < Tree[RootInstance,val]) { #print "trying again, on the left" Tree[RootInstance,left] = rbstinsert(Tree[RootInstance,left], Tree, Value, Attr,Rec ) } else { #print "trying again, on the right" Tree[RootInstance,right] = rbstinsert(Tree[RootInstance,right], Tree, Value, Attr, Rec) } Tree[RootInstance,all]++ return RootInstance } } END {for (i in R) print "rrr " i " " R[i] } END {for (i in L) print "lll " i " " L[i] }