BEGIN { # command-line options Samples = 20 K1 = 5 K2 = 15 Seed = 1 Tests = 0.33 AutoStop = 1 MinOverlap = 0.75 RankedOverride = 0 Verbose = 1 Note = "" Class = "" SkipRelevancy = "" KNN = "" } BEGIN { # internal options OFS="," IGNORECASE=1 Inf = 10^32 _ = SUBSEP CONVFMT="%.8g" OFMT="%.8g" } ################################################################## # main program function main() { worker(Samples,K1,K2) } function worker(samples, k1,k2, rankeds, ranked) { if (Verbose) { print "samples : " samples print "k1 : " k1 print "k2 : " k2 print "%test : " Tests*100 print "Contrast Method : " (Nomograms ? "Nomograms" : "BSquared") print "Case Relevancy : " (MinOverlap ? MinOverlap*100"% Overlap" : "Stoicastic Samples") print "Logging : " (Log ? "On ("Log")" : "Off") print "" print (RankedOverride ? "Overriding training recomendations using "RankedOVerride : "Training results on " Train[0] " historical examples (what looks useful):") } rankeds = (RankedOverride ? injectRanked(RankedOverride, ranked) : train(samples, k1, k2, ranked)) printf (Verbose ? "Test results on " Test[0] " new projects (applying the training results to new data):\n" : "") test(samples,k1,k2,rankeds,ranked) } function train(samples,k1,k2,ranked, projects,neighbors,memos,best,rest,knearest,rankeds) { if (!KNN) { getRelevant(Train,k1+k2,relevant) bestRest(relevant,k1, best,rest) # divide knearest into best/worst } else { getProjects(MinOverlap,Train,samples, projects) # get example1 projects neighbors(samples,projects,Train[0],Train, neighbors,memos) # distances from example1 to Train cases knn(k1+k2,samples,neighbors,memos, knearest) # knearest Train instance row numbers to example1 p bestRest(knearest,k1, best,rest) # divide knearest into best/worst } # com = "sort -n -k 2 -r" # print "BEST("best[0]"):" # for (key in best) { # split(key,tmp,SUBSEP) # if (tmp[2] != "") # print Eman[tmp[1]]"="tmp[2]": "best[key] | com # } # close(com) # print "\nREST("rest[0]"):" # for (key in rest) { # split(key,tmp,SUBSEP) # if (tmp[2] != "") # print Eman[tmp[1]]"="tmp[2]": "rest[key] | com # } # close(com) rankeds = rank(k1,k2,best,rest, ranked) # contrast set between best/worst return rankeds } function test(samples,k1,k2,rankeds,ranked, \ i,projects,neighbors,memos,knearest, \ m,n,sorted,kloc,row,col,data,rowKlasses) { if (!KNN) { getRelevant(Test,k1+k2,knearest) } else { getProjects(MinOverlap,Test,samples, projects) # get example2 projects neighbors(samples,projects,Test[0],Test, neighbors,memos) # distances example2 to Test set knn(k1+k2,samples,neighbors,memos, knearest) # knearest Test instances row numbers to example2 projects } for(row=1;row<=Test[0];row++) #Re-align indexes for knn data if (row in knearest) { data[0]++ for(col=1;col<=Cols;col++) data[data[0],col]=Test[row,col] # convert row numbers to their data rows } apply(ranked,data) } ################################################################## # read in data { gsub(/%.*/,"") } /^[ \t]$/ { next } /^@project/ { In = 0 } In { rand() <= Tests ? cells(Test,Cols) : cells(Train,Cols) } /^@relation/ { Relation=$2 } /^@attribute/ { def($2) } /^@class/ { defclass($2) } /^@data/ { In = 1; inits(Cols) } /^@/ { next } function inits(cols, i) { srand(Seed ? Seed : 1) for(i=1;i<=cols;i++) { Train["max",i]= -1*Inf; Train["min",i]=Inf } for(i=1;i<=cols;i++) { Test[ "max",i]= -1*Inf; Test[ "min",i]=Inf } } function def(name, a,i,goalp) { goalp = sub(/?/,"",name) if (name in Name) { a = Name[name] } else { a = Name[name] = ++Cols Eman[Cols]=name } if (Train["range",a,0]) { clearStack(Train, "range" _ a) clearStack(Test, "range" _ a) } for(i=3;i<=NF;i++) { Train["range",a, ++Train["range",a,0]] = $i Test[ "range",a, ++Test[ "range",a,0]] = $i } if (goalp) Goal[a]=1 } function defclass(name) { if (name in Name) { a = Name[name] } else { a = Name[name] = ++Cols Eman[Cols]=name } if (Train["range",a,0]) { clearStack(Train, "range" _ a) clearStack(Test, "range" _ a) } for(i=3;i<=NF;i++) { Train["range",a, ++Train["range",a,0]] = $i Test[ "range",a, ++Test[ "range",a,0]] = $i } if (Class) { if (Class ~ name) { Klasses[name] = Name[name] NumKlasses++ } } else { Klasses[name] = Name[name] NumKlasses++ } } function clearStack(a, key, i, max) { if (max = a[ key _ 0 ]) for(i=1;i<=max;i++) delete a[ key _ i ] a[key _ 0] = 0 } function cells(data,cols, col) { data[0]++ for(col=1;col<=cols;col++) { data[data[0],col] = $col data["max",col] = max(data["max",col],$col) data["min",col] = min(data["min",col],$col) } }