######################################################################### # select and report subset of relevant rows that satisfy constraints 1..n function selects(k1,data,rankeds,ranked, constraints,n,selected,previous,isOptimal) { RR = rankeds ##BAD for(n=1;n<=rankeds && (!isOptimal || !AutoStop);n++) { copya(selected,previous) isOptimal = addNextConstraint(k1,n,data,ranked[n],constraints,selected,previous) } } function addNextConstraint(k1,n,data,constraint,constraints,selected,previous,isOptimal) { extendConstraint(constraint, constraints) split("",selected,"") selectRows(data,constraints,selected) isOptimal = anyBetter(data,selected,previous,n) if (!isOptimal || !AutoStop) report(k1,n,data,constraint, constraints, selected) return isOptimal } function anyBetter(data,selected,previous,n, key,curScore,prevScore,curCount,prevCount,stop,m,i) { for(key in selected) { split("",rowKlasses,"") for (k in Klasses) { rowKlasses[++r] = data[key,Klasses[k]] } curScore[key] = score(rowKlasses) curCount++ } for(key in previous) { split("",rowKlasses,"") for (k in Klasses) { rowKlasses[++r] = data[key,Klasses[k]] } prevScore[key] = score(rowKlasses) prevCount++ } ##if median or spread improves and we have >2 items if (curCount < 3) stop = 1 if (findMedian(prevScore) > 0) { if (findMedian(curScore) >= findMedian(prevScore) && findSpread(curScore) >= findSpread(prevScore)) stop = 1 } if (n == RR) #hack to stop when we run out of constraints. Much better ways to do this. stop = 1 if(stop && Report) { print "\n\n\n"Relation" "ProjName" ("Samples" Samples)" >> OutFile print " 25%\t50%\t75%" >> OutFile m = asort(Baseline) printf "Baseline: "find25(Baseline)"\t"findMedian(Baseline)"\t"find75(Baseline)"\t"m"*{" >> OutFile for(i=1; i<=m; i++) printf " "Baseline[i] >> OutFile printf " }\n Final: "find25(prevScore)"\t"findMedian(prevScore)"\t"find75(prevScore)"\t"m"*{" >> OutFile m = asort(prevScore) for(i=1; i<=m; i++) printf " "prevScore[i] >> OutFile print " }\nMedian Reduction: "findMedian(Baseline) - findMedian(prevScore) >> OutFile print "Spread Reduction: "findSpread(Baseline) - findSpread(prevScore) >> OutFile if (findMedian(prevScore)!=0) { print "@@@ "Relation"."ProjName","Samples","K1","K2","Tests","(Nomograms>0 ? "Nomograms" : "BSquared")","(Count>0 ? "MultiCount" : "OneCount")"," 100*(findMedian(Baseline) - findMedian(prevScore))/findMedian(Baseline)","100*(findSpread(Baseline) - findSpread(prevScore))/findSpread(Baseline)","Chose #>> OutFile } print"------------------------------------\n" >> OutFile } return stop } function extendConstraint(constraint,constraints, attrange,attr,range) { split(constraint, attrange,_); attr = attrange[1] range = attrange[2] if (attr in constraints) constraints[attr] = "(" range "|" substr(constraints[attr],2) else { constraints[attr] = "(" range ")" } # if (SelectsDebug) # saya(constraints,"constrants") } function selectRows(data,constraints,selected, row) { for(row=1;row<=data[0];row++) if ( selectRow(data,row,constraints) ) selected[row]=1 } function selectRow(data,row,constraints, col) { # saya(data,"data") # saya(constraints,"cons") for(col in constraints) if ( constraints[col] !~ data[row,col] ) ############# return 0 return 1 } function report(k1,n,data,constraint, constraints, selected, \ all,attr,range,attrange,row,tmp,i,str,sep,j,max,sorted) { split(constraint, attrange,_); attr = Eman[attrange[1]] range = attrange[2] ##BAD Chose = Chose attr" " ##/BAD for(row in selected) { all++ split("",rowKlasses,"") for (k in Klasses) { rowKlasses[++r] = data[row,Klasses[k]] } tmp[++i] = int(score(rowKlasses)) } max=asort(tmp,sorted) for(j=1;j<=max;j++) { str = str sep sorted[j] sep=" " } printf(n==1 ? " " : "and ") printf("n="n ": " attr "= " constraints[attrange[1]] " :\t\t ") printf find25(sorted) "\t" findMedian(sorted) "\t" find75(sorted) "\t" ##begin ugly code## #store for later if (max > 0) { Previous[0] = max for (k=1; k 0) printf "Better" else printf "Worse" printf "\t" } ##end ugly code## printf max print (all <= 30) ? "*{" str "}" : "*{..}" }