######################################################################### # select and report subset of relevant rows that satisfy constraints 1..n function apply(treatments,data, baseEstimateData,finalEstimateData,constraints,t,optimal,filteredData) { #Save the original estimate data for (r=1; r<=data[0]; r++) { for (c=1; c<=Cols; c++) { baseEstimateData[r,c] = data[r,c] } } baseEstimateData[0] = data[0] getClassRanges(baseEstimateData, classRanges) scoreData(baseEstimateData, classRanges, baseRowToScore) baseMedian = findMedian(baseRowToScore) baseSpread = findSpread(baseRowToScore) if (Verbose) {print "\n\n---------------Queries---------------"} if (Verbose) {describeData("Query #0", baseEstimateData, baseRowToScore)} optimal=0 t=1 previousMedian = baseMedian previousSpread = baseSpread while(!optimal) { # -----input------ ---output--- addConstraint(treatments[t], constraints) filter(data, constraints, filteredData) split("",scores,"") scoreData(filteredData, classRanges, scores) filteredMedian = findMedian(scores) filteredSpread = findSpread(scores) #Stopping Rules if (!treatments[t]) { optimal=1 printf (Verbose ? "STOPPING. No more treatments left:\n" : "") } if (filteredData[0] < 3) { optimal=1 printf (Verbose ? "STOPPING. Next query too small:\n" : "" ) } if (filteredMedian >= previousMedian && filteredSpread >= previousSpread) { optimal=1 printf (Verbose ? "STOPPING. Next query shows no improvement:\n" : "" ) } if (t == 1) #first treatment always works optimal=0 previousMedian = filteredMedian previousSpread = filteredSpread if (!optimal) { split("",finalEstimateData,"") split("",finalScores,"") for (r=1; r<=filteredData[0]; r++) { for (c=1; c<=Cols; c++) { finalEstimateData[r SUBSEP c] = filteredData[r SUBSEP c] } finalEstimateData[0] = filteredData[0] finalScores[r] = scores[r] } } if (Verbose) {describeData("Query #"t, filteredData, scores) } t++ } if (Verbose) {print "---------------Results---------------"} if (Verbose) {describeData("Baseline", baseEstimateData, baseRowToScore) } for (col in constraints) { split(constraints[col],tmp,SUBSEP) for (val in tmp) recommendation = recommendation Eman[col]"="tmp[val]" " } if (Verbose) {describeData("Final", finalEstimateData, finalScores)} baseScoreMedian = findMedian(baseRowToScore) finalScoreMedian = findMedian(finalScores) baseScoreSpread = findSpread(baseRowToScore) finalScoreSpread = findSpread(finalScores) if (finalScoreMedian == 0) scoreMedianReduction = 0 else scoreMedianReduction = 100 * (baseScoreMedian - finalScoreMedian) / baseScoreMedian if (finalScoreSpread == 0) scoreSpreadReduction = 0 else scoreSpreadReduction = 100 * (baseScoreSpread - finalScoreSpread) / baseScoreSpread outStr = " score median: "sprintf("%4.0f",scoreMedianReduction)"%" outStr = outStr"\n score spread: "sprintf("%4.0f",scoreSpreadReduction)"%" RankedOverride="" if (Log) { if (NumKlasses > 1) { printf "score.ASIS."(Note ? Note : Relation)"."ProjName""(RankedOverride ? "."RankedOverride : "")"," >> Log printf arr2str(baseRowToScore) >> Log printf "\nscore.TOBE."(Note ? Note : Relation)"."ProjName""(RankedOverride ? "."RankedOverride : "")"," >> Log printf arr2str(finalScores) >> Log printf "\n" >> Log } } for(key in Klasses) { split("",tmpBase,"") split("",tmpFinal,"") for(r=1; r<=baseEstimateData[0]; r++) { tmpBase[r] = baseEstimateData[r,Klasses[key]] } for(r=1; r<=finalEstimateData[0]; r++) { tmpFinal[r] = finalEstimateData[r,Klasses[key]] } baseMedian = findMedian(tmpBase) finalMedian = findMedian(tmpFinal) baseSpread = findSpread(tmpBase) finalSpread = findSpread(tmpFinal) if (finalMedian == 0) medianReduction = 0 else medianReduction = 100 * (baseMedian - finalMedian) / baseMedian if (finalSpread == 0) spreadReduction = 0 else spreadReduction = 100 * (baseSpread - finalSpread) / baseSpread outStr = outStr"\n"sprintf("%8s",key)" median: "sprintf("%4.0f",medianReduction)"%" outStr = outStr"\n"sprintf("%8s",key)" spread: "sprintf("%4.0f",spreadReduction)"%" if (Log) { printf key".ASIS."(Note ? Note : Relation)"."ProjName""(RankedOverride ? "."RankedOverride : "")"," >> Log printf arr2str(tmpBase) >> Log printf "\n"key".TOBE."(Note ? Note : Relation)"."ProjName""(RankedOverride ? "."RankedOverride : "")"," >> Log printf arr2str(tmpFinal) >> Log printf "\n" >> Log } } if (Verbose) { print "" print "-----------Reduction Summary-----------" print outStr } print recommendation if (!Verbose) {printf "."} } function sortData(data, scores) { for (row in scores) { copy[row] = 0.000001 * rand() + scores[row] serocs[copy[row]] = row } data[0] = newdata[0] = asort(copy) for (row=1; row<=newdata[0]; row++) { for (c=1; c<=Cols; c++) newdata[row SUBSEP c] = data[serocs[copy[row]] SUBSEP c] } for (row=1; row<=newdata[0]; row++) { for (c=1; c<=Cols; c++) data[row SUBSEP c] = newdata[row SUBSEP c] } } function getClassRanges(data, ranges, class,min,max,colnum,row) { for (class in Klasses) { min = Inf max = -1*Inf colnum = Klasses[class] for (row=1; row<=data[0]; row++) { if (data[row SUBSEP colnum] > max) max = data[row SUBSEP colnum] if (data[row SUBSEP colnum] < min) min = data[row SUBSEP colnum] } ranges[colnum SUBSEP "min"]=min ranges[colnum SUBSEP "max"]=max } } function scoreData(data, ranges, scores, numClasses,key,r,score,class,colnum,min,max) { if (!data[0]) "Can't score data with no defined size" numClasses=0 for(key in Klasses) numClasses++ for (r=1; r<=data[0]; r++) { score=0 for (class in Klasses) { colnum = Klasses[class] min = ranges[colnum SUBSEP "min"] max = ranges[colnum SUBSEP "max"] if (max - min == 0) score += 0 else score += (data[r SUBSEP colnum] - min) / (max - min) } scores[r] = score/numClasses #Normalize score to 1.0 } } function describeData(name, data, scores) { strdesc = "" print name" (size: "data[0]")" asort(scores, scoresCopy) print "\tScore-Median: "findMedian(scoresCopy) print "\tScore-Spread: "findSpread(scoresCopy) strdesc = findMedian(scoresCopy)","findSpread(scoresCopy) printf "\tScores: " for (r=1; r<=data[0]; r++) printf "%.3f ",scoresCopy[r] print "" for (class in Klasses) { split("",tmp,"") printf "\t"class": " t=0 for (r=1; r<= data[0]; r++) tmp[++t] = data[r SUBSEP 23]#"("data[r SUBSEP Klasses[class]]")" asort(tmp) for (r=1; r<= data[0]; r++) printf tmp[r]" " print "" print "\t"class"-median: "findMedian(tmp) print "\t"class"-spread: "findSpread(tmp) strdesc = strdesc","findMedian(tmp)","findSpread(tmp) } print "" return strdesc } #Add a treatment to the list of constraints function addConstraint(treatment, constraints) { split(treatment,tmp,SUBSEP) attr = tmp[1] val = tmp[2] if(!Seen[attr SUBSEP val]) { if(constraints[attr]) #Have we already constrained this attribute? constraints[attr] = constraints[attr] SUBSEP val #if so, extend its range else constraints[attr] = val } Seen[attr SUBSEP val]=1 } function filter(data, constraints, filteredData, tmp,passesNeeded) { split("",filteredData,"") passesNeeded=0 for (key in constraints) passesNeeded++ for(row=1; row<=data[0]; row++) { #Row passes if all attributes match constraint values #Disjunctions form if multiple constraint values for a single attribute exist passes=0 for(col in constraints) { success=0 split(constraints[col], possibleValues, SUBSEP) for (value in possibleValues) { if (possibleValues[value] == data[row,col]) { success=1 } } if(success) { #Guarantees we can't somehow match multiple times in a range for a single attr passes++ } } if(passes == passesNeeded) { #All constraints matched filteredData[0]++ #Ensure monotonic increasing order in filtered set indexes for(c = 1; c <= Cols; c++) { filteredData[filteredData[0],c] = data[row,c] #Add to filtered set } } } }