################################################################## # divide the k-th nearest historial projects into best (lowest) # estiamted and the rest. collect frequency counts for best and rest. # rank attribute ranges by how common they are in best and how # rare they are in rest function bestRest(rows,border,best,rest, enough,n,scores,row,k,r,rowKlasses) { # printf " " # for (c=1; c<=Cols; c++) # printf "%5s ",Eman[c] nasa93com="sort -n -k 24" for(row in rows) { split("",rowKlasses,"") getRowClassVals(row, Train, Klasses, rowKlasses) scores[++n] = score(rowKlasses) } asort(scores) # saya(scores,"SCORES") # print "BORDER:"border enough = scores[border] # print "ENOUGH: "enough for(row in rows) { split("",rowKlasses,"") getRowClassVals(row, Train, Klasses, rowKlasses) if ( score(rowKlasses) <= enough ) { # printf "**BEST**" | nasa93com count(row,best,rows[row]) } else { # printf "__REST__" | nasa93com count(row,rest,rows[row]) } } # close(nasa93com) } function count(row,f,n, col,c) { # printf "ROW(%02d):",row | nasa93com # for (c = 1; c <= Cols-2; c++) # printf "%-5s ",Train[row,c] | nasa93com # print "" | nasa93com f[0]++ for(col in Goal) { f[col,Train[row,col]] += n } } function rank(k1,k2,best,rest,ranked, \ range,bests,rests,i,b,r,score,scores,sorted,memo,max) { bests = best[0] rests = rest[0] # saya(best,"B") # saya(rest,"R") # saya(Eman,"EMAN") for(i in best) { # print "i:"i"::"best[i] if (i != 0) { b = best[i] / bests r = rest[i] / rests # if(Nomograms) { # if(best[i] < (bests*Nomograms)) continue # score = log(b/(r+0.000001)) + rand()/10000# as100(b^2/(b+r)) # } else { score=as100((b^2)/(b+r)) scores[i] = score memo[score] = i # if (RankDebug) # print no_(i) "\t" " b=" b " r= " r " score=" score "::bests"best[i]"::"i } } max = asort(scores,sorted) # saya(sorted,"Sorted") showRanks(memo,sorted,max) for(i=max; i>=1;i--) # highest score must be forst ranked[max-i+1] = memo[sorted[i]] # saya(ranked,"R") return max } function showRanks (memo,sorted,max, i, range,tmp,com) { if (Verbose) { com="sort -r -n | cat -n" print "#n\tscore\trange" print "------\t-----\t--------------" for(i=max; i>=1;i--) { # highest score must be forst range = memo[sorted[i]] split(range,tmp,_) print sprintf("%5.2f",sorted[i]) "\t" Eman[tmp[1]] " = " tmp[2] | com } close(com) print "" } }