################################################################## # divide the k-th nearest historial projects into best (lowest) # estiamted and the rest. collect frequency counts for best and rest. # rank attribute ranges by how common they are in best and how # rare they are in rest function bestRest(rows,border,best,rest, enough,n,scores,row,k,r,rowKlasses) { for(row in rows) { split("",rowKlasses,"") for (k in Klasses) { rowKlasses[++r] = Train[row,Klasses[k]] } scores[++n] = score(rowKlasses) } asort(scores) enough = scores[border] # print "ENOUGH: "enough for(row in rows) { split("",rowKlasses,"") for (k in Klasses) { rowKlasses[++r] = Train[row,Klasses[k]] } if ( score(rowKlasses) <= enough ) { # printf "**BEST**" count(row,best,rows[row]) } else { # printf "__REST__" count(row,rest,rows[row]) } } } function count(row,f,n, col,c,inc) { # printf "ROW("row"):" # for (c = 1; c <= Cols; c++) # printf Train[row,c]" " # print "" f[0]++ if (Count == 1) inc = n else inc = 1 for(col in Goal) { f[col,Train[row,col]] += inc } } function rank(k1,k2,best,rest,ranked, \ range,bests,rests,i,b,r,score,scores,sorted,memo,max) { bests = best[0] rests = rest[0] for(i in best) if (i != 0) { b = best[i] / bests r = rest[i] / rests if(Nomograms > 0) { if(best[i] < (bests*Nomograms)) continue score = log(b/(r+0.000001)) + rand()/10000# as100(b^2/(b+r)) } else {score=as100(b^2(b+r))} scores[i] = score memo[score] = i # if (RankDebug) # print no_(i) "\t" " b=" b " r= " r " score=" score "::bests"best[i]"::"i } max = asort(scores,sorted) showRanks(memo,sorted,max) for(i=max; i>=1;i--) # highest score must be forst ranked[max-i+1] = memo[sorted[i]] return max } function showRanks (memo,sorted,max, i, range,tmp,com) { com="sort -r -n | cat -n" print "#n\tscore\trange" print "------\t-----\t--------------" for(i=max; i>=1;i--) { # highest score must be forst range = memo[sorted[i]] split(range,tmp,_) print sprintf("%5.2f",sorted[i]) "\t" Eman[tmp[1]] " = " tmp[2] | com } close(com) print "" }