################################################################## # divide the k-th nearest historial projects into best (lowest) # estiamted and the rest. collect frequency counts for best and rest. # rank attribute ranges by how common they are in best and how # rare they are in rest function bestRest(rows,border,best,rest, enough,n,scores,row,k,r,rowKlasses) { for(row in rows) { split("",rowKlasses,"") getRowClassVals(row, Train, Klasses, rowKlasses) scores[++n] = score(rowKlasses) } asort(scores) enough = scores[border] for(row in rows) { split("",rowKlasses,"") getRowClassVals(row, Train, Klasses, rowKlasses) if ( score(rowKlasses) <= enough ) { count(row,best,rows[row]) } else { count(row,rest,rows[row]) } } } function count(row,f,n, col,c) { f[0]++ for(col in Goal) { f[col,Train[row,col]] += n } } function rank(k1,k2,best,rest,ranked, \ range,bests,rests,i,b,r,score,scores,sorted,memo,max) { bests = best[0] rests = rest[0] for(i in best) { if (i != 0) { b = best[i] / bests r = rest[i] / rests score=as100((b^2)/(b+r)) scores[i] = score memo[score] = i } } max = asort(scores,sorted) showRanks(memo,sorted,max) for(i=max; i>=1;i--) # highest score must be forst ranked[max-i+1] = memo[sorted[i]] return max } function showRanks(memo,sorted,max, i, range,tmp,com) { if (Verbose) { com="sort -r -n | cat -n" print "#n\tscore\trange" print "------\t-----\t--------------" for(i=max; i>=1;i--) { # highest score must be forst range = memo[sorted[i]] split(range,tmp,_) print sprintf("%5.2f",sorted[i]) "\t" Eman[tmp[1]] " = " tmp[2] | com } close(com) print "" } }