################################################################## # divide the k-th nearest historial projects into best (lowest) # estiamted and the rest. collect frequency counts for best and rest. # rank attribute ranges by how common they are in best and how # rare they are in rest function bestWorst(rows,border,best,rest, cutoffBest,cutoffWorst,n,scores,row,k,r,rowKlasses) { for(row in rows) { scores[++n] = scoreRow(row, Train, Klasses) } n = asort(scores) cutoffBest = scores[border] cutoffWorst = scores[n-border] for(row in rows) { if ( scoreRow(row, Train, Klasses) <= cutoffBest ) { count(row,best,rows[row]) print("BEST") } if ( scoreRow(row, Train, Klasses) > cutoffWorst ) { count(row,rest,rows[row]) print("WORST") } } } function bestRest(rows,border,best,rest, enough,n,scores,row,k,r,rowKlasses) { for(row in rows) { scores[++n] = scoreRow(row, Train, Klasses) } asort(scores) enough = scores[border] for(row in rows) { if ( scoreRow(row, Train, Klasses) <= enough ) { count(row,best,rows[row]) } else { count(row,rest,rows[row]) } } } function count(row,f,n, col,c) { f[0]++ for(col in Goal) { f[col,Train[row,col]] += n } } function rank(k1,k2,best,rest,ranked, \ range,bests,rests,i,b,r,score,scores,sorted,memo,max) { bests = best[0] rests = rest[0] for(i in best) { if (i != 0) { b = best[i] / bests r = rest[i] / rests score=as100((b^2)/(b+r)) scores[i] = score memo[score] = i } } max = asort(scores,sorted) showRanks(memo,sorted,max) for(i=max; i>=1;i--) # highest score must be forst ranked[max-i+1] = memo[sorted[i]] return max } function showRanks (memo,sorted,max, i, range,tmp,com) { if (Verbose) { com="sort -r -n | cat -n" print "#n\tscore\trange" print "------\t-----\t--------------" for(i=max; i>=1;i--) { # highest score must be forst range = memo[sorted[i]] split(range,tmp,_) print sprintf("%5.2f",sorted[i]) "\t" Eman[tmp[1]] " = " tmp[2] | com } close(com) print "" } }