################################################################## # find the k-th nearest historial projects near the generated projects function euclidean(row1,row2,data1,data2, n,col,d,d1,d2,key,ignorep,i) { split("",ignorep,"") for (key in Klasses) ignorep[Klasses[key]] = 1 for(col=1;col<=Cols;col++) if (!(col in ignorep)) { d1 = normalize(data1,col,data1[row1,col]) d2 = normalize(data2,col,data2[row2,col]) d += abs(d1 - d2)^2 n++ } return sqrt(d)/sqrt(n) } function distance(row1,row2,data1,data2,memo, d) { d = as100(euclidean(row1,row2,data1,data2)) memo[-1 * d] = row1 # d started at row1 memo[d] = row2 # d ended at row2 return d } function normalize(data,col,n, min,max,d) { min = data["min",col] max = data["max",col] d = min == max ? 1 : (n - min) /(max - min) #print "n " n " min " min " max " max " d " d return d } function neighbors(news,new,olds,old,neighbor,memo, o,n) { for(n=1;n<=news;n++) for(o=1;o<=olds;o++) push2(distance(n,o,new,old,memo),neighbor,n) } function knn(k,news,neighbor,memo,ks, dist,n,most,i,d,sorted) { for(n=1;n<=news;n++) { most = neighbor[n,0] for(i = 1;i <= most;i++) dist[++d] = neighbor[n,i] } knnDebug(dist,memo) asort(dist,sorted) for(i=1;i<=d;i++) { n = memo[sorted[i]] if ( ++ks[n]==1 ) k-- if (k == 0) return i } return k } function knnDebug(dist,memo, com,i) { if (KnnDebug) { # com = "sort -n -k 2 | " KnnDebug # for(i in dist) # print memo[dist[i]] " " dist[i] # close(com) } }