# returns the best k BEGIN { Tests = 20 } { gsub(/%.*/,"")} /^[ \t]*$/ { next } In { if ((Tests--) > -1 ) readRow(Test) else readRow(Train) } /^@data/ { In=1 } /^@/ { next } END { findBestK() } function readRow(data, row,i,col) { row = ++data["rows"] for(col=1;col<=NF;col++) data[row,col]=$col } function findBestK() { } function euclidean(row1,row2,data1,data2, n,col,d,d1,d2) { for(col=1;col<=Cols;col++) if (col != Klass) { d1 = data1[row1,col] d2 = data2[row2,col] d += abs(d1 - d2)^2 n++ } return sqrt(d)/sqrt(n) } function distance(row1,row2,data1,data2,memo, d) { d = as100(euclidean(row1,row2,data1,data2)) memo[-1 * d] = row1 # d started at row1 memo[d] = row2 # d ended at row2 return d } function neighbors(data1,data2,neighbors,memos, data1s,data2s,o,n) { data1s=data1["rows"] data2s=data2["rows"] for(i=1;i<=data1s;i++) for(j=i+1;i<=data2s;j++) push2(distance(i,j,data1s,data2s,memos),neighbors,i) } function mySortedNeighbors(i,neighbors,memos,out, most,j,k,dist,d,most,sorted) { most = neighbors[i,0] for(j=1; j<=most; j++) dist[++d] = neighbors[i,j] asort(dist,sorted) for(k=1; k<=d; k++) out[k] = memos[sorted[k]] return d } }