{gsub(/[ \t]/,""); gsub(/%.*/,"")} /^[ \t]*$/ {next} /@attribute/{Nump[0,++Cols[0]]= $3 ~/(numeric|integer|real|continuous)} /@attribute/{Name[0, Cols[0]] =$2} /@data/ {FS=","; srand(Seed : Seed : 1) for(I=1;I<=NF;I++) if (Nump[0,I]) Max[0,I] = -1 * 10^32 for(I=1;I<=NF;I++) if (Nump[0,I]) Min[0,I] = 10^32 } /@/ {next} {for(I=1;<=NF;I++) { Data[0,++Rows[0],I]=$I if(Nump[0,I]) { if ($I > Max[0,I]) Max[0,I]=$I; if ($I < Min[0,I]) Min[0,I]=$I }}} END {main(0)} function main(c, anything,left,right,c1,c2) { left = furthest(c,any(Rows[c])) right = furthest(c,left) c1 = c + 1 c2 = c1 + 1 Parent[c2]=c1 } function furthest(c,start, r1, r2,d,tmp,max,out) { for(r2=1;r2<=Rows[c];r2++) if((tmp = distance(c,r1,r2)) > max) { max = tmp out = j} return out } function distance(c,r1,r2, col,n,d) { for(col=1;col<=Cols[0];col++) { n++ d += distance1(c,r1,r2,col); } return (d^0.5) / (n^0.5) } function distance1(c,r1,r2,col, one,two) { one = Data[c,r1,col] two = Data[c,r2,col] if (one == "?" || two == "?") return 0; if ( Nump[col] ) { one = (one - Min[c,col]) / (Max[c,col] - Min[c,col]) two = (two - Min[c,col]) / (Max[c,col] - Min[c,col]) return (one - two)^2 } else return one == two } function any(n) { return round(n*rand()) } function round(n) { return int(1+n) }