BEGIN { Bins=5; Inf = 10^32; Klass=0 } ## -------------------------------------------------- #Data entry. Pretty routine stuff. /@relation / {Relation=$2} /@attribute/ {Name[++Name[0]]=$2; Name[$2] = Name[0]; Num[$2] = $3 !~ /{/ if (Num[$2]) { Max[$2] = -1 * Inf Min[$2] = Inf } } {gsub(/[ \t]*/,"")} # no blanks {gsub(/%.*/,"")} # no comments /^$/ {next} # no blank likes /@data/ {In=1;OFS=FS=",";s2a(Logging,Log,",")} /@/ {next} In {Rows++; train(Rows,Data)} END { print "@relation " Relation "\n" attributes(Name) print "\n@data\n" cells(Rows,Data,Name[0]) } function train(row,d, what,i) { for(i=1;i<=NF;i++) { what = Name[i] d[row,what]=$i if ($i == "?") continue if (Num[what]) { if (Log[what]) { $i = log(d[row,what] < 0.0001 ? 0.0001 : d[row,what]) d[row,what]=$i } if ($i > Max[what]) Max[what]= $i if ($i < Min[what]) Min[what]= $i }}} function cell1(n,col,bins, e,width,i,low,high) { bins = bins ? bins : Bins width = (Max[col] - Min[col]) / bins i = int((n - Min[col]) / width) if (i==bins) i-- low = Min[col] + i*width high = low + width if (Log[col]) { e = 2.71828183 low = e^low high = e^high } return sprintf(OFMT".."OFMT,low,high) } function range(i,col,bins, width,low) { bins = bins ? bins : Bins width = (Max[col] - Min[col])/ bins low = Min[col] + i*width return sprintf(OFMT".."OFMT,low,low + width) } function attributes(name,bins, what,i,line,j) { bins = bins ? bins : Bins for(i=1;i<=name[0];i++) { what=Name[i] line= "@attribute " name[i] if (Num[what]) { line = line " { " range(1,what) for(j=2;j<=bins;j++) line = line " , " range(j,what) line = line " }" } else { line = line " discrete "} print line } } function cells(rows,data,cols, row,col,line,what) { for(row=1;row<=rows;row++) { what=Name[1] line=cell(what,data[row,what],1,cols) for(col=2;col<=cols;col++) { what=Name[col] line=line OFS cell(what,data[row,what],col,cols) } print line } } function cell(what,c,col,cols) { if (cols==col) return classCell(c,what) return Num[what] ? cell1(c,what) : c } function classCell(c,what) { return Klass ? cell1(c,what,Klass) : c } function o(a,s,what, i,com) { print "" com = what ? "sort -t, " what : "sort -t, " for(i in a) print s " [ " i " ]= " a[i] | com; close(com) } function s2a(s,a,sep, tmp,i) { split(s,tmp,sep ? sep : ",") for(i in tmp) a[tmp[i]]=tmp[i] }