BEGIN{ NumBins= (Discretize ? Discretize : 2) BinNames = (BinNames ? BinNames : 0) NoProj = (NoProj ? NoProj : 0) } {gsub(/%.*/,"")} /^[ \t]*$/ { next } /@relation/ { print $0}#"-"NumBins"Bins" } /@attribute/ { Proj ? printProj() : initCol() } /@class/ { initClass() } /@project/ { print $0; Proj=1; In=0 } In { printData() } /^@data/ { print $0; mapBins(); In=1 } /^@/ { next } function initCol( n,i, seen,binned,name) { Columns[++Cols] = $2 name = $2 sub(/?/,"",name) ColName2Indx[name] = Cols for (i = 1; i <= NF-2; i++) { ColValues[Cols,i] = $(i+2) #@attr name 1stval 2ndval 3rdval... } mapBins(ColValues, Cols, NF-2, BinMap) printf $1" "$2 for (i = 1; i <= NF-2; i++) { binned = BinMap[Cols,$(i+2)] if (!seen[binned]) { seen[binned] = 1 printf " "BinMap[Cols,$(i+2)] } } print "" } function initClass() { Columns[++Cols] = $2 Class[Cols] = 1 printf $1" "$2 for (i = 3; i <= NF; i++) { printf " "$i } print "" } #BinMap[columnNumber, value] = bin function mapBins(ColValues, colIndx, numValues, BinMap, tmp,i,c) { for (i = 1; i <= numValues; i++) { val = ColValues[colIndx,i] binWidth = numValues/NumBins bin = int((i-1)/binWidth)+1 if (BinNames) BinMap[colIndx,val] = "B"(bin) else BinMap[colIndx,val] = ColValues[colIndx,int(numValues*((bin-1)/NumBins))+1] if (BinNames && NumBins=5) BinMap[colIndx,val] = rangeName(bin) } } function rangeName(bin, name) { name = "" if (bin == 1) name = "vl" if (bin == 2) name = "lo" if (bin == 3) name = "md" if (bin == 4) name = "hi" if (bin == 5) name = "vh" return name } function printData() { for (c = 1; c <= Cols; c++) { if (Class[c]) printf $c" " else printf BinMap[c,$c]" " } print "" } function printProj( i,seen) { printf $1" "$2 for (i=3; i<=NF; i++) { name = $2 sub(/?/,"",name) binned = BinMap[ColName2Indx[name],$i] if (!seen[binned]) { seen[binned] = 1 printf " "binned } } print "" }