gawk ' BEGIN { FS= " "; L["VL"]=1; L["L"]=2; L["M"]=3; L["H"]=4; L["VH"]=5; L["?"]="?"; } /@attribute/ { A++; } /@attribute/ && (A < 28) { split($0,tmp,/ /) print tmp[1] " " tmp[2] " numeric " next } /@data/{OFS=FS=","; IN=1} IN {suffix=""} IN && NF > 27 {for(i=1;i<=27;i++) $i= L[$i]; suffix= "," int(log($NF)/2)*2} {print $0 suffix} END{print ""} ' qqdefects.arff > qqdefectsdiscrete.arff