#!/usr/bin/gawk -f BEGIN {count[""]=0; miss[""]=0; claNum=0;} (/^@attribute/) { claNum++; $3=substr($3,2,length($3)-3); if(index($2,"'")>0) $2=substr($2,2,length($2)-2); cn[claNum]=$2; aname[$2]=$3; miss[$2]=0; #split($3,att,","); #printf("%s %s\n",$2,$3); } (/^[^@%]/) { n=claNum; #weird behavior when n=11 num++; #$0=substr($0,2,length($0)-3); split($0,ins,","); for (i=1;i<=n;i++) { count[ins[i]]++; if(ins[i]=="?") miss[cn[i]]++; printf("%s ",ins[i]); } print "\n"; } END { for (i in aname) #split(aname[i],$i,"','"); printf("%s | %s\n",i,aname[i]); for (i in count) printf("%s\t\t\t%s\n",i,count[i]); for (i in miss) printf("miss: %s\t\t\t%s\n",i,miss[i]); print NR; }