# example: gawk -f columnEntropy.awk Columns="-3-" data/subsets/coc81_all.csv.converted # or gawk -f columnEntropy.awk I=3 data/subsets/coc81_all.csv.converted BEGIN { FS=OFS="," I=1; Columns=""; } NR==1 { for(x=1; x<=NF; x++) { target="-"x"-"; if (index(Columns, target)!=0) { I=x; } } } { Frequency[$I]++; } END { entropy=0; for (prob in Frequency) { p=Frequency[prob]/NR; entropy-=(p*log(p)); } print entropy; }