#!/usr/bin/gawk -f BEGIN { Rule1=Rule2=Rule3=Rule4=Rule5=1; OFS=FS=","; Need=12; Limits[99] = 2.575; Limits[95] = 1.96; Trust = 95; Kills = 0; } NF==Need { Key = $1 ":" $2 ":" $3 ":" $4 ":" $5 ":" $6; $5 = $5 ? $5 : 17; Attr[Key] = $5; Pred[Key] = $7; N[Key] = $8; Mean[Key] = $9; Sd[Key] = $10; Corr[Key] = $12; $9=int($9); $10=int($10); $11=int(100*$11); $12= int($12*100)/100; Line[Key] = $0; } END { R=0; Seed ? srand(Seed) : srand(1); reject(); } function reject( i) { for(i in N) Treatments[i]=1 prune(); for(i in Treatments) print ++R,Prefix, Line[i],"survivor","."; } function prune( x,y,why) { for(x in Treatments) for(y in Treatments) if(x != y) if ( xWorse(x,y) ) { return prune(); } } function xWorse(x,y, a,b,k) { if (statisticallyDifferent(x,y)) { if ( Rule1 && (a= Mean[x]) < (b= Mean[y]) ) return d("mmre /" ,y,x,b,a); if ( Rule1 && (a= Mean[y]) < (b= Mean[x]) ) return d("mmre /" ,x,y,a,b); } else { if ( Rule2 && (a= Sd[x]) < (b= Sd[y]) ) return d("sd /" ,y,x,b,a); if ( Rule2 && (a= Sd[y]) < (b= Sd[x]) ) return d("sd /" ,x,y,a,b); # if ( Rule2 && (a= Sd[x]/Mean[x]) < (b= Sd[y]/Mean[y]) ) return d("sd/mmre /", y,x,b,a); # if ( Rule2 && (a= Sd[y]/Mean[y]) < (b= Sd[x]/Mean[x]) ) return d("sd/mmre /", x,y,a,b); if ( Rule3 && (a= Corr[x]) < (b= Corr[y]) ) return d("correlation *",x,y,a,b); if ( Rule3 && (a= Corr[y]) < (b= Corr[x]) ) return d("correlation *",y,x,b,a); if ( Rule4 && (a= Pred[x]) < (b= Pred[y]) ) return d("pred *", x,y,a,b); if ( Rule4 && (a= Pred[y]) < (b= Pred[x]) ) return d("pred *", y,x,b,a); if ( Rule5 && (a= Attr[x]) < (b= Attr[y]) ) return d("attributes /", y,x,b,a); if ( Rule5 && (a= Attr[y]) < (b= Attr[x]) ) return d("attributes /", x,y,a,b); } return 0; } function statisticallyDifferent(i,j) { return diff(Mean[i], Sd[i], N[i], Mean[j], Sd[j], N[j]) } function diff(m1,s1,n1,m2,s2,n2) { return abs(z(m1, s1, n1, m2, s2, n2)) > Limits[Trust] } function z(m1,s1,n1,m2,s2,n2) { return (m1 - m2)/sqrt((s1^2 / (n1-1))+(s2^2 / (n2-1))) } function abs(n) { return n < 0 ? -1*n : n } function d(txt,killed,killer,a,b) { #this is added to avoid division by zero by substituting zero with a very small number if (a==0) a=10^(-20); print ++R,Prefix,Line[killed], killer "," txt " " 100+int(abs(a-b)/a*100)"%"; delete Treatments[killed]; return 1; }