#!/usr/bin/gawk -f # /* vim: set filetype=awk : */ -*- awk -*- BEGIN { OFS=FS=","; Cignore = "?"; Ctimes = "*"; Chate = "<"; Clove = ">"; Ccomment = "[%#].*"; N = 0.1; Inf = 10^32; } { gsub(/ /,""); sub(Ccomment,""); } /^$/ { next } FNR==1 { if (Pass==1) { for(I=1;I<=NF;I++) { if($I ~ Ccomment) Ignorep[I]= I; if($I ~ Chate ) Hatep[I] = I; if($I ~ Clove ) Lovep[I] = I; } for(I in Hatep) watch(I); for(I in Lovep) watch(I); } if (Pass==3) Best=bore(Score) next } Pass==1 { maxMin(); } Pass==2 { Score[FNR] = score(); } Pass==3 { classify(Best); } function watch(i, tmp) { split($I,tmp,Ctimes); Watchp[i] = (2 in tmp) ? tmp[2] : 1; Min[i] = Inf; Max[i] = -1 * Inf; } function maxMin( i) { for(i in Watchp) { if (i in Ignorep ) continue; if ($i ~ Cignore ) continue; if ($i > Max[i]) Max[i]= $i; if ($i < Min[i]) Min[i]= $i; } } function score( i, strength,max,norm,alpha,good) { for(i in Watchp) { if (i in Ignorep ) continue; strength = Watchp[i]; max += strength^2; if ($i ~ Cignore) continue; norm = ($i - Min[i])/(Max[i] - Min[i] + 1/Inf); if (i in Hatep) norm = 1 - norm; norm = strength * norm; alpha += (norm - strength)^2 # (x - xmax)^2 } good = sqrt(alpha)/sqrt(max) # sqrt(sum(x0 -x2)) normalized to 0...1 good = 1 - good; # the SMALLER the distance to good, the BETTER return good; } function bore(a, i,n) { n = asort(a); i = n - int(n*N); if (i<50) i = int(n/2); return a[i]; } function classify(n, sep,str) { for(i=1;i<=NF;i++) { if(i in Watchp) continue; str = str sep $i; sep = OFS; } print str OFS "_" ( score() >= n ) } function saya(s,a,q1,q2,eol, com,i,j,n,tmp,str,sep) { com="sort"; q1= q1 ? "\"" : ""; q2= q2 ? "\"" : ""; for(i in a) { sep=""; str= s"["; n=split(i,tmp,SUBSEP); for(j=1;j<=n;j++) { str=str sep q1 tmp[j] q1; sep=","; } print str "] = " q2 a[i] q2 eol | com; }; close(com); }