#!/sw/bin/gawk -f BEGIN { Missing="?" ; Goal="yes" ; Klass=-1 ; E = 2.718281828 ; Inf=10^30 ; IGNORECASE=1 ; OFMT="%.10f" ; _ = SUBSEP ; OFS=" " } /@attribute/ { Name[++Name[0]]=$2} /@data/ {FS="," if (Klass < 0 ) Klass= Name[0] + 1 + Klass } /@/ {next} {gsub(/\%.*/,"")} {gsub(/[ \t]/,"")} /^$/ {next} {train($Klass==Goal)} END {#printf "\n" FILENAME " :: " Goal "\n" lor()} function train(class, i) { Instances++ H[class]++ for(i=1;i<=NF;i++) if (i != Klass) if ($i != Missing) { N[class,Name[i],$i]++ Seen[Name[i],$i] } } function lor( i,a,n) { for(i in Seen) { b=N[1 _ i] / (H[1] + 1/Inf) r=N[0 _ i] / (H[0] + 1/Inf) support=N[1 _ i]/H[1] Lor[i] = b> r? log(b/r) : 0 Br[i] = b > r ? b^2/(b+r) :0 print munge(FILENAME _ Goal _ i),Lor[i],Br[i],support,r } } function munge(x) { gsub(_," ",x) return x } function sorted(a0,a ,old,k,b,c,i,n) { for(k in a0) back[a0[k] ]= k n=asort(a0) a[0]=n for(i=1;i<=n;i++) { a[-1*i] = a0[i] a[i] = back[a0[i]] } return n } function saya(pre,a, i,com) { com="sort #" rand() for(i in a) print pre "[" commas(i) "] = " a[i] | com close(com) } function points2p(f,p) { return 1 / (1 + E^(-1*log(p/(1 - p)) - f )) } function commas(s) { gsub( _ ,",",s); return s } function key1(s, a) { split(s,a,_); return a[1] } function keys(s,a) { split(s,a,_) } function some() { return 1/(rand() * Inf) }