BEGIN { FS=OFS=","; Klass= -1; DontKnow = "?"; OFMT = "%.20f" } NR==1 { Attr=NF; Klass= Klass < 0 ? NF + 1 + Klass : Klass } { Instances++; for(I=1;I<=NF;I++) if(I!=Klass) record(Instances,I,$I,$Klass) record(Instances,Klass,$Klass,$Klass) } function size() { return 0} function asAttr(i) { return i} function asRange(i) { return -1 * i} function record(n,attr,range,class, i) { Data[n,attr]=range; if (range != DontKnow) { F[class,attr,range]++ if (! ((attr,range) in Ar2i)) { Universe[attr]++; i= ++I2ar[size()] I2ar[asAttr(i )]= attr; I2ar[asRange(i)]= range Ar2i[attr,range]= i } } } function nowTimesPrior(evidence,class,classes,n, \ f,nUs,pUs,out,i,j,prior,attr,range,delta,counts, m,k) { nUs = F[class,Klass,class] ; for(i in evidence) { j = evidence[i]; attr = I2ar[asAttr( j)]; range = I2ar[asRange(j)]; counts[attr] += (class,attr,range) in F ? F[class,attr,range] : 0; } m=2; k=1; m=k=0; prior = (nUs+k)/(n + k*classes) ; out = prior; for(attr in counts) out *= (counts[attr] + m*prior)/(nUs + m ) return out; } function useful(i, attr,range) { if (i <= 0) return 0; attr = I2ar[asAttr( i)]; range = I2ar[asRange(i)]; if (attr == Klass) return 0; if ((1,attr,range) in F) return 1; return 0; } function score(evidence, lbest,lrest,out) { lbest = nowTimesPrior(evidence,1,2,Instances); lrest = nowTimesPrior(evidence,0,2,Instances); out = lbest^2 / (lbest + lrest); return out } function sortScores(scores,sorts, i,n) { delete scores; delete sorts; for(i in I2ar) if(useful(i)) getScores(scores,i) n = asorti(scores,sorts); #showScores(n,scores,sorts); return n; } function getScores(scores,i, e,s) { e[i]=i s = score(e); if (s) scores[s+rand()/10000]=i } function showScores(n,scores, sorts, i,x,tmp,attr,range) { for(x=1;x<=n;x++) { i = scores[sorts[x]]; attr=I2ar[asAttr(i)]; range=I2ar[asRange(i)]; print "attr",attr,"range",range,"sorts[x]",sprintf("%.20f",sorts[x]),\ "scores[sorts[x]]",scores[sorts[x]] } for(x in sorts ) print "sorts: " x " " sorts[x] for(x in scores) print "scores: " x " " scores[x] } function try(n,scores,sorts) { for(i=1;i<=n;i++) { try1(i,n,scores,sorts) } } # have to handle disjunctions function try1(i,n,scores,sorts, attrs,attr,range,str,evidence,j,k,some) { for(j=n;j > n-i; j--) { k=scores[sorts[j]] attr = I2ar[asAttr(k )]; range = I2ar[asRange(k)]; ranges[attr]++ if (ranges[attr] < Universe[attr]) { some = 1; evidence[k] = k; str[j] = str[j] " " attr"="range } } #for(k in evidence) print k, I2ar[asAttr(k)], I2ar[asRange(k)]; if (some) printf("%s\t %10.10f\t %10.10f \t %s\n", i ,score(evidence) , test(evidence) , str2label(str)); } function str2label(strs, str,i,n) { n=asort(strs); str=strs[1]; for(i=2;i<=n;i++) str= str " " strs[i] return str } function test(evidence, max,i,score,n,nfound) { for(i=1;i<=Instances;i++) { max += Data[i,Klass] if(selected(i,evidence)) { nfound++ score += Data[i,Klass] }} return (score/nfound) / (max/Instances) } function selected(i,evidence, x,attr,range,e,unsatisfied) { for(e in evidence) { attr = I2ar[asAttr( e)]; unsatisfied[attr]=1 } for(e in evidence) { attr = I2ar[asAttr( e)]; range = I2ar[asRange(e)]; x = Data[i,attr] if ( (x == range) || (x ~ DontKnow) ) unsatisfied[attr]=0 } for (attr in unsatisfied) if (unsatisfied[attr]) return 0 return 1 } function main( n, scores, sorts) { n = sortScores(scores,sorts); #showScores(n,scores,sorts); try(n,scores, sorts); #showData() } function showData( i,j,sep) { for(j=1;j<=Instances;j++) { sep=""; for(i=1;i<=Attr;i++) { printf("%s%s",sep,Data[j,i]) sep="," } print ""; } } END { main() } #attr,3,range,1,sorts[x],0.03032239999999999935,scores[sorts[x]],12 #attr,1,range,2,sorts[x],0.12124100000000000155,scores[sorts[x]],6 #attr,3,range,2,sorts[x],0.12126099999999999379,scores[sorts[x]],3 #attr,4,range,3,sorts[x],0.16365199999999999192,scores[sorts[x]],8 #attr,2,range,3,sorts[x],0.16365399999999999392,scores[sorts[x]],2 #attr,3,range,3,sorts[x],0.16372100000000000541,scores[sorts[x]],7 #attr,4,range,2,sorts[x],0.20456900000000000084,scores[sorts[x]],4 #attr,2,range,2,sorts[x],0.20460400000000000809,scores[sorts[x]],11 #attr,1,range,1,sorts[x],0.29099000000000002641,scores[sorts[x]],1 #sorts: 4 0.163652 #sorts: 5 0.163654 #sorts: 6 0.163721 #sorts: 7 0.204569 #sorts: 8 0.204604 #sorts: 9 0.29099 #sorts: 1 0.0303224 #sorts: 2 0.121241 #sorts: 3 0.121261 #scores: 0.163721 7 #scores: 0.163654 2 #scores: 0.29099 1 #scores: 0.204604 11 #scores: 0.121241 6 #scores: 0.121261 3 #scores: 0.0303224 12 #scores: 0.204569 4 #scores: 0.163652 8