#!/usr/bin/gawk -f BEGIN { FS = ","; SUBSEP = "="; Goal = "^no-recurrence-events$"; Beam = 5; Loops = 1; Patience = 3 OFMT = "%.8g"; srand(Seed ? Seed : 1); } NR == 1{ names(); next } { data() } END { if (Report) report() } function names( i) { for(i=1;i<=NF;i++) Name[i] = $i } function data() { if ($NF ~ Goal) {Best++; data1(B)} else {Rest++; data1(R)} } function data1(a, i) { for(i=1;i r) { j = jiggle(b^2/(b+r)); value[range] = j; memo[j] = range; }} n = asort(value,sorted); stop = n < Beam ? 1 : n - Beam + 1; for(i=n; i >= stop; i--) if (sorted[i] > 0) { range = memo[sorted[i]]; score = sorted[i]; split(range,tmp,SUBSEP); a[score] = tmp[1]; v[score] = tmp[2]; min = out[score] = score; } return min } function picks(patience,stack,memo,a,v, new,sorted,cdf,n,loops,max,min,better) { if (! patience) return 0 n = a2best(stack,sorted,Beam); #o(sorted,"sorted") #have to reset the stack to the sorted stuff sorted2cdf(n,sorted,cdf); #n(sorted,"picks:sorted") o(cdf,"cdf") #o(stack,"picks" patience ) min = sorted[1] max = sorted[n] print "patience " patience " max " max " min " min loops = Loops; while(loops--) { new = combineTwo(stack,a,v,sorted,n,cdf,memo,min) if (new > max) better=1; } if (better) { print "better!!" patience++ } picks(--patience,stack,memo,a,v) } function prange(range,memo, n,tmp,i,out) { n=split(range,tmp, SUBSEP) for(i=1;i<=n;i++) out = out ":" memo[tmp[i]] return out } function differentThings(cdf,n,things, one,two,giveup) { one = pick(cdf,n); giveup = 16; do { two = pick(cdf,n) if (--giveup <= 0) return 0 } while (one == two) return split(one SUBSEP two,things,SUBSEP) } function combineTwo(stack,a,v,sorted,n,cdf,memo,min,\ things,new,range,seen,bs,rs,b,r,b2r,i) { if (differentThings(cdf,n,things)) { for(i in things) { range = memo[things[i]]; if (++seen[range] == 1) { new = new ? new SUBSEP things[i] : things[i] print " i " i " tmp[i] " things[i] " >> " range " new " new bs[a[things[i]]] += B[range]/Best ; rs[a[things[i]]] += R[range]/Rest ; } } b = Best/(Best+Rest); for(i in bs) b *= bs[i]; r = Rest/(Best+Rest); for(i in rs) r *= rs[i]; b2r = jiggle(b^2/(b+r)); if (b2r > min) { print "new " prange(new,memo) " >> b " b " r " r " = " b2r; o(bs,"bs") o(rs,"rs") stack[new] = b2r; memo[b2r] = new } } return b2r } function a2best(a,best,max, sorted,n,i,j) { n = asort(a,sorted); if (max && n >= max) { for(i = n - max + 1; i<=n; i++) best[++j] = sorted[i]; n= max; } else for(i in sorted) best[i]=sorted[i]; return n; } function sorted2cdf(n,sorted,cdf, i,j,sum) { sum = cdf[1] = sorted[1]; for(i=2;i<=n;i++) { sum += sorted[i]; cdf[i] = cdf[i-1] + sorted[i]; } for(i in cdf) cdf[i] = cdf[i] / sum; for(i=1;i<=n;i++) cdf[-1*i] = sorted[i] return n } function pick(cdf,n, i) { i = pick1(cdf,n) return cdf[-1*i] } function pick1(cdf,n, bias,u,i) { u = bias ? rand()^bias : rand(); for(i=n-1;i>=1;i--) if (u > cdf[i] ) return i+1; return n; }