1 TOP = 10 2 MaxLIVES = 5 3 LIVES = MaxLIVES 4 SAMPLES = 20 5 6 which2() 7 counts() 8 for class1 in classes do 9 which = round0(class1) 10 which = rounds(class1, which) 11 print "rules for class1 are " which 12 13 counts() 14 for row in rows do 15 NUMBER[class of row]++ 16 NUMBER["all"]++ 17 for col in row 18 FREQUENCY[class of row, col, value of col]++ 19 FREQUENCY["all", col, value of col]++ 20 21 round0(class1) { 22 for (class2,col,value) in FREQUENCY do 23 if (class1 == class2) 24 best0 = FREQUENCY[class2,col,value] 25 bests = NUMBER[class2] 26 rest0 = FREQUENCY["all", col,value] - best0 27 rests = NUMBER["all"] - bests 28 best = best0/bests 29 rest = rest0/rests 30 if (best > rest) 31 push (best^2/(best + rest), col,value) onto SCORE 32 return TOP items of SCORE 33 34 rounds(class1,which0) 35 if no improvement from last round 36 then LIVES-- 37 else LIVES=MaxLIVES 38 if LIVES < 1 39 then return which 40 sample = explode(normalize(which0)) 41 which1 = copy(which0) 42 SAMPLES times repeat ; e.g 20 times repeat 43 one = any item from sample 44 two = any item from sample 45 new = combine one and two 46 push (abcd(new,class1), new) onto which1 47 which2 = TOP items of which1 48 return rounds(class1,which2) 49 50 normalize(list) 51 for key in list do total =+ array[key] 52 for key in list do list[key] = round (100*list[key]/total) 53 return list 54 55 explode(list) { ; e.g. a[apples]=90, a[bananas]=10 56 for key in list 57 for i = 1 to list[key] 58 push key onto out 59 return out ; e.g. contains 90 apples and 10 bananas 60 61 abcd(class1, rule) 62 for row in rows 63 triggered = rule matches row 64 if (class of row == class1) { 65 {if triggered then d++ else b++ } 66 } else { 67 {if triggered then c++ else a++ } 68 } 69 probability of detection = pd = d/(d+b) 70 probability of false alarm = pf =c/(c+a) 71 precision = prec =d/(d+c) 72 accuracy = acc = (a+d)/(a+b+c+d) 73 support = s = (c+d)/(a+b+c+d) 74 fmeasure = 2*pd*prec/(pd+prec) 75 return support * fmeasure ======================================================== ;class,thing=value,thing=value 111111111111111 start with some scored sorted things 32,no,temp=humid 22,no,windy=true 12,no,forecast=raininng 222222222222222 A couple of times, pick two things at random, favoring the higher scored things 1) no,temp=humid no,temp=humid 2) no,temp=humnd no,windy=true 3) no,windy=true no,forecast=raining 333333333333333 combine and score 1) 32,no,temp=humid 2) 20,no,temp=humnd,windy=true 3) 5, no,windy=true,forecast=raining 444444444444444 sort into the stack (ignore if already there) 32,no,temp=humid 22,no,windy=true 20,no,temp=humnd,windy=true 12,no,forecast=raininng 5, no,windy=true,forecast=raining 555555555555 prune all but the top (say) 3 things 32,no,temp=humid 22,no,windy=true 20,no,temp=humid,windy=true 6666666666 goto 1