# gawk profile, created Tue Apr 13 08:47:56 2010 # BEGIN block(s) BEGIN { 1 Max = 1 1 Seed = 1 1 More = 1.02 1 Lives = 5 1 Dull = 0.1 1 Beam = 10 1 Samples = 20 1 Pinch = 1 / 1000 1 OverFitted = 3 1 CONVFMT = "%.8g" 1 IGNORECASE = 1 1 SUBSEP = "=" 1 _ = SUBSEP 1 OFS = "," 1 C = "," 1 Verbose = 1 } # Rule(s) 243 /@attribute/ { # 13 5 Name[++Name[0]] = $2 5 Name[$2] = Name[0] } 243 { 243 gsub(/[ \\t]*/, "", $0) } 243 { 243 gsub(/%.*/, "", $0) } 243 /^$/ { # 141 69 next } 174 /@data/ { # 5 1 In = 1 1 FS = "," 1 srand(Seed) } 174 /@/ { # 17 7 next } 167 In { # 167 167 Rows++ 167 train(Rows, Data, Counts) } # END block(s) END { 1 learn(Rows, Data, Counts) } # Functions, listed alphabetically 10 function chop(score0, memo, out, score, n, i) { 10 n = asort(score0, score) 110 for (i = n; i >= 1; i--) { 110 if (score[i] <= score[n] * Dull) { break } 110 if (i <= n - Beam) { # 10 10 break } 100 out[memo[score[i]]] = score[i] } } 180 function combine(class, this, that, n, i, used, tmp, out) { 180 sub(/^[^,]*,/, "", this) 180 sub(/^[^,]*,/, "", that) 180 split(((this) "," (that)), tmp, ",") 180 n = asort(tmp) 180 out = tmp[1] 180 used[tmp[1]] = 1 694 for (i = 1; i <= n; i++) { 694 if (! used[tmp[i]]) { # 414 414 out = out "," tmp[i] 414 used[tmp[i]] = 1 } } 180 return ((class) "," (out)) } 9 function explode(a, out, i, j) { 90 for (i in a) { 866 for (j = 1; j <= a[i]; j++) { 866 out[++out[0]] = i } } } function fv(f, v) { return ((f) _ (v)) } 1 function learn(rows, data, counts, which0, which) { 1 round0(counts, which0) 1 rounds(1, 0, Lives, which0, rows, data, which) } 8851 function matched(row, data, this, out, col, n, goals, pair, f0, f, status) { 8851 n = split(this, goals, ",") 19536 for (col = 2; col <= n; col++) { 19536 split(goals[col], pair, _) 19536 f = pair[1] 19536 status[f] += data[row, f] == pair[2] 19536 if (f0 && f != f0 && ! status[f0]) { # 4982 4982 return 0 } 14554 f0 = f } 3869 return status[f] } 9 function most(a, i, max) { 9 max = -1e+09 90 for (i in a) { 90 if (a[i] > max) { # 19 19 max = a[i] } } 9 return max } 9 function normalize(a, i, sum) { 90 for (i in a) { 90 sum += a[i] } 90 for (i in a) { 90 a[i] = int(100 * a[i] / sum) } } 416 function one(sample, any) { 416 any = int(rand() * sample[0]) + 1 416 return sample[any] } 9 function o(a, s, what, i, com) { 9 print "" 9 com = what ? ("sort " (what)) : "sort " 90 for (i in a) { 90 print((a[i] "," (i))) | (com) } 9 close(com) } function oo(a, s, what, i, com) { print "" com = what ? ("sort " (what)) : "sort " for (i in a) { print(((s) "[ " (i) " ] = " a[i])) | (com) } close(com) } 1 function round0(counts, which, j, i, s, memo, score) { 70 for (i in counts) { 70 s = Max ? counts[i] : -1 * counts[i] 70 j = (Max "," (i)) 70 memo[s] = j 70 score[j] = s } 1 chop(score, memo, which) } 10 function rounds(round, max0, lives, which0, rows, data, out, score, max, i, sample, which1, s, memo, which2) { 10 if (round == 1) { # 1 1 max = 0 9 } else { 9 max = most(which0) 9 lives = max > max0 * More ? Lives : lives - 1 9 if (lives < 0) { # 1 10 for (i in which0) { 10 out[i] = which0[i] } 1 return max } } 9 print "\n----------------------------------------------------" 9 print ("% max: " (max) " seed: " Seed " round: " (round) " max: " (max) " lives: " (lives)) 9 normalize(which0) 9 explode(which0, sample) 9 twos(Max, sample, Samples, which1) 90 for (i in which0) { 90 which1[i] = i } 9 if (Verbose) { # 9 9 values(which1, "candidate") } 166 for (i in which1) { 166 s = i in score ? score[i] : score(i, rows, data) + rand() * Pinch 166 memo[s] = i 166 score[i] = s } 9 chop(score, memo, which2) 9 if (Verbose) { # 9 9 o(which2, "score", "-t, -n -k 1") } 9 return rounds(round + 1, max, lives, which2, rows, data, out, score) } 53 function score1(fits, support, sum) { 53 if (fits <= OverFitted) { return 0 } 53 if (Eval == 1) { return support * sum } 53 return sum } 53 function score(rule, rows, data, cols, row, s, triggered, fits, sum, support) { 53 fits = sum = Pinch 53 cols = Name[Name[0]] 8851 for (row = 1; row <= rows; row++) { 8851 if (matched(row, data, rule)) { # 1333 1333 fits++ 1333 sum += data[row, cols] } } 53 support = fits / rows 53 s = score1(fits, support, sum) 53 return Max ? s : -1 * s } 167 function train(rows, d, c, what, i) { 835 for (i = 1; i <= NF; i++) { 835 if ($(i) == "?") { continue } 835 what = Name[i] 835 d[rows, what] = $(i) 835 c[what, $(i)] += $NF } } 9 function twos(class, sample, n, sampled, pair) { 180 while (n--) { 180 pair = two(class, sample) 180 sampled[pair] = pair } } 208 function two(class, sample, tries, this, that) { 208 this = one(sample) 208 if (tries == 9) { return this } 208 that = one(sample) 208 if (this == that) { # 28 28 return two(class, sample, tries + 1) 180 } else { 180 return combine(class, this, that) } } 9 function values(a, s, what, i, com) { 9 print "" 9 com = what ? ("sort " (what)) : "sort" 166 for (i in a) { 166 print(("% " (s) ": " a[i])) | (com) } 9 close(com) }