;;;; the kokako algorithm ;;;; note: things can become best then rest bu ;;;; once something is rest it remains rest for ever. so ;;;; we can do the rest counts in one pass through the data. ;; unit tests (defvar *test-name* nil) (defmacro deftest (name parameters &body body) "Define a test function. Within a test function we can call other test functions or use 'check' to run individual test cases." `(defun ,name ,parameters (let ((*test-name* (append *test-name* (list ',name)))) ,@body))) (defmacro check (&body forms) "Run each expression in 'forms' as a test case." `(combine-results ,@(loop for f in forms collect `(report-result ,f ',f)))) (defmacro combine-results (&body forms) "Combine the results (as booleans) of evaluating 'forms' in order." (with-gensyms (result) `(let ((,result t)) ,@(loop for f in forms collect `(unless ,f (setf ,result nil))) ,result))) (defun report-result (result form) "Report the results of a single test case. Called by 'check'." (format t "~:[FAIL~;pass~] ... ~a: ~a~%" result *test-name* form) result) (defstruct wme names updates warnings (patience 20) all n best (b 0) ; list of best egs, number of best egs rest (r 0) ; list of rest egs, number of rest egs (enough 30) ; maximum number of bests (too-few 4) ; stop if too few examples (sufficient 1.5) ; number of rests must be sufficient*b (classes (make-hash-table)) ) (defun data (&keys names egs0 updates (class -1)) (let* ((w (make-wme :names names :updates updates)) (egs (mapcar #'(lambda (one) (datum names one updates class w)) egs0))) (setf (wme-egs w) egs) w)) (defmacro ooops (w format-string &rest args) (progn (push `(format nil format-string ,@args) (wme-warnings w)) (defc (wme-patience w)) (if (zero (wme-patience w)) (error "too many errors")) )) (defun datum (names one updates klass w) (labels ((classi () (if (< klass 0) (+ klass (length names) klass))) (unless (= (length names) (length no-next-method)) (oops w "~a wrong size" one)) (if updates (setf one (sublis updates one))) (infc (wme-n w)) (mulitple-value-bind (features class) (allbut l (classi)) (make-eg :class class (defun allbut (l n) "assumes list is indexed 0 1 2 3 4..." (if (zerop n) (values (rest l) (first l)) (cons (first l) (allbut (rest l) (1- n))))) (defstruct eg features class) (defstruct counted (n 0) l) (defun counted+ (new c) (push new (counted-l c)) (incf (counted-n c)) c) ;; selectors for the items in best, rest lists (defun score-columns (score columns) (cons score columns)) (defun score (eg) (first eg)) (defun columns (eg) (rest eg)) (defun inc (eg h &optional before) "increment a hash table count, unless we've already used it" (labels ((old (key value) (and before (member value (gethash key before))))) (let ((i -1)) (dolist (x (columns eg)) (incf i) (unless (old i x) (setf (gethash `(,i ,x) h) (counted+ eg (gethash `(,i ,x) h (make-counted))))))))) (defun jump (w) "move things from best to rest" (decf (wme-b w)) (rest+ (pop (wme-best w)) w)) (defun rest+ (new w) "insert into rest, and update the resth index" (let ((rest (gethash :rest (wme-class w) (make-hash-table))) (incf (wme-r w)) (push new rest) (inc new (wme-resth w))))) (defun best+ (new w) "insert into best, keeping the order in best. don't change the besth index since best could change if better bests arrive in the future" (labels ((insert (cons list) (cond ((null list) (list cons)) ((<= (first cons) (first (first list))) (cons cons list)) (t (cons (first list) (insert cons (rest list))))))) (incf (wme-b w)) (setf (wme-best w) (insert new (wme-best w))))) (defun train (eg &optional (w (make-wme))) "add new input into best or rest" (labels ((betterp () (> (eg-class eg) (eg-class (first (wme-best w))))) (enoughp () (>= (wme-b w) (wme-enough w)))) (push eg (wme-all w)) (if (enoughp) (if (betterp) (progn (jump w) (best+ new w)) (rest+ new w)) (best+ new w) w))) (defun b^2/b+r (w before) "return attribute ranges ranked by their b^2/(b+r) score" (let ((best (wme-classes w)) (rest (wme-resth w)) out) (labels ((rank (b r) (/ (* b b) (+ b r))) (result (k v) (list (float (rank (counted-n v) (counted-n (gethash k rest (make-counted))))) (counted-l v) k))) (sufficient w) (dolist (eg (wme-best w)) (inc eg best before)) (maphash #'(lambda (k v) (push (result k v) out)) best) (sort out #'> :key #'car)))) (defun sufficient (w) "returns a working memory with sufficient items in rest" (when (< (/ (wme-r w) (wme-b w)) (wme-sufficient w)) (jump w) (sufficient w))) (defun run (source &optional (enough 30) (want (make-hash-table)) (runs 0) baseline ) "main function" (let (nums (n 0) (w (make-wme :enough enough))) (dolist (eg (funcall source)) (when (wantedp want eg) ; select the relevant subset of examples (incf n) (let ((num (first (last eg)))) (push num nums) (train (make-eg :features (butlast eg) :class (first (last eg)) w)))) ; train on the relevant ; examples (when (< n (wme-too-few w)) (return-from run n)) ;early termination (unless (zerop n) (let* ((sorted (sort nums #'<)) (pairs (b^2/b+r w want)) (pair (third (first pairs))) ;; "pair" is now the next best thing to do (key (first pair)) (value (second pair)) (median (median sorted n)) (spread (spread median sorted n))) ; (print pairs) (when (zerop runs) (format t "~%~a~%" source) (setf baseline sorted)) ; store the baseline (used in reporting) (format t "found ~a egs with median ~a [~2,0F%] (min=~a spread= ~a max=~a). recommend ~a = ~a~%" n median (if baseline (* 100 (/ (position median baseline) (length baseline))) 50) (first sorted) spread (first (last sorted)) key value) (wanted key value want) ; add "key value" to the wanted lise (run source enough want (1+ runs) baseline))))) ;;; support code for the run ;; some maths (defun median (l n) (nth (round (* 0.5 n)) l)) (defun spread (median l n) (- (nth (round (* 0.75 n)) l) median)) ;;; the query system (defun wantedp (wanted eg) "check if an example 'eg' satisfies all the constraints in 'wanted'" (maphash #'(lambda (key values) (unless (wantedp1 key values eg) (return-from wantedp nil))) wanted) t) (defun wantedp1 (key values eg) "checks that the example contains at least one value from key" (if (null values) ; bad news! nil (let ((value (first values))) (or (eql (nth key eg) value) (wantedp1 key (rest values) eg))))) (defun wanted (key value queries) (let* ((values (gethash key queries))) (if values (or (member value values) (push value values)) (push value values)) (setf (gethash key queries) values) queries)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; test suit (defun tests () (test1) (test2) (test3) (test4)) (defun test1 () (load "/Users/timm/svns/wisp/var/timm/08/keys/weather.lisp") (time (run 'weather-nominal))) (defun test2 () (load "/Users/timm/svns/wisp/var/timm/08/keys/breast-cancer.lisp") (time (run 'breast-cancer))) (defun test3 () (load "/Users/timm/svns/wisp//var/timm/08/keys/cholesterol.lisp") (time (run 'cholesterol))) (defun test4 () (load "/Users/timm/svns/wisp/var/timm/08/keys/mushroom.lisp") (time (run 'mushroom)))