;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; knn functions for walking the idea-k2 trees ;;; (defun knn-k2(tbl row k &optional (? #'nearest) treatment (min-size 6)) (knn1-k2 tbl tbl row k ? treatment min-size)) (defun knn1-k2(tbl above row k ? treatment min-size) (let ((right-size (>= min-size (length (table-rows tbl)))) (clustered (table-north-west tbl))) (cond ((not clustered) (funcall ? row above k treatment)) (right-size (funcall ? row tbl k treatment)) (t (knn2-k2 tbl above row k ? treatment min-size))))) (defun knn2-k2(tbl above row k ? treatment min-size) (with-slots (left right rows east west north-east north-west south-east south-west c xbreak ybreak) tbl (let* ((a (dist tbl row west treatment)) (b (dist tbl row east treatment)) (x (x a b c)) (y (y a x))) (if (<= x xbreak) (if (<= y ybreak) (knn1-k2 south-west tbl row k ? treatment min-size) (knn1-k2 south-east tbl row k ? treatment min-size)) (if (<= y ybreak) (knn1-k2 north-west tbl row k ? treatment min-size) (knn1-k2 north-east tbl row k ? treatment min-size)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; knn functions for walking the single-split idea trees ;;; (defun knn (tbl row k &optional (? #'nearest) treatment (min-size 6)) (knn1 tbl tbl row k ? treatment min-size)) (defun knn1 (tbl above row k ? treatment min-size) (let ((right-size (>= min-size (length (table-rows tbl)))) (clustered (table-west tbl)) ) (cond ((not clustered) (funcall ? row above k treatment)) (right-size (funcall ? row tbl k treatment)) (t (knn2 tbl above row k ? treatment min-size))))) (defun knn2 (tbl above row k ? treatment min-size) (with-slots (left right rows west east c break) tbl (let* ((a (dist tbl row west treatment)) (b (dist tbl row east treatment)) (x (x a b c))) (if (<= x break) (knn1 left tbl row k ? treatment min-size) (knn1 right tbl row k ? treatment min-size))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun classify-leaf (tbl row k) (with-slots (up gray? color id) tbl (format t "~&gray? ~a color ~a~%" gray? color) (cond (color color) (gray? (zeror (kneighbors tbl row k))) (up (classify up row k))))) ;does majority counting in a cluster. Class with best representation within the cluster wins. (defun zeror (tbl) (let ((max -1) (h (make-hash-table :test #'equal)) out) (dov (row (table-rows tbl) out) (let* ((klass (sym-klass1 row tbl)) (n (incf (gethash klass h 0)))) (when (> n max) (setf max n out klass)))))) ;does nearest neighbor within a cluster. (defun old-nearest (instance tbl &optional (k 1)) (let ((minDistance 1000000000000000) out) (dov (row (table-rows tbl) out) (let* ((distance (dist tbl row instance))) (if (< distance minDistance) (setf minDistance distance out (sym-klass1 row tbl))))))) (defun nearest (instance tbl &optional (k 1) treatment) (let ((minDistance 1000000000000000) (rows (table-rows tbl)) out nearestList) (dotimes (i k out) (let (near) (dov (row rows out) (let* ((distance (dist tbl row instance treatment))) (if (< distance minDistance) (setf minDistance distance out (sym-klass1 row tbl))))) (push (list minDistance out) nearestList))) (sort nearestList #'< :key #'first) (second (median nearestList)))) (defun !classify-all (&key (k 2) (tbl 'weather) (enough '6)) (dohash (klass result (classify-all (idea (data2tree tbl enough)) k)) (with-slots (target pf prec pd f g) result (format t "~&target: ~a pf: ~a prec: ~a pd: ~a f: ~a g: ~a ~a~%" target pf prec pd f g result)))) (defun !classify1 (&optional (tbl 'weather) (enough '2)) (let* ((tbl (data2tree tbl enough)) (rows (table-rows tbl)) (row (svref rows 0))) (knn tbl row 2))) (defun !classify-all (&key (tbl 'weather) (k 2) (enough 2)) (classify-all (data2tree tbl enough) :k k)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Self-tests for single-split IDEA trees and IDEA-k2 trees ;;; (defun self-test (&key (tbl "data/weather.lisp") (repeats 1) (k 6)) (let* ((tbl1 (data tbl)) (log (results0 tbl1))) (dov (row (table-rows tbl1)) (let* ((test (copy-table tbl1))) (setf (table-rows test) (vector row)) (classify-all1 tbl1 test log k))) (results! log) (dovalues (values log) (print values)))) (defun self-test-k2 (&key (tbl "data/weather.lisp") (repeats 1) (k 6)) (let* ((tbl1 (data tbl)) (log (results0 tbl1))) (dov (row (table-rows tbl1)) (let* ((test (copy-table tbl1))) (setf (table-rows test) (vector row)) (classify-all1-k2 tbl1 test log k))) (results! log) (dovalues (values log) (print values)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Example classification functions for knn (defun classify-all (&key (tbl "data/weather.lisp") (repeats 10) (k 6)) (let* ((tbl1 (data tbl)) (log (results0 tbl1))) (dotimes (r repeats) (multiple-value-bind (train test) (traintest tbl1) (classify-all1 train test log k))) (results! log) (dovalues (values log) (print values)))) (defun classify-all-effort (&key (tbl "data/albrecht.lisp") (repeats 10) (k 6)) (let* ((tbl1 (data tbl)) logs) (dotimes (r repeats) (let* ((log (make-effort-result))) (multiple-value-bind (train test) (traintest tbl1) (classify-all1-effort train test log k)) (push log logs))) (effort-stats logs))) (defun classify-all1-effort (train test log min-size treatment k idea-n) (setf train (funcall (first idea-n) train min-size)) (setf test (funcall (first idea-n) test min-size)) (dov (row (table-rows test)) (let ((actual (sym-klass1 row test)) (predicted (funcall (second idea-n) train row k #'nearest treatment min-size))) (effort-result+ log predicted actual)))) (defun classify-all1 (train test log k) (setf train (idea train)) (setf test (idea test)) (dov (row (table-rows test)) (let ((actual (sym-klass1 row test)) (predicted (knn train row k))) (result+ log actual predicted)))) (defun classify-all1-k2 (train test log k) (setf train (idea-k2 train 6)) (setf test (idea-k2 test 6)) (dov (row (table-rows test)) (let ((actual (sym-klass1 row test)) (predicted (knn-k2 train row k))) (result+ log actual predicted)))) (defun !classify-alls (&optional (f 'weather)) (let ((tbl (idea (data2tree f 2)))) (dolist (k '(1 2 3 4 5 6 7 )) (print k) (classify-all tbl :k k)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Split a table into a training and testing set. (defun traintest (tbl &optional (bins 10)) (let (trainings testings train test (rows (shufflev (table-rows tbl)))) (dovs (row n rows) (if (zerop (mod n (1- bins))) (push row testings) (push row trainings))) (setf train (copy-table tbl) test (copy-table tbl) (table-rows train) (vector! trainings) (table-rows test) (vector! testings)) (values train test)))