; need to tune training per learner ; need to make abcd know about numbers ; then, bagging ; then n-way ; need a lint tool ; learning bestK (defklass knn-data (data) maxs mins (summary #'majority) (k 1) instances (classes (make-hash-table))) (defun knn (k trains &optional (tests trains)) (let ((wme (trainings (make-instance 'knn-data :k k :instances trains) trains))) (testings wme tests))) (defmethod trainings ((this knn-data) rows) (readHeaderRow (svref rows 0) this) (in knn-data this (setf maxs (make-array ncols :initial-element *least*) mins (make-array ncols :initial-element *least*)) (dov+ (row rows this) (if (not (= ncols (length row))) (error "expected ~a cells, found ~a, in ~a" ncols (length row) row) (dovn (cell n row) (setit (svref maxs n) (max2 cell it)) (setit (svref mins n) (min2 cell it)) (if (= n classid) (setf (gethash cell classes) (make-instance 'abcd :target cell)))))))) (defmethod testings ((this knn-data) rows) (in knn-data this (dov+ (row rows classes) (let (neighbors (actual (svref row classid))) (dov+ (instance instances) (unless (eql row instance) (push (cons (dist row instance this) instance) neighbors))) (let ((predicted (funcall summary (nearby-classes k this neighbors)))) (abcd-add classes actual predicted)))))) (defun nearby-classes (k data neighbors) (labels ((class (x) (svref (cdr x) (data-classid data))) (classes (l) (mapcar #'class l)) (near (l) (subseq l 0 k)) (sorted (l) (sort l #'< :key #'car))) (classes (near (sorted neighbors))))) (deftest ?knn-weather () (test (with-output-to-string (s) (dotimes+ (k 10) (?knn-test "data/weather.lisp" k s))) ; pd pd prec f acc pd pf prec f acc "1 | NO , 40, 44, 33, 36, 50 | YES , 56, 60, 62, 59, 50 2 | NO , 40, 44, 33, 36, 50 | YES , 56, 60, 62, 59, 50 3 | NO , 60, 22, 60, 60, 71 | YES , 78, 40, 78, 78, 71 4 | NO , 60, 22, 60, 60, 71 | YES , 78, 40, 78, 78, 71 5 | NO , 20, 0, 100, 33, 71 | YES , 100, 80, 69, 82, 71 6 | NO , 20, 0, 100, 33, 71 | YES , 100, 80, 69, 82, 71 7 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 8 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 9 | NO , 0, 0, 0, 0, 64 | YES , 100, 100, 64, 78, 64 10 | NO , 0, 0, 0, 0, 64 | YES , 100, 100, 64, 78, 64" )) (defun ?knn-credit () (dotimes+ (i 10) (print `(,i ,(knn i (read1 "data/credit.lisp")))))) (deftest ?knn-weather-num () (test (with-output-to-string (s) (dotimes+ (k 10) (?knn-test "data/weather-num.lisp" k s))) ; pd pd prec f acc pd pf prec f acc "1 | NO , 80, 22, 67, 73, 79 | YES , 78, 20, 87, 82, 79 2 | NO , 80, 22, 67, 73, 79 | YES , 78, 20, 87, 82, 79 3 | NO , 0, 22, 0, 0, 50 | YES , 78, 100, 58, 67, 50 4 | NO , 0, 22, 0, 0, 50 | YES , 78, 100, 58, 67, 50 5 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 6 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 7 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 8 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 9 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 10 | NO , 0, 11, 0, 0, 57 | YES , 89, 100, 62, 73, 57 " )) (deftest ?knn-crx () (test (with-output-to-string (s) (dotimes+ (k 10) (?knn-test "data/crx.lisp" k s))) "1 | + , 78, 15, 81, 79, 82 | - , 85, 22, 83, 84, 82 2 | + , 78, 15, 81, 79, 82 | - , 85, 22, 83, 84, 82 3 | + , 85, 13, 84, 85, 86 | - , 87, 15, 88, 87, 86 4 | + , 85, 13, 84, 85, 86 | - , 87, 15, 88, 87, 86 5 | + , 87, 12, 85, 86, 88 | - , 88, 13, 90, 89, 88 6 | + , 87, 12, 85, 86, 88 | - , 88, 13, 90, 89, 88 7 | + , 86, 14, 83, 85, 86 | - , 86, 14, 88, 87, 86 8 | + , 86, 14, 83, 85, 86 | - , 86, 14, 88, 87, 86 9 | + , 85, 13, 84, 85, 86 | - , 87, 15, 88, 88, 86 10 | + , 85, 13, 84, 85, 86 | - , 87, 15, 88, 88, 86 " )) (defun ?knn-test (f k s) (format s "~3s " k) (dovalue (value (knn k (read1 f))) (format s "~a " value)) (terpri s))