(defun unknownp (item) (eql item (wme-unknown *w*))) (defun lastitem (list) (first (last list))) (defun pow (num &optional (exp 2)) (let ((total 1)) (dotimes (x exp) (setf total (* total num))) total)) (defun memberp (item list) (member item list)) ;----------------------------------------------------------------------- ;input: variances-list that contains 4 items(maybe 5, but discards it): ; 1st=list of rows, 2nd=sum, 3rd=count(n),4th=sumSQ ;output: variances-list contains two items: ; 1st=list of rows, 2nd = variances (defun findVariances (variances vartotal) (let (myVariances) (dotimes (x (length variances)) (let ((sum 0) (n 0) (sumSq 0)) (push (list (first (nth x variances)) nil) myVariances) (dotimes (y (length variances)) (unless (eql y x) (incf sum (second (nth y variances))) ;sum (incf n (third (nth y variances))) ;n (incf sumSq (fourth (nth y variances))))) ;sumSq (if (< n 2) (setf n 2)) (setf (second (first myVariances)) (sqrt (/ (- sumSq (/ (* sum sum) n)) (- n 1)))))) (setf myVariances (first (sort myVariances #'< :key #'second))) (if (> (second myVariances) vartotal) nil myVariances))) ;----------------------------------------------------------------------- (defun findVarBin (columnNum vartotal coltype) (let (;1st=list of rows, 2nd=sum, 3rd=count(n),4th=sumSQ ;though when finished, 1st=list of rows with 2nd, 2nd=variance variances temp) (dotimes (row (length (therows))) (let (currentSplit goal cells ifstatement) (setf goal (lastitem (row-cells (nth row (therows))))) (setf cells (row-cells (nth row (therows)))) (if (equal coltype "num") (progn (setf currentSplit (floor (* 1.0 (/ row (/ (length (therows)) 10))))) (setf ifstatement #'(lambda () (nth currentSplit variances)))) (progn (setf currentSplit (position (nth columnNum cells) variances :key #'fifth)) (setf ifstatement #'(lambda () (member (nth columnNum (row-cells (nth row (therows)))) variances :key #'fifth))))) (if (funcall ifstatement) (progn (push (row-sortkey (nth row (therows))) (first (nth currentSplit variances))) (incf (second (nth currentSplit variances)) goal) ;sum (incf (third (nth currentSplit variances))) ;count (incf (fourth (nth currentSplit variances))(* goal goal))) ;sumSq (setf variances (append variances (list (list (list (row-sortkey (nth row (therows)))) goal 1 (* goal goal) (nth columnNum cells) ))))))) (setf variances (findVariances variances vartotal)) variances)) ;----------------------------------------------------------------------- (defun call_pp () (readarff "trainp.arff") (data "DummyData.lisp") (BAMBOO_pp "train" "trainp.arff") ;----------------------------- (readarff "testp.arff") (data "DummyData.lisp") (BAMBOO_pp "test" "testp.arff")) (defun BAMBOO_pp (&optional (type "train") (output "rr_dummyData.arff")) (let (DiscrCols NumCols ;temporarily removed rows for sorting perposes,handles unknowns removedRows variances (maxsize (sqrt (length (therows)))) temp (sum 0) (n 0) (sumSq 0) vartotal) ;go through each attribute/column number ;and record which is numeric and which is symbolic (dotimes (x (1- (length (thecols)))) (if (sym-p (nth x (thecols))) (setf DiscrCols(append DiscrCols (list x))) (setf NumCols (append NumCols (list x))))) ;logs numeric values (dolist (row (therows)) (setf temp (row-cells row)) (incf sum (lastitem (row-cells row))) (incf n) (incf sumSq (pow (lastitem (row-cells row)))) (dolist (x NumCols) (setf (nth x temp) (log (nth x temp) 2))) (setf (lastitem temp) (log (lastitem temp) 2))) ;ROW REMOVAL (if (equal type "train") (progn (if (< n 2) (setf n 2)) (setf vartotal (sqrt (/ (- sumSq (/ (* sum sum) n)) (- n 1)))) (dotimes (col (1- (length (thecols)))) (dolist (row (therows)) (setf temp (row-cells row)) (if (unknownp (nth col temp)) (push row removedRows))) (setf (therows) (remove '? (therows) :key 'row-cells :test 'member)) (setf temp (if (member col NumCols) (progn (sort (therows) #'< :key #'(lambda (x) (nth col (row-cells x)))) (findVarBin col vartotal "num")) (findVarBin col vartotal "disc"))) (if temp (push temp variances)) (if removedRows (setf (therows) (append (therows) removedRows)))) (row_removal variances maxsize vartotal))) (table2ARFF output) )nil) (defun row_removal (variances maxsize oldvar) (let ((sum 0) (n 0) (sumSq 0) newvar) (setf variances (remove t variances :key #'(lambda (x) (> (length (first x)) (- (length (therows)) maxsize))))) (setf variances (sort variances #'< :key 'second)) (dolist (row (therows)) (unless (member (row-sortkey row) (first (first variances))) (incf sum (lastitem (row-cells row))) (incf n) (incf sumSq (pow (lastitem (row-cells row)))))) (if (< n 2) (setf n 2)) (setf newvar (sqrt (/ (- sumSq (/ (* sum sum) n)) (- n 1)))) (if (> oldvar newvar) (progn (dolist (x (first (first variances))) (setf (therows) (remove x (therows) :key 'row-sortkey)) (dolist (y (rest variances)) (setf (first y) (remove x (first y))))) (setf variances (rest variances)) (if variances (row_removal variances maxsize newvar)))))) ;----------------------------------------------------------------------------------- ;learner (defun demoBAMBOO_l () (readARFF "rr_dumyyData,arff") (data "dummyData.lisp") (BAMBOO_l 3 "removed_rows.arff")) (defun call_l () (readARFF "trainp.arff") (data "dummyData.lisp") (BAMBOO_l 3 "testp.arff" (thetable))) (defun listmedian (list) (nth (1- (round (* 1.0 (/ (length list) 2)))) list)) (defun BAMBOO_l (k testCaseFile &optional (tbl (thetable))) ;----------------------------------------- ;LEARNER: K means ;----------------------------------------- (let (mMedians newRows) (readARFF testCaseFile) (data "dummyData.lisp") (dolist (row (therows)) ;per test row given (let (rows medians) (dotimes (x (length (table-rows tbl))) ;must look over each row (push (list (row-sortkey (nth x (table-rows tbl))) (eucliddist row (nth x (table-rows tbl)) (table-cols tbl) tbl)) rows)) (setf rows (sort rows #'< :key 'second)) (dotimes (round 10) ;repeat 10 times (let (selected (i 0) (sum 0) (n 0) (sumSq 0) temp (var 9999999) newvar) (loop while (and (< (length selected) k) (< i (length (table-rows tbl)))) do (setf temp (nth (position (first (nth i rows)) (table-rows tbl) :key 'row-sortkey) (table-rows tbl))) (if (> (random 100) 33) (progn (incf sum (lastitem (row-cells temp))) (incf n 1) (incf sumSq (pow (lastitem (row-cells temp)))) (push (nth i rows) selected))) (incf i)) (setf newvar (sqrt (/ (- sumSq (/ (* sum sum) n)) (- n 1)))) (loop while (and (<= newvar var) (< i (length (table-rows tbl)))) do (setf temp (nth (position (first (nth i rows)) (table-rows tbl) :key 'row-sortkey) (table-rows tbl))) (if (> (random 100) 33) (progn (setf var newvar) (incf sum (lastitem (row-cells temp))) (incf n 1) (incf sumSq (pow (lastitem (row-cells temp)))) (push (nth i rows) selected) (setf newvar (sqrt (/ (- sumSq (/ (* sum sum) n)) (- n 1)))))) (incf i)) (setf selected (rest selected)) (push (listmedian selected) medians))) (setf mMedians (append mMedians (list (first (listmedian (sort medians #'< :key 'second)))))) )) ;(print mMedians) (dotimes (x (length mMedians)) (setf (nth x mMedians) (lastitem (row-cells (nth (position (nth x mMedians) (table-rows tbl) :key 'row-sortkey) (table-rows tbl)))))) ;(print mMedians) )mMedians) ;----------------------------------------------------------------------------------- ;reads a given table and outputs it to an arff file (defun table2ARFF (&optional (outputFile "rr_dummyData.arff") (tbl (thetable))) ;now write to dummy lisp file dummyData.lisp, or whetever the output file is (with-open-file (out (make-pathname :name outputFile) :direction :output :if-exists :supersede) (format out "@relation ~a~%" (table-name tbl)) (dolist (x (table-arffAttr tbl)) (format out "@attribute ~a~%" x)) (format out "@data~%") (dolist (x (table-rows tbl)) (format out "~a" (first (row-cells x))) (dolist (y (rest (row-cells x))) (format out ",~a" y)) (format out "~%")))) ;reads a given arff file and outputs a lisp file where desired. (defun readARFF (filename &optional (outputFile "dummyData.lisp")) (let (line file splitfile (header "(deftable ") rows arffCols data (split #\Space)) (with-open-file (in (make-pathname :name filename) :direction :input) (setf line (string (read-line in nil 'eof))) (loop while (not (equal line "EOF")) do (setf file (append file (list line))) (setf line (string (read-line in nil 'eof))))) (dolist (x file) (let ((temp (breakString x split))) (if (equal (first temp) "@data") (setf split #\,)) (setf splitfile (append splitfile (list temp))))) (dotimes (y (length splitfile)) (let ((spacing " ") (x (nth y splitfile)) row) (if x (if data (let ((row "(!")) (dolist(z x) (setf row (concatenate 'string row " " z))) (setf rows (append rows (list (concatenate 'string row ")"))))) (progn (if (equal (first x) "@relation") (setf header (concatenate 'string header (second x)))) (if (equal (first x) "@attribute") (progn (setf arffCols (append arffCols (list (let (blah) (dolist (z (rest x)) (setf blah (concatenate 'string blah " " z))) blah)))) (if (equal (third x) "real") (setf spacing (concatenate 'string spacing "$"))) (if (not (equal (first (nth (1+ y) splitfile)) "@attribute")) (progn (setf spacing (concatenate 'string spacing "!")) (setf data t))) (setf header (concatenate 'string header spacing (second x))) (if data (progn (setf header (concatenate 'string header ")")) (incf y)))))))) )) ;now write to dummy lisp file dummyData.lisp, or whetever the output file is (with-open-file (out (make-pathname :name outputFile) :direction :output :if-exists :supersede) (format out "~a~%" header) (dolist (x arffCols) (format out "(arff \"~a\")~%" x)) (dolist (x rows) (format out "~a~%" x))) nil)) ;splits a string up into a list by a given demoinator (defun breakString (str &optional (char #\Space)) (let (split spacPos (rest (string-trim (string char) str))) (setf rest (string-trim (string #\Newline) str)) (if (not (setf spacPos (position char rest))) (setf split (list str))) (loop while spacPos do (setf split (append split (list (subseq rest 0 spacPos)))) (setf rest (string-trim (string char) (subseq rest (1+ spacPos)))) (setf spacPos (position char rest)) (if (not spacPos) (setf split (append split (list rest))))) split)) ;preprocessor (defun demoBAMBOO_pp () (readARFF "../servo.arff") (data "dummyData.lisp") (BAMBOO_pp)) #| ;learner (defun demoBAMBOO_l () (readARFF "rr_dummyData.arff") (data "dummyData.lisp") (BAMBOO_l 3 "../servo_loo.arff")) |#