(defparameter *every* 'allqzjx) ;; Basic learner wrapper (defun which2 (&optional (tbl (thetable)) (report t)) (train tbl) (learn tbl report)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Train wrapper (defun train (tbl) (dolist (col (thecols tbl)) (if (typep col 'num) (progn (setf (num-counts col) (make-hash-table :test 'equal)) (setf (num-min col) most-positive-fixnum) (setf (num-max col) most-negative-fixnum) (setf (wme-results2 *w*) nil)) (setf (sym-counts col) (make-hash-table :test 'equal)))) (dolist (row (therows tbl)) (how-manys (thecols tbl) ; get the column headers (row-cells row) ; get the cells (row-class row) ; get the class of this row ))) ;; how-many wrapper (defun how-manys (cols cells class) (labels ((worker (col cell) (how-many class (col-name col) cell (if (typep col 'sym) (sym-counts col) (num-counts col)) col ))) (mapcar #'worker cols cells))) ; run down cols and cells in parallel ;; Count col value pairs (defun how-many (class what cell hash col) (when (knownp cell) ; skip any cell labelled "?" (if (goalp what) (push cell (wme-results2 *w*))) (if (typep col 'num) (progn (inchNum `(,class ,what ,cell) hash cell col) (inch `(,*every* ,what ,cell) hash)) (progn (inch `(,class ,what ,cell) hash) (inch `(,*every* ,what ,cell) hash))))) ;; Increment a hash from 0 (defun inch (key hash) "increment a hash bucket from zero" (incf (gethash key hash 0))) ;; Increment a hash from 0 and track mins/maxs (defun inchNum (key hash val col) (incf (gethash key hash 0)) (if (>= val (num-max col)) (setf (num-max col) val)) (if (<= val (num-min col)) (setf (num-min col) val))) (defun pmo () (park-miller-randomizer)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun nwaytest () ;(data "LispData/xerces/04-xerces-c.lisp") (data "LispData/poi/03-poi-ab.lisp") (learner-loop (thetable) #'nwaylrn 1 2 "defective" t 10)) ;; N-way Train/Test (defun nwaylrn (set1 set2 disc bins goal logflg base) (let ((ranges) (res) (rule) (scor) (name1) (name2) (headr)) (train set1) (if (eq disc 1) (setf ranges (undisc2 bins set1))) (setf name1 (table-name set1)) (train set1) (learn set1 nil) (setf name2 (table-name set2)) (train set2) (if (eq disc 1) (setf ranges (undisc2 bins set2))) (setf res (test set2)) (setf headr "~12a ~12a ~5d ~5d ~5d ~5d ~5f ~5f ~5f ~5f ~3d ") (with-open-file (stream "guiout.txt" :direction :output :if-exists :append :if-does-not-exist :create) (if (equal goal "flawless") (progn (setf rule (nth 0 res)) (setf scor (nth 1 res))) (progn (setf rule (nth 2 res)) (setf scor (nth 3 res)))) (format stream headr name1 name2 (scores-A scor) (scores-B scor) (scores-C scor) (scores-D scor) (* 100 (scores-pd scor)) (* 100 (scores-pf scor)) (* 100 (scores-prec scor)) (* 100 (scores-acc scor)) bins) (printrule3 rule stream (first ranges) (nth 1 ranges) logflg base) (format stream "~%") (close stream)) )) ;; N-way gui wrapper (defun guinway (set logflg base beam lives sampl disc bins eval goal) (setf (config-lives (theconfig)) lives) (setf (config-beam (theconfig)) beam) (setf (config-samples (theconfig)) sampl) (setf (config-eval1 (theconfig)) eval) (data set) (if logflg (logger base)) (learner-loop (thetable) #'nwaylrn disc bins goal logflg base)) ;;Train/Test for the interface (defun guitest (set1 set2 logflg base beam lives sampl disc bins eval goal) (let ((ranges) (res) (rule) (scor) (name1) (name2) (headr)) (data set1) (setf (config-lives (theconfig)) lives) (setf (config-beam (theconfig)) beam) (setf (config-samples (theconfig)) sampl) (setf (config-eval1 (theconfig)) eval) (if logflg (logger base)) (train (thetable)) (if (eq disc 1) (setf ranges (undisc2 bins))) (setf name1 (table-name (thetable))) (train (thetable)) (learn (thetable) nil) (data set2) (if logflg (logger base)) (setf name2 (table-name (thetable))) (train (thetable)) (if (eq disc 1) (setf ranges (undisc2 bins))) (setf res (test (thetable))) (setf headr "~12a ~12a ~5d ~5d ~5d ~5d ~5f ~5f ~5f ~5f ~3d ") (with-open-file (stream "guiout.txt" :direction :output :if-exists :append :if-does-not-exist :create) (if (equal goal "flawless") (progn (setf rule (nth 0 res)) (setf scor (nth 1 res))) (progn (setf rule (nth 2 res)) (setf scor (nth 3 res)))) (format stream headr name1 name2 (scores-A scor) (scores-B scor) (scores-C scor) (scores-D scor) (* 100 (scores-pd scor)) (* 100 (scores-pf scor)) (* 100 (scores-prec scor)) (* 100 (scores-acc scor)) bins) (printrule3 rule stream (first ranges) (nth 1 ranges) logflg base) (format stream "~%") (close stream)) )) ;; Compound rules ;; Calling format: (customrule2 "LispData/ant/06-ant-d.lisp" 1 2 t 10 "flawless" '("CBO" 1 "NIT" 3 "WMC" 0)) ;; Output: ANT-D 91 257 1 2 .7722 1.087 66.67 26.5 2 $CBO=1$NIT=3$WMC=0 (defun customrule2 (set1 disc bins logflg base want whats &optional (target1 "guiout.txt")) (let ((i 0) (tmp) (rules (list)) (res) (rule) (scor) (name1) (headr)) (loop (setf tmp (concatenate 'string "$" (nth i whats))) (incf i) (setf rule (make-rule :class want :ands (list (make-ors :for (make-symbol tmp) :values (list (nth i whats)))) :score 0)) (push rule rules) (incf i) (when (>= i (length whats)) (return))) (setf rule (pop rules)) (dolist (x rules) (setf rule (combine rule x))) (data set1) (if logflg (logger base)) (train (thetable)) (if (eql disc 1) (undisc2 bins)) (setf name1 (table-name (thetable))) (setf scor (score rule t)) (setf headr "~12a ~5d ~5d ~5d ~5d ~5f ~5f ~5f ~5f ~3d ") (with-open-file (stream target1 :direction :output :if-exists :append :if-does-not-exist :create) (format stream headr name1 (scores-A scor) (scores-B scor) (scores-C scor) (scores-D scor) (* 100 (scores-pd scor)) (* 100 (scores-pf scor)) (* 100 (scores-prec scor)) (* 100 (scores-acc scor)) bins) (printrule2 rule stream) (format stream "~%") (close stream)) )) ;; Atomic custom rules (defun customrule (set1 disc bins logflg base want what value &optional (target1 "guiout.txt")) (let ( (ranges) (res) (rule) (scor) (name1) (headr) ) (setf what (concatenate 'string "$" what)) (setf rule (make-rule :class want :ands (list (make-ors :for (make-symbol what) :values (list value))) :score 0)) (data set1) (if logflg (logger base)) (train (thetable)) (if (eql disc 1) (setf ranges (undisc2 bins))) (setf name1 (table-name (thetable))) (setf scor (score rule t)) (setf headr "~12a ~5d ~5d ~5d ~5d ~5f ~5f ~5f ~5f ~3d ") (with-open-file (stream target1 :direction :output :if-exists :append :if-does-not-exist :create) (format stream headr name1 (scores-A scor) (scores-B scor) (scores-C scor) (scores-D scor) (* 100 (scores-pd scor)) (* 100 (scores-pf scor)) (* 100 (scores-prec scor)) (* 100 (scores-acc scor)) bins) (printrule2 rule stream) ;(printrule3 rule stream (first ranges) (nth 1 ranges) logflg base) (format stream "~%") (close stream)) )) ;; Train/Test Wrapper for rig (defun test2 (set1 set2 b &optional (target1 "dataC/testFlw.txt") (target2 "dataC/testDef.txt")) (let ((ranges) (res) (rule) (scor) (name1) (name2) (headr)) ;; Load train set (data set1) ;; Log data cells (logger 10) ;; Train for mins/maxes (train (thetable)) ;; Get bin ranges (setf ranges (undisc2 b)) ;; Remember set name (setf name1 (table-name (thetable))) ;; Train to up sym-counts (train (thetable)) ;; Learn recommendations (learn (thetable) nil) ;; Load test set (data set2) ;; Log data cells (logger 10) ;; Remember set name (setf name2 (table-name (thetable))) ;; Train for discretizer (train (thetable)) ;; Undisc test data so it matched reccomendations (undisc2 b) ;; Get the results and dump to a couple files ;; One flawless the other defective, as tbl rows (setf res (test (thetable))) (setf headr "~12a ~12a ~5d ~5d ~5d ~5d ~5f ~5f ~5f ~5f ~3d ~5d ") (with-open-file (stream target1 :direction :output :if-exists :append :if-does-not-exist :create) ;; Flawless (setf rule (nth 0 res)) (setf scor (nth 1 res)) (format stream headr name1 name2 (scores-A scor) (scores-B scor) (scores-C scor) (scores-D scor) (* 100 (scores-pd scor)) (* 100 (scores-pf scor)) (* 100 (scores-prec scor)) (* 100 (scores-acc scor)) b (scores-fits scor)) (printrule2 rule stream) ;(printrule3 rule stream (first ranges) (nth 1 ranges) t 10) (format stream "~%") (close stream)) (with-open-file (stream target2 :direction :output :if-exists :append :if-does-not-exist :create) ;; Defective (setf rule (nth 2 res)) (setf scor (nth 3 res)) (format stream headr name1 name2 (scores-A scor) (scores-B scor) (scores-C scor) (scores-D scor) (* 100 (scores-pd scor)) (* 100 (scores-pf scor)) (* 100 (scores-prec scor)) (* 100 (scores-acc scor)) b (scores-fits scor)) (printrule2 rule stream) ;(printrule3 rule stream (first ranges) (nth 1 ranges) t 10) (format stream "~%") (close stream)) )) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Test results on a tbl (requires learning) (defun test (tbl) (let ((scor) (rule) (ret (list))) (dolist (results (wme-results *w*)) (setf rule (first results)) (setf scor (score (first results) t)) (setf ret (append ret (list rule scor)))) ret )) ;; Edumacation, gonna learn me a book (learn1 wrapper) (defun learn (tbl report) (let ((defect (list)) (flawless (list)) (flg t) (tmp)) ;; For each class... only flawless and defective (dolist (target (theklasses tbl)) (if flg (progn (setf flawless (learn1 target tbl report)) (setf flg nil)) (setf defect (learn1 target tbl report)))) (setf (wme-results *w*) (list flawless defect)) )) ;; The real learnin (defun learn1 (target tbl report) (setf (livesleft) (config-lives (theconfig))) (setf (theeval) (config-eval1 (theconfig))) ;; Round 0 sets up contrast set, initial reccomendations (let ((which (round0 target tbl))) ;; Rounds tries combinations (setf which (rounds 1 target which tbl report)) which)) ;; b2/b+r setup (defun round0 (target tbl) (let (out tmprule (n (length (therows tbl)))) (labels ((worker (hash want m what class value &aux s) (if (eql class want) (if(setf s (b^2/b+r hash want m n what value)) (progn (setf tmprule (make-rule :class want :ands (list (make-ors :for what :values (list value))) :score (round s 0.01))) (setf out (append out (list tmprule))) out))))) (dolist (col (thecols tbl)) ; for every column (unless (col-goalp col) ; that's not the goal (dokeys (key (if (typep col 'num) (num-counts col) (sym-counts col))) ; for everything counted in that col (worker (if (typep col 'num) (num-counts col) ; get the hash table counts (sym-counts col)) (klass-name target) ; what class are we targetting? (klass-n target) ; how many of them do we have? (col-name col) ; what is the col name? (first key) ; what class is being counted? (third key) ; what value we looking at? )))) (sort out #'> :key #'rule-score)))) ;; Are you in order? (defun inorder-p (x y) (string< (write-to-string x) (write-to-string y))) ;; Get in order (defun reorder (rule) (setf (rule-ands rule) (sort (rule-ands rule) #'inorder-p :key #'ors-for)) (dolist (a (rule-ands rule) rule) (setf (ors-values a) (sort (ors-values a) #'inorder-p)))) ;adds new batch of rules to the old batch (defun beliefsinject (old new) ;(beliefsreview old) (let ((combined old) (willadd t)) (dolist (rule new combined) (setf rule (reorder rule)) (dolist (rule2 combined) (if (equalp (rule-ands rule) (rule-ands rule2)) (setf willadd nil))) (if willadd (setf combined (cons rule combined))) (setf willadd t)) ;(beliefsreview combined) combined)) ;; Rule printin loopage (defun beliefsreview (rules) (format t "Current rule list:~%") (dolist (rule rules) (format t "--Score ~a " (rule-score rule)) (printrule rule) (format t "~%"))) ;; Pretty print a rule (defun printrule (rule) (format t "~a" (rule-class rule)) (dolist (a (rule-ands rule)) (dolist (b (ors-values a)) (format t ",~a=~a" (ors-for a) b)))) ;; Rule to a stream (defun printrule2 (rule stream) (dolist (a (rule-ands rule)) (dolist (b (ors-values a)) (format stream "~a=~a" (ors-for a) b)))) ;; Rule to a stream with ranges (defun printrule3 (rule stream hash1 hash2 logflg base) (let ((colname) (key) (min) (max)) (dolist (a (rule-ands rule)) (dolist (b (ors-values a)) (setf colname (ors-for a)) (setf key `(,colname ,b)) (if logflg (progn (setf min (expt base (gethash key hash1 nil))) (setf max (expt base (gethash key hash2 nil)))) (progn (setf min (gethash key hash1 nil)) (setf max (gethash key hash2 nil)))) (format stream "~a=~a-~a " (ors-for a) min max))))) ;; Prune the rule list, off with its... legs (defun chop (rulelist) (setf rulelist (sort rulelist #'> :key #'rule-score)) (let ((maxscore (rule-score (car rulelist))) (n 0)) (dolist (rule rulelist) (if (< n (config-beam (theconfig))) (if (> (rule-score rule) (* maxscore (config-dull (theconfig)))) (incf n)))) (butlast rulelist (- (length rulelist) n)))) ;; The contrast magics (defun b^2/b+r (hash want m n what value) (let* ((every (gethash `(,*every* ,what ,value) hash 0)) (b0 (gethash `(,want ,what, value) hash 0)) (r0 (- every b0)) (b (/ b0 m)) ; ratio in target (r (/ r0 (- n m)))) ; ration everwhere else (if (> b r) ; in more better than rester (/ (* b b) ; b^2/(b+r) (+ b r (randf 0.0000001)))))) ;which = list of canidates(rules) from previous round, a stack structure ;tbl = the table, yay ;report = some boolean about whether it's reported? ;class = the klass we're targeting. (defun rounds (round class which tbl report) (let (newrules (maxscore (if (eql 1 round) 0 (rule-score (car which))))) (declare (ignore tbl)) (if report (beliefsreview which)) (normalize which) (format report "~%---------------------------------------------------------------------------------") (format report "~%class: ~a round: ~a max: ~a lives: ~a~%" (klass-name class) round maxscore (livesleft)) (format report "New rules:~%") (setf newrules (twos (config-samples (theconfig)) (explode which))) (if report (dolist (rule newrules) (format report " candidate: ") (printrule rule) (format report "~%"))) (setf which (beliefsinject which newrules)) (dolist (rule which) ;score all rules as scores have been changed by normalize (score rule)) (setf which (chop which)) (if report (beliefsreview which)) (if (> (rule-score (car which)) (* maxscore (config-more (theconfig)))) (setf (livesleft) (config-lives (theconfig))) (decf (livesleft))) (if (< (livesleft) 0) which (setf which (rounds (+ round 1) class which tbl report))) ) ) ;; Boom goes a list, exploded by score (defun explode (which) (let ((out (list))) (dolist (rule which) (dotimes (i (rule-score rule)) (setf out (append out (list rule))))) out)) ;; Normalize a list of rules score (defun normalize (which) (let ((sum 0)) (dolist (rule which) (incf sum (rule-score rule))) (dolist (rule which) (setf (rule-score rule) (floor (/ (* 100 (rule-score rule)) sum)))))) ;; Generates a list of rule pairs (defun twos (n explosionList) (let (tempCombine combinationList) (dotimes (x n) (setf tempCombine (two explosionList)) (setf combinationList (if (memberp tempCombine combinationList) combinationList (append combinationList (list tempCombine))))) combinationList)) ;; Picks two random unmatched rules (defun two (explosionList &optional (tries 0)) (let ((this (any explosionList)) (that (any explosionList)) new) (if (< tries 9) (progn (if (equalp this that) (two explosionList (+ 1 tries)) (progn (setf new (combine this that)) (if new new (two explosionList (+ 1 tries))))))))) ;; Is u a memba? (defun memberp (goal list) (let (found) (dolist (item list) (if (not found) (if (atom item) (setf found (equalp item goal)) (setf found (memberp goal item))))) found)) ;; Combines rules (defun combine (this that) (let ((orHash (make-hash-table)) hashkeylist (combinedRules (make-rule :class (rule-class this)))) ;;records the unique attributes and their values (dolist (orItem (rule-ands this)) (setf (gethash (ors-for orItem) orHash) (ors-values orItem)) (setf hashkeylist (append hashkeylist (list (ors-for orItem))))) (dolist (orItem (rule-ands that)) (setf (gethash (ors-for orItem) orHash) (if (gethash (ors-for orItem) orHash) (union (gethash (ors-for orItem) orHash) (ors-values orItem)) (progn (setf hashkeylist (append hashkeylist (list (ors-for orItem)))) (ors-values orItem))))) ;;puts those unique attributes/values into proper or values (dolist (orFor hashkeylist) (setf (rule-ands combinedRules) (append (rule-ands combinedRules) (list (make-ors :for orFor :values (gethash orFor orHash)))))) (setf (rule-ands combinedRules) (sort (rule-ands combinedRules) 'string-lessp :key #'ors-for)) (setf (rule-sorted combinedRules) t) combinedRules)) ;; Does a rule match a row? (defun matched (row rule) (let ((cols (thecols)) (values (row-cells row)) (match t) (colmatch nil)) (dotimes (n (- (length cols) 1) match) ;process each column/cell in the row (dolist (a (rule-ands rule)) (if (equal (symbol-name (col-name (nth n cols))) (symbol-name (ors-for a))) ;part of the rule pertaining to the current column (progn (dolist (b (ors-values a)) (if (eql (nth n values) b) ;column matches the rule (setf colmatch t))) (if colmatch (setf colmatch nil) ;reset for next collumn (setf match nil)))))))) ;column did not match ;; a b c d confusion matrix maths (defun score (rule &optional (modret nil)) (let ((s (make-scores)) (target (rule-class rule))) (dolist (row (therows)) ;for each row (if (equal (row-class row) target) (if (matched row rule) ;row is of the target class (incf (scores-d s)) ;rule predicts row correclty (incf (scores-b s))) ;rule predicts row incorrectly (if (matched row rule) ;row is not in target class (incf (scores-c s)) ;rule predicts row incorrectly (incf (scores-a s))))) ;rule predicts row correctly (let ((a (scores-a s)) (b (scores-b s)) (c (scores-c s)) (d (scores-d s))) (setf (scores-fits s) (+ c d)) (setf (scores-pd s) (/ d (+ d b (randf 0.0000001)))) (setf (scores-pf s) (/ c (+ a c (randf 0.0000001)))) (setf (scores-prec s) (/ d (+ d c (randf 0.0000001)))) (setf (scores-acc s) (/ (+ a d) (+ a b c d (randf 0.0000001)))) (setf (scores-lift s) (/ (* (scores-prec s) (+ a b c d)) (+ b d (randf 0.0000001)))) (setf (scores-support s) (/ (+ c d) (+ a b c d (randf 0.0000001))))) (setf (rule-score rule) (score1 s)) (if modret s rule))) ;; Which2 rules quality scoring (defun score1 (abcds) (let ((acc (scores-acc abcds)) (pd (scores-pd abcds)) (pf (scores-pf abcds)) (prec (scores-prec abcds)) (lift (scores-lift abcds)) (support (scores-support abcds)) (fits (scores-fits abcds))) (cond ((<= fits (config-overfitted (theconfig))) 0) ((eql (theeval) 1) pd) ((eql (theeval) 2) (* 2 pd (/ prec (+ pd prec (randf 0.0000001)))) ) ((eql (theeval) 3) (* 2 pd (- 1 pf) (/ (+ pd (- 1 pf) (randf 0.0000001)))) ) ((eql (theeval) 4) (* support 2 pd (- 1 pf) (/ (+ pd (- 1 pf) (randf 0.0000001))))) ((eql (theeval) 5) (* support acc)) ((eql (theeval) 6) (* support 2 pd (/ prec (+ pd prec (randf 0.0000001))))) ((eql (theeval) 7) (* 2 pd (- 1 pf) (/ (+ pd (- 1 pf) (randf 0.0000001)))) ) ((eql (theeval) 8) (* support lift)) (t (* support 2 pd (/ prec (+ pd prec (randf 0.0000001)))))) ))