;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ABOUT THIS DOCUMENT ; Here are some notes on using Timm's lisp code. ; In the following definition of a table of data "deftable" ; starts a new table and "!" writes a new row of data into ; that table. (deftable weather forecast temp humidty windy !play) ; in deftable, !xx denotes a class (the dependent variable). ; everything else are the independent variables and are of two ; types: "sym" (for symbol) and "num" (for numeric). ; $xx denotes a "num" attribute. All other attributes are "sym". (! sunny hot high FALSE no) (! sunny hot high TRUE no) (! overcast hot high FALSE yes) (! rainy mild high FALSE yes) (! rainy cool normal FALSE yes) (! rainy cool normal TRUE no) (! overcast cool normal TRUE yes) (! sunny mild high FALSE no) (! sunny cool normal FALSE yes) (! rainy mild normal FALSE yes) (! sunny mild normal TRUE yes) (! overcast mild high TRUE yes) (! overcast hot normal FALSE yes) (! rainy mild high TRUE no) ; What data structures are needed to store the above? ; There are two answers to this question. ; For proj1a 1b 1c, the answer is "it does not matter". ; But for project2, you need to understand this stuff since ; the following code is a backbone system (on top of which, ; you can build which2). ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; GETTING THIS CODE ; The following code is an assembly of stuff that you can find at ; ; cd $HOME/svns/csx73/lisp101 ; svn export http://unbox.org/wisp/var/timm/10/dm/lisp101/ml ; To load this code ; cd $HOME/svns/csx73/lisp101/ml ; emacs boot.lisp ; ; then load boot.lisp into SLIME ; To modify this code ; ; edit boot.lisp ; ; add in your own files ; FILE LIST ; --------- ; abcd.lisp ; util, ignore, for now ; bestof.lisp ; util ; boot.lisp ; list of files ; data.lisp ; routines for deftable and "!" ; structs.lisp ; defines structs and the *W* variable ; which2.lisp ; sample code to get you started with proj2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; HIGH-LEVEL STUFF ;When the above is loaded, there is a global *w* storing the result. ;THis global is of type "wme". ;; from structs.lisp (defparameter *w* nil) (defun w0 () (setf *w* (make-wme))) ;; from data.lisp (defun data (&optional f) (w0) ; reset the global *w* (load (or f (thefile))) ; load the file, or the default file (funcall (wme-ready *w*)) ; prep (funcall (wme-run *w*)) ; learn (funcall (wme-report *w*)) ; report ) ; For example > (data "../data/discrete-lisp/weather.lisp") T > (thetable) #S(TABLE :NAME WEATHER :ROWS (#S(ROW :CELLS (SUNNY HOT HIGH TRUE NO) :CLASS NO :UTILITY 0 :SORTKEY 0.05486242722621558d0) #S(ROW :CELLS (RAINY MILD HIGH TRUE NO) :CLASS NO :UTILITY 0 :SORTKEY 0.15162094871921356d0) #S(ROW :CELLS (SUNNY MILD HIGH FALSE NO) :CLASS NO :UTILITY 0 :SORTKEY 0.16896725912164381d0) #S(ROW :CELLS (SUNNY HOT HIGH FALSE NO) :CLASS NO :UTILITY 0 :SORTKEY 0.3374376731603196d0) #S(ROW :CELLS (RAINY COOL NORMAL TRUE NO) :CLASS NO :UTILITY 0 :SORTKEY 0.34725111281542576d0) #S(ROW :CELLS (OVERCAST HOT NORMAL FALSE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.0762189353172944d0) #S(ROW :CELLS (SUNNY MILD NORMAL TRUE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.1621534186087104d0) #S(ROW :CELLS (RAINY MILD NORMAL FALSE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.2642953673986543d0) #S(ROW :CELLS (SUNNY COOL NORMAL FALSE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.2826687920405857d0) #S(ROW :CELLS (OVERCAST COOL NORMAL TRUE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.3493395062490854d0) #S(ROW :CELLS (OVERCAST HOT HIGH FALSE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.3827964523910197d0) #S(ROW :CELLS (RAINY COOL NORMAL FALSE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.4209536138992274d0) #S(ROW :CELLS (OVERCAST MILD HIGH TRUE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.4224535227656765d0) #S(ROW :CELLS (RAINY MILD HIGH FALSE YES) :CLASS YES :UTILITY 0 :SORTKEY 1.4498501279678888d0)) :KLASSES (#S(KLASS :NAME NO :N 5) #S(KLASS :NAME YES :N 9)) :COLS (#S(SYM :NAME FORECAST :GOALP NIL :COUNTS {hash of 0 items}) #S(SYM :NAME TEMP :GOALP NIL :COUNTS {hash of 0 items}) #S(SYM :NAME HUMIDTY :GOALP NIL :COUNTS {hash of 0 items}) #S(SYM :NAME WINDY :GOALP NIL :COUNTS {hash of 0 items}) #S(SYM :NAME !PLAY :GOALP #\! :COUNTS {hash of 0 items})) :RESULTS NIL) ; My code has a bunch of accessors to simplify getting to "the" last ; table loaded: (defmacro thetable () `(wme-table *w*)) (defmacro thecols (&optional tbl) `(table-cols (or ,tbl (wme-table *w*)))) (defmacro thename (&optional tbl) `(table-name (or ,tbl (wme-table *w*)))) (defmacro therows (&optional tbl) `(table-rows (or ,tbl (wme-table *w*)))) (defmacro theklasses (&optional tbl) `(table-klasses (or ,tbl (wme-table *w*)))) ; But you can't understand the code unless you look under the hood ; at the structs they came from. So.... ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Under the hood ;; from structs.lisp (defstruct wme (goal #\!) (num #\$) (unknown #\?) (file "../discrete-lisp/weather.lisp") (utility-function #'zero) (! #'defrow) (ready #'sort-rows) (run #'noop) (report #'noop) table ) #| you won't get the above unless you know the data structures STRUCTURES ========== Wme with goal = char ; if col.name has this char, then this is a class num = char ; if col.name has this char, then this is a numeric column unknown = char; if any item in row.cells is this char then this value is unknown file = string; place to load a file ! = thing to be called when we see "(! a d c )" ready = thing to do to prep the table run = thing to do to process the table report = thing to do to report the result table = Table Table with name = atom rows = list of Row klasses = list of Klass ;only one per class in rows cols = list of Col results = list of Result Row with cells = list of atom ; and #cells = #cols class = atom utility = number sortKey = number Klass with name = atom ; n = number ; stores how many rows with this Klass name in Table Col with name isa atom goalp isa boolean; true if this is a class attribute Sym isa Col with counts = hashtable ; counts of (value in column in class) Num isa Col with n = number sum = number sumsq = number min = number max = number |# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; How to count the frequencies in the above table? ; Note- if you understand the above structures, this code ; will make sense to you ;; from which2.lisp (defun train (tbl) (dolist (row (therows tbl)) (how-manys (thecols tbl) ; get the column headers (row-cells row) ; get the cells (row-class row) ; get the class of this row ))) (defun how-manys (cols cells class) (labels ((worker (col cell) (how-many class (col-name col) cell (sym-counts col)))) (mapcar #'worker cols cells))) ; run down cols and cells in parallel (defun how-many (class what cell hash) (when (knownp cell) ; skip any cell labelled "?" (inch `(,class ,what ,cell) hash) (inch `(,*every* ,what ,cell) hash))) (defun inch (key hash) "increment a hash bucket from zero" (incf (gethash key hash 0))) (defun !how-manys1 () (reset-seed) (data "../data/discrete-lisp/weather.lisp") (train (thetable)) (with-output-to-string (s) (dolist (col (thecols)) (showh (sym-counts col) :stream s)))) > (!how-manys1) "(ALLQZJX FORECAST OVERCAST) = 4 (ALLQZJX FORECAST RAINY) = 5 (ALLQZJX FORECAST SUNNY) = 5 (NO FORECAST RAINY) = 2 (NO FORECAST SUNNY) = 3 (YES FORECAST OVERCAST) = 4 (YES FORECAST RAINY) = 3 (YES FORECAST SUNNY) = 2 (ALLQZJX TEMP COOL) = 4 (ALLQZJX TEMP HOT) = 4 (ALLQZJX TEMP MILD) = 6 (NO TEMP COOL) = 1 (NO TEMP HOT) = 2 (NO TEMP MILD) = 2 (YES TEMP COOL) = 3 (YES TEMP HOT) = 2 (YES TEMP MILD) = 4 (ALLQZJX HUMIDTY HIGH) = 7 (ALLQZJX HUMIDTY NORMAL) = 7 (NO HUMIDTY HIGH) = 4 (NO HUMIDTY NORMAL) = 1 (YES HUMIDTY HIGH) = 3 (YES HUMIDTY NORMAL) = 6 (ALLQZJX WINDY FALSE) = 8 (ALLQZJX WINDY TRUE) = 6 (NO WINDY FALSE) = 2 (NO WINDY TRUE) = 3 (YES WINDY FALSE) = 6 (YES WINDY TRUE) = 3 (ALLQZJX !PLAY NO) = 5 (ALLQZJX !PLAY YES) = 9 (NO !PLAY NO) = 5 (YES !PLAY YES) = 9" ; What's this "ALLQZKK" nonsense? Well, sometimes it is useful ; to count column range frequencies in EVERY class. So we make ; up a class name (something using the rarest letters- as defined ; by the point scores in SCRABBLE QZJX) and count all (column range) ; pairs in that majic EVERY class. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Using the above, lets sort all the ranges according ; to their ability to distinquish one class from all the others. ; In the following code, if there are N classes in the system, ; then we make each one the "target". Our goal then is to ; divide the data into ; a) "target1, rest1" (where "rest1" is everything but "target1") ; b) "target2, rest2" (where "rest2" is everything but "target2") ; etc (defun learn (tbl report) (dolist (target (theklasses tbl)) (learn1 target tbl report) )) (defun learn1 (target tbl report) (let ((which (round0 target tbl))) (rounds target which tbl report))) (defun round0 (target tbl) "returns a sorted list of triples (score col value) where SCORE is higher if (col value) is more common in the BEST target class rather than the REST" (let (out (n (length (therows tbl)))) ; the total number of rows is "n" (labels ((worker (hash want m ; the number of rows for this class is "m" what class value &aux s) (if (eql class want) (if (setf s (b^2/b+r hash want m n what value)) (push (list (round s 0.01) what value) out))))) (dolist (col (thecols tbl)) ; for every column (unless (col-goalp col) ; that's not the goal (dokeys (key (sym-counts col)) ; for everything counted in that col (worker (sym-counts col) ; get the hash table counds (klass-name target) ; what class are we targetting? (klass-n target) ; how many of them do we have? (col-name col) ; what is the col name? (first key) ; what class is being counted? (third key) ; what value we looking at? )))) (sort out #'> :key #'first)))) (defun b^2/b+r (hash want m n what value) (let* ((every (gethash `(,*every* ,what ,value) hash 0)) (b0 (gethash `(,want ,what, value) hash 0)) (r0 (- every b0)) (b (/ b0 m)) ; ratio in target (r (/ r0 (- n m)))) ; ration everwhere else (if (> b r) ; in more better than rester (/ (* b b) ; b^2/(b+r) (+ b r (randf 0.0000001)))))) ; add a pinch to dodge div/0 errors ;;; so does the above all work? well, we need a test rig (defun which2 (&optional (tbl (thetable)) (report t)) (train tbl) (learn tbl report) ) (defun rounds (class which tbl report) (declare (ignore tbl)) (format report ";;; ~a~%" (klass-name class)) (dolist (one which) (format report " ~a~%" one))) (defun !learn1 () (reset-seed) (data "../data/discrete-lisp/weather.lisp") (with-output-to-string (s) (which2 (thetable) s))) (deftest !learn () (test (!learn1) ";;; NO (56 HUMIDTY HIGH) ;; best predictor for not playing golf (44 FORECAST SUNNY) (39 WINDY TRUE) (26 TEMP HOT) (22 FORECAST RAINY) ;;; YES (51 HUMIDTY NORMAL) ;; best predictor for playing golf (44 FORECAST OVERCAST) (42 WINDY FALSE) (23 TEMP MILD) (21 TEMP COOL)"))