;;note the pima-diabetes relation at the bottom of the file ;;all of this code creates a relation based on whatever data set you pass it ;; however it must be in same format as the pima-diabetes ;; ;;you can create a new relation by the new function ;;> (new (pima-daibetes)) ;; then you can access all aspects of the database with commands like ;;> (rel-rows *the-relation*) ;; also the discretizer function takes non-discrete data and make it discrete ;; thats why (rel-buffer *the-relation*) shows you thae actual data ;; and (rel-rows *the-relation* only shows values between 1 and 10 ;; ;;good luck (defparameter *the-relation* nil) (defun new (&optional (r (make-rel))) "Create a fresh 'the-relation'." (setf *the-relation* r)) ;;; structs (defstruct rel "A relation stores rows of data and meta-knowledge about each column and some derived data." (name "relation") buffer dims (diagonal 0) bins columns (width 0) ; width = number of columns rows (height 0) ; height = number of rows ) (defstruct feature "Features are either continuous or discrete" name values goalp) (defstruct (continuous (:include feature)) "Continuous features are characterized by the min and max value" (max most-negative-double-float) (min most-positive-double-float) (n 0)) (defstruct (discrete (:include feature)) "Discrete features are characterized by their range of symbols." range) ;;; macro stuff ;; general macos (defmacro o (x) "print a named value; e.g. (let ((a 22)) (o a)) ==> [a]=[22]" `(progn (format t "[~a]=[~a] " (quote ,x) ,x) ,x)) (defmacro oo (&rest l) "print a list of names values; e.g. (let ((aa 22) (b 33)) (oo a b)) ==> [a]=22;[b]=[33]" `(progn ,@(mapcar #'(lambda(x) `(o ,x)) l) (terpri))) (defmacro inc> (new old) `(if (> ,new ,old) (setf ,old ,new))) (defmacro inc< (new old) `(if (< ,new ,old) (setf ,old ,new))) (defmacro doitems ((one n list &optional out) &body body ) `(let ((,n 0)) (dolist (,one ,list ,out) (incf ,n) ,@body))) (defmacro doitems1 ((one n list &optional out) &body body ) `(let ((,n 1)) (dolist (,one ,list ,out) (incf ,n) ,@body))) ;;; lib stuff ;; misc (defun ?elt (choices) (elt choices (floor (random (length choices))))) (defun ?quantity (min max) (if (> min max) (?quantity max min) (+ min (random (+ 1 (- max min)))))) (defun sum (l) (let ((s 0)) (dolist (x l s) (incf s x)))) (defun countlist (l) (let (out) (doitems (one n l (reverse out)) (push (cons (1- n) one) out)))) (defstruct counts class sample frequency) (defun h2l (h) (let (all) (labels ((keep (k v) (push (make-counts :class (second k) :sample (fourth k) :frequency v) all))) (maphash #'keep h) all))) ;;;; nearest neighbor stuff (defun distance1 (x y col max) "in this system, all ranges are numbered 1..N and '0' denotes a wild card" (cond ((zerop x) (?QUANTITY 0 10));;BUG? should this be 1..10 ((zerop y) (?QUANTITY 0 10)) ((numericp col) (- x y)) ((eql x y) 0) (t max))) (defun distance (l1 l2 max &optional (cols (rel-columns *the-relation*))) "Returns a number 0..1" (let ((sum 0)) (mapc #'(lambda (one two col) (let* ((delta (distance1 one two col max)) (pow (expt delta 2))) (incf sum pow))) l1 l2 cols) (/ (sqrt sum) max))) ;;; relation definiton and access stuff (defmacro defrelation (name &body body) "Return a discretiezed relation, with all the ranges converted to integer indexes 1...N" `(defun ,name (&key (*the-relation* *the-relation*) (discretizer #'equal-width)(bins 10));;changed from eq-width (relation ,name) (setf (rel-bins *the-relation*) bins) ,@body ;;dat gets passed in at runtime (funcall discretizer *the-relation*) (ranges *the-relation*))) (defmacro relation (x) "define a new relation. make it 'the' relation." `(progn (new) (setf (rel-name *the-relation*) ',x))) (defun defattr (new) "Add a new attribute to the current relation'." (or *the-relation* (new)) (incf (rel-width *the-relation*)) (setf (rel-columns *the-relation*) (append (rel-columns *the-relation*) (list new)))) (defmacro attribute (name &rest args) "Redine a new attribute. Discrete attributes have more than one symbol after the attribute name." `(if (rest ',args) (defsomes ',name nil ',args) (defnums ',name nil))) (defmacro attribute! (name &rest args) "Define a discrete goal attribute" `(defsomes ',name t ',args)) (defun defnums (x goalp) "Define a numeric attribute" (defattr (make-continuous :name x :goalp goalp))) (defun numericp (column) "Recognize a discrete attribute." (eq (type-of column) 'continuous)) (defun defsomes (x goalp l) "Define a discrete attribute." (defattr (make-discrete :name x :range l :goalp goalp))) (defmacro data (&rest args) "Define a new piece of data." `(data1 ',args)) (defun setbins (numb) "sets the number of bins" (setf (rel-bins *the-relation*) numb)) (defun data1 (row) "Add a row to the buffer of the current realtion, updating some counters along the way." (let ((width (rel-width *the-relation*))) (labels ((min-max (datum column) (when (numericp column) (inc> datum (continuous-max column)) (inc< datum (continuous-min column))))) (cond ((eql width (length row)) (mapcar #'min-max row (rel-columns *the-relation*)) (push row (rel-buffer *the-relation*)) (incf (rel-height *the-relation*))) (t (warn "wrong width ~a~%" row)))))) (defun equal-width (&optional (*the-relation* *the-relation*)) "Convert all numerics to a range 1..numbins for (min to max)/bins." (let ((bins (rel-bins *the-relation*))) (labels ((fudge (x col) "need to move 0..N-1 to N. Can't blow number of bins" (min bins (1+ (binned1 x col)))) (binned1 (x col) (floor (/ (- x (continuous-min col)) (/ (- (continuous-max col) (continuous-min col)) bins)))) (binned (x col) (if (numericp col) (fudge x col) (1+ (position x (discrete-range col)))))) (dolist (one (rel-buffer *the-relation*) *the-relation*) (let* ((old (rel-columns *the-relation*)) (new (mapcar #'binned one old))) (push new (rel-rows *the-relation*))))))) (defun ranges (&optional (*the-relation* *the-relation*)) (let (dims (bins (rel-bins *the-relation*)) (cols (rel-columns *the-relation*))) (labels ((counts (col) (if (numericp col) bins (length (discrete-range col))))) (let ((dims (mapcar #'counts (butlast cols)))) (setf (rel-dims *the-relation*) dims (rel-diagonal *the-relation*) (sqrt (sum dims)))))) *the-relation*) (defrelation pima-diabetes (attribute preg real) (attribute plas real) (attribute pres real) (attribute skin real) (attribute insu real) (attribute mass real) (attribute pedi real) (attribute age real) (attribute! class tested_negative tested_positive) (data 1 89 66 23 94 28.1 0.167 21 tested_negative) (data 1 85 66 29 0 26.6 0.351 31 tested_negative) (data 5 116 74 0 0 25.6 0.201 30 tested_negative) (data 6 148 72 35 0 33.6 0.627 50 tested_positive) (data 8 183 64 0 0 23.3 0.672 32 tested_positive) (data 0 137 40 35 168 43.1 2.288 33 tested_positive) (data 3 78 50 32 88 31 0.248 26 tested_positive) )