;;;;compiled under: ;;;;slime 1:20070927-3 ;;;;SBCL 1:1.0.11.0-1 ;;;;Peter Santiago, June 2nd, 2008 ;;;;Program note: the input data array is assumed to be AxB, where each row represents the B dimensions of one point and there are A points (defun accept-array (datarr) "accept-array: tests that datarr is an array" (typep datarr 'array)) ;end accept-array (defun quicksort-discrete-array (data z) "quicksort-discrete-array performs a quicksort on the last element in each row, such that the matrix is eventually sorted by the class characteristic." (let ((pivotvector (make-array (array-dimension data 1) :initial-element nil)) (lesser (make-array (list (array-dimension data 0) (array-dimension data 1)) :initial-element nil)) (greater (make-array (list (array-dimension data 0) (array-dimension data 1)) :initial-element nil)) (rows.lesser 0) (rows.greater 0) (x (array-dimension data 0)) (y (array-dimension data 1))) (if (<= x 1) (return-from quicksort-discrete-array data)) (if (<= z 1) (return-from quicksort-discrete-array data)) (print x) ;;set pivot ;(print 'pass+1) ;(print lesser) ;(print greater) (dotimes (j y) (setf (aref pivotvector j) (aref data (floor (- x 1) 2) j))) ;;iteration! ;(print 'more) (dotimes (i x) ;(print 'arr) ;(print (aref data i (- y 1))) (cond ((< (aref data i (- y 1)) (aref pivotvector (- y 1))) ;(print 'err) (dotimes (j y) (setf (aref lesser rows.lesser j) (aref data i j))) (setf rows.lesser (+ rows.lesser 1))) ((> (aref data i (- y 1)) (aref pivotvector (- y 1))) ;(print 'eer) (dotimes (j y) (setf (aref greater rows.greater j) (aref data i j))) (setf rows.greater (+ rows.greater 1))))) ;;adjust matrices ;(print 'here) (setf lesser (adjust-array lesser (list rows.lesser y))) (print lesser) (print (array-dimension lesser 0)) (setf greater (adjust-array greater (list rows.greater y))) (print greater) (print (array-dimension greater 0)) ;;recursion (array-concat (quicksort-discrete-array lesser (- z 1)) pivotvector (quicksort-discrete-array greater (- z 1))))) ;(format t "~a~%~a~%~a~%" (quicksort-discrete-array lesser (- z 1)) pivotvector (quicksort-discrete-array greater (- z 1))))) (defun array-concat (a1 vector a3) "array-append: returns a new array with all elements of a1 vector a3 in it. Note: the row size must be the same for all arrays" (let ((arr (make-array (list (+ (array-dimension a1 0) 1 (array-dimension a3 0)) (array-dimension a1 1)) :initial-element nil)) (currow -1)) (dotimes (i (array-dimension a1 0)) (setf currow (+ currow 1)) (dotimes (j (array-dimension a1 1)) (setf (aref arr currow j) (aref a1 i j)))) (setf currow (+ currow 1)) (dotimes (j (array-dimension vector 0)) (setf (aref arr currow j) (aref vector j))) (dotimes (i (array-dimension a3 0)) (setf currow (+ currow 1)) (dotimes (j (array-dimension a3 1)) (setf (aref arr currow j) (aref a3 i j)))) arr)) (defun set-distance-matrix(datarray) "set-distance-matrix: Creates and returns an array representing the distances between all objects" (let ((distancearray (make-array (list (array-dimension datarray 0) (array-dimension datarray 0)) :initial-element 0)) (sum 0)) (dotimes (i (array-dimension datarray 0) distancearray) (dotimes (j (array-dimension datarray 0)) (setf sum 0) (dotimes (k (array-dimension datarray 1)) (setf sum (+ sum (expt (- (aref datarray i k) (aref datarray j k)) 2)))) (setf (aref distancearray i j) (sqrt sum)))))) ;end set-distance-matrix (defun set-distance-matrix2 (data1 data2) "set-distance-matrix2: Creates and returns an array representing the distances between objects of two sets" (let ((distancearray (make-array (list (array-dimension data1 0) (array-dimension data2 0)) :initial-element 0)) (sum 0)) (dotimes (i (array-dimension distancearray 0) distancearray) (dotimes (j (array-dimension distancearray 1)) (setf sum 0) (dotimes (k (array-dimension data1 1)) (setf sum (+ sum (expt (- (aref data1 i k) (aref data2 j k)) 2)))) (setf (aref distancearray i j) (sqrt sum)))))) ;end set-distance-matrix2 (defun reduce-dist(oldist xmatrix column) "reduce-dist:reduces the distancematrix using another magic equation" (let ((reducedist (make-array (list (array-dimension oldist 0) (array-dimension oldist 1)) :initial-element 0))) (dotimes (i (array-dimension oldist 0) reducedist) (dotimes (j (array-dimension oldist 1)) (setf (aref reducedist i j) (sqrt (- (expt (aref oldist i j) 2) (expt (- (aref xmatrix i column) (aref xmatrix j column)) 2)))))))) ;end reduce-dist (defun dist(distmatrix a) "returns the number of the object furthest from a, according to the distmatrix" (let ((b 0) (temp 0)) (dotimes (i (array-dimension distmatrix 0) b) (cond ((> (aref distmatrix a i) temp) (setf temp (aref distmatrix a i)) (setf b i)))))) ;end dist (defun choose-distance-objects (dist) "choose-distant-objects: chooses the two longest away object thingies through ancient magic" (let ((o.b 0) (o.a 0) (vlist 0)) (dotimes (i 5) ;5 is the number of iterations suggested by Faloutsos & Lin (setf o.a (dist dist o.b)) (setf o.b (dist dist o.a))) (setf vlist (list o.a o.b)))) ;end choose-distant-objects (defun fastmap (k datamatrix &optional (distancematrix (set-distance-matrix datamatrix)) (x (make-array (list (array-dimension datamatrix 0) k) :initial-element nil)) (pa (make-array (list 2 k) :initial-element nil)) (cnum -1)) ;k is the desired number of dimensions, datamatrix if the data; all the rest are optional and generally computed by fastmap for subsequent passes "fastmap: does important things; generally awesome." (let ((templist 0) ;for storing the output of choose-distance-objects (dprimematrix 0) ;for storing the reduced distancematrix (o.a 0) ;stores object a (o.b 0) ;stores object b (x.i 0)) ;stores the calculated value of x.i (a bit extraneous, but I think it helps with readability) ;;1 base case conditional (if (<= k 0) (return-from fastmap x)) ;returns the new coordinates of objects (setf cnum (+ cnum 1)) ;This is here because that's how faloutsos and lin organized their algorithm ;;2 choose pivot objects (setf templist (choose-distance-objects distancematrix)) (setf o.a (first templist)) (setf o.b (second templist)) ;;3 record the ids of the pivot objects (setf (aref pa 0 cnum) o.a) (setf (aref pa 1 cnum) o.b) ;;4 distance something (if (= (aref distancematrix o.a o.b) 0) (dotimes (i (array-dimension x 0)) (setf (aref x i cnum) 0))) ;;5 project the objects on the line(o.a,o.b) (dotimes (i (array-dimension datamatrix 0)) (setf x.i (/ (- (+ (expt (aref distancematrix o.a i) 2) (expt (aref distancematrix o.a o.b) 2)) (expt (aref distancematrix o.b i) 2)) (* 2 (aref distancematrix o.a o.b)))) (setf (aref x i cnum) x.i)) ;;6 consider the projections of the objects on a hyper-plaine perpendicular to the line (o.a,o.b); find new distance function (setf dprimematrix (reduce-dist distancematrix x cnum)) (fastmap (- k 1) datamatrix dprimematrix x pa cnum))) ;end fastmap (defun stress (data1 data2) "stress: checks the difference between two nxn matrices. The primary purpose of this function is to ensure distance between the original and 'fastmapped' objects are relatively maintained" (let ((dist1 (set-distance-matrix data1)) (dist2 (set-distance-matrix data2)) (computed (make-array (list (array-dimension data1 0) (array-dimension data1 1)) :initial-element nil))) (dotimes (i (array-dimension computed 0) computed) (dotimes (j (array-dimension computed 1)) (setf (aref computed i j) (- (aref dist1 i j) (aref dist2 i j))))))) ;end stress (that seems unlikely) (defun k-means (k data) "k-means: performs a k-means clustering." (let ((memberof (make-array (array-dimension data 0) :initial-element nil)) ;holds current cluster membership (oldmemberof (make-array (array-dimension data 0) :initial-element nil)) ;holds old cluster membership (num.in.cluster (make-array k :initial-element 0)) ;holds number of objects per cluster, to facilitate centroid calculation (centroids (make-array (list k (array-dimension data 1)) :initial-element 0)) ;holds centroid data (distances (make-array (list (array-dimension data 0) k) :initial-element nil)) ;holds distance matrix (tempmin nil)) ;holds temporary minimum value. Used when recalculating cluster membership ;;initial-partition (dotimes (i (array-dimension data 0)) (setf (aref memberof i) (mod i k)) (setf (aref num.in.cluster (mod i k)) (+ (aref num.in.cluster (mod i k)) 1))) ;;initial-centroids (dotimes (i (array-dimension data 0)) (dotimes (j (array-dimension data 1)) (setf (aref centroids (aref memberof i) j) (+ (/ (aref data i j) (aref num.in.cluster (aref memberof i))) (aref centroids (aref memberof i) j))))) ;;iteration: run at a max # of points. Exit once membership remains constant (dotimes (iteration (array-dimension data 0) memberof) ;(format t "Iteration number: ~a~%" iteration) (if (equalp oldmemberof memberof) (return memberof)) ;;set the oldmemberof (dotimes (i (array-dimension memberof 0)) (setf (aref oldmemberof i) (aref memberof i))) ;;calculate distances, determine nearest centroid & membership (setf distances (set-distance-matrix2 data centroids)) (dotimes (i (array-dimension distances 0)) (setf tempmin (aref distances i 0)) (dotimes (j (array-dimension distances 1)) (cond ((< (aref distances i j) tempmin) (setf tempmin (aref distances i j)) (setf (aref memberof i) j))))) ;;calculate number of items per cluster (setf num.in.cluster (make-array k :initial-element 0)) (dotimes (i (array-dimension memberof 0)) (setf (aref num.in.cluster (aref memberof i)) (+ (aref num.in.cluster (aref memberof i)) 1))) ;;calculate mean (setf centroids (make-array (list k (array-dimension data 1)) :initial-element 0)) (dotimes (i (array-dimension data 0)) (dotimes (j (array-dimension data 1)) (setf (aref centroids (aref memberof i) j) (+ (/ (aref data i j) (aref num.in.cluster (aref memberof i))) (aref centroids (aref memberof i) j)))))) (print num.in.cluster) memberof)) (defun save-item (filename item) "save-item: allows the variable item to be stored in the file at filename as a lisp object" (with-open-file (out filename :direction :output :if-exists :supersede) (with-standard-io-syntax (print item out)))) ;end save-item (defun load-item (filename) "load-item: allows the file at filename to be parsed and stored" (let ((x 0)) (with-open-file (in filename) (with-standard-io-syntax (setf x (read in)) x)))) ;end load-item (defun make-plottable (matrix filename) "make-plottable: formats the given data matrix and throws it into the specified file so that it is easily readable by gnuplot" (with-open-file (out filename :direction :output :if-exists :supersede) (with-standard-io-syntax (dotimes (i (array-dimension matrix 0)) (dotimes (j (array-dimension matrix 1)) (format out "~a " (aref matrix i j))) (format out "~%")))) 'done) ;end save-item