;;;;compiled under: ;;;;slime 1:20070927-3 ;;;;SBCL 1:1.0.11.0-1 ;;;;Peter Santiago, June 2nd, 2008 ;;;;Program note: the input data array is assumed to be AxB, where each row represents the B dimensions of one point and there are A points (load "fastmap-kmeans-ancillary.lisp") (defun k-means (k data) "k-means: performs a k-means clustering." (let ((memberof (make-array (array-dimension data 0) :initial-element nil)) ;holds current cluster membership (oldmemberof (make-array (array-dimension data 0) :initial-element nil)) ;holds old cluster membership (num.in.cluster (make-array k :initial-element 0)) ;holds number of objects per cluster, to facilitate centroid calculation (centroids (make-array (list k (array-dimension data 1)) :initial-element 0)) ;holds centroid data (distances (make-array (list (array-dimension data 0) k) :initial-element nil)) ;holds distance matrix (tempmin nil) ;holds temporary minimum value. Used when recalculating cluster membership (finaldata (adjust-array data (list (array-dimension data 0) (+ (array-dimension data 1) 1))))) ;final thingie to be returned ;;initial-partition (dotimes (i (array-dimension data 0)) (setf (aref memberof i) (mod i k)) (setf (aref num.in.cluster (mod i k)) (+ (aref num.in.cluster (mod i k)) 1))) ;;initial-centroids (dotimes (i (array-dimension data 0)) (dotimes (j (array-dimension data 1)) (setf (aref centroids (aref memberof i) j) (+ (/ (aref data i j) (aref num.in.cluster (aref memberof i))) (aref centroids (aref memberof i) j))))) ;;iteration: run at a max # of points. Exit once membership remains constant (dotimes (iteration (array-dimension data 0) memberof) ;(format t "Iteration number: ~a~%" iteration) (if (equalp oldmemberof memberof) (return memberof)) ;;set the oldmemberof (dotimes (i (array-dimension memberof 0)) (setf (aref oldmemberof i) (aref memberof i))) ;;calculate distances, determine nearest centroid & membership (setf distances (set-distance-matrix2 data centroids)) (dotimes (i (array-dimension distances 0)) (setf tempmin (aref distances i 0)) (dotimes (j (array-dimension distances 1)) (cond ((< (aref distances i j) tempmin) (setf tempmin (aref distances i j)) (setf (aref memberof i) j))))) ;;calculate number of items per cluster (setf num.in.cluster (make-array k :initial-element 0)) (dotimes (i (array-dimension memberof 0)) (setf (aref num.in.cluster (aref memberof i)) (+ (aref num.in.cluster (aref memberof i)) 1))) ;;calculate mean (setf centroids (make-array (list k (array-dimension data 1)) :initial-element 0)) (dotimes (i (array-dimension data 0)) (dotimes (j (array-dimension data 1)) (setf (aref centroids (aref memberof i) j) (+ (/ (aref data i j) (aref num.in.cluster (aref memberof i))) (aref centroids (aref memberof i) j)))))) (dotimes (i (array-dimension data 0)) (setf (aref finaldata i (- (array-dimension finaldata 1) 1)) (aref memberof i))) finaldata)) ;end k-means (deftest test-k-means() (check (let ((x (k-means 3 (make-array '(4 2) :initial-contents '((0 0) (1 1) (500 500) (600 600)))))) (and (= (aref x 0 2) (aref x 1 2)) (= (aref x 2 2) (aref x 3 2))))))