(ns code.learners.k_means (:use (code.utils utils preprocess)) (:use (incanter core stats))) ;return nearest cluster (defn find-nearest-cluster [one clusters dist] (let [cluster-centroids (map #(centroid %) clusters) cluster-w-centroids (map #(vector %1 %2) cluster-centroids clusters) nearest-cluster-centroid (second (first (sort-by first (map #(vector (dist one %) %) cluster-centroids)))) nearest-cluster (second (first (filter #(= nearest-cluster-centroid (first %)) cluster-w-centroids)))] nearest-cluster)) ;classify one test (defn k-means-classifier [one clusters dist] (let [nearest-cluster (find-nearest-cluster one clusters dist)] (if (= (nrow nearest-cluster) 1) (last nearest-cluster) (let [kn (+ 1 (int (Math/sqrt (nrow nearest-cluster)))) k-nearest (map second (take kn (sort-by first (map #(vector (dist one %) %) nearest-cluster)))) klass (last (to-vect (trans k-nearest))) classification (k-majority klass)] classification)))) (def CLUSTERS []) (defn Membership [centroids one dist] (let [distances (loop [cent centroids results (transient [])] (if (empty? cent) (persistent! results) (recur (rest cent) (conj! results (vector (first cent) (dist (first cent) one)))))) closest-centroid (first (first (sort-by second distances)))] [one closest-centroid])) (defn MakeClusters [centroids dataset dist] (let [members (loop [data dataset results (transient [])] (if (empty? data) (persistent! results) (recur (rest data) (conj! results (Membership centroids (first data) dist)))))] (loop [cent centroids results (transient [])] (if (empty? cent) (apply vector (remove #(= nil %) (persistent! results))) (recur (rest cent) (conj! results (matrix (map first (apply vector (filter #(= (first cent) (second %)) members)))))))))) (defn k-means [centroids dataset dist] (loop [C centroids] (let [clusters (MakeClusters centroids dataset dist) new-centroids (map centroid clusters)] (if (= new-centroids C) clusters (recur new-centroids))))) (defn k-means-model [k D dist] (let [clusters (k-means (take k (shuffle1 D)) D dist)] clusters))