(ns code.privacy.summarize_data (:use (code.utils utils)) (:use (code.learners k_means)) (:use (incanter core stats))) (defn summarize [k data distance] (let [clusters (k-means-model k data distance) summary (map #(mysummary % distance) clusters)] summary)) (defn iscovered? [one summary distance idx] (loop [s summary result false] (if (or (= result true) (empty? s)) result (recur (rest s) (if (<= (distance (butlast one) (first (first s))) (second (first s))) true false))))) (defn conflict [test-data summary distance idx] (let [fight (count (filter #(= idx %) (map #(iscovered? % summary distance idx) test-data))) all (count test-data)] (* 100.0 (/ fight all)))) (defn coverage [test-data summary distance idx] (* (/ (apply + (map #(iscovered? % summary distance idx) test-data)) (count test-data)) 100.0)) (defn new-test [test-data summary distance idx] (loop [t test-data result [] train1 []] (if (empty? t) (let [test (remove #(= 'na %) result) train (remove #(= 'na %) train1)] [(save (matrix train) "train.csv") (save (matrix test) "test.csv")]) (recur (rest t) (conj result (if (iscovered? (first t) summary distance idx) (first t) 'na)) (conj train1 (if (iscovered? (first t) summary distance idx) 'na (first t)))))))