(ns code.privacy.privacy (:use (code.utils utils preprocess)) (:use (code.pls cliff ncliff)) (:use (incanter core stats))) (defn jiggle1 "Returns a version of x according to the normal distribution of the data." [x mycol] (let [org (if (not= 0 (- x (Math/floor x))) true false) mu (mean mycol) stdev (sd mycol) newx (abs (sample-normal 1 :mean mu :sd stdev))] (if org newx (Math/round newx)))) (defn jiggle [x mycol] "Returns a version of x perturbed by a small distance" (let [bincol (sort (efb mycol 10)) xnearest (loop [xm bincol prev 10000 result nil] (if (empty? xm) result (recur (rest xm) (first xm) (if (< (abs (- x (first xm))) prev) (first xm) result)))) xfar (loop [xm bincol prev -10000 result nil] (if (empty? xm) result (recur (rest xm) (first xm) (if (> (abs (- x (first xm))) prev) (first xm) result)))) xfar-dist (rand-int (- x xfar)) newx (if (= xfar x) (sample mycol :size 1 :replacement false) (abs (+ x xfar-dist))) newx-1 (if (= xfar x) (sample mycol :size 1 :replacement false) (abs (- x xfar-dist)))] (if (= (rand-int 2) 0) newx newx-1))) (defn jiggle0 [x mycol] (let [unique-x (filter #(not= % x) mycol) new-x (if (empty? unique-x) x (first (shuffle unique-x)))] new-x)) (defn jiggle2 [X Y] (let [separate-Y (group-by last Y) buggy (matrix (second (second separate-Y))) clean (matrix (second (first separate-Y)))] (loop [x X result []] (if (empty? x) (matrix result) (recur (rest x) (conj result (let [near-x (if (= (last (first x)) 0) (first (get-nearest (first x) (if (= (nrow buggy) 1) [buggy] buggy) numeric)) (first (get-nearest (first x) clean numeric))) new-x (map #(+ %1 (/ (abs (- %1 %2)) 2)) (butlast (first x)) (butlast near-x))] (conj (apply vector new-x) (last (first x)))))))))) (defn jiggle3 [X Y] (let [separate-Y (group-by last Y) buggy (matrix (second (second separate-Y))) clean (matrix (second (first separate-Y)))] (loop [x X result []] (if (empty? x) (matrix result) (recur (rest x) (conj result (let [near-x (if (= (last (first x)) 0) (first (get-nearest (first x) (if (= (nrow buggy) 1) [buggy] buggy) numeric)) (first (get-nearest (first x) (if (= (nrow clean) 1) [clean] clean) numeric))) new-x (map #(+ %1 (/ (abs (- %1 %2)) 4)) (butlast (first x)) (butlast near-x))] (conj (apply vector new-x) (last (first x)))))))))) (defn jiggle4 [X Y] (let [separate-Y (group-by last Y) buggy (matrix (second (second separate-Y))) clean (matrix (second (first separate-Y)))] (loop [x X result []] (if (empty? x) (matrix result) (recur (rest x) (conj result (let [near-x (if (= (last (first x)) 0) (first (get-nearest (first x) (if (= (nrow buggy) 1) [buggy] buggy) numeric)) (first (get-nearest (first x) (if (= (nrow clean) 1) [clean] clean) numeric))) new-x (map #(+ %1 (* (abs (- %1 %2)) (+ 0.15 (rand 0.35)))) (butlast (first x)) (butlast near-x)) new-x1 (map #(- %1 (* (abs (- %1 %2)) (+ 0.15 (rand 0.35)))) (butlast (first x)) (butlast near-x))] (conj (apply vector (if (= (rand-int 2) 0) new-x new-x1)) (last (first x)))))))))) (defn jiggle5 [X sav b] (let [separate-X (sort-by first (group-by last X)) buggy (matrix (second (second separate-X))) clean (matrix (second (first separate-X)))] (loop [x X result1 []] (if (empty? x) (matrix result1) (recur (rest x) (conj result1 (let [near-x (if (= (last (first x)) (last (first clean))) (first (get-nearest (first x) (if (= (nrow buggy) 1) [buggy] buggy) numeric)) (first (get-nearest (first x) (if (= (nrow clean) 1) [clean] clean) numeric))) nudge1 (fn [] (+ 0.15 (rand b))) nudge (nudge1) new-x (loop [x1 0 result []] (if (= (count (first x)) x1) result (recur (inc x1) (conj result (if (member? x1 sav) (nth (first x) x1) (if (= (rand-int 2) 0) (let [big (+ (nth (first x) x1) (* (abs (- (nth (first x) x1) (nth near-x x1))) nudge)) big1 (if (> big (apply max (nth (to-vect (trans X)) x1))) (apply max (nth (to-vect (trans X)) x1)) big)] big) (let [small (- (nth (first x) x1) (* (abs (- (nth (first x) x1) (nth near-x x1))) nudge)) small1 (if (< small 0) ;(< small (apply min (nth (to-vect (trans X)) x1))) (abs (+ small (apply min (nth (to-vect (trans X)) x1)))) small)] small1) ))))))] new-x ))))))) (defn jiggle6 [X sav att b] (let [separate-X (map second (group-by #(nth % att) X)) sx (map first (group-by #(nth % att) X)) best-others (fn [sav-label] (let [best (matrix (nth separate-X (FindPos1 sav-label sx 0))) others (matrix (apply concat (filter #(not= best %) separate-X)))] others))] (loop [x X result1 []] (if (empty? x) (matrix result1) (recur (rest x) (conj result1 (let [one (best-others (nth (first x) att)) near-x (first (get-nearest (first x) (if (= (nrow one) 1) [one] one) numeric)) nudge (fn [] (+ 0.15 (rand b))) ;b=0.3501, 0.7501 new-x (loop [x1 0 result []] (if (= (count (first x)) x1) result (recur (inc x1) (conj result (if (member? x1 sav) (nth (first x) x1) (if (= (rand-int 2) 0) (+ (nth (first x) x1) (* (abs (- (nth (first x) x1) (nth near-x x1))) (nudge))) (- (nth (first x) x1) (* (abs (- (nth (first x) x1) (nth near-x x1))) (nudge)))))))))] new-x))))))) (defn get-fakes [data] "Returns data changed by a small distance" (let [indy (sel data :cols (range (dec (ncol data)))) klass (sel data :cols (dec (ncol data)))] (loop [d (trans indy) result (transient [])] (if (empty? d) (bind-columns (trans (persistent! result)) klass) (recur (rest d) (conj! result (map #(jiggle % (first d)) (first d)))))))) (defn myprivatize-data [n data _ deci-n _] "Returns data which includes instances pruned by ncliff, and the unpruned instances which has been changed by the get-fakes function. " (let [pruned (ncliff n data 9 deci-n 9) unpruned (loop [d data result (transient [])] (if (empty? d) (matrix (remove #(= % nil) (persistent! result))) (recur (rest d) (conj! result (if (member? (first d) pruned) nil (first d))))))] (bind-rows (get-fakes unpruned) (jiggle2 pruned data)))) (defn ourprivatize-data [n data _ deci-n _] "Returns data which includes instances pruned by ncliff, and the unpruned instances which has been changed by the get-fakes function. " (let [pruned (ncliff n data 9 deci-n 9)] (jiggle3 pruned data))) (defn ourprivatize-data1 [data sav b] "Returns data which includes instances pruned by ncliff, and the unpruned instances which has been changed by the get-fakes function. " (jiggle5 data sav b)) (defn ourprivatize-data2 [data] "Returns data which includes instances pruned by ncliff, and the unpruned instances which has been changed by the get-fakes function. " (get-fakes data)) (defn nprivatize-data [n data _ deci-n _] "Returns data which includes instances pruned by ncliff, and the unpruned instances which has been changed by the get-fakes function. " (let [pruned (ncliff1 n data 9 deci-n 9) unpruned (loop [d data result (transient [])] (if (empty? d) (matrix (remove #(= % nil) (persistent! result))) (recur (rest d) (conj! result (if (member? (first d) pruned) nil (first d))))))] (bind-rows (get-fakes unpruned) pruned))) (defn oprivatize-data [n data _ deci-n _] "Returns data which includes instances pruned by ncliff, and the unpruned instances which has been changed by the get-fakes function. " (let [pruned (ocliff n data 9 deci-n 9) unpruned (loop [d data result (transient [])] (if (empty? d) (matrix (remove #(= % nil) (persistent! result))) (recur (rest d) (conj! result (if (member? (first d) pruned) nil (first d))))))] (bind-rows (get-fakes unpruned) pruned)))