(ns code.privacy.query_sad (:use (code.utils utils)) (:use (incanter core stats))) (defn sad [data-k klass? att] ;sad = sensitive attribute disclosure "klass? is the sensitive attribute values being measured. data-k is the group of data after applying the query." (let [n (nrow data-k)] (if (= n 1) (if (= (int (nth (first data-k) att)) (int klass?)) 1.0 0.0) (if (or (= data-k nil) (= n 0)) 0.0 (let [clump (map second (group-by #(nth % att) data-k)) nklasses (map count clump) ;no. of inst 4 each class klasses (map first (group-by #(nth % att) data-k)) isklass (member? (int klass?) (map int klasses)) together (map #(vector %1 %2) (map int klasses) nklasses)] (if (= true isklass) (/ (second (first (filter #(= (int (first %)) (int klass?)) together))) (double n)) 0.0)))))) ;if all queries return no instances sad is zero and attacker ;uses distribution of entire privatized data-set to for ;best-guess (defn zero-data [x] (second (first (reverse (sort-by first (compress x)))))) (defn sad+ [data-k all-data-k bin-data att] "data-k could be the original data or the privatized data. bin-data is the efb of the original data use as aid to measure sad. att is the column number of the sensitive attribute being measured. " (let [ans (sort (to-vect (extract-unique (nth (trans bin-data) att)))) result0 (map #(vector % (sad data-k % att)) ans) result1 (first (first (reverse (sort-by second result0)))) ; best guess SA result-one (if (apply = (map second result0)) (int (zero-data (nth (trans all-data-k) att))) result1) result2 (second (first (reverse (sort-by second result0)))) ; highest SAD result-two (if (apply = (map second result0)) (/ (count (filter #(= result-one %) (nth (trans bin-data) att))) ; SAD for SA in entire data set (count bin-data)) result2) result3 (/ (count (filter #(= result1 %) (nth (trans bin-data) att))) ; SAD for SA in entire data set (count bin-data))] (apply concat (map #(vector % result-one result-two result3 (sad data-k % att) (sad bin-data % att) (- (sad bin-data % att) (sad data-k % att))) ans)))) (defn query-score1 [query bin-data data-k att] "For the binned original data" (let [data (sel data-k :cols (range 0 (- (ncol data-k) 1)))] (loop [one-att query d data-k] (if (empty? one-att) (sad+ d bin-data att) (recur (rest one-att) (filter #(= (nth % (first (first one-att))) (second (first one-att))) d)))))) (defn query-score3 [query bin-data data-k att] "The query is from the binned data, keep the binned values in order to extract instances that match the queries in the privatized data att is the column number of the sav being tested." (let [bdata (map #(apply vector (sort %)) (to-vect (trans bin-data))) get-bin (fn [attnum query-val] (let [att-col (nth bdata attnum) x (FindPos1 query-val att-col 0) prev-query (nth att-col (if (= x 0) 0 (dec x)))] prev-query))] (loop [one-att query d data-k] (if (empty? one-att) (sad+ d data-k bin-data att) (recur (rest one-att) (filter #(or (= (second (first one-att)) (get-bin (first (first one-att)) (second (first one-att)))) (= (second (first one-att)) -10000) (and (< (nth % (first (first one-att))) ;value from privatized data (second (first one-att))) ;value from query (>= (nth % (first (first one-att))) (get-bin (first (first one-att)) (second (first one-att)))))) d)))))) ;nb to self = suppress the class instead? (defn query-scores [queries bin-data data-k qs12 att n beforeafter] (let [ans (matrix (map #(qs12 % bin-data data-k att) queries)) ; ans (trans (map sort (to-vect (trans ans1)))) x (matrix (repeat (nrow ans) beforeafter)) x1 (matrix (range 1 (inc (nrow ans)))) y (matrix (repeat (nrow ans) n))] (bind-columns x1 x y (sel ans :cols [1 2 3]))))