(ns code.privacy.query_generator (:use (code.utils utils preprocess)) (:use (code.privacy datafly)) (:use (incanter core stats))) (defn get-query [data-k n X] "The n is the length/strength of the query, at first this was done randomly. X is the vector of sensitive attributes SA (column number from 0 to numcols Note to self, let the class attribute be in X" (let [no-of-attributes (ncol data-k) data-with-SA (sel data-k :cols X) ;(range 0 (- (ncol data-k) 1))) data-without-SA (sel data-k :cols (notX X (range no-of-attributes))) attribute-values (loop [d (trans data-k) result []] (if (empty? d) result (recur (rest d) (conj result (extract-unique (first d)))))) attributes (sample (notX X (range no-of-attributes)) ;no-of-attributes) :size n ;(+ (rand-int no-of-attributes) 1) :replacement false) att (if (= nil (nrow attributes)) [attributes] (apply vector attributes)) random-queries (loop [a att result []] (if (empty? a) result (recur (rest a) (conj result (vector (first a) (sample (nth attribute-values (first a)) :size 1 :replacement false))))))] random-queries)) (defn query-worth [query data-k] (let [;data (sel data-k :cols (range 0 (- (ncol data-k) 1))) query-result (fn [ndat att-query] (filter #(= (nth % (first att-query)) (second att-query)) ndat)) one-query (fn [query] (loop [one-att query d data-k] (if (empty? one-att) ; (if (and (not= (nrow d) nil) (not= (nrow d) 0)) true false) ;note d = nil (if (not (empty? d)) true false) (recur (rest one-att) (query-result d (first one-att))))))] (one-query query))) (defn get-queries [data-k num1 n X] "main function: output is vector of vectors, [att-num att-val] the terminator says that it can get up to num1 without repeated queries" (loop [pre nil terminator 0 result []] (if (or (= terminator num1) (= (count result) 1000)) result (recur result (if (= pre result) (inc terminator) 0) (let [myone (let [que (get-query data-k n X)] (if (and (not (member? que result)) (= true (query-worth que data-k))) que nil)) ans (if (= nil myone) result (conj result myone))] ans))))) (defn store-queries [q data filename] (let [one (get-queries (bind-columns (trans (butlast (efb2 data))) (sel data :cols (dec (ncol data)))) 1000 1 (filter #(not (member? % (datafly-qis data q [10 20]))) (range (ncol data)))) two (get-queries (bind-columns (trans (butlast (efb2 data))) (sel data :cols (dec (ncol data)))) 1000 2 (filter #(not (member? % (datafly-qis data q [10 20]))) (range (ncol data)))) four (get-queries (bind-columns (trans (butlast (efb2 data))) (sel data :cols (dec (ncol data)))) 1000 4 (filter #(not (member? % (datafly-qis data q [10 20]))) (range (ncol data))))] [ (save (matrix (apply concat one)) (str "./results/queries/" filename "1.csv") :delim \,) (save (matrix (map #(apply concat %) two)) (str "./results/queries/" filename "2.csv") :delim \,) (save (matrix (map #(apply concat %) four)) (str "./code/privacy/queries/" filename "4.csv") :delim \,)]))