(ns code.experiments.within (:import (javaapplication1 Main)) ;(:import (weightedscore Main)) (:use (code.utils utils preprocess)) (:use (code.pls cliff ncliff)) (:use (code.data data)) (:use (code.privacy privacy)) (:use (incanter io core stats))) (def exp1 (new Main)) (defn within-experiment [treatments data distance-fn result-pathname & options] "Want to be able to accept list of treatments for example privacy algorithms or prototype learners and return csv files of 100 each for each treatment, train and test in the ./within directory for each data-set Arguments: treatments: vector of algoritm (function) names in symbols ['morph1 'morph3 'morph10 'morph20 'morph40 'swap10 'swap20 'swap40 'datafly2 'datafly4] data: just dataname ex. 'ant13 distance-fn could be 'numeric' or 'cat' Options: :sav (default [10 20]) for privacy algorithm: indicates the columns that are not altered during privatization - the default is specific to the PROMISE defect data. 10 is the sensitive attribute and 20 is the class attribute (this must always be on the list). :num (default 10) indicates 10*10 cross-validation experiment. :n (default 10) indicates number of bins for efb :pro (default ncliff) indicates new cliff for numeric data :opro (default ocliff) indicates original cliff for numeric data Examples: (within-experiment ['morph1 'morph3 'morph10 'morph20 'morph40 'swap10 'swap20 'swap40 'datafly2 'datafly4] 'ant13 numeric " ;"./results/within/" (let [opts (when options (apply assoc {} options)) sav (or (:sav opts) [10 20]) num (or (:num opts) 10) n (or (:n opts) 10) r (or (:r opts) [0.1501 0.3501]) pro (or (:pro opts) ncliff) opro (or (:opro opts) ocliff) all (apply concat (map #(folds num %) (make-random-data (pre-data (eval data)) num))) mxn (loop [tt all i 1 result []] (if (empty? tt) result (recur (rest tt) (inc i) (conj result (let [train (save (:train (first tt)) (str result-pathname data "/train-"i".csv") :header (eval (symbol (str data"-header")))) test (save (:test (first tt)) (str result-pathname data "/test-"i".csv") :header (eval (symbol (str data"-header")))) trains (map #((eval %) n (:train (first tt)) sav r pro opro distance-fn) treatments) allnew (loop [treatment treatments t trains result " "] (if (empty? treatment) 'done (recur (rest treatment) (rest t) (save (first t) (str result-pathname data "/" (first treatment)"-" i".csv") :header (eval (symbol (str data"-header")))))))] allnew)))))] nil)) (defn within-weka [treatments data result-pathname & options] "Treatments are a vector of symbols n is initialized to 1: in the unit its a digit between 1-100 klassifier is a number representing a classifier from weka klassifier-name is the name representing a classifier from weka (This has not been used but we leave it to know what the numbers mean) The klassifier and klassifier-name must correspond data is a symbol result-pathname /home/ourprivacy/results/within/ For the treatment unit, within the code may need to add 'train' to treatments vector Example: (within-weka ['train 'morph1 'morph3 'morph10 'morph20 'morph40 'swap10 'swap20 'swap40 'datafly2 'datafly4] 'ant13 /home/fayola/ourprivacy/results/within/)" (let [opts (when options (apply assoc {} options)) klassifiers (or (:klassifiers opts) [0 3 4 5]) klassifier-names (or (:klassifier-names opts) ["nb" "lr" "rf" "knn"]) klassifier (or (:klassifier opts) 4) treatment-unit (fn [treatment klassifier n] (let [base_dir result-pathname dir (str base_dir data "/") train_file (str dir treatment"-"n".csv.arff") test_file (str dir "test-"n".csv.arff") prefix (str data","klassifier","(FindPos1 treatment treatments 0)",")] (. exp1 start_single_experiment train_file test_file prefix klassifier))) treatment-units (fn [klassifier n] (loop [treatment treatments result []] (if (empty? treatment) result (recur (rest treatment) (conj result (treatment-unit (first treatment) klassifier n))))))] ;(treatment-units 1))) (let [ans (loop [i 1 result []] (if (>= i 101) result (recur (inc i) (conj result (treatment-units klassifier i)))))] [ans (. exp1 uci_output_result (str data))]))) ;(run-within-weka 'ant13 "/home/fayola/ourprivacy/results/within/") (defn run-within-weka [data result-pathname & options] (let [opts (when options (apply assoc {} options)) klassifiers (if (true? (:klassifiers opts)) (:klassifiers opts) [0 3 4 5])] (within-weka ['train 'morph1 'morph3 'morph10 'morph20 'morph40 'swap10 'swap20 'swap40 'datafly2 'datafly4] data result-pathname :klassifier 0) (. exp1 clear_experiment))) ;reports (defn report-file [result-csv-name] ;string eg. "data/uci/iris.dat" or "ant-test/ant13-output-result1.csv" (to-matrix (read-dataset (str incanter-home result-csv-name) :delim \, :header true))) ;(extract-acc-data4 "/home/fayola/ourprivacy/results/within/results/nb/ant13-output-result.csv" [2 3] 'acc) (defn extract-acc-data4 [result-csv-name a performance-name] ;a for [3 5] 5=pd within (let [result (report-file result-csv-name) result1 (sel result :cols a) ml-rs3 result1 td-rs3 (group-by #(nth % 0) ml-rs3) med-tbl (fn [group-td-data] (loop [td (sort-by first group-td-data) result []] (if (empty? td) result (recur (rest td) (conj result (float (get-median (last (trans (second (first td))))))))))) myans (matrix [(med-tbl td-rs3)])] myans)) (def rcns ["/home/fayola/ourprivacy/results/within/results/nb/ant13-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/arc-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/camel10-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/poi15-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/redaktor-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/skarbonka-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/tomcat-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/velocity14-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/xalan24-output-result.csv" "/home/fayola/ourprivacy/results/within/results/nb/xerces12-output-result.csv"]) ;(ead4 "/home/fayola/ourprivacy/results/within/results/nb/" 'nb rcns extract-acc-data4 [2 4] 'pd) (defn ead4 [result-pathname klassifier-name result-csv-names extract-fn a performance-name] (loop [rcn result-csv-names result []] (if (empty? rcn) (save (bind-columns (matrix (range 1 (inc (nrow result)))) (matrix result)) (str result-pathname performance-name"-"klassifier-name".txt") :delim \space) (recur (rest rcn) (conj result (extract-fn (first rcn) a performance-name)))))) ;a "/home/fayola/ourprivacy/results/privacy/results/query-results-all-m-1.txt" ;b "/home/fayola/ourprivacy/results/within/results/nb/pd-nb.txt" (defn priv-vs-ipr [a b test-names] (let [priv (report-file a) ipr (report-file b)] [priv ipr])) ; clumps (map #(matrix (vector %1 %2)) priv ipr) ; ans (fn [dat test-name] ; (loop [d dat result nil] ; (if (empty? d) ; result ; (recur ; (rest d) ; (do (println (first d)) (println " ") (println " "))))))] ; (loop [test-name test-names clump (rest clumps) result []] ; (if (empty? test-name) ; result ; (recur ; (rest test-name) ; (rest clump) ; (binding [*out* (java.io.FileWriter. (str "/home/fayola/ourprivacy/results/within/results/"(first test-name)"-priv-vs-ipr.txt"))] ;; ; (ans (first clump) (first test-name)) ; (println (second clump)) ; (flush))))))) (defn run-priv-vs-ipr [] (let [test-names ['ant13 'arc 'camel10 'poi15 'redaktor 'skarbonka 'tomcat 'velocity14 'xalan24 'xerces12]] (priv-vs-ipr "/home/fayola/ourprivacy/results/privacy/results/query-results-all-m-1.txt" "/home/fayola/ourprivacy/results/within/results/nb/pd-nb.txt" test-names)))