(ns code.report.privacy_report (:use (code.utils utils preprocess)) (:use (code.pls cliff ncliff)) (:use (code.data data)) (:use (code.defect utils search1 cliff_java)) (:use (code.privacy privacy)) (:use (incanter core stats io))) ;convert csv to matrix ;(def incanter-home (System/getProperty "incanter.home")) (defn report-file [result-csv-name] ;string eg. "data/uci/iris.dat" or "ant-test/ant13-output-result1.csv" (to-matrix (read-dataset (str incanter-home result-csv-name) :delim \, :header true))) ;column 6 in csv is fmeasure ;column 1 in csv is machine learners [0 3 4 5] = [nb lr rf knn] (ml-nb) ;column 2 in csv is training data (td-nb) ;fm is [1 2 6] ;group-by column 1 (gb1) ;measure-string "fm" ;ms-num 6 for fm ;"./code/report/report-2011-11-29-1/ant13-test/ant13-output-result1.csv" (defn extract-performance-data [result-csv-name fm test-name test-file-name measure-string] (let [result (report-file result-csv-name) result1 (sel result :cols fm) gb1 (group-by first result1) ml-nb (matrix (second (first (filter #(= 0 (first %)) gb1)))) ml-lr (matrix (second (first (filter #(= 3 (first %)) gb1)))) ml-rf (matrix (second (first (filter #(= 4 (first %)) gb1)))) ml-knn (matrix (second (first (filter #(= 5 (first %)) gb1)))) td-nb (group-by #(nth % 1) ml-nb) td-lr (group-by #(nth % 1) ml-lr) td-rf (group-by #(nth % 1) ml-rf) td-knn (group-by #(nth % 1) ml-knn) tbl (fn [group-td-data] (loop [td (sort-by first group-td-data) result []] (if (empty? td) (matrix result) (recur (rest td) (conj result (apply vector (cons (first (first td)) (last (trans (second (first td)))))))))))] ;traindata,orig,priv,pcliff1,pcliff2,pcliff4 [(save (tbl td-nb) (str test-file-name"/nb-"measure-string"-"test-name".dat") :delim \space) (save (tbl td-lr) (str test-file-name"/lr-"measure-string"-"test-name".dat") :delim \space) (save (tbl td-rf) (str test-file-name"/rf-"measure-string"-"test-name".dat") :delim \space) (save (tbl td-knn) (str test-file-name"/knn-"measure-string"-"test-name".dat") :delim \space)])) ;acc=4, pd=5, prec=6, fm=7, pf=8 ; (run-epd "fm" 7) (defn run-epd [measure-string ms-num] [(extract-performance-data "./code/report/report-2012-02-01-1/ant13-test/ant13-output-result.csv" [1 2 ms-num] 'ant13 "./code/report/report-2012-02-01-1/ant13-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/arc-test/arc-output-result.csv" [1 2 ms-num] 'arc "./code/report/report-2012-02-01-1/arc-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/camel10-test/camel10-output-result.csv" [1 2 ms-num] 'camel10 "./code/report/report-2012-02-01-1/camel10-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/poi15-test/poi15-output-result.csv" [1 2 ms-num] 'poi15 "./code/report/report-2012-02-01-1/poi15-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/redaktor-test/redaktor-output-result.csv" [1 2 ms-num] 'redaktor "./code/report/report-2012-02-01-1/redaktor-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/skarbonka-test/skarbonka-output-result.csv" [1 2 ms-num] 'skarbonka "./code/report/report-2012-02-01-1/skarbonka-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/tomcat-test/tomcat-output-result.csv" [1 2 ms-num] 'tomcat "./code/report/report-2012-02-01-1/tomcat-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/velocity14-test/velocity14-output-result.csv" [1 2 ms-num] 'velocity14 "./code/report/report-2012-02-01-1/velocity14-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/xalan24-test/xalan24-output-result.csv" [1 2 ms-num] 'xalan24 "./code/report/report-2012-02-01-1/xalan24-test" measure-string) (extract-performance-data "./code/report/report-2012-02-01-1/xerces12-test/xerces12-output-result.csv" [1 2 ms-num] 'xerces12 "./code/report/report-2012-02-01-1/xerces12-test" measure-string)])