(ns code.experiments.cbr_cases (:import (javaapplication1 Main)) (:use (code.utils utils preprocess)) (:use (code.pls cliff ncliff)) (:use (code.data data)) (:use (code.privacy morph_v2)) (:use (code.experiments within)) (:use (incanter io core stats))) (def exp2 (new Main)) ; This experiment is for generating test cases ; make test cases = median and spread of each independent attribute, ; %defects, acc, prec, pd, pf and fm (def data-bank ['ant13 'ant14 'ant15 'ant16 'ant17 'camel10 'camel12 'camel14 'camel16 'e-learning 'forrest06 'forrest07 'forrest08 'intercafe 'ivy11 'ivy14 'ivy20 'jedit32 'jedit40 'jedit41 'jedit42 'jedit43 'kalkulator 'log4j10 'log4j11 'log4j12 'lucene20 'lucene22 'lucene24 'nier 'pbeans1 'pbeans2 'pdf 'poi15 'poi20 'poi25 'poi30 'sklebagd 'szybkafucha 'termoproject 'wsp 'xalan24 'xalan25 'xalan26 'xalan27 'xerces 'xerces12 'xerces13 'xerces14 'zuzel]) (def test-bank ['arc 'berek 'ckjm 'redaktor 'serapion 'skarbonka 'synapse10 'systemdata 'tomcat 'workflow]) (defn meta-case [data] (let [dat (eval data) vars (sel dat :cols (range 0 (dec (ncol dat)))) klass (sel dat :cols (dec (ncol dat))) defects (Math/round (* 100.0 (/ (count (filter #(> % 0) klass)) (nrow dat)))) var-medians (map #(get-median %) (trans vars)) var-spreads (map #(get-spread %) (trans vars))] (trans (apply concat [[defects] [(count klass)] var-medians var-spreads])))) (defn meta-cases [bank?] (let [idx (matrix (range 0 (count bank?)))] (bind-columns idx (matrix (map #(meta-case %) bank?))))) (defn performance-case [datas model-data model-name result-pathname] "To convert to arffs after pre-data" (let [train (save (pre-data model-data) (str result-pathname model-name "/"model-name".csv") :header (eval (symbol (str model-name"-header")))) mtrain (save (morph10 10 (pre-data model-data) [10 20] [0.1501 0.3501] ncliff 9 numeric) (str result-pathname model-name "/m-"model-name".csv") :header (eval (symbol (str model-name"-header")))) ctest0 (map #(save (pre-data (eval %)) (str result-pathname model-name "/"%".csv") :header (eval (symbol (str %"-header")))) test-bank) ctest (map #(save (pre-data (eval %)) (str result-pathname model-name "/"%".csv") :header (eval (symbol (str %"-header")))) datas)] [ctest0 ctest])) ; (run-performance-case velocity14 'velocity14 data-bank) ; (run-performance-case tomcat 'tomcat data-bank) (defn run-performance-case [model-data model-name bank?] (performance-case bank? model-data model-name "/home/fayola/cbrprivacy/results/cbr/")) (defn performance-case-weka [data model-data model-name result-pathname & options] "Use arffs for finding performance; acc, prec, pd, pf, fm treatment is model" (let [opts (when options (apply assoc {} options)) klassifiers (or (:klassifiers opts) [0 3 4 5]) klassifier-names (or (:klassifier-names opts) ["nb" "lr" "rf" "knn"]) klassifier (or (:klassifier opts) 0) valid (or (:valid opts) model-name) klass (sel (eval data) :cols (dec (ncol (eval data)))) defects (Math/round (* 100.0 (/ (count (filter #(> % 0) klass)) (nrow (eval data))))) cbr-unit (fn [klassifier] (let [base_dir result-pathname dir (str base_dir "/") train_file (str dir model-name".csv.arff") test_file (str dir data".csv.arff") prefix (str data","defects","(count klass)",")] (. exp2 start_single_experiment train_file test_file prefix klassifier)))] [(cbr-unit klassifier) (. exp2 cbr_output_result (str valid))])) ;accuracy pd precision f-measure pf ; (run-performance-case-weka ant15 'ant15 velocity14 'velocity14 "/home/fayola/cbrprivacy/results/cbr/velocity14") (defn run-performance-case-weka [data model-data model-name result-pathname type & options] (let [opts (when options (apply assoc {} options)) klassifiers (if (true? (:klassifiers opts)) (:klassifiers opts) [0 3 4 5]) label (if (= type 0) (str model-name (if (= type 1) (str "m-"model-name) (if (= type 2) (str "test-"model-name) (str "m-test-"model-name)))))] (performance-case-weka data model-data model-name result-pathname :klassifier 0 :valid label))) ; (map #(run-performance-case-weka % velocity14 'velocity14 "/home/fayola/cbrprivacy/results/cbr/velocity14" 0) data-bank) ; (map #(run-performance-case-weka % tomcat 'tomcat "/home/fayola/cbrprivacy/results/cbr/tomcat" 0) data-bank) ; (map #(run-performance-case-weka % velocity14 'm-velocity14 "/home/fayola/cbrprivacy/results/cbr/velocity14" 1) data-bank) ; (map #(run-performance-case-weka % tomcat 'm-tomcat "/home/fayola/cbrprivacy/results/cbr/tomcat" 1) data-bank) ; (map #(run-performance-case-weka % velocity14 'velocity14 "/home/fayola/cbrprivacy/results/cbr/velocity14" 2) test-bank) ; (map #(run-performance-case-weka % tomcat 'tomcat "/home/fayola/cbrprivacy/results/cbr/tomcat" 2) test-bank) ; (map #(run-performance-case-weka % velocity14 'm-velocity14 "/home/fayola/cbrprivacy/results/cbr/velocity14" 3) test-bank) ; (map #(run-performance-case-weka % tomcat 'm-tomcat "/home/fayola/cbrprivacy/results/cbr/tomcat" 3) test-bank) ; (. exp2 clear_experiment) ; (report-file "/home/fayola/cbrprivacy/results/cbr/results/nb/velocity14-output-result.csv") ; (cbr-privacy "/home/fayola/cbrprivacy/results/cbr/results/nb/" numeric "velocity14-output-result.csv" "test-velocity14-output-result.csv") ; (cbr-privacy "/home/fayola/cbrprivacy/results/cbr/results/nb/" numeric "m-velocity14-output-result.csv" "m-test-velocity14-output-result.csv") ; (cbr-privacy "/home/fayola/cbrprivacy/results/cbr/results/nb/" numeric "tomcat-output-result.csv" "test-tomcat-output-result.csv") ; (cbr-privacy "/home/fayola/cbrprivacy/results/cbr/results/nb/" numeric "m-tomcat-output-result.csv" "m-test-tomcat-output-result.csv") (defn cbr-privacy [result-pathname distance-fn brain valid] (let [train (matrix (map reverse (meta-cases data-bank))) test (meta-cases test-bank) cbr (report-file (str result-pathname "/" brain)) isvalid? (report-file (str result-pathname "/" valid)) change (fn [a b] (let [a1 (if (= a 0) 0.000001 a)] (* 100 (/ (- b a) a1))))] (loop [tt test i 0 result []] (if (empty? tt) result (recur (rest tt) (inc i) (conj result (vector (nth test-bank i) ;(let [adjust0 (map first (get-nearest-k (reverse (rest (rest (first tt)))) train distance-fn 3)) ; adjust1 (map last adjust0) ; adjust2 (map mean (trans (map #(nth cbr %) adjust1)))] ; adjust2) (let [before (nth cbr (last (first (get-nearest (reverse (rest (rest (rest (first tt))))) train distance-fn)))) after (nth isvalid? i)] [before after (map #(change %1 %2) after before)]))))))))