;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; CONTRAST SET LEARNING FUNTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (ns forensics.ContrastSetLearning (:use (forensics utils EqualFrequencyBinning)) (:use (incanter core))) (declare new-csl) (defn csl-seh-gro [data] (let [result (new-csl data) ranges (first result) ranks result support (last result) max-ranks (map #(apply max %) ranks) titles '(xvvv yvvv sdvv muvv tauv Like Time Cost)] (dotimes [one (- (count ranks) 0)] (println "| "(nth titles one) " |" (map #(Math/round (* 10 (/ % (nth max-ranks one)))) (nth ranks one)))))) (defn csl-eve [data] (let [result (new-csl data) ranges (first result) ranks result support (last result) max-ranks (map #(apply max %) ranks) titles '(lamb nvvv f-va Like Time Cost)] (dotimes [one (- (count ranks) 0)] (println "| "(nth titles one) " |" (map #(Math/round (* 10 (/ % (nth max-ranks one)))) (nth ranks one)))))) (defn csl-wal [data] (let [result (new-csl data) ranges (first result) ranks result support (last result) max-ranks (map #(apply max %) ranks) titles '(flvv pvvv Like Time Cost)] (dotimes [one (- (count ranks) 0)] (println "| "(nth titles one) " |" (map #(Math/round (* 10 (/ % (nth max-ranks one)))) (nth ranks one)))))) (defn BestRest [scored-data b r] (let [data (sort-by last scored-data) bt (apply vector (take b data)) rt (apply vector (drop (count bt) data))] [bt rt])) (def *bins* []) (defn EqualFrequencyBinning ([data] (EqualFrequencyBinning data 10)) ([data nb] (let [cols (reverse (pop (reverse (Transpose data))))] (doseq [col cols] (def *bins* (conj *bins* (efb (sort col) nb)))) (let [result *bins*] (def *bins* []) ;reset result)))) (defn bin-range [lst-of-bin-vals] ;result of unique-compress EFB (def *bin-range* [(list (- (first lst-of-bin-vals) 1) (first lst-of-bin-vals))]) (dotimes [one (- (count lst-of-bin-vals) 1)] (def *bin-range* (conj *bin-range* (list (nth lst-of-bin-vals one) (nth lst-of-bin-vals (inc one)))))) (let [result *bin-range*] (def *bin-range* []) ;returns min max ranges result)) (defn rank-range [range var-best-data var-rest-data b r N] (let [Pb (/ b N) Pr (/ r N) freqEbest (if (= (first range) (second range)) (/ (count var-best-data) b) (/ (count (remove #(= % 'f) (map #(if (<= % (second range)) (if (> % (first range)) % 'f) 'f) var-best-data))) b)) freqErest (if (= (first range) (second range)) (/ (count var-rest-data) r) (/ (count (remove #(= % 'f) (map #(if (<= % (second range)) (if (> % (first range)) % 'f) 'f) var-rest-data))) r)) likebestE (* freqEbest Pb 1.0) likerestE (* freqErest Pr 1.0) supported-rank (if (and (= likebestE 0) (= likerestE 0)) 0.0 (/ (Math/pow likebestE 2) (+ likebestE likerestE)))] supported-rank)) (defn rank-ranges [ranges var-best-data var-rest-data b r N] (map #(rank-range % var-best-data var-rest-data b r N) ranges)) (defn get-range-ranks [ranges best-data rest-data b r N] (let [ranges-best-rest (map list ranges best-data rest-data) range-ranks (map #(rank-ranges (first %) (second %) (last %) b r N) ranges-best-rest)] range-ranks)) ;--------------------------------------------- ;These support functions are not for the output ;I just wanted to see how many points were in ;each bin (defn support [range var-data] (let [no-of-points (count (filter #(and (> % (first range)) (<= % (second range))) var-data))] no-of-points)) (defn supports [ranges var-data] (map #(support % var-data) ranges)) (defn get-range-support [ranges all-attributes] (let [ranges-attributes (map list ranges all-attributes) range-support (map #(supports (first %) (second %)) ranges-attributes)] range-support)) ;---------------------------------------------- (defn new-csl [data] (let [N (count data) all-attributes (Transpose data) all-min (map #(apply min %) all-attributes) b (* 0.1 N) ;b = best r (* 0.9 N) ;r = rest b-r (BestRest data b r) ;b-r = best-rest best-data (Transpose (first b-r)) rest-data (Transpose (second b-r)) breaks (map #(unique-compress %) (map #(compress %) (EqualFrequencyBinning data))) breaks-min (map #(cons %1 %2) all-min breaks) ;breaks-min = minimum vals added to front ranges (map #(bin-range %) breaks-min) range-ranks (get-range-ranks ranges best-data rest-data b r N) range-support (get-range-support ranges all-attributes)] ; (map list ranges range-ranks range-support))) (map list ranges range-ranks))) ; range-ranks))