;(ns us.menzies.utils) (ns forensics.utils (:use (incanter core))) ;---------Distance Functions------------------------ ;Distance for discrete data (defn EuclideanDistance [[Oi Oj _]] (let [k (if (= (last Oi) (last Oj)) 0 1)] (Math/sqrt (apply + (map #(Math/pow % 2) (conj (map - (butlast Oi) (butlast Oj)) k )))))) (defn EuclideanDistance2 [[Oi Oj _]] (Math/sqrt (apply + (map #(if (= %1 %2) 0 1) Oi Oj)))) (def memo-EuclideanDistance (memoize EuclideanDistance)) ;Distance for continuous data (defn EuclideanDistance1 [[Oi Oj _]] ; (let [k (if (= (last Oi) (last Oj)) 0 1)] (Math/sqrt (apply + (map #(Math/pow % 2) (map - Oi Oj) )))) ;Euclidean Distance between projections (defn ProjectionED [[Oi Oj xi xj]] (Math/sqrt (- (Math/pow (EuclideanDistance [Oi Oj]) 2) (Math/pow (- xi xj) 2)))) ;--------------------------------------------------- ;-------------Create random dataset for CSL-------------- (declare Transpose) (def *csl-random* []) (defn csl-random ([model-function] (csl-random model-function 1000 [1 0 0] 1 1)) ([model-function samples] (csl-random model-function samples [1 0 0] 1 1)) ([model-function samples sweet-spot] (csl-random model-function samples sweet-spot 1 1)) ([model-function samples sweet-spot time-factor cost-factor] (let [data (model-function samples) new-data (Transpose (butlast (Transpose data))) lr (last (Transpose data))] (doseq [one lr] (def *csl-random* (conj *csl-random* [one (* (/ (Math/round (rand 300)) 300) time-factor) (* (/ (Math/round (rand 300)) 300) cost-factor)]))) ; (Math/round (rand 300)) ; (Math/round (rand 100))]))) (let [result (map #(conj % (EuclideanDistance1 [sweet-spot %])) *csl-random*)] (def *csl-random* []) (vector new-data (apply vector result)))))) (defn csl1 [model-function] (let [ltc (csl-random model-function)] (loop [i 0 C [] model (first ltc) l (second ltc)] (if (< i (count (first ltc))) (recur (inc i) (conj C (concat (first model) (first l))) (rest model) (rest l)) C)))) ;---------------------------------------------------------- ;-------------Qc dataset for CSL-------------- (declare Transpose) (def *csl-q* []) (let [sweet-spot [1 0 0]] (defn csl-q [data time-factor cost-factor] ; [1 0 0] (let [new-data (Transpose (butlast (Transpose data))) lr (last (Transpose data))] (doseq [one lr] (def *csl-q* (conj *csl-q* [one (* (/ (Math/round (rand 300)) 300) time-factor) (* (/ (Math/round (rand 300)) 300) cost-factor)]))) (let [result (map #(conj % (EuclideanDistance1 [sweet-spot %])) *csl-q*)] (def *csl-q* []) (vector new-data (apply vector result))))) (defn csl2 [data time-factor cost-factor] (let [ltc (csl-q data time-factor cost-factor)] (loop [i 0 C [] model (first ltc) l (second ltc)] (if (< i (count (first ltc))) (recur (inc i) (conj C (concat (first model) (first l))) (rest model) (rest l)) C)))) ) ;---------------------------------------------------------- ;-------------Normalize-------------- (def *norm* []) (defn normalize [data] (doseq [one (Transpose data)] (def *norm* (conj *norm* (map #(/ % (apply max one)) one)))) (let [result *norm*] (def *norm* []) (apply vector (map #(apply vector %) (Transpose result))))) ;---------------------------------------------------------- (defn mean1 [lst] (map #(/ % (count lst)) (apply #'map (cons #'+ lst)))) (defn FindPos [val lst counter] (if (= (not-empty lst) nil) counter (if (= val (first lst)) (inc counter) (FindPos val (rest lst) (inc counter))))) (defn Transpose [data] (apply vector (apply #'map (cons #'vector data)))) ;----------compress---------------------- (defn n-elts [elt n] (if (> n 1) (list n elt) (list 1 elt))) (defn compr [elt n lst] (if (empty? lst) (list (n-elts elt n)) (let [after (first lst)] (if (= after elt) (compr elt (+ n 1) (rest lst)) (cons (n-elts elt n) (compr after 1 (rest lst))))))) (defn compress [lst] (if (or (= (count lst) 0) (= (count lst) 1)) lst (compr (first lst) 1 (rest lst)))) (defn unique-compress [coll] ; coll = (compress coll) (map #(second %) coll)) ;------------------------------------------- ;----------- Used by seheult and grove------ (defn nr [minimum maximum] (+ minimum (rand (- maximum minimum)))) ;------------------------------------------ (defn tab [n] (dotimes [_ n] (print " "))) (defn print-tree ([tree] (print-tree tree 0)) ([tree depth] (println "") (tab (+ depth 1)) (println (first tree)) (let [subtrees (rest tree) c1 (count subtrees)] (loop [subtree subtrees c 0] (tab (+ depth 1)) (print " " '= (first (first subtree))) (if (not (vector? (second (first subtree)))) (println " " '=> (second (first subtree))) (print-tree (second (first subtree)) (+ depth 5))) (if (>= c (- c1 1)) 'done (recur (rest subtree) (inc c))))))) (defn member? [item lst] (if (empty? lst) 'nil (if (= item (first lst)) true (member? item (rest lst))))) (defn shuffle [X] (let [len (count X)] (loop [rand-indices []] (let [rdi (rand-int len)] (if (= (count (remove #(= 'nil %) rand-indices)) len) (let [ri (remove #(= 'nil %) rand-indices)] (apply vector (map #(nth X %) ri))) (recur (conj rand-indices (if (not (member? rdi rand-indices)) rdi)))))))) (defn folds [D n] ; n = 10 (let [break (Math/ceil (/ (count D) (* 1.0 n)))] (loop [n 0 folds (transient [])] (if (= n 10) (persistent! folds) (recur (inc n) (conj! folds (if (= n 9) (sel D :rows (range (* n break) (nrow D))) (sel D :rows (range (* n break) (* (inc n) break)))))))))) (defn acc [want got] (/ (count (filter #(not= nil %) (map #(if (= %1 %2) 1) want got))) (* 1.0 (count want)))) (defn abcd [want got goal] (loop [w want g got d 0 c 0 b 0 a 0] (if (empty? w) [a b c d] (recur (rest w) (rest g) (if (= (first g) goal) (if (= (first w) goal) (inc d) d) d) (if (= (first g) goal) (if (not= (first w) goal) (inc c) c) c) (if (not= (first g) goal) (if (= (first w) goal) (inc b) b) b) (if (not= (first g) goal) (if (not= (first w) goal) (inc a) a) a))))) (defn abcd-stat [want got goal] (let [abcds (abcd want got goal) a (nth abcds 0) b (nth abcds 1) c (nth abcds 2) d (nth abcds 3) acc (* 1.0 (/ (+ a d) (+ a b c d))) pd (if (and (= b 0) (= d 0)) 0 (* 1.0 (/ d (+ b d))))] pd)) (defn abcd-stats [want got goals] (loop [g goals result []] (if (empty? g) result (recur (rest g) (conj result (abcd-stat want got (first g))))))) (defn ug1 [D] (let [C (apply vector (sort (last (Transpose D))))] (loop [c C result (transient [(first C)])] (let [elt (first c)] (if (empty? c) (persistent! result) (recur (rest c) (conj! result (if (not= elt (second c)) (second c))))))))) (defn unique-goals [D] (remove #(= 'nil %) (ug1 D))) (defn shrink1 [col D] ;answer to compress (let [klass (last (Transpose D))] (loop [c col result (transient [])] (if (empty? c) (persistent! result) (recur (rest c) (conj! result (count (filter #(= (first c) %) klass)))))))) (defn shrink [col D] (apply vector (reverse (sort-by last (map #(vector %1 %2) col (shrink1 col D)))))) (defn majority-class [shrk magic-n] (let [t (take 2 shrk)] (if (<= magic-n (if (= (second (second t)) 0) 100 (* 100 (/ (- (second (first t)) (second (second t))) (second (second t)))))) 'true 'false))) (defn prototype-sort [D] (loop [g (unique-goals D) result (transient [])] (if (empty? g) (persistent! result) (recur (rest g) (conj! result (apply vector (remove #(= 'nil %) (map #(if (= (first g) (last %)) %) D))))))))