(ns code.pls.cliff (:use (code.utils utils preprocess)) (:use (incanter core stats))) (defn get-criteria-only [D] "Returns criteria for the dataset D. Used for numeric data. " (let [br (my-best-rest D)] (loop [br1 br result (transient [])] (if (empty? br1) (persistent! result) (recur (rest br1) (conj! result (get-ranks-only D (first (first br1)) (second (first br1))))))))) (defn calculate-inst-rank [one crit0] "Returns the rank value of an instance by multipling the individual ranks of each attribute value in the instance. " (loop [n 0 result (transient [])] (if (= n (count one)) (apply * (map second (apply concat (persistent! result)))) (recur (inc n) (conj! result (let [get1 (nth crit0 n) get2 (filter #(= (nth one n) (first %)) get1)] get2)))))) (defn rank-instances [D] (let [crit (get-criteria-only D) inst-group (mygroup D) inst (loop [dat inst-group c crit result (transient [])] (if (empty? dat) (map #(reverse (sort-by second %)) (persistent! result)) (recur (rest dat) (rest crit) (conj! result (map #(vector % (calculate-inst-rank (butlast %) (first crit))) (first dat))))))] inst)) (defn select-numeric-instances [D n] (let [r-instances (rank-instances D) get-r (loop [rinst r-instances result (transient [])] (if (empty? rinst) (matrix (map first (apply concat (persistent! result)))) (recur (rest rinst) (conj! result (take (Math/ceil (* n (count (first rinst)))) (first rinst))))))] get-r)) (defn get-criteria [D] (let [br (my-best-rest D)] (loop [br1 br result (transient [])] (if (empty? br1) (persistent! result) (recur (rest br1) (conj! result (rank-vals D (first (first br1)) (second (first br1))))))))) (defn select-instances [crit inst] (loop [c crit prev inst result inst] (if (or (empty? c) (empty? result)) prev (recur (rest c) result (filter #(= (nth % (last (first c))) (first (first c))) result))))) (defn select-instances1 [crit inst distance] (let [one (map first (sort-by last crit))] (first (get-nearest one inst distance)))) (defn cliff1 [_ D distance type] (let [crits (get-criteria D) insts (mygroup D) ans (map #(select-instances %1 %2 distance) crits insts)] (extract-unique ans))) (defn cliff [_ D _ type] (let [crits (get-criteria D) insts (mygroup D) prototypes (fn [lst-of-lst] (loop [lol lst-of-lst result (transient [])] (if (empty? lol) (extract-unique (apply bind-rows (persistent! result))) (recur (rest lol) (conj! result (if (matrix? (first lol)) (first lol) (matrix (first lol)))))))) ans (prototypes (map #(select-instances %1 %2) crits insts))] ans)) (defn ocliff1 [n data _ _ _] "Returns a pruned list of instances. Designed for numeric data. data=original data, n=number of bins, deci-n=% instances selected " (let [cliff-data (cliff 9 data 9 'z)] cliff-data)) (defn ocliff [n data _ _ _] "Returns a pruned list of instances. Designed for numeric data. data=original data, n=number of bins, deci-n=% instances selected " (let [klass (sel data :cols (dec (ncol data))) ind-vars (sel data :cols (range (dec (ncol data)))) discretized-data (bind-columns (trans (efb2 ind-vars n)) klass) cliff-data (extract-unique (cliff 9 discretized-data 9 'z)) match-data (fn [dat] (loop [n 0 dd discretized-data d data result (transient [])] (if (empty? d) (persistent! result) (recur (inc n) (rest dd) (rest d) (conj! result (if (= dat (first dd)) (first d) nil)))))) result (apply concat (map #(match-data %) cliff-data))] (extract-unique (matrix (remove #(= nil %) result)))))