(ns code.pls.ncliff (:use (code.pls cliff)) (:use (code.utils utils preprocess)) (:use (incanter core stats))) (defn ncliff1 [n data _ deci-n _] "Returns a pruned list of instances. Designed for discrete data. data=original data, n=number of bins, deci-n=% instances selected " (let [cliff-data (extract-unique (select-numeric-instances data deci-n))] cliff-data)) (defn ncliff [n data1 _ deci-n _] "Returns a pruned list of instances. Designed for numeric data. data=original data, n=number of bins, deci-n=% instances selected " (let [data (bind-columns (sel data1 :cols (range 10)) (matrix (efb (sel data1 :cols 10))) (sel data1 :cols (range 11 21))) klass (sel data :cols (dec (ncol data))) ind-vars (sel data :cols (range (dec (ncol data)))) discretized-data (bind-columns (trans (efb2 ind-vars n)) klass) cliff-data (extract-unique (select-numeric-instances discretized-data deci-n)) match-data (fn [dat] (loop [n 0 dd discretized-data d data result (transient [])] (if (empty? d) (persistent! result) (recur (inc n) (rest dd) (rest d) (conj! result (if (= dat (first dd)) (first d) nil)))))) result (apply concat (map #(match-data %) cliff-data))] (extract-unique (matrix (remove #(= nil %) result)))))