#!/bin/bash DESTDIR=${DESTDIR="/srv/bronze/dj"} MyHome=${MyHome=${HOME}} #${HOME}} # /* vim: set filetype=sh : */ -*- sh -*- # Q0: where will the output be stored Safe=${Safe=${DESTDIR}/var/weka} BinDir=${BinDir=${MyHome}/bin} # Q1: where is your data? Data=${Data=${MyHome}/var/data/discrete} # Q2: what data sets will we run? # A2: only those with binary discrete classes; i.e. (26 datasets) Datums=${Datums="a2b/audiology a2b/auto-mpg a2b/breast-cancer a2b/breast-cancer-wisconsin c2d/credit-a c2d/diabetes e2i/ecoli e2i/flag e2i/hayes-roth e2i/heart-c e2i/heart-h e2i/hepatitis e2i/imports-85 e2i/iris j2p/kr-vs-kp j2p/letter j2p/mushroom q2s/segment q2s/sick q2s/splice q2s/soybean t2z/vowel t2z/wine t2z/wdbc t2z/waveform-5000"} #Datum=${Datums=" a2b/breast-cancer c2d/diabetes e2i/heart-c e2i/iris # j2p/mushroom q2s/soybean t2z/vowel t2z/wine"} # A2': just a couple #Datums=" c2d/diabetes a2b/breast-cancer q2s/sonar t2z/vote " # Q3: what learners will you try? #Learners=${Learners="bayes nbk"} Learners=${Learners="bayes"} bayes() { wttp $1 $2 $wBayes | gotwant ; } #nbk() { wttp $1 $2 $wNbk | gotwant ; } [ -f "$Functions" ] && . $Functions # Q4 : what pre-processors will you use Preps=${Preps="cat disctree3 fayyadIrani pkid tbin"} #Preps=${Preps="cat disctreea disctreeb disctreec disctreed fayyadIrani pkid tbin"} #Preps=${Preps="cat disctreeb2 disctreeb3 disctreeb4 fayyadIrani pkid tbin"} #Preps=${Preps="cat disctreea disctreeb disctreec disctreed disctreee disctreef disctreeg disctreeh disctreei disctreej disctreek disctreel disctreem disctreen disctreeo disctreep fayyadIrani pkid tbin"} #disctreea() { dtree3 Pass=1 $1 Pass=2 $1; } Preps=${Preps="disctree2 disctree3 disctree4"} #disctree2() { dtree2 DynTree=1 Pass=1 $1 Pass=2 $1; } disctree3() { dtree3 DynTree=1 Pass=1 $1 Pass=2 $1; } #disctree4() { dtree4 DynTree=1 Pass=1 $1 Pass=2 $1; } #disctreec() { dtree3 SizeChoice=1 Pass=1 $1 Pass=2 $1; } #disctreed() { dtree3 DynTree=1 SizeChoice=1 Pass=1 $1 Pass=2 $1; } #disctreee() { dtree2 MaxSize=15 GrowthMult=2 Pass=1 $1 Pass=2 $1; } #disctreef() { dtree2 MaxSize=15 GrowthMult=2 DynTree=1 Pass=1 $1 Pass=2 $1; } #disctreeg() { dtree2 MaxSize=15 GrowthMult=2 SizeChoice=1 Pass=1 $1 Pass=2 $1; } #disctreeh() { dtree2 MaxSize=15 GrowthMult=2 DynTree=1 SizeChoice=1 Pass=1 $1 Pass=2 $1; } #disctreei() { dtree2 IncSize=50 MaxSize=15 GrowthMult=2 Pass=1 $1 Pass=2 $1; } #disctreej() { dtree2 IncSize=50 MaxSize=15 GrowthMult=2 DynTree=1 Pass=1 $1 Pass=2 $1; } #disctreek() { dtree2 IncSize=50 MaxSize=15 GrowthMult=2 SizeChoice=1 Pass=1 $1 Pass=2 $1; } #disctreel() { dtree2 IncSize=50 MaxSize=15 GrowthMult=2 DynTree=1 SizeChoice=1 Pass=1 $1 Pass=2 $1; } #disctreem() { dtree2 IncSize=32 MaxSize=15 GrowthMult=2 Pass=1 $1 Pass=2 $1; } #disctreen() { dtree2 IncSize=32 MaxSize=15 GrowthMult=2 DynTree=1 Pass=1 $1 Pass=2 $1; } #disctreeo() { dtree2 IncSize=32 MaxSize=15 GrowthMult=2 SizeChoice=1 Pass=1 $1 Pass=2 $1; } #disctreep() { dtree2 IncSize=32 MaxSize=15 GrowthMult=2 DynTree=1 SizeChoice=1 Pass=1 $1 Pass=2 $1; } tbin() { tenbins Pass=1 $1 Pass=2 $1 Pass=3 $1; } pkid() { wpkid $1;} # Q5: how many repeats? #Repeats=${Repeats=3} Repeats=${Repeats=10} # Q6: how many bins? #Bins=${Bins=5} Bins=${Bins=10} # All right then. Lets go! mkdir -p $Safe # ensure safe place exists Tmp=`mktemp -d` # make a sandbox where only you will play trap "rm -rf $Tmp" 0 1 2 3 15 # leave nothing behind when you quit cd $Tmp # go to the sandbox main() { set -x for datum in $Datums do echo "#data,learner,prep,train,test,repeats,bin,goal,a,b,c,d,accuracy,pd,pf,precision,bal" for prep in $Preps do $prep ${Data}/${datum}.arff > data.arff for((repeats=1;repeats<=$Repeats;repeats++)) do seed=$RANDOM for((bin=1;bin<=${Bins};bin++)) do cat data.arff | someArff --seed $seed --bins $Bins --bin $bin goals=`cat data.arff | classes --brief` n1=`instances train.arff` n2=`instances test.arff` for learn in $Learners do $learn train.arff test.arff > results.csv cp results.csv $Safe/results.csv for goal in $goals do b4="$datum,$learn,$prep,$n1,$n2,$repeats,$bin,$goal"; cat results.csv | ${BinDir}/abcd -g "$goal" -p "${b4}" -d 1 | tail -n 1 #abcd Prefix="${b4}" False="${one}" True="${two}" done done; done; done; done; done } Log=$$ Start=`date +%H:%M:%S%t%m/%d/%Y` (main | tee $Safe/xval.$Log ) 2> $Safe/xval.err.$Log #cat $Safe/xval.$Log | sort -t, -n -k 17,17 | malign > $Safe/myresults.$Log.csv doExtras $Safe/xval.$Log | sort -t, -n -k 18,18 | malign > $Safe/myresults.$Log.csv echo "Computing acc comparison" winlosstie --input $Safe/myresults.$Log.csv --fields 17 --perform 13 --key 3 --95 --high > $Safe/crossvalaccmw.$Log.csv echo "Computing pd comparison" winlosstie --input $Safe/myresults.$Log.csv --fields 17 --perform 14 --key 3 --95 --high > $Safe/crossvalpdmw.$Log.csv echo "Computing prec comparison" winlosstie --input $Safe/myresults.$Log.csv --fields 17 --perform 16 --key 3 --95 --high > $Safe/crossvalprecmw.$Log.csv echo "Computing bal comparison" winlosstie --input $Safe/myresults.$Log.csv --fields 17 --perform 17 --key 3 --95 --high > $Safe/crossvalbalmw.$Log.csv echo "Computing npf comparison" winlosstie --input $Safe/myresults.$Log.csv --fields 19 --perform 18 --key 3 --95 --high > $Safe/crossvalnpfmw.$Log.csv #echo "Computing fmeas comparison" #winlosstie --input $Safe/myresults.$Log.csv --fields 19 --perform 19 --key 3 --95 --high > $Safe/crossvalfmeasmw.$Log.csv End=`date +%H:%M:%S%t%m/%d/%Y` echo $Start "to" $End echo $Start "to" $End >> myresults.$Log.csv echo $Safe/xval.$Log