#At class on Tuesday, the following technology was #identified as required for Project2. #1) running jrip # see jrip and jrip10 in minerc #2) 1Rdiscretization #3) c45rules # I can't find c45rules in weka. # Use "PART" instead # weka.classifiers.rules.PART -M 2 -C 0.25 -Q 1 # For more on "PART", see witten&frank p409 #4) script for the wrapper # see the demo below ################################### # to use this demo # copy this file to $HOME/opt/our/minerc # bash our minerrc # . fssdemo # worker1001 ################################### #The following code takes some inputarff file $1 and re-orders the attributes, left to #right, most interesting to most dull. #This code calls infoGain: rankViaInfoGain() { blab "<" $Weka weka.filters.supervised.attribute.AttributeSelection \ -S "weka.attributeSelection.Ranker -T -2.7976931348623157E308 -N -1" \ -E "weka.attributeSelection.InfoGainAttributeEval" \ -i $1 -o tmp.arff cat tmp.arff } #This code calls wrapper: rankViaWrapper() { # warning: slow! blab "<" $Weka weka.filters.supervised.attribute.AttributeSelection \ -E "weka.attributeSelection.WrapperSubsetEval -B weka.classifiers.bayes.NaiveBayes -F 5 -T 0.01 -R 1" \ -S "weka.attributeSelection.BestFirst -D 1 -N 5" \ -i $1 -o tmp.arff cat tmp.arff } logNumbers() { gawk ' BEGIN {Min=0.00001; OFS=","; IGNORECASE=1} /@attribute/ {Attr++} /@attribute/ && $3 ~ /numeric|real|integer/ {Num[Attr]=Attr; } /@data/ {In=1; FS=","} /@/ {print; next; } In && NF > 2 { for(I=1;I<=Attr;I++) if (I in Num) if ($I !~ /\?/) { if ( ($I +0) < Min) {Bad=1} else {Bad=0} if (Bad) $I= Min; $I=log($I) } print $0 } ' - } #### some workers worker1001() { cd $Tmp local bins=2 local repeats=2 local learners="nb" local data="$Here/minerc.lib/arffs/uci/discrete/diabetes.arff" for one in $data; do cp $one raw.arff stem=`basename $one` stem=${stem/.*/} cat raw.arff | logNumbers > logged.arff discretizeViaFayyadIrani raw.arff > discrete.arff discretizeViaFayyadIrani logged.arff > loggedDiscrete.arff for x in raw discrete; do # logged loggedDiscrete; do rankViaInfoGain $x.arff > ranked.arff #rankViaWrapper $x.arff > ranked.arff for Attrs in 6 4 2 ; do removeAttributes $Attrs 8 ranked.arff > ranked${Attrs}.arff # removes all attributes from $Attrs to 8 blab "$stem $x $Attrs " echo "#file,x,attrs,bin,learner,goal,a,b,c,d,acc,pd,pf,prec,g" for((R=1;R<=$repeats;R++)); do Seed=$RANDOM for((bin=1; bin<=$bins ; bin++)); do blab "$bin" cat ranked${Attrs}.arff | makeTrainTest $Seed 10 1 goals=`cat test.arff | classes --brief` for Learner in $Learners; do $Learner train.arff test.arff | gotwant > results.dat for goal in $goals; do cat results.dat | abcd --goal "$goal" \ --prefix "`basename $one`,$x,$Attrs,$bin,$Learner,$goal"\ --decimals 1 done done done done blabln done done done | tee $Safe/worker1001.log cd $Here }