#!/bin/bash # ColumnPruner With M5P as the target learner but without logging ColumnPruner="M5PWrapperNoLog" arffWithNums $Num test.arff > testN.arff; arffWithNums $Num train.arff > trainN.arff; #run the wrapper $Weka weka.attributeSelection.WrapperSubsetEval -S "weka.attributeSelection.BestFirst -D 1 -N 5" -I trainN.arff -B weka.classifiers.trees.M5P -F 5 -T 0.01 -R $Seed -- -M 4.0 > M5PWrapperNoLogResults; LastAttribute=`./attributes trainN.arff` #This is required to ensure that the last two attributes (in this case KLOC and ActualEffort) are included. #KLOC is sometimes ignored and this is unacceptable since this wrapper has no concept of KLOC as a special attribute. M5PWrapperNoLogAttributes=`grep "Selected attributes:" M5PWrapperNoLogResults | sed 's/Selected attributes: //' | sed 's/ : [0-9]*//' | sed 's/ //g' | gawk 'BEGIN {FS=OFS=",";} {split($0, attributes,","); n=asort(attributes); for (i=1; i<=n; i++) if (attributes[i] != LastAttribute-1) printf ("%d,",attributes[i]); printf ("%d,%d\n",LastAttribute-1,LastAttribute); }' LastAttribute=$LastAttribute;` #filter the attributes $Weka weka.filters.unsupervised.attribute.Remove -R $M5PWrapperNoLogAttributes -V -i trainN.arff -o $ColumnPruner"_"Train.arff; $Weka weka.filters.unsupervised.attribute.Remove -R $M5PWrapperNoLogAttributes -V -i testN.arff -o $ColumnPruner"_"Test.arff;