Java="$Here" Weka="nice -n 20 java -Xmx2048M -cp $Java/weka.jar " Octave="/Applications/Octave.app/Contents/Resources/bin/octave" Seed=1 tempARFF="./temp.arff" tempARFF2="./temp2.arff" tempARFF3="./temp3.arff" ########################################################################### # Experiments # initializeTestArrays() { unset dataList unset splitList unset preprocessorList unset learnerList unset errorList declare -a dataList declare -a splitList declare -a preprocessorList declare -a learnerList declare -a errorList declare -a dNames declare -a sNames declare -a pNames declare -a lNames declare -a eNames } selectExperiment() { task=$1 initializeTestArrays # Effort Estimation if [ "$task" = "ee" ]; then # Initialize datalist dataList[0]=d_albrecht #dataList[1]=d_china dataList[2]=d_cocomo81 dataList[3]=d_cocomo81e dataList[4]=d_cocomo81o dataList[5]=d_cocomo81s dataList[6]=d_desharnais dataList[7]=d_desharnaisL1 dataList[8]=d_desharnaisL2 dataList[9]=d_desharnaisL3 dataList[10]=d_finnish dataList[11]=d_kemerer dataList[12]=d_maxwell dataList[13]=d_miyazaki94 dataList[14]=d_nasa93center1 dataList[15]=d_nasa93center2 dataList[16]=d_nasa93center5 dataList[17]=d_sdr dataList[18]=d_telecom1 # Initialize splitList splitList[0]=loo # Initialize preprocessorList preprocessorList[0]=pp_none preprocessorList[1]=pp_log preprocessorList[2]=pp_normalize preprocessorList[3]="pp_widthNbin 3" preprocessorList[4]="pp_widthNbin 5" preprocessorList[5]="pp_freqNbin 3" preprocessorList[6]="pp_freqNbin 5" preprocessorList[7]=pp_pca # Initialize learnerList learnerList[0]=l_zeroR learnerList[1]=l_slreg learnerList[2]=l_plsr learnerList[3]=l_nnet learnerList[4]="l_nNearN 1" learnerList[5]="l_nNearN 5" # learnerList[6]="l_cart y" # learnerList[7]="l_cart n" # produces broken results # Run the experiment runExperiment # Set error list errorList[0]=e_ar errorList[1]=e_mre errorList[2]=e_mer errorList[3]=e_bre errorList[4]=e_ibre # Collect error measures runErrors echo "Errors Collected" # Do Paired Wilcoxan signed-ranked test #runSigtests # Compute results computeFinalResults echo "Done" fi } computeFinalResults() { echo -n "data" >> ./results.txt for i1 in ${pNames[@]} do for i2 in ${lNames[@]} do echo -n $i1"_"$i2"," >> ./results.csv done done echo >> ./results.csv for i1 in ${dNames[@]} do data_name=$i1 echo -n $data_name"," >> ./results.csv for i2 in ${sNames[@]} do split=$i2 for i3 in ${pNames[@]} do preprocessor_name=$i3 for i4 in ${lNames[@]} do learner_name=$i4 sumWSRT done done done echo >> ./results.csv done } sumWSRT() { infileW=./results/ infileW+=$data_name infileW+="_" infileW+=$split infileW+="_" infileW+=$preprocessor_name infileW+="_" infileW+=$learner_name infileW+="_" wins=0 ties=0 losses=0 for i5 in ${eNames[@]} do error_name=$i5 infileW1=$infileW infileW1+=$error_name infileW1+=.mww sumWTL `gawk '{if($1 == 1){ w = w + 1;} if($1 == 0){ t = t + 1;} if($1 == -1){ l = l + 1; }} END{print w " " t " " l}' w=$wins t=$ties l=$losses $infileW1` if [ "$i5" = "mre" ] then infileW2=$infileW1 infileW2+=med sumWTL `gawk '{if($1 == 1){ w = w + 1;} if($1 == 0){ t = t + 1;} if($1 == -1){ l = l + 1; }} END{print w " " t " " l}' w=$wins t=$ties l=$losses $infileW2` infileW2=$infileW1 infileW2+=pred sumWTL `gawk '{if($1 == 1){ w = w + 1;} if($1 == 0){ t = t + 1;} if($1 == -1){ l = l + 1; }} END{print w " " t " " l}' w=$wins t=$ties l=$losses $infileW2` fi done sumwlt=`expr $wins + $ties + $losses` echo -n `gawk -v l=$losses -v s=$sumwlt 'END{print l/s}' results.csv`"," >> ./results.csv } sumWTL() { wins=`expr $1 + $wins` ties=`expr $2 + $ties` losses=`expr $3 + $losses` } runSigtests() { for i1 in ${dNames[@]} do data_name=$i1 for i2 in ${sNames[@]} do split=$i2 for i3 in ${eNames[@]} do error_name=$i3 pSize=${#pNames[@]} lSize=${#lNames[@]} c1=0 while [ "$c1" -lt "$pSize" ] do c2=$c1 while [ "$c2" -lt "$pSize" ] do c3=0 while [ "$c3" -lt "$lSize" ] do c4=$c3 preprocessor_name=${pNames[$c3]} while [ "$c4" -lt "$lSize" ] do preprocessor_name=${pNames[$c1]} learner_name=${lNames[$c3]} setOutfile3 setOutfileW f1=$outfile3 w1=$outfileW preprocessor_name=${pNames[$c2]} learner_name=${lNames[$c4]} setOutfile3 setOutfileW w2=$outfileW echo $w1 $w2 # for debug wilcoxonSRT $f1 $outfile3 $w1 $w2 if [ "$error_name" = "mre" ]; then w3=$w1 w3+=med w4=$w2 w4+=med wilcoxonSRTmd $f1 $outfile3 $w3 $w4 w3=$w1 w3+=pred w4=$w2 w4+=pred wilcoxonSRTpred25 $f1 $outfile3 $w3 $w4 fi c4=`expr $c4 + 1` done c3=`expr $c3 + 1` done c2=`expr $c2 + 1` done c1=`expr $c1 + 1` done done done done } runErrors() { i1=0 dSize=${#dataList[@]} while [ "$i1" -lt "$dSize" ] do ${dataList[$i1]} dNames[$i1]=$data_name i2=0 sSize=${#splitList[@]} while [ "$i2" -lt "$sSize" ] do split=${splitList[$i2]} sNames[$i2]=$split i3=0 pSize=${#preprocessorList[@]} while [ "$i3" -lt "$pSize" ] do ${preprocessorList[$i3]} pNames[$i3]=$preprocessor_name i4=0 lSize=${#learnerList[@]} while [ "$i4" -lt "$lSize" ] do ${learnerList[$i4]} lNames[$i4]=$learner_name i5=0 eSize=${#errorList[@]} setOutfile infile=$outfile while [ "$i5" -lt "$eSize" ] do ${errorList[$i5]} eNames[$i5]=$error_name errorEval i5=`expr $i5 + 1` done i4=`expr $i4 + 1` done i3=`expr $i3 + 1` done i2=`expr $i2 + 1` done i1=`expr $i1 + 1` done } runExperiment() { for i1 in ${dataList[@]} do $i1 currentpp="" for i2 in ${splitList[@]} do split=$i2 i3=0 pSize=${#preprocessorList[@]} while [ "$i3" -lt "$pSize" ] do ${preprocessorList[$i3]} i3=`expr $i3 + 1` i4=0 lSize=${#learnerList[@]} while [ "$i4" -lt "$lSize" ] do ${learnerList[$i4]} i4=`expr $i4 + 1` runEval done done done done # Perform cleanup rm $tempARFF rm $tempARFF2 rm $tempARFF3 } errorEval() { #Check if run exists setOutfile2 if [ ! -e $outfile2 ]; then errorMeasure fi } runEval() { #Check if run exists setOutfile if [ ! -e $outfile ]; then echo "Performing" $data_name $split $preprocessor_name $learner_name if [ "$currentpp" != "$preprocessor_name" ]; then echo "Preprocessing" # Call preprocessor preprocessor echo "Done Preprocessing" fi # Call learner echo "Learning" learner echo "Done Learning" fi } sizeData() { # determine size of data numberInstances=`gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) print $0}' $1 | wc -l | gawk '{print $1}'` # determine number of features not including class numberFeatures=`sed 's/\,/\ /g' $1 | gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) {a=NF; pr = 0;}} END{ print a - 1;}'` } formatFile() { # First column is actual, second is predicted gawk '{ if (NR > 5 && $2 && $3) print $2 "," $3; }' } formatFile2() { sed 's/:/\ /g' | gawk '{ if (NR > 5 && $3 && $7) print $3 "," $5; }' } setOutfile() { outfile=./results/ outfile+=$data_name outfile+="_" outfile+=$split outfile+="_" outfile+=$preprocessor_name outfile+="_" outfile+=$learner_name outfile+=.csv } setOutfile2() { outfile2=./results/ outfile2+=$data_name outfile2+="_" outfile2+=$split outfile2+="_" outfile2+=$preprocessor_name outfile2+="_" outfile2+=$learner_name outfile2+="_" outfile2+=$error_name outfile2+=.csv } setOutfile3() { outfile3=$Here outfile3+=/results/ outfile3+=$data_name outfile3+="_" outfile3+=$split outfile3+="_" outfile3+=$preprocessor_name outfile3+="_" outfile3+=$learner_name outfile3+="_" outfile3+=$error_name outfile3+=.csv } setOutfileW() { outfileW=./results/ outfileW+=$data_name outfileW+="_" outfileW+=$split outfileW+="_" outfileW+=$preprocessor_name outfileW+="_" outfileW+=$learner_name outfileW+="_" outfileW+=$error_name outfileW+=.mww } ########################################################################### # Data Sets # d_albrecht() { data="./data/albrecht.arff" data_name=albrecht } d_china() { data="./data/china.arff" data_name=china } d_cocomo81() { data="./data/cocomo81.arff" data_name=cocomo81 } d_cocomo81e() { data="./data/cocomo81e.arff" data_name=cocomo81e } d_cocomo81o() { data="./data/cocomo81o.arff" data_name=cocomo81o } d_cocomo81s() { data="./data/cocomo81s.arff" data_name=cocomo81s } d_desharnais() { data="./data/desharnais.arff" data_name=desharnais } d_desharnaisL1() { data="./data/desharnaisL1.arff" data_name=desharnaisL1 } d_desharnaisL2() { data="./data/desharnaisL2.arff" data_name=desharnaisL2 } d_desharnaisL3() { data="./data/desharnaisL3.arff" data_name=desharnaisL3 } d_finnish() { data="./data/finnish.arff" data_name=finnish } d_kemerer() { data="./data/kemerer.arff" data_name=kemerer } d_maxwell() { data="./data/maxwell.arff" data_name=maxwell } d_miyazaki94() { data="./data/miyazaki94.arff" data_name=miyazaki94 } d_nasa93center1() { data="./data/nasa93_center_1.arff" data_name=nasa93center1 } d_nasa93center2() { data="./data/nasa93_center_2.arff" data_name=nasa93center2 } d_nasa93center5() { data="./data/nasa93_center_5.arff" data_name=nasa93center5 } d_sdr() { data="./data/sdr.arff" data_name=sdr } d_telecom1() { data="./data/telecom1.arff" data_name=telecom1 } ########################################################################### # Preprocessors # # None # pp_none() { preprocessor_name=none preprocessor() { currentpp=none $Weka weka.filters.AllFilter -i $data -o $tempARFF # $data > $tempARFF } } # Logarithmic # pp_log() { preprocessor_name=log #log(e) = 0.434294482 preprocessor() { currentpp=log $Weka weka.filters.unsupervised.attribute.MathExpression -R last -E "ifelse(A=0,0,(log(A))/0.434294482)" -i $data -o $tempARFF } } # Normalization # pp_normalize() { preprocessor_name=norm preprocessor() { currentpp=norm $Weka weka.filters.unsupervised.instance.Normalize -c last -i $data -o $tempARFF } } # n-Bin Equal Frequency Discretization # pp_freqNbin() { preprocessor_name=freq preprocessor_name+=$1 preprocessor_name+=bin ppVar=$1 preprocessor() { currentpp=$preprocessor_name $Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -F -c last -i $data -o $tempARFF } } # n-Bin Equal Width Discretization # pp_widthNbin() { preprocessor_name=width preprocessor_name+=$1 preprocessor_name+=bin ppVar=$1 preprocessor() { currentpp=$preprocessor_name $Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -c last -i $data -o $tempARFF } } # Principle Component Analysis # pp_pca() { preprocessor_name=pca preprocessor() { currentpp=pca $Weka weka.filters.unsupervised.attribute.PrincipalComponents -D -c last -i $data -o $tempARFF } } ########################################################################### # Learners # # ZeroR # l_zeroR() { learner_name=ZeroR learner() { # Capabilities text from WEKA # Class -- Date class, Numeric class, Nominal class, Missing class values, Binary class # Attributes -- Numeric attributes, Unary attributes, Relational attributes, Binary attributes, Date attributes, String attributes, Empty nominal attributes, Missing values, Nominal attributes if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Simple Linear Regression # l_slreg() { learner_name=SLReg learner() { # Capabilities text from WEKA # Class -- Date class, Missing class values, Numeric class # Attributes -- Date attributes, Numeric attributes # min # of instances: 1 if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2 $Weka weka.classifiers.functions.SimpleLinearRegression -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Partial Least Squares Regression # l_plsr() { learner_name=PlSR learner() { # Capabilities text from WEKA # Class -- Date class, Missing class values, Numeric class # Attributes -- Date attributes, Missing values, Numeric attributes # min # of instances: 1 if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then $Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2 sizeData $tempARFF2 internal internal() { $Weka weka.classifiers.functions.PLSClassifier -t $tempARFF2 -s $Seed -x $numberInstances -p 0 -filter "weka.filters.supervised.attribute.PLSFilter -M -P none -C $numberFeatures" | formatFile > $outfile if [ "`cat $outfile`" = "" ]; then numberFeatures=`expr $numberFeatures - 1` if [ "$numberFeatures" -gt "0" ]; then internal else $Weka weka.classifiers.rules.ZeroR -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } fi fi } } # Neural Net # l_nnet() { learner_name=nnet learner() { if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.classifiers.functions.MultilayerPerceptron -N 50 -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Analogy Based Estimation - n Nearest Neighbor # l_nNearN() { learner_name=nn learner_name+=$1 lVar=$1 learner() { if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.classifiers.lazy.IBk -K $lVar -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Simple CART # l_cart() { learner_name=CART learner_name+=$1 lVar=$1 learner() { # Capabilities text from WEKA # Class -- Nominal class, Binary class # Attributes -- Binary attributes, Missing values, Numeric attributes, Nominal attributes, Empty nominal attributes, Unary attributes # min # of instances: 1 if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then $Weka weka.filters.unsupervised.attribute.NumericToNominal -i $tempARFF -o $tempARFF2 sizeData $tempARFF2 if [ "$lVar" = "y" ]; then $Weka weka.classifiers.trees.SimpleCart -U -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile fi if [ "$lVar" = "n" ]; then $Weka weka.classifiers.trees.SimpleCart -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile fi if [ "`cat $outfile`" = "" ]; then sizeData $tempARFF $Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi fi } } ########################################################################### # Error Calculators # # Absolute Residual Error # e_ar() { error_name=ar errorMeasure() { if [ ! -e $outfile ] || [ -N $rawfile ]; then sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a : -a; }' > $outfile2 fi } } # Magnitude of Relative Error # e_mre() { error_name=mre errorMeasure() { if [ ! -e $outfile ] || [ -N $rawfile ]; then sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a / $1 : -a / $1; }' > $outfile2 fi } } # Magnitude of Error Relative to the Estimate # e_mer() { error_name=mer errorMeasure() { if [ ! -e $outfile ] || [ -N $rawfile ]; then sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a / $2 : -a / $2; }' > $outfile2 fi } } # Balanced Relative Error # e_bre() { error_name=bre errorMeasure() { if [ ! -e $outfile ] || [ -N $rawfile ]; then sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; if ($1 > $2) { print (a >= 0) ? a / $2 : -a / $2;} else {print (a >= 0) ? a / $1 : -a / $1;}}' > $outfile2 fi } } # Inverted Balanced Relative Error # e_ibre() { error_name=ibre errorMeasure() { if [ ! -e $outfile ] || [ -N $rawfile ]; then sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; if ($1 < $2) { print (a >= 0) ? a / $2 : -a / $2;} else {print (a >= 0) ? a / $1 : -a / $1;}}' > $outfile2 fi } } ########################################################################### # Wilcoxon Signed-Rank Test mean comparison# wilcoxonSRT() { # $1 = X values file # $2 = Y values file passArgs=wilcoxonSRT\(\" passArgs+=$1 passArgs+=\" passArgs+=, passArgs+=\" passArgs+=$2 passArgs+=\"\) progLoc=$Here progLoc+=/supportCode wsrt=`echo $passArgs | $Octave -q --path $progLoc | gawk '{print $3}'` echo $wsrt >> $4 echo `expr 0 - $wsrt` >> $3 # 1 is loss for w1, 0 is tie, -1 is win for w1 } # Wilcoxon Signed-Rank Test median comparison# wilcoxonSRTmd() { # $1 = X values file # $2 = Y values file passArgs=wilcoxonSRTmed\(\" passArgs+=$1 passArgs+=\" passArgs+=, passArgs+=\" passArgs+=$2 passArgs+=\"\) progLoc=$Here progLoc+=/supportCode wsrt=`echo $passArgs | $Octave -q --path $progLoc | gawk '{print $3}'` echo $wsrt >> $4 echo `expr 0 - $wsrt` >> $3 # 1 is loss, 0 is tie, -1 is win } # Wilcoxon Signed-Rank Test pred25 comparison# wilcoxonSRTpred25() { # $1 = X values file # $2 = Y values file passArgs=wilcoxonSRTpred25\(\" passArgs+=$1 passArgs+=\" passArgs+=, passArgs+=\" passArgs+=$2 passArgs+=\"\) progLoc=$Here progLoc+=/supportCode wsrt=`echo $passArgs | $Octave -q --path $progLoc | gawk '{print $3}'` echo $wsrt >> $4 echo `expr 0 - $wsrt` >> $3 # 1 is loss, 0 is tie, -1 is win } ########################################################################### # WEKA Wrapper for Debug # weka() { $Weka $1 $2 $3 $4 $5 $6 $7 $8 $9 } # Make override for debug # make() { cd $Here . comba.bash } # CSV to ARFF conversion # c2a2() { local outarff=$1 local incsv=$outarff incsv+=".csv" outarff+=".arff" local converter="weka.core.converters.CSVLoader" $Weka $converter $incsv | cat > $outarff } # System Opening Messages # echo "COMBA 2 by Vincent Rogers and William Sica" echo "" PS1="COMBA> "