Java="$Here" Weka="nice -n 20 java -Xmx2048M -cp $Java/weka.jar " Seed=1 tempARFF="./temp.arff" tempARFF2="./temp2.arff" tempARFF3="./temp3.arff" ########################################################################### # Experiments # initializeTestArrays() { unset dataList unset splitList unset preprocessorList unset learnerList unset errorList declare -a dataList declare -a splitList declare -a preprocessorList declare -a learnerList declare -a errorList } selectExperiment() { task=$1 initializeTestArrays # Effort Estimation if [ "$task" = "ee" ]; then # Initialize datalist dataList[0]=d_albrecht #dataList[1]=d_china dataList[2]=d_cocomo81 dataList[3]=d_cocomo81e dataList[4]=d_cocomo81o dataList[5]=d_cocomo81s dataList[6]=d_desharnais dataList[7]=d_desharnaisL1 dataList[8]=d_desharnaisL2 dataList[9]=d_desharnaisL3 dataList[10]=d_finnish dataList[11]=d_kemerer dataList[12]=d_maxwell dataList[13]=d_miyazaki94 dataList[14]=d_nasa93center1 dataList[15]=d_nasa93center2 dataList[16]=d_nasa93center5 dataList[17]=d_sdr dataList[18]=d_telecom1 # Initialize splitList splitList[0]=loo # Initialize preprocessorList preprocessorList[0]=pp_none preprocessorList[1]=pp_log preprocessorList[2]=pp_normalize preprocessorList[3]="pp_widthNbin 3" preprocessorList[4]="pp_widthNbin 5" preprocessorList[5]="pp_freqNbin 3" preprocessorList[6]="pp_freqNbin 5" preprocessorList[7]=pp_pca # Initialize learnerList learnerList[0]=l_zeroR learnerList[1]=l_slreg learnerList[2]=l_plsr learnerList[3]=l_nnet learnerList[4]="l_nNearN 1" learnerList[5]="l_nNearN 5" learnerList[6]="l_cart y" learnerList[7]="l_cart n" # Run the experiment runExperiment fi } runExperiment() { for i1 in ${dataList[@]} do $i1 currentpp="" for i2 in ${splitList[@]} do split=$i2 i3=0 pSize=${#preprocessorList[@]} while [ "$i3" -lt "$pSize" ] do ${preprocessorList[$i3]} i3=`expr $i3 + 1` i4=0 lSize=${#learnerList[@]} while [ "$i4" -lt "$lSize" ] do ${learnerList[$i4]} i4=`expr $i4 + 1` runEval done done done done # Perform cleanup rm $tempARFF rm $tempARFF2 rm $tempARFF3 } runEval() { #Check if run exists setOutfile if [ ! -e $outfile ]; then echo "Performing" $data_name $split $preprocessor_name $learner_name if [ "$currentpp" != "$preprocessor_name" ]; then echo "Preprocessing" # Call preprocessor preprocessor echo "Done Preprocessing" fi # Call learner echo "Learning" learner echo "Done Learning" fi } sizeData() { # determine size of data numberInstances=`gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) print $0}' $1 | wc -l | gawk '{print $1}'` # determine number of features not including class numberFeatures=`sed 's/\,/\ /g' $1 | gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) {a=NF; pr = 0;}} END{ print a - 1;}'` } formatFile() { # First column is actual, second is predicted gawk '{ if (NR > 5 && $2 && $3) print $2 "," $3; }' } formatFile2() { sed 's/:/\ /g' | gawk '{ if (NR > 5 && $3 && $7) print $3 "," $5; }' } setOutfile() { outfile=./results2/ outfile+=$data_name outfile+="_" outfile+=$split outfile+="_" outfile+=$preprocessor_name outfile+="_" outfile+=$learner_name outfile+=.csv } ########################################################################### # Data Sets # d_albrecht() { data="./data/albrecht.arff" data_name=albrecht } d_china() { data="./data/china.arff" data_name=china } d_cocomo81() { data="./data/cocomo81.arff" data_name=cocomo81 } d_cocomo81e() { data="./data/cocomo81e.arff" data_name=cocomo81e } d_cocomo81o() { data="./data/cocomo81o.arff" data_name=cocomo81o } d_cocomo81s() { data="./data/cocomo81s.arff" data_name=cocomo81s } d_desharnais() { data="./data/desharnais.arff" data_name=desharnais } d_desharnaisL1() { data="./data/desharnaisL1.arff" data_name=desharnaisL1 } d_desharnaisL2() { data="./data/desharnaisL2.arff" data_name=desharnaisL2 } d_desharnaisL3() { data="./data/desharnaisL3.arff" data_name=desharnaisL3 } d_finnish() { data="./data/finnish.arff" data_name=finnish } d_kemerer() { data="./data/kemerer.arff" data_name=kemerer } d_maxwell() { data="./data/maxwell.arff" data_name=maxwell } d_miyazaki94() { data="./data/miyazaki94.arff" data_name=miyazaki94 } d_nasa93center1() { data="./data/nasa93_center_1.arff" data_name=nasa93center1 } d_nasa93center2() { data="./data/nasa93_center_2.arff" data_name=nasa93center2 } d_nasa93center5() { data="./data/nasa93_center_5.arff" data_name=nasa93center5 } d_sdr() { data="./data/sdr.arff" data_name=sdr } d_telecom1() { data="./data/telecom1.arff" data_name=telecom1 } ########################################################################### # Preprocessors # # None # pp_none() { preprocessor_name=none preprocessor() { currentpp=none $Weka weka.filters.AllFilter -i $data -o $tempARFF # $data > $tempARFF } } # Logarithmic # pp_log() { preprocessor_name=log #log(e) = 0.434294482 preprocessor() { currentpp=log $Weka weka.filters.unsupervised.attribute.MathExpression -R last -E "ifelse(A=0,0,(log(A))/0.434294482)" -i $data -o $tempARFF } } # Normalization # pp_normalize() { preprocessor_name=norm preprocessor() { currentpp=norm $Weka weka.filters.unsupervised.instance.Normalize -c last -i $data -o $tempARFF } } # n-Bin Equal Frequency Discretization # pp_freqNbin() { preprocessor_name=freq preprocessor_name+=$1 preprocessor_name+=bin ppVar=$1 preprocessor() { currentpp=$preprocessor_name $Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -F -c last -i $data -o $tempARFF } } # n-Bin Equal Width Discretization # pp_widthNbin() { preprocessor_name=width preprocessor_name+=$1 preprocessor_name+=bin ppVar=$1 preprocessor() { currentpp=$preprocessor_name $Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -c last -i $data -o $tempARFF } } # Principle Component Analysis # pp_pca() { preprocessor_name=pca preprocessor() { currentpp=pca $Weka weka.filters.unsupervised.attribute.PrincipalComponents -D -c last -i $data -o $tempARFF } } ########################################################################### # Learners # # ZeroR # l_zeroR() { learner_name=ZeroR learner() { # Capabilities text from WEKA # Class -- Date class, Numeric class, Nominal class, Missing class values, Binary class # Attributes -- Numeric attributes, Unary attributes, Relational attributes, Binary attributes, Date attributes, String attributes, Empty nominal attributes, Missing values, Nominal attributes if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Simple Linear Regression # l_slreg() { learner_name=SLReg learner() { # Capabilities text from WEKA # Class -- Date class, Missing class values, Numeric class # Attributes -- Date attributes, Numeric attributes # min # of instances: 1 if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2 $Weka weka.classifiers.functions.SimpleLinearRegression -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Partial Least Squares Regression # l_plsr() { learner_name=PlSR learner() { # Capabilities text from WEKA # Class -- Date class, Missing class values, Numeric class # Attributes -- Date attributes, Missing values, Numeric attributes # min # of instances: 1 if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then $Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2 sizeData $tempARFF2 internal internal() { $Weka weka.classifiers.functions.PLSClassifier -t $tempARFF2 -s $Seed -x $numberInstances -p 0 -filter "weka.filters.supervised.attribute.PLSFilter -M -P none -C $numberFeatures" | formatFile > $outfile if [ "`cat $outfile`" = "" ]; then numberFeatures=`expr $numberFeatures - 1` if [ "$numberFeatures" -gt "0" ]; then internal else $Weka weka.classifiers.rules.ZeroR -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } fi fi } } # Neural Net # l_nnet() { learner_name=nnet learner() { if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.classifiers.functions.MultilayerPerceptron -N 50 -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Analogy Based Estimation - n Nearest Neighbor # l_nNearN() { learner_name=nn learner_name+=$1 lVar=$1 learner() { if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then sizeData $tempARFF $Weka weka.classifiers.lazy.IBk -K $lVar -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi } } # Simple CART # l_cart() { learner_name=CART learner_name+=$1 lVar=$1 learner() { # Capabilities text from WEKA # Class -- Nominal class, Binary class # Attributes -- Binary attributes, Missing values, Numeric attributes, Nominal attributes, Empty nominal attributes, Unary attributes # min # of instances: 1 if [ "$task" = "ee" ]; then if [ "$split" = "loo" ]; then $Weka weka.filters.unsupervised.attribute.NumericToNominal -i $tempARFF -o $tempARFF2 sizeData $tempARFF2 if [ "$lVar" = "y" ]; then $Weka weka.classifiers.trees.SimpleCart -U -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile fi if [ "$lVar" = "n" ]; then $Weka weka.classifiers.trees.SimpleCart -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile fi if [ "`cat $outfile`" = "" ]; then sizeData $tempARFF $Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile fi fi fi } } ########################################################################### # WEKA Wrapper for Debug # weka() { $Weka $1 $2 $3 $4 $5 $6 $7 $8 $9 } # Make override for debug # make() { cd $Here . comba.bash } # CSV to ARFF conversion # c2a2() { local outarff=$1 local incsv=$outarff incsv+=".csv" outarff+=".arff" local converter="weka.core.converters.CSVLoader" $Weka $converter $incsv | cat > $outarff } # System Opening Messages # echo "COMBA 2 by Vincent Rogers and William Sica" echo "" PS1="COMBA> "