Java="$Here"
Weka="nice -n 20 java -Xmx2048M -cp $Java/weka.jar "
Octave="/Applications/Octave.app/Contents/Resources/bin/octave"
Seed=1
tempARFF="./temp.arff"
tempARFF2="./temp2.arff"
tempARFF3="./temp3.arff"

###########################################################################
# Experiments #
initializeTestArrays()
{
	unset dataList
	unset splitList
	unset preprocessorList
	unset learnerList
	unset errorList
	
	declare -a dataList
	declare -a splitList
	declare -a preprocessorList
	declare -a learnerList
	declare -a errorList
	
	declare -a dNames
	declare -a sNames
	declare -a pNames
	declare -a lNames
	declare -a eNames
}

selectExperiment()
{
	task=$1
	initializeTestArrays
	# Effort Estimation
	if [ "$task" = "ee" ]; then
		# Initialize datalist
		dataList[0]=d_albrecht
		#dataList[1]=d_china
		dataList[2]=d_cocomo81
		dataList[3]=d_cocomo81e
		dataList[4]=d_cocomo81o
		dataList[5]=d_cocomo81s
		dataList[6]=d_desharnais
		dataList[7]=d_desharnaisL1
		dataList[8]=d_desharnaisL2
		dataList[9]=d_desharnaisL3
		dataList[10]=d_finnish
		dataList[11]=d_kemerer
		dataList[12]=d_maxwell
		dataList[13]=d_miyazaki94
		dataList[14]=d_nasa93center1
		dataList[15]=d_nasa93center2
		dataList[16]=d_nasa93center5
		dataList[17]=d_sdr
		dataList[18]=d_telecom1
		
		# Initialize splitList
		splitList[0]=loo
		
		# Initialize preprocessorList
		preprocessorList[0]=pp_none
		preprocessorList[1]=pp_log
		preprocessorList[2]=pp_normalize
		preprocessorList[3]="pp_widthNbin 3"
		preprocessorList[4]="pp_widthNbin 5"
		preprocessorList[5]="pp_freqNbin 3"
		preprocessorList[6]="pp_freqNbin 5"
		preprocessorList[7]=pp_pca
		
		# Initialize learnerList
		learnerList[0]=l_zeroR
		learnerList[1]=l_slreg
		learnerList[2]=l_plsr
		learnerList[3]=l_nnet
		learnerList[4]="l_nNearN 1"
		learnerList[5]="l_nNearN 5"
		# learnerList[6]="l_cart y"
		# learnerList[7]="l_cart n" # produces broken results
		
		# Run the experiment
		runExperiment
		
		# Set error list
		errorList[0]=e_ar
		errorList[1]=e_mre
		errorList[2]=e_mer
		errorList[3]=e_bre
		errorList[4]=e_ibre
		
		# Collect error measures
		runErrors
		echo "Errors Collected"
		
		# Do Paired Wilcoxan signed-ranked test
		#runSigtests
		
		# Compute results
		computeFinalResults
				
		echo "Done"
	fi
}

computeFinalResults()
{
	echo -n "data" >> ./results.txt
	for i1 in ${pNames[@]}
	do
		for i2 in ${lNames[@]}
		do
			echo -n $i1"_"$i2"," >> ./results.csv
		done
	done
	echo >> ./results.csv
	for i1 in ${dNames[@]}
	do
		data_name=$i1
		echo -n $data_name"," >> ./results.csv
		for i2 in ${sNames[@]}
		do
			split=$i2
			for i3 in ${pNames[@]}
			do
				preprocessor_name=$i3
				for i4 in ${lNames[@]}
				do
					learner_name=$i4
					sumWSRT
				done
			done
		done
		echo >> ./results.csv
	done
}
sumWSRT()
{
	infileW=./results/
	infileW+=$data_name
	infileW+="_"
	infileW+=$split
	infileW+="_"
	infileW+=$preprocessor_name
	infileW+="_"
	infileW+=$learner_name 
	infileW+="_"
	wins=0
	ties=0
	losses=0
	for i5 in ${eNames[@]}
	do
		error_name=$i5
		infileW1=$infileW
		infileW1+=$error_name
		infileW1+=.mww
		sumWTL `gawk '{if($1 == 1){ w = w + 1;} if($1 == 0){ t = t + 1;} if($1 == -1){ l = l + 1; }} END{print w " " t " " l}' w=$wins t=$ties l=$losses $infileW1`
		if [ "$i5" = "mre" ]
		then
			infileW2=$infileW1
			infileW2+=med
			sumWTL `gawk '{if($1 == 1){ w = w + 1;} if($1 == 0){ t = t + 1;} if($1 == -1){ l = l + 1; }} END{print w " " t " " l}' w=$wins t=$ties l=$losses $infileW2`
			infileW2=$infileW1
			infileW2+=pred
			sumWTL `gawk '{if($1 == 1){ w = w + 1;} if($1 == 0){ t = t + 1;} if($1 == -1){ l = l + 1; }} END{print w " " t " " l}' w=$wins t=$ties l=$losses $infileW2`
		fi
	done
	sumwlt=`expr $wins + $ties + $losses`
	echo -n `gawk -v l=$losses -v s=$sumwlt 'END{print l/s}' results.csv`"," >> ./results.csv
}
sumWTL()
{
wins=`expr $1 + $wins`
ties=`expr $2 + $ties`
losses=`expr $3 + $losses`
}
runSigtests()
{
	for i1 in ${dNames[@]}
	do
		data_name=$i1
		for i2 in ${sNames[@]}
		do
		split=$i2
			for i3 in ${eNames[@]}
			do
			error_name=$i3
				pSize=${#pNames[@]}
				lSize=${#lNames[@]}
				c1=0
				while [ "$c1" -lt "$pSize" ]
				do
					c2=$c1
					while [ "$c2" -lt "$pSize" ]
					do
					c3=0
						while [ "$c3" -lt "$lSize" ]
						do
						c4=$c3
						preprocessor_name=${pNames[$c3]}
							while [ "$c4" -lt "$lSize" ]
							do
								preprocessor_name=${pNames[$c1]}
								learner_name=${lNames[$c3]}
								setOutfile3
								setOutfileW
								f1=$outfile3
								w1=$outfileW
								preprocessor_name=${pNames[$c2]}
								learner_name=${lNames[$c4]}
								setOutfile3
								setOutfileW
								w2=$outfileW
								echo $w1 $w2 # for debug
								wilcoxonSRT $f1 $outfile3 $w1 $w2
								if [ "$error_name" = "mre" ]; then
									w3=$w1
									w3+=med
									w4=$w2
									w4+=med
									wilcoxonSRTmd $f1 $outfile3 $w3 $w4
									w3=$w1
									w3+=pred
									w4=$w2
									w4+=pred
									wilcoxonSRTpred25 $f1 $outfile3 $w3 $w4
								fi
								c4=`expr $c4 + 1`
							done
							c3=`expr $c3 + 1`
						done
						c2=`expr $c2 + 1`
					done
				c1=`expr $c1 + 1`
				done
			done
		done
	done
}
runErrors()
{
	i1=0
	dSize=${#dataList[@]}
	while [ "$i1" -lt "$dSize" ]
	do
		${dataList[$i1]}
		dNames[$i1]=$data_name
		i2=0
		sSize=${#splitList[@]}
		while [ "$i2" -lt "$sSize" ]
		do
			split=${splitList[$i2]}
			sNames[$i2]=$split
			i3=0
			pSize=${#preprocessorList[@]}
			while [ "$i3" -lt "$pSize" ]
			do
				${preprocessorList[$i3]}
				pNames[$i3]=$preprocessor_name
				i4=0
				lSize=${#learnerList[@]}
				while [ "$i4" -lt "$lSize" ]
				do
					${learnerList[$i4]}
					lNames[$i4]=$learner_name
					i5=0
					eSize=${#errorList[@]}
					setOutfile
					infile=$outfile
					while [ "$i5" -lt "$eSize" ]
					do
						${errorList[$i5]}
						eNames[$i5]=$error_name
						errorEval
						i5=`expr $i5 + 1`
					done
					i4=`expr $i4 + 1`
				done
				i3=`expr $i3 + 1`
			done
			i2=`expr $i2 + 1`
		done
		i1=`expr $i1 + 1`
	done
}

runExperiment()
{
	for i1 in ${dataList[@]}
	do
		$i1
		currentpp=""
		for i2 in ${splitList[@]}
		do
			split=$i2
			i3=0
			pSize=${#preprocessorList[@]}
			while [ "$i3" -lt "$pSize" ]
			do
				${preprocessorList[$i3]}
				i3=`expr $i3 + 1`
				i4=0
				lSize=${#learnerList[@]}
				while [ "$i4" -lt "$lSize" ]
				do
					${learnerList[$i4]}
					i4=`expr $i4 + 1`
					runEval
				done
			done
		done
	done
	# Perform cleanup
	rm $tempARFF
	rm $tempARFF2
	rm $tempARFF3
}

errorEval()
{
	#Check if run exists
	setOutfile2
	if [ ! -e $outfile2 ]; then
		errorMeasure
	fi
}
runEval()
{
	#Check if run exists
	setOutfile
	if [ ! -e $outfile ]; then 
		echo "Performing" $data_name $split $preprocessor_name $learner_name
		if [ "$currentpp" != "$preprocessor_name" ]; then
			echo "Preprocessing"
			# Call preprocessor
			preprocessor
			echo "Done Preprocessing"
		fi
		# Call learner
		echo "Learning"
		learner	
		echo "Done Learning"
	fi
}

sizeData()
{
	# determine size of data
	numberInstances=`gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) print $0}' $1 | wc -l | gawk '{print $1}'`
	# determine number of features not including class
	numberFeatures=`sed 's/\,/\ /g' $1 | gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) {a=NF; pr = 0;}} END{ print a - 1;}'` 
}

formatFile()
{
	# First column is actual, second is predicted
	gawk '{ if (NR > 5 && $2 && $3) print $2 "," $3; }'
}

formatFile2()
{
	sed 's/:/\ /g' | gawk '{ if (NR > 5 && $3 && $7) print $3 "," $5; }'
}

setOutfile()
{	
	outfile=./results/
	outfile+=$data_name
	outfile+="_"
	outfile+=$split
	outfile+="_"
	outfile+=$preprocessor_name
	outfile+="_"
	outfile+=$learner_name 
	outfile+=.csv
}

setOutfile2()
{
	outfile2=./results/
	outfile2+=$data_name
	outfile2+="_"
	outfile2+=$split
	outfile2+="_"
	outfile2+=$preprocessor_name
	outfile2+="_"
	outfile2+=$learner_name 
	outfile2+="_"
	outfile2+=$error_name
	outfile2+=.csv
}
setOutfile3()
{
	outfile3=$Here
	outfile3+=/results/
	outfile3+=$data_name
	outfile3+="_"
	outfile3+=$split
	outfile3+="_"
	outfile3+=$preprocessor_name
	outfile3+="_"
	outfile3+=$learner_name 
	outfile3+="_"
	outfile3+=$error_name
	outfile3+=.csv
}
setOutfileW()
{
	outfileW=./results/
	outfileW+=$data_name
	outfileW+="_"
	outfileW+=$split
	outfileW+="_"
	outfileW+=$preprocessor_name
	outfileW+="_"
	outfileW+=$learner_name 
	outfileW+="_"
	outfileW+=$error_name
	outfileW+=.mww
}
###########################################################################
# Data Sets #
d_albrecht()
{
		data="./data/albrecht.arff"
		data_name=albrecht
}
d_china()
{
		data="./data/china.arff"
		data_name=china
}
d_cocomo81()
{
		data="./data/cocomo81.arff"
		data_name=cocomo81
}
d_cocomo81e()
{
		data="./data/cocomo81e.arff"
		data_name=cocomo81e
}
d_cocomo81o()
{
		data="./data/cocomo81o.arff"
		data_name=cocomo81o
}
d_cocomo81s()
{
		data="./data/cocomo81s.arff"
		data_name=cocomo81s
}
d_desharnais()
{
		data="./data/desharnais.arff"
		data_name=desharnais
}
d_desharnaisL1()
{
		data="./data/desharnaisL1.arff"
		data_name=desharnaisL1
}
d_desharnaisL2()
{
		data="./data/desharnaisL2.arff"
		data_name=desharnaisL2
}
d_desharnaisL3()
{
		data="./data/desharnaisL3.arff"
		data_name=desharnaisL3
}
d_finnish()
{
		data="./data/finnish.arff"
		data_name=finnish
}
d_kemerer()
{
		data="./data/kemerer.arff"
		data_name=kemerer
}
d_maxwell()
{
		data="./data/maxwell.arff"
		data_name=maxwell
}
d_miyazaki94()
{
		data="./data/miyazaki94.arff"
		data_name=miyazaki94
}
d_nasa93center1()
{
		data="./data/nasa93_center_1.arff"
		data_name=nasa93center1
}
d_nasa93center2()
{
		data="./data/nasa93_center_2.arff"
		data_name=nasa93center2
}
d_nasa93center5()
{
		data="./data/nasa93_center_5.arff"
		data_name=nasa93center5
}
d_sdr()
{
		data="./data/sdr.arff"
		data_name=sdr
}
d_telecom1()
{
		data="./data/telecom1.arff"
		data_name=telecom1
}
###########################################################################
# Preprocessors #
# None #
pp_none()
{
	preprocessor_name=none
	preprocessor()
	{
		currentpp=none
		$Weka weka.filters.AllFilter -i $data -o $tempARFF
		# $data > $tempARFF
	}
}
# Logarithmic #
pp_log()
{
	preprocessor_name=log
	#log(e) = 0.434294482
	preprocessor()
	{
		currentpp=log
		$Weka weka.filters.unsupervised.attribute.MathExpression -R last -E "ifelse(A=0,0,(log(A))/0.434294482)" -i $data -o $tempARFF
	}
}
# Normalization #
pp_normalize()
{
	preprocessor_name=norm
	preprocessor()
	{
		currentpp=norm
		$Weka weka.filters.unsupervised.instance.Normalize -c last -i $data -o $tempARFF
	}
}
# n-Bin Equal Frequency Discretization #
pp_freqNbin()
{
	preprocessor_name=freq
	preprocessor_name+=$1
	preprocessor_name+=bin
	ppVar=$1
	preprocessor()
	{
		currentpp=$preprocessor_name
		$Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -F -c last -i $data -o $tempARFF
	}
}
# n-Bin Equal Width Discretization #
pp_widthNbin()
{
	preprocessor_name=width
	preprocessor_name+=$1
	preprocessor_name+=bin
	ppVar=$1	
	preprocessor()
	{
		currentpp=$preprocessor_name
		$Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -c last -i $data -o $tempARFF
	}
}
# Principle Component Analysis #
pp_pca()
{
	preprocessor_name=pca
	preprocessor()
	{
		currentpp=pca
		$Weka weka.filters.unsupervised.attribute.PrincipalComponents -D -c last -i $data -o $tempARFF
	}
}
###########################################################################
# Learners #
# ZeroR #
l_zeroR()
{
	learner_name=ZeroR
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Date class, Numeric class, Nominal class, Missing class values, Binary class
		# Attributes -- Numeric attributes, Unary attributes, Relational attributes, Binary attributes, Date attributes, String attributes, Empty nominal attributes, Missing values, Nominal attributes
	
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Simple Linear Regression #
l_slreg()
{
	learner_name=SLReg
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Date class, Missing class values, Numeric class
		# Attributes -- Date attributes, Numeric attributes
		# min # of instances: 1
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2
				
				$Weka weka.classifiers.functions.SimpleLinearRegression -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Partial Least Squares Regression #
l_plsr()
{
	learner_name=PlSR
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Date class, Missing class values, Numeric class
		# Attributes -- Date attributes, Missing values, Numeric attributes
		# min # of instances: 1
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				$Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2
				sizeData $tempARFF2
				internal
				internal()
				{
					$Weka weka.classifiers.functions.PLSClassifier -t $tempARFF2 -s $Seed -x $numberInstances -p 0 -filter "weka.filters.supervised.attribute.PLSFilter -M -P none -C $numberFeatures"  | formatFile > $outfile
					if [ "`cat $outfile`" = "" ]; then
						numberFeatures=`expr $numberFeatures - 1`
						if [ "$numberFeatures" -gt "0" ]; then
							internal
						else
							$Weka weka.classifiers.rules.ZeroR -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
						fi
						
					fi
				}
			fi	
		fi
	}
}
# Neural Net #
l_nnet()
{
	learner_name=nnet
	learner()
	{
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.classifiers.functions.MultilayerPerceptron -N 50 -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Analogy Based Estimation - n Nearest Neighbor #
l_nNearN()
{
	learner_name=nn
	learner_name+=$1
	lVar=$1	
	learner()
	{
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.classifiers.lazy.IBk -K $lVar -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Simple CART #
l_cart()
{
	learner_name=CART
	learner_name+=$1
	lVar=$1
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Nominal class, Binary class
		# Attributes -- Binary attributes, Missing values, Numeric attributes, Nominal attributes, Empty nominal attributes, Unary attributes
		# min # of instances: 1

		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				$Weka weka.filters.unsupervised.attribute.NumericToNominal -i $tempARFF -o $tempARFF2
				sizeData $tempARFF2
				if [ "$lVar" = "y" ]; then
					$Weka weka.classifiers.trees.SimpleCart -U -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile
				fi
				if [ "$lVar" = "n" ]; then
					$Weka weka.classifiers.trees.SimpleCart -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile
				fi
				if [ "`cat $outfile`" = "" ]; then
					sizeData $tempARFF
					$Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
				fi
			fi
		fi
	}
}
###########################################################################
# Error Calculators #
# Absolute Residual Error #
e_ar()
{
	error_name=ar
	errorMeasure()
	{
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a : -a; }' > $outfile2
	fi
	}
}
# Magnitude of Relative Error #
e_mre()
{
	error_name=mre
	errorMeasure()
	{
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a / $1 : -a / $1; }' > $outfile2
	fi
	}
}
# Magnitude of Error Relative to the Estimate #
e_mer()
{
	error_name=mer
	errorMeasure()
	{
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; print (a >= 0) ? a / $2 : -a / $2; }' > $outfile2
	fi
	}
}
# Balanced Relative Error #
e_bre()
{
	error_name=bre
	errorMeasure()
	{
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; if ($1 > $2) { print (a >= 0) ? a / $2 : -a / $2;} else {print (a >= 0) ? a / $1 : -a / $1;}}' > $outfile2
	fi
	}
}
# Inverted Balanced Relative Error #
e_ibre()
{
	error_name=ibre
	errorMeasure()
	{
	if [ ! -e $outfile ] || [ -N $rawfile ]; then
		sed 's/\,/\ /g' $infile | gawk '{if($1 && $2) a = $1 - $2; if ($1 < $2) { print (a >= 0) ? a / $2 : -a / $2;} else {print (a >= 0) ? a / $1 : -a / $1;}}' > $outfile2
	fi
	}
}
###########################################################################
# Wilcoxon Signed-Rank Test mean comparison#
wilcoxonSRT()
{
	# $1 = X values file
	# $2 = Y values file
	passArgs=wilcoxonSRT\(\"
	passArgs+=$1
	passArgs+=\"
	passArgs+=,
	passArgs+=\"
	passArgs+=$2
	passArgs+=\"\)
	progLoc=$Here
	progLoc+=/supportCode
	wsrt=`echo $passArgs | $Octave -q --path $progLoc | gawk '{print $3}'` 
	echo $wsrt >> $4
	echo `expr 0 - $wsrt` >> $3
	# 1 is loss for w1, 0 is tie, -1 is win for w1
}
# Wilcoxon Signed-Rank Test median comparison#
wilcoxonSRTmd()
{
	# $1 = X values file
	# $2 = Y values file
	passArgs=wilcoxonSRTmed\(\"
	passArgs+=$1
	passArgs+=\"
	passArgs+=,
	passArgs+=\"
	passArgs+=$2
	passArgs+=\"\)
	progLoc=$Here
	progLoc+=/supportCode
	wsrt=`echo $passArgs | $Octave -q --path $progLoc | gawk '{print $3}'` 
	echo $wsrt >> $4
	echo `expr 0 - $wsrt` >> $3
	# 1 is loss, 0 is tie, -1 is win
}
# Wilcoxon Signed-Rank Test pred25 comparison#
wilcoxonSRTpred25()
{
	# $1 = X values file
	# $2 = Y values file
	passArgs=wilcoxonSRTpred25\(\"
	passArgs+=$1
	passArgs+=\"
	passArgs+=,
	passArgs+=\"
	passArgs+=$2
	passArgs+=\"\)
	progLoc=$Here
	progLoc+=/supportCode
	wsrt=`echo $passArgs | $Octave -q --path $progLoc | gawk '{print $3}'` 
	echo $wsrt >> $4
	echo `expr 0 - $wsrt` >> $3
	# 1 is loss, 0 is tie, -1 is win
}
###########################################################################

# WEKA Wrapper for Debug #
weka()
{
	$Weka $1 $2 $3 $4 $5 $6 $7 $8 $9 
}

# Make override for debug #
make() 
{
	cd $Here
	. comba.bash
}

# CSV to ARFF conversion #
c2a2() 
{
	local outarff=$1
	local incsv=$outarff
	incsv+=".csv"
	outarff+=".arff"
	local converter="weka.core.converters.CSVLoader"
	$Weka $converter $incsv | cat > $outarff
}

# System Opening Messages #
echo "COMBA 2 by Vincent Rogers and William Sica"
echo ""
PS1="COMBA> "