Java="$Here"
Weka="nice -n 20 java -Xmx2048M -cp $Java/weka.jar "
Seed=1
tempARFF="./temp.arff"
tempARFF2="./temp2.arff"
tempARFF3="./temp3.arff"

###########################################################################
# Experiments #
initializeTestArrays()
{
	unset dataList
	unset splitList
	unset preprocessorList
	unset learnerList
	unset errorList
	
	declare -a dataList
	declare -a splitList
	declare -a preprocessorList
	declare -a learnerList
	declare -a errorList
}

selectExperiment()
{
	task=$1
	initializeTestArrays
	# Effort Estimation
	if [ "$task" = "ee" ]; then
		# Initialize datalist
		dataList[0]=d_albrecht
		#dataList[1]=d_china
		dataList[2]=d_cocomo81
		dataList[3]=d_cocomo81e
		dataList[4]=d_cocomo81o
		dataList[5]=d_cocomo81s
		dataList[6]=d_desharnais
		dataList[7]=d_desharnaisL1
		dataList[8]=d_desharnaisL2
		dataList[9]=d_desharnaisL3
		dataList[10]=d_finnish
		dataList[11]=d_kemerer
		dataList[12]=d_maxwell
		dataList[13]=d_miyazaki94
		dataList[14]=d_nasa93center1
		dataList[15]=d_nasa93center2
		dataList[16]=d_nasa93center5
		dataList[17]=d_sdr
		dataList[18]=d_telecom1
		
		# Initialize splitList
		splitList[0]=loo
		
		# Initialize preprocessorList
		preprocessorList[0]=pp_none
		preprocessorList[1]=pp_log
		preprocessorList[2]=pp_normalize
		preprocessorList[3]="pp_widthNbin 3"
		preprocessorList[4]="pp_widthNbin 5"
		preprocessorList[5]="pp_freqNbin 3"
		preprocessorList[6]="pp_freqNbin 5"
		preprocessorList[7]=pp_pca
		
		# Initialize learnerList
		learnerList[0]=l_zeroR
		learnerList[1]=l_slreg
		learnerList[2]=l_plsr
		learnerList[3]=l_nnet
		learnerList[4]="l_nNearN 1"
		learnerList[5]="l_nNearN 5"
		learnerList[6]="l_cart y"
		learnerList[7]="l_cart n"
		
		# Run the experiment
		runExperiment
	fi
}

runExperiment()
{
	for i1 in ${dataList[@]}
	do
		$i1
		currentpp=""
		for i2 in ${splitList[@]}
		do
			split=$i2
			i3=0
			pSize=${#preprocessorList[@]}
			while [ "$i3" -lt "$pSize" ]
			do
				${preprocessorList[$i3]}
				i3=`expr $i3 + 1`
				i4=0
				lSize=${#learnerList[@]}
				while [ "$i4" -lt "$lSize" ]
				do
					${learnerList[$i4]}
					i4=`expr $i4 + 1`
					runEval
				done
			done
		done
	done
	# Perform cleanup
	rm $tempARFF
	rm $tempARFF2
	rm $tempARFF3
}

runEval()
{
	#Check if run exists
	setOutfile
	if [ ! -e $outfile ]; then 
		echo "Performing" $data_name $split $preprocessor_name $learner_name
		if [ "$currentpp" != "$preprocessor_name" ]; then
			echo "Preprocessing"
			# Call preprocessor
			preprocessor
			echo "Done Preprocessing"
		fi
		# Call learner
		echo "Learning"
		learner	
		echo "Done Learning"
	fi
}

sizeData()
{
	# determine size of data
	numberInstances=`gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) print $0}' $1 | wc -l | gawk '{print $1}'`
	# determine number of features not including class
	numberFeatures=`sed 's/\,/\ /g' $1 | gawk 'BEGIN{ pr=0; } {if($1 == "@data") pr=1; else if (pr && $0) {a=NF; pr = 0;}} END{ print a - 1;}'` 
}

formatFile()
{
	# First column is actual, second is predicted
	gawk '{ if (NR > 5 && $2 && $3) print $2 "," $3; }'
}

formatFile2()
{
	sed 's/:/\ /g' | gawk '{ if (NR > 5 && $3 && $7) print $3 "," $5; }'
}

setOutfile()
{	
	outfile=./results2/
	outfile+=$data_name
	outfile+="_"
	outfile+=$split
	outfile+="_"
	outfile+=$preprocessor_name
	outfile+="_"
	outfile+=$learner_name 
	outfile+=.csv
}

###########################################################################
# Data Sets #
d_albrecht()
{
		data="./data/albrecht.arff"
		data_name=albrecht
}
d_china()
{
		data="./data/china.arff"
		data_name=china
}
d_cocomo81()
{
		data="./data/cocomo81.arff"
		data_name=cocomo81
}
d_cocomo81e()
{
		data="./data/cocomo81e.arff"
		data_name=cocomo81e
}
d_cocomo81o()
{
		data="./data/cocomo81o.arff"
		data_name=cocomo81o
}
d_cocomo81s()
{
		data="./data/cocomo81s.arff"
		data_name=cocomo81s
}
d_desharnais()
{
		data="./data/desharnais.arff"
		data_name=desharnais
}
d_desharnaisL1()
{
		data="./data/desharnaisL1.arff"
		data_name=desharnaisL1
}
d_desharnaisL2()
{
		data="./data/desharnaisL2.arff"
		data_name=desharnaisL2
}
d_desharnaisL3()
{
		data="./data/desharnaisL3.arff"
		data_name=desharnaisL3
}
d_finnish()
{
		data="./data/finnish.arff"
		data_name=finnish
}
d_kemerer()
{
		data="./data/kemerer.arff"
		data_name=kemerer
}
d_maxwell()
{
		data="./data/maxwell.arff"
		data_name=maxwell
}
d_miyazaki94()
{
		data="./data/miyazaki94.arff"
		data_name=miyazaki94
}
d_nasa93center1()
{
		data="./data/nasa93_center_1.arff"
		data_name=nasa93center1
}
d_nasa93center2()
{
		data="./data/nasa93_center_2.arff"
		data_name=nasa93center2
}
d_nasa93center5()
{
		data="./data/nasa93_center_5.arff"
		data_name=nasa93center5
}
d_sdr()
{
		data="./data/sdr.arff"
		data_name=sdr
}
d_telecom1()
{
		data="./data/telecom1.arff"
		data_name=telecom1
}
###########################################################################
# Preprocessors #
# None #
pp_none()
{
	preprocessor_name=none
	preprocessor()
	{
		currentpp=none
		$Weka weka.filters.AllFilter -i $data -o $tempARFF
		# $data > $tempARFF
	}
}
# Logarithmic #
pp_log()
{
	preprocessor_name=log
	#log(e) = 0.434294482
	preprocessor()
	{
		currentpp=log
		$Weka weka.filters.unsupervised.attribute.MathExpression -R last -E "ifelse(A=0,0,(log(A))/0.434294482)" -i $data -o $tempARFF
	}
}
# Normalization #
pp_normalize()
{
	preprocessor_name=norm
	preprocessor()
	{
		currentpp=norm
		$Weka weka.filters.unsupervised.instance.Normalize -c last -i $data -o $tempARFF
	}
}
# n-Bin Equal Frequency Discretization #
pp_freqNbin()
{
	preprocessor_name=freq
	preprocessor_name+=$1
	preprocessor_name+=bin
	ppVar=$1
	preprocessor()
	{
		currentpp=$preprocessor_name
		$Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -F -c last -i $data -o $tempARFF
	}
}
# n-Bin Equal Width Discretization #
pp_widthNbin()
{
	preprocessor_name=width
	preprocessor_name+=$1
	preprocessor_name+=bin
	ppVar=$1	
	preprocessor()
	{
		currentpp=$preprocessor_name
		$Weka weka.filters.unsupervised.attribute.Discretize -B $ppVar -c last -i $data -o $tempARFF
	}
}
# Principle Component Analysis #
pp_pca()
{
	preprocessor_name=pca
	preprocessor()
	{
		currentpp=pca
		$Weka weka.filters.unsupervised.attribute.PrincipalComponents -D -c last -i $data -o $tempARFF
	}
}
###########################################################################
# Learners #
# ZeroR #
l_zeroR()
{
	learner_name=ZeroR
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Date class, Numeric class, Nominal class, Missing class values, Binary class
		# Attributes -- Numeric attributes, Unary attributes, Relational attributes, Binary attributes, Date attributes, String attributes, Empty nominal attributes, Missing values, Nominal attributes
	
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Simple Linear Regression #
l_slreg()
{
	learner_name=SLReg
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Date class, Missing class values, Numeric class
		# Attributes -- Date attributes, Numeric attributes
		# min # of instances: 1
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2
				
				$Weka weka.classifiers.functions.SimpleLinearRegression -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Partial Least Squares Regression #
l_plsr()
{
	learner_name=PlSR
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Date class, Missing class values, Numeric class
		# Attributes -- Date attributes, Missing values, Numeric attributes
		# min # of instances: 1
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				$Weka weka.filters.supervised.attribute.NominalToBinary -A -c last -i $tempARFF -o $tempARFF2
				sizeData $tempARFF2
				internal
				internal()
				{
					$Weka weka.classifiers.functions.PLSClassifier -t $tempARFF2 -s $Seed -x $numberInstances -p 0 -filter "weka.filters.supervised.attribute.PLSFilter -M -P none -C $numberFeatures"  | formatFile > $outfile
					if [ "`cat $outfile`" = "" ]; then
						numberFeatures=`expr $numberFeatures - 1`
						if [ "$numberFeatures" -gt "0" ]; then
							internal
						else
							$Weka weka.classifiers.rules.ZeroR -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
						fi
						
					fi
				}
			fi	
		fi
	}
}
# Neural Net #
l_nnet()
{
	learner_name=nnet
	learner()
	{
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.classifiers.functions.MultilayerPerceptron -N 50 -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Analogy Based Estimation - n Nearest Neighbor #
l_nNearN()
{
	learner_name=nn
	learner_name+=$1
	lVar=$1	
	learner()
	{
		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				sizeData $tempARFF
				$Weka weka.classifiers.lazy.IBk -K $lVar -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
			fi
		fi
	}
}
# Simple CART #
l_cart()
{
	learner_name=CART
	learner_name+=$1
	lVar=$1
	learner()
	{
		# Capabilities text from WEKA
		# Class -- Nominal class, Binary class
		# Attributes -- Binary attributes, Missing values, Numeric attributes, Nominal attributes, Empty nominal attributes, Unary attributes
		# min # of instances: 1

		if [ "$task" = "ee" ]; then
			if [ "$split" = "loo" ]; then
				$Weka weka.filters.unsupervised.attribute.NumericToNominal -i $tempARFF -o $tempARFF2
				sizeData $tempARFF2
				if [ "$lVar" = "y" ]; then
					$Weka weka.classifiers.trees.SimpleCart -U -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile
				fi
				if [ "$lVar" = "n" ]; then
					$Weka weka.classifiers.trees.SimpleCart -t $tempARFF2 -s $Seed -x $numberInstances -p 0 | formatFile2 > $outfile
				fi
				if [ "`cat $outfile`" = "" ]; then
					sizeData $tempARFF
					$Weka weka.classifiers.rules.ZeroR -t $tempARFF -s $Seed -x $numberInstances -p 0 | formatFile > $outfile
				fi
			fi
		fi
	}
}
###########################################################################

# WEKA Wrapper for Debug #
weka()
{
	$Weka $1 $2 $3 $4 $5 $6 $7 $8 $9 
}

# Make override for debug #
make() 
{
	cd $Here
	. comba.bash
}

# CSV to ARFF conversion #
c2a2() 
{
	local outarff=$1
	local incsv=$outarff
	incsv+=".csv"
	outarff+=".arff"
	local converter="weka.core.converters.CSVLoader"
	$Weka $converter $incsv | cat > $outarff
}

# System Opening Messages #
echo "COMBA 2 by Vincent Rogers and William Sica"
echo ""
PS1="COMBA> "