#! /bin/bash #example: ./effort data/subsets/coc81_all.csv.converted COC81 precise log/seed.log name simpleLC -1- /tmp/effort #Argument Key # 1 Filename # 2 Effort Multiplier Method (COC81 or COCII) # 3 Effort Multiplier Precision (precise or rounded) # 4 Seed Filename # 5 Name as source:part # 6 Learner # 7 TargetEMs - Set this to equal -a-b-c- where a, b, and c are the column numbers of effort multipliers you want to use. # 8 Path #Variables Filename=$1 Iterations=30 TestSets=10 #Path=/tmp/effort Path=$8 rm -r $Path mkdir $Path LogFile=$Path/lc.log #LogFile=/tmp/effort/lc.log #SeedFile=$Path/seed.log SeedFile=$4 TestFile=$Path/test.tmp TrainFile=$Path/train.tmp #gawk 'END{print NR;}' $Path/train.tmp > $Path/trainCount.tmp #ConvertedFile=$Path/converted.csv #ConvertedFile=/tmp/converted.csv ConvertedFile=$Filename".converted" StatsFile=$Path"/stats.tmp" #Preprocess data files #gawk -f convertCSV.awk Method=$2 Style=$3 $Filename > $ConvertedFile rm -f $LogFile rm -f $SeedFile rm -f "/tmp/corr.log" #Run the algorithm many times for ((i=1;i<=$Iterations;i++)) do #Randomize Data rm -f $TrainFile rm -f $TestFile Seed=$RANDOM gawk -f randomize.awk -v Seed=$Seed -v TestSets=$TestSets -v TrainFile=$TrainFile -v TestFile=$TestFile $ConvertedFile #gawk -f lc.awk Pass=1 $TrainFile Pass=2 $TestFile >> $LogFile #gawk -f lc.awk Pass=1 $TrainFile Pass=2 LogFile=$LogFile $TestFile Pass=3 $TestFile #gawk -f lc.awk Pass=1 $TrainFile Pass=2 $TestFile Pass=3 $TestFile #if [ $6 == "attrLC" ] ; then # gawk -f attrLC.awk Pass=1 TargetEMs=$7 $TrainFile Pass=2 TargetEMs=$7 $TestFile Pass=3 TargetEMs=$7 $TestFile >> $LogFile #fi #if [ $6 == "simpleLC" ] ; then # gawk -f simpleLC.awk Pass=1 TargetEMs=$7 $TrainFile Pass=2 TargetEMs=$7 $TestFile >> $LogFile #fi gawk -f simpleLC.awk Pass=1 TargetEMs=$7 $TrainFile Pass=2 TargetEMs=$7 $TestFile >> $LogFile echo $Seed >> $SeedFile done ./calc $LogFile > $StatsFile #gawk -f stats.awk $LogFile > $StatsFile #gawk -f subsetDisplay.awk $StatsFile Train=$(gawk 'END{print NR;}' $TrainFile) #this subset should be taken out Subset=$(gawk 'BEGIN{FS=",";} NR==1{print NF-2;}' $TestFile) #gawk -f subsetDisplay.awk Name=$5 Train=$Train Test=$TestSets Method=$2 Numbers=$3 Subset=$Subset Learn=$6 Columns=$7 AvgCorr=$AvgCorr /tmp/effort/stats.tmp if [ $6 == "attrLC" ] ; then #AvgCorr=$(gawk 'BEGIN{sum=0;} {sum+=$1} END{print sum/NR;}' /tmp/corr.log) #Calculate Standard Deviation of Attribute AttrDev=$(gawk -f columnStDev.awk Columns=$7 $ConvertedFile) #Calculate Entropy of Attribute (using discrete formula because the effort multipliers are discrete) AttrEntropy=$(gawk -f columnEntropy.awk Columns=$7 $ConvertedFile) #gawk -f attrDisplay.awk Name=$5 Train=$Train Test=$TestSets Method=$2 Numbers=$3 Subset=$Subset Learn=$6 Columns=$7 AvgCorr=$AvgCorr /tmp/effort/stats.tmp gawk -f attrDisplay.awk Name=$5 Train=$Train Test=$TestSets Method=$2 Numbers=$3 Subset=$Subset Learn=$6 Columns=$7 AttrDev=$AttrDev AttrEntropy=$AttrEntropy $StatsFile fi if [ $6 == "simpleLC" ] ; then gawk -f simpleLCdisplay.awk Name=$5 Train=$Train Test=$TestSets Method=$2 Numbers=$3 Subset=$Subset Learn=$6 Columns=$7 $StatsFile fi #Delete the temporary files #rm -r $Path