#! /bin/bash #this file runs competing learners on the given subset of data Path=$1 Filename=$2 #SeedFile=$Path"/"$Filename".seed" ConvertedFile=$Path"/data/"$Filename".numeric" Iterations=30 #Iterations=100 TestSets=1 DoCocomostV2=1 DoCocomostV3=0 Experts=4 #M="0.8" Mprime="0.2" #Run the algorithm many times for ((i=1;i<=$Iterations;i++)) do #Randomize Data TestFile=$Path"/data/test/"$Filename".numeric."$i TrainFile=$Path"/data/train/"$Filename".numeric."$i Seed=$RANDOM gawk -f randomize.awk -v Seed=$Seed -v TestSets=$TestSets -v TrainFile=$TrainFile -v TestFile=$TestFile $ConvertedFile # basic Local Calibration LogFile=$Path"/log/"$Filename".LC."$i echo -n $Filename","$i","$Seed",LC,-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-," ./effort $ConvertedFile $Path "BasicLC" $TestFile $TrainFile "-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-" $LogFile if (($DoCocomostV2==1)) then TargetEMs=$(./lcFssFilter $TrainFile) LogFile=$Path"/log/"$Filename".cocomostV2."$i echo -n $Filename","$i","$Seed",cocomostV2,"$TargetEMs"," ./effort $ConvertedFile $Path "BasicLC" $TestFile $TrainFile $TargetEMs $LogFile fi if (($DoCocomostV3==1)) then # This loop creates the expert's guesses ExpertFile=$Path"/log/"$Filename".experts."$i for ((j=1;j<=$Experts;j++)) do # Set Variables vTestFile=$Path"/data/v_test/"$Filename".numeric."$i"."$j vTrainFile=$Path"/data/v_train/"$Filename".numeric."$i"."$j vTestSets=$(gawk -v M=$Mprime 'END{print int(NR*M)}' $ConvertedFile) vSeed=$RANDOM LogFile=$vTrainFile".log" # Generate the vTestFile and vTrainFile from the TrainFile gawk -f randomize.awk -v Seed=$vSeed -v TestSets=$vTestSets -v TrainFile=$vTrainFile -v TestFile=$vTestFile $TrainFile # Find the Best Evaluated Subset from the vTrainFile. TargetEMs=$(./lcFssFilter $vTrainFile) # Try local calibration with the virtual dataset in order to evaluate the expert. gawk -f simpleLC.awk Pass=1 TargetEMs=$TargetEMs $vTrainFile Pass=2 TargetEMs=$TargetEMs $vTrainFile > $LogFile #gawk -f simpleLC.awk Pass=1 TargetEMs=$TargetEMs $vTrainFile Pass=2 TargetEMs=$TargetEMs $vTestFile > $LogFile # Find the Pred30 of this Expert Stats=$(./calc $LogFile) Pred30=$(echo $Stats | gawk 'BEGIN{FS=","} NR==1{print $3}') # Let the expert make a guess at the actual Test Set TestResult=$(gawk -f simpleLC.awk Pass=1 TargetEMs=$TargetEMs $vTrainFile Pass=2 TargetEMs=$TargetEMs $TrainFile) Estimate=$(echo $TestResult | gawk 'BEGIN{FS=","} NR==1{print $1}') # Record the experts estimate, weight (as pred30), randomization seed, and subset to a file. echo $Estimate","$Pred30","$vSeed","$TargetEMs >> $ExpertFile done # Now generate an estimate from the combined wisdom of the experts ExpertEstimate=$(gawk -f experts.awk $ExpertFile) Actual=$(gawk 'BEGIN{FS=","} NR=1{print $NF}' $TestFile) Mre=$(gawk -v e=$ExpertEstimate -v a=$Actual 'BEGIN{re=(e-a)/a;mre=re<0?-1*re:re;print 100*mre;}') echo $Filename","$i","$Seed",cocomostV3,N/A,"$ExpertEstimate","$Actual","$Mre fi #--------------Old Cocomost-------------- #if (($DoCocomost==1)) #then # IdealsFile=$Path"/log/"$Filename".ideals."$i # gawk -f findIdealsByLC.awk Pass=1 TargetEMs="-" $TrainFile Pass=2 TargetEMs="-" $TrainFile > $IdealsFile # TargetEMs=$(./idealsFssFilter $TrainFile $IdealsFile) # LogFile=$Path"/log/"$Filename".cocomost."$i # echo -n $Filename","$i","$Seed",cocomost,"$TargetEMs"," # ./effort $ConvertedFile $Path "BasicLC" $TestFile $TrainFile $TargetEMs $LogFile #fi #---------------------------------------- done