#! /bin/bash

Train=$1
Test=$2
SeedFile=$3
#SeedFile="log/committee.seed"

Boost=3
if ((Boost==0))
then
  TrainLocal=$Train
else
  TrainLocal="log/boost.tmp"
  BoostAmount="0.5"
  Subset="-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-"
  gawk -f full_lc.awk Pass=1 TargetEMs=$Subset $Train Pass=2 TargetEMs=$Subset $Train |
  sort -g -t"," -k19 |
  gawk -f boost.awk Boost=$Boost Amount=$BoostAmount > $TrainLocal
fi

# try later - run lc every iteration
# some day could stack learners to find the best config values

#Assumes only one test record
TestCount=$(gawk 'END{print NR}' $Test)
if ((TestCount>1))
then
  echo "This quick and dirty version of committee was written to work on only one test record."
  exit
fi

ExpertNum=10
Option=1
vTrain="vtrain.tmp"
X=0.9
Num=$(gawk  -v X=$X 'END{print int((NR*X)+0.5)}' $TrainLocal)
#Num=$(gawk  -v X=$X 'END{print int((NR*X)+0.5)}' $Train)
ExpertFile="/tmp/experts.tmp"
#PredLevel=50
UseWeights=1
Goodness=0  #Default Value
TargetEMs="-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-" #Default Value
UseCocomost=1

echo -n "" > $SeedFile
echo -n "" > $ExpertFile

for ((i=1;i<=$ExpertNum;i++))
do

  #Set up the vTrain and vTest files for the new Expert
  Seed=$RANDOM
  echo $Seed >> $SeedFile
  echo -n "" > $vTrain
  gawk -v Seed=$Seed -v TFile=$vTrain -v Num=$Num 'BEGIN{CONVFMT="%.20g"}NR==1{srand(Seed);}{Dat[rand()]=$0}END{c=0;for(I in Dat){if (c<Num){print Dat[I]>>TFile}c++}}' $TrainLocal
  #gawk -v Seed=$Seed -v TFile=$vTrain -v Num=$Num 'BEGIN{CONVFMT="%.20g"}NR==1{srand(Seed);}{Dat[rand()]=$0}END{c=0;for(I in Dat){if (c<Num){print Dat[I]>>TFile}c++}}' $Train
  if ((Option==1))
  then
    vTest=$TrainLocal
    #vTest=$Train
  fi
  if ((Option==2))
  then
    vTest=$vTrain
  fi

  #Use the COCOMOST filter
  if ((UseCocomost==1))
  then
    TargetEMs=$(./lcFssFilter $vTrain)
  fi

  #Evaluate the expert
  if ((UseWeights==1))
  then
    Goodness=$(
    gawk -f simplelc.awk Pass=1 TargetEMs=$TargetEMs $vTrain Pass=2 TargetEMs=$TargetEMs $vTest |
    gawk 'BEGIN{FS=",";Sum=0;}{Sum+=$3}END{print Sum/NR}' |
    gawk '{print 1/($1+1)}')
    #gawk  -v Pred=$PredLevel 'BEGIN{FS=",";p=0;}{if($3<Pred){p++}}END{print p}'
  fi

  #The expert makes a prediction on the test file
  Result=$(gawk -f simplelc.awk Pass=1 TargetEMs=$TargetEMs $vTrain Pass=2 TargetEMs=$TargetEMs $Test)
  Estimate=$(echo $Result | gawk -F, '{print $1}')
  echo $Estimate","$Goodness >> $ExpertFile

done

#Now average the expert results for the estimate.  Compare to actual.
if ((UseWeights==1))
then
  WeightedAvg=$(gawk 'BEGIN{FS=",";n=0;d=0;}{n+=($1*$2);d+=$2;}END{print n/d}' $ExpertFile)
  Actual=$(gawk -F, 'NR==1{print $NF}' $Test)
  MRE=$(gawk -v e=$WeightedAvg -v a=$Actual 'BEGIN{re=(e-a)/a;mre=re<0?-1*re:re;print 100*mre}')
  #echo "#Estimate,Actual,MRE"
  echo $WeightedAvg","$Actual","$MRE
else
  Avg=$(gawk 'BEGIN{FS=",";Sum=0;}{Sum+=$1}END{print Sum/NR}' $ExpertFile)
  Actual=$(gawk -F, 'NR==1{print $NF}' $Test)
  MRE=$(gawk -v e=$Avg -v a=$Actual 'BEGIN{re=(e-a)/a;mre=re<0?-1*re:re;print 100*mre}')
  #echo "#Estimate,Actual,MRE"
  echo $Avg","$Actual","$MRE
fi