#! /bin/bash Data=$1 MethodCol=$2 EstimateCol=$3 ActualCol=$4 # Assumed variables #Subsets="coc81_all nasa93_all coc81_kind_min coc81_lang_ftn coc81_mode_e coc81_kind_max coc81_mode_org coc81_lang_mol nasa93_project_gro nasa93_cat2_missionplanning nasa93_cat2_avionicsmonitoring nasa93_mode_semidetached nasa93_project_sts nasa93_forg_g nasa93_center_5 nasa93_year_1975 nasa93_year_1980 nasa93_mode_embedded nasa93_center_2" Coc81="coc81_all coc81_kind_min coc81_lang_ftn coc81_mode_e coc81_kind_max coc81_mode_org coc81_lang_mol" Nasa93="nasa93_all nasa93_project_gro nasa93_cat2_missionplanning nasa93_cat2_avionicsmonitoring nasa93_mode_semidetached nasa93_project_sts nasa93_forg_g nasa93_center_5 nasa93_year_1975 nasa93_year_1980 nasa93_mode_embedded nasa93_center_2" All="coc81_all nasa93_all coc81_kind_min coc81_lang_ftn coc81_mode_e coc81_kind_max coc81_mode_org coc81_lang_mol nasa93_project_gro nasa93_cat2_missionplanning nasa93_cat2_avionicsmonitoring nasa93_mode_semidetached nasa93_project_sts nasa93_forg_g nasa93_center_5 nasa93_year_1975 nasa93_year_1980 nasa93_mode_embedded nasa93_center_2" Confidence=95 TmpNonpar="mwu.tmp" TmpParam="param.tmp" # make a function to get the filename without path or extension function get_name() { local base=${1##*/} local base0="${base%.*}" local base1="${base0%.*}" echo $base1 } DataName=$(get_name $Data) if [ -d $DataName ]; then echo -n "" else mkdir $DataName fi for Subset in $All do echo "evaluating "$Subset Filename=$DataName"/"$Subset".csv" # First handle the nonparametric mwu call # remember in mwu that high=1 means that a higher performance value wins grep $Subset $Data | gawk -v a=$MethodCol -v b=$EstimateCol -v c=$ActualCol 'BEGIN{FS=OFS=","}{Re=($b-$c)/$c;Mre=Re<0?-1*Re:Re;print $a,Mre;}' | mwu Fields=2 Key=1 Performance=2 High=0 Confidence=95 > $TmpNonpar #gawk -f mwu.awk Fields=2 Key=1 Performance=2 High=0 Confidence=95 > $TmpNonpar # Run parametric eval grep $Subset $Data | gawk -v a=$MethodCol -v b=$EstimateCol -v c=$ActualCol 'BEGIN{FS=OFS=","}{print $a,$b,$c}' | baker_parametrics > $TmpParam #gawk -f baker_parametrics.awk > $TmpParam # Combine parametric and nonparametric eval grep -v "#" $TmpParam | gawk 'BEGIN{FS=OFS=","}{if($1!="key"&&$1!="Method"){name[$1]++}}END{for (x in name){print x}}' > names.tmp NumMethods=$(gawk 'END{print NR}' names.tmp) { echo "#Method,Ties,Wins,Losses,Wins-Losses,MMRE,MedianMRE,Pred30,Correlation" for ((i=1;i<=$NumMethods;i++)) do name=$(gawk -v i=$i 'NR==i{print $0}' names.tmp) #a=$(grep $name $TmpNonpar) a=$(gawk -v name=$name 'BEGIN{FS=OFS=","}$1==name{print $0}' $TmpNonpar) winsminuslosses=$(echo $a | gawk 'BEGIN{FS=OFS=","}{print $3-$4}') #b=$(grep $name $TmpParam | gawk 'BEGIN{FS=OFS=","}{print $2,$3,$4,$5}') b=$(gawk -v name=$name 'BEGIN{FS=OFS=","}$1==name{print $2,$3,$4,$5}' $TmpParam) echo $a,$winsminuslosses,$b #./gatherParametrics $Data $name >> $ParametricsFile done } > $Filename rm names.tmp rm $TmpNonpar rm $TmpParam done echo "aggregating coc81" # Now aggregate the COC81 data { for File in $Coc81 do Filename=$DataName"/"$File".csv" gawk 'NR>1{print $0}' $Filename done } | gawk 'BEGIN{FS=OFS=","; print "Method,Ties,Wins,Losses,Wins-Losses,MMRE,MedianMRE,Pred30,Correlation"} { Ties[$1]+=$2; Wins[$1]+=$3; Losses[$1]+=$4; Minus[$1]+=$5; MMRE[$1]+=$6; Med[$1]+=$7; Pred[$1]+=$8; Corr[$1]+=$9; } END { j=1 for (x in Minus) { tmp[j]=x j++ } n=asort(tmp) num=NR/n for (i=1;i<=n;i++) { printf("%s,%d,%d,%d,%d,",tmp[i], Ties[tmp[i]], Wins[tmp[i]], Losses[tmp[i]], Minus[tmp[i]]); printf("%.2f,%.2f,%.2f,%.3f\n",MMRE[tmp[i]]/num, Med[tmp[i]]/num, Pred[tmp[i]]/num, Corr[tmp[i]]/num); } }' > $DataName"/coc81_aggregate.csv" echo "aggregating nasa93" # Now aggregate the NASA93 data { for File in $Nasa93 do Filename=$DataName"/"$File".csv" gawk 'NR>1{print $0}' $Filename done } | gawk 'BEGIN{FS=OFS=","; print "Method,Ties,Wins,Losses,Wins-Losses,MMRE,MedianMRE,Pred30,Correlation"} { Ties[$1]+=$2; Wins[$1]+=$3; Losses[$1]+=$4; Minus[$1]+=$5; MMRE[$1]+=$6; Med[$1]+=$7; Pred[$1]+=$8; Corr[$1]+=$9; } END { j=1 for (x in Minus) { tmp[j]=x j++ } n=asort(tmp) num=NR/n for (i=1;i<=n;i++) { printf("%s,%d,%d,%d,%d,",tmp[i], Ties[tmp[i]], Wins[tmp[i]], Losses[tmp[i]], Minus[tmp[i]]); printf("%.2f,%.2f,%.2f,%.3f\n",MMRE[tmp[i]]/num, Med[tmp[i]]/num, Pred[tmp[i]]/num, Corr[tmp[i]]/num); } }' > $DataName"/nasa93_aggregate.csv" echo "aggregating all" # Now aggregate all of the data { for File in $All do Filename=$DataName"/"$File".csv" gawk 'NR>1{print $0}' $Filename done } | gawk 'BEGIN{FS=OFS=","; print "Method,Ties,Wins,Losses,Wins-Losses,MMRE,MedianMRE,Pred30,Correlation"} { Ties[$1]+=$2; Wins[$1]+=$3; Losses[$1]+=$4; Minus[$1]+=$5; MMRE[$1]+=$6; Med[$1]+=$7; Pred[$1]+=$8; Corr[$1]+=$9; } END { j=1 for (x in Minus) { tmp[j]=x j++ } n=asort(tmp) num=NR/n for (i=1;i<=n;i++) { printf("%s,%d,%d,%d,%d,",tmp[i], Ties[tmp[i]], Wins[tmp[i]], Losses[tmp[i]], Minus[tmp[i]]); printf("%.2f,%.2f,%.2f,%.3f\n",MMRE[tmp[i]]/num, Med[tmp[i]]/num, Pred[tmp[i]]/num, Corr[tmp[i]]/num); } }' > $DataName"/all_aggregate.csv"