#! /bin/bash MyRoot="../../../../trunk/" Path="tmp/Boosting3" rm -r $Path DataPath=$Path"/data" mkdir -p $DataPath Method="COC81" Style="precise" # output the first line of the csv output describing the field names echo "#FILENAME,I,LEARNER,ESTIMATE,ACTUAL,MRE" #Seed=$RANDOM #echo "#SEED="$Seed # Preprocess the full coc81 and nasa93 datasets cp $MyRoot"data/coc81modetypelangtype.csv" $DataPath"/coc81_all.csv" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/coc81_all.csv" > $DataPath"/coc81_all.csv.numeric" cp $MyRoot"data/nasa93.csv" $DataPath"/nasa93_all.csv" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_all.csv" > $DataPath"/nasa93_all.csv.numeric" #Make the subsets of coc81 data gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="kind" Want="min" $MyRoot"data/coc81modetypelangtype.csv" > $DataPath"/coc81_kind_min.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="lang" Want="ftn" $MyRoot"data/coc81modetypelangtype.csv" > $DataPath"/coc81_lang_ftn.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="mode" Want="e" $MyRoot"data/coc81modetypelangtype.csv" > $DataPath"/coc81_mode_e.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="kind" Want="max" $MyRoot"data/coc81modetypelangtype.csv" > $DataPath"/coc81_kind_max.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="mode" Want="org" $MyRoot"data/coc81modetypelangtype.csv" > $DataPath"/coc81_mode_org.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="lang" Want="mol" $MyRoot"data/coc81modetypelangtype.csv" > $DataPath"/coc81_lang_mol.csv" #Convert the subsets of coc81 data to numbers gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/coc81_kind_min.csv" > $DataPath"/coc81_kind_min.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/coc81_lang_ftn.csv" > $DataPath"/coc81_lang_ftn.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/coc81_mode_e.csv" > $DataPath"/coc81_mode_e.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/coc81_kind_max.csv" > $DataPath"/coc81_kind_max.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/coc81_mode_org.csv" > $DataPath"/coc81_mode_org.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/coc81_lang_mol.csv" > $DataPath"/coc81_lang_mol.csv.numeric" #Make the subsets of nasa93 data gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="projectname" Want="gro" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_project_gro.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="cat2" Want="missionplanning" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_cat2_missionplanning.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="cat2" Want="avionicsmonitoring" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_cat2_avionicsmonitoring.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="mode" Want="semidetached" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_mode_semidetached.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="projectname" Want="sts" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_project_sts.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="forg" Want="g" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_forg_g.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="center" Want="5" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_center_5.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="year" Want="1975" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_year_1975.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="year" Want="1980" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_year_1980.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="mode" Want="embedded" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_mode_embedded.csv" gawk -f $MyRoot"baker_lc/csvsubset.awk" Col="center" Want="2" $MyRoot"data/nasa93.csv" > $DataPath"/nasa93_center_2.csv" #Convert the subsets of nasa93 data to numbers gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_project_gro.csv" > $DataPath"/nasa93_project_gro.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_cat2_missionplanning.csv" > $DataPath"/nasa93_cat2_missionplanning.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_cat2_avionicsmonitoring.csv" > $DataPath"/nasa93_cat2_avionicsmonitoring.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_mode_semidetached.csv" > $DataPath"/nasa93_mode_semidetached.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_project_sts.csv" > $DataPath"/nasa93_project_sts.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_forg_g.csv" > $DataPath"/nasa93_forg_g.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_center_5.csv" > $DataPath"/nasa93_center_5.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_year_1975.csv" > $DataPath"/nasa93_year_1975.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_year_1980.csv" > $DataPath"/nasa93_year_1980.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_mode_embedded.csv" > $DataPath"/nasa93_mode_embedded.csv.numeric" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method=$Method Style=$Style $DataPath"/nasa93_center_2.csv" > $DataPath"/nasa93_center_2.csv.numeric" # try experiment on full datasets Train=$DataPath"/coc81_all.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_all.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base # try experiment on coc81 data subsets Train=$DataPath"/coc81_kind_min.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/coc81_lang_ftn.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/coc81_mode_e.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/coc81_kind_max.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/coc81_mode_org.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/coc81_lang_mol.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base # try experiment on nasa93 data subsets Train=$DataPath"/nasa93_project_gro.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_cat2_missionplanning.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_cat2_avionicsmonitoring.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_mode_semidetached.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_project_sts.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_forg_g.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_center_5.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_year_1975.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_year_1980.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_mode_embedded.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base Train=$DataPath"/nasa93_center_2.csv.numeric" Base=$(basename $Train) boosting_oversample_3 $Train $Base