#!/bin/bash Here=$1 Sandbox=$2 Repeats=$3 Use=$4 Pred=$5 SpecialMethods=$6 NormalMethods=$7 ColumnPrunerMethods=$8 Log=$9 let Min=(Use+10) Drivers="precise proximal" Trainings="Subset Superset" Weka="java -Xmx1024M -cp weka.jar" csvToArff(){ ./csv2arff --ranges rules.config --relation $stem $1; } arffWithNums(){ ./cocNums --numbers $1.config $2; } generate(){ for i in *.csv do mainDataset=${i/_*/}_all.csv; stem=${mainDataset/\.*/} csvToArff $mainDataset > mainDataset.arff; stem=${i/\.*/} csvToArff $i > subset.arff Size=`./instances subset.arff`; [ "$Size" -lt $Min ] && continue ./blab "\n$stem " for ((R = 1; R <= Repeats; R++)) do ./blab "$R " Seed=$RANDOM #generates a test file and two train files #one train file is generated from the same subset that the test file was generated (called manual stratification or Subset) #the other train file is generated from the superset of the subset that the test file was generated (called Superset) gawk -f splitIntoTrainTest.awk Seed=$Seed TestSize=$Use subset.arff mainDataset.arff; for Training in $Trainings do cp train$Training.arff train.arff for Num in $Drivers do arffWithNums $Num test.arff > testN.arff arffWithNums $Num train.arff > trainN.arff numberOfAttributes=`./attributes testN.arff` #---------- No Column Pruner ColumnPruner="None" #---------- LC with pre-defined settings . $Sandbox/lcSimple testN.arff; #---------- Only Full Attribute Methods for Method in $SpecialMethods do . $Sandbox/$Method trainN.arff testN.arff; done #---------- Any Attribute Methods for Method in $NormalMethods do . $Sandbox/$Method trainN.arff testN.arff; done #---------- Local Wrapper ColumnPruner="LocalWrapper" sed 's/ //g' $Num.config > tunings ./localwrapper tunings train.arff > rankings.dat rm -rf subset mkdir -p subset/train subset/test cat rankings.dat train.arff | gawk -f split.awk Stem="subset/train/$stem"; cat rankings.dat test.arff | gawk -f split.awk Stem="subset/test/$stem"; #---------- Any Attribute Methods For Local Wrapper for j in subset/train/*.arff do numberOfAttributes=`./attributes $j`; arffWithNums $Num $j > trainN.arff; arffWithNums $Num subset/test/`basename $j` > testN.arff; for Method in $NormalMethods; do . $Sandbox/$Method trainN.arff testN.arff; done done #---------- All Other Column Pruners combined with other methods for ColumnPruner in $ColumnPrunerMethods do . $Sandbox/$ColumnPruner for Method in $NormalMethods; do . $Sandbox/$Method $ColumnPruner"_"Train.arff $ColumnPruner"_"Test.arff; done done done done done done } generate > $Log