\#!/bin/bash # $1 - Location of directory to cross-validate the learners on. # $2 - Location of the output of the comparison. # $3 - Location of the Tar3 and Which learners. # $4 - Sampling rate. ## For each data file that was created in the new directory. files=`ls $1/*.data`; for file in $files do ## Do an 10way cross-validation on the data. stem=`basename $file .data`; echo "Creating $4 Train-Test sets with $1/$stem.data to test the learners on." ./makeTrainTest seed=$RANDOM sample=$4 file=$stem $1/$stem.data; # For each of the 10 samples run Which and Tar3 on 90% train, then test their rules # on 10% test. Score them and store that score along with other statistics. for i in 0 1 2 3 4 5 6 7 8 9 do ## Create some local temp files. cp $1/$stem".cfg" train$i".cfg"; cp $1/$stem".cfg" test$i$".cfg"; cp $1/$stem".names" train$i".names"; cp $1/$stem".names" test$i".names"; mv train$i$stem train$i".data"; mv test$i$stem test$i".data"; echo " Tar3 pass $i on $stem."; $3t3 train$i | ./getRules Pass=2 > trules$i; echo " Which pass $i on $stem."; $33 100 train$i l 1 | ./getRules Pass=1 > wrules$i; ## Compare Which and Tar3 at each of the 10-runs. echo " Comparing using test$i"; ./compare test$i wrules$i trules$i $2$stem l $i > comLog; ## Clean up the temp files. rm -f trules$i; rm -f wrules$i; rm -f train$i* rm -f test$i* done ## Sort the 10 runs on each data set by how great Which's performance over Tar3 was. echo "Sorting the compare'd output information by difference( Which - Tar )."; ./plotsort $2$stem; done