#! /bin/bash # This generates 10 experts from the training data MyRoot="../../../trunk/" cd .. if [ ! -d /tmp/$USER ] then mkdir /tmp/$USER fi # Set up data to demonstrate usage #Data="/tmp/$USER/nasa93_all.csv.numeric" Data="/tmp/$USER/coc81_all.csv.numeric" RawData=$MyRoot"data/coc81.csv" Train="/tmp/$USER/train.csv" Test="/tmp/$USER/test.csv" echo -n "" > $Train echo -n "" > $Test #echo "Preprocessing data file" gawk -f $MyRoot"baker_lc/convertcsv.awk" Method="COC81" Style="precise" $RawData > $Data # Print a header line of the attribute names. # IMPORTANT - This is only for COC81 attributes and would need updated for COCII head -1 $RawData | gawk 'BEGIN{FS=OFS=","} { for(i=1;i<=(NF-2);i++) { if (index("rely,data,cplx,time,stor,virt,turn,acap,aexp,pcap,vexp,lexp,modp,tool,sced", $i)!=0) { if(i<(NF-2)){printf("%s,",$i)}else{printf("%s\n",$i)} } } } ' #echo "Preprocessed data stored at "$Data N=$(gawk 'END{print NR}' $Data) i=$(gawk -v X=$RANDOM -v N=$N 'BEGIN{print int(N*(X/32767))}') gawk -v X=$i -v Train=$Train -v Test=$Test '{if(NR==X){print $0 >> Test}else{print $0 >> Train}}' $Data #echo "Training file generated and stored at "$Train #echo "Test file generated and stored at "$Test #echo "This may take a few minutes because COCOMOST has to run N+1 times and there are a lot of I/O operations to make the training files." #echo "" # Run range #./range $Train $Test Seed=$RANDOM #echo "Seed="$Seed #Arguments: TrainingFile, Seed, NumberOfExperts, TestSizeOfExperts ./cocomostExperts $Train $Seed 10 0.33