# warning: requires at least 5MB of free disk
##########################################################################
# /* vim: set filetype=sh : */
# ourmine : a simple learning environment for data mining
# Copyright (C) 2007, Tim Menzies, tim@menzies.us, http://menzies.us
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
##########################################################################
# for more info on command-line weka stuff, see
# http://www.cs.waikato.ac.nz/~remco/weka_bn/node13.html
Here=`pwd`
#### generic stuff
reload() {
cd $Here
. $Ourrc
}
show() {
local goal1="^$1"
local com="/^$1 /,/^}/{print}"
if (set | grep $goal1 | grep "=" > /tmp/debug)
then set | grep $goal1
else set | gawk "$com"
fi
}
blab() { printf "$*" >&2; }
blabln() { printf "$*\n" >&2; }
#### initialization stuff
setup() {
setUpVars
setUpDirs
}
lcsee() {
alias ls="ls --color"
}
build() {
export AWKPATH="$Here/minerc.lib:$AWKPATH"
}
setUpVars() {
alias ls="ls -G"
PROMPT_COMMAND='echo -ne "\033]0;${HOSTNAME}: `pwd`\007"'
PS1="Our MINE! \W#\!> "
Ourmine="$HOME/opt/ourmine"
PATH="$Ourmine/bin:$HOME/bin:$PATH"
Safe=$Ourmine/var/safe
Dirs="$Our/lib/arffs"
LibUrl="http://unbox.org/wisp/trunk/our/minerc.lib/lib.zip"
export AWKPATH="$Ourmine/lib:$AWKPATH"
Bins=10
Repeats=2;
Learners="nb nbk"
Data="$Ourmine/lib/arff/uci/discrete/a*.arff
$Ourmine/lib/arff/uci/discrete/s*.arff"
Audit="pgawk --profile=$HOME/tmp/awkprof.out --dump-variables=$HOME/tmp/awkvars.out --lint"
}
setUpDirs() {
mkdir -p $HOME/tmp
mkdir -p /tmp/$USER
Tmp=`mktemp -d -p /tmp/$USER`
Weka="nice -19 java -Xmx1024M -cp $Tmp/weka.jar "
mkdir -p $Tmp
mkdir -p $Ourmine/lib # for support code
mkdir -p $Ourmine/bin # for our executables
mkdir -p $HOME/bin # for your executables
mkdir -p $Safe # for stuff you want to keep around
if [ ! -f "$Ourmine/lib/lib.zip" ]; then
downloads
fi
# I had too much trouble with pathname syntax problems
# on mac, windows, linux, etc. So now I just copy weka.jar
# to the working directory (no need for pathnames)
cp $Ourmine/lib/weka.jar $Tmp
}
downloads() {
set -x
(cd $Ourmine/lib
wget -O lib.zip $LibUrl
unzip -o lib.zip
)
set +x
}
#### stuff for the turkey experiment
setUpSeds() {
cat<<-EOF > $Tmp/etc/seds
s/loccodeandcomment/loc_code_and_comment/
s/locodeandcomment/loc_code_and_comment/
s/locandcomment/loc_code_and_comment/
s/essential_complexity/ev(g)/
s/cyclomatic_complexity/v(g)/
s/halstead_length/n/
s/halstead_level/l/
s/num_operators/n1/
s/num_operands/n2/
s/unique_operands/uniq_opnd/
s/unique_operators/uniq_op/
s/halstead_content/i/
s/halstead_error_est/b/
s/halstead_prog_time/t/
s/halstead_effort/e/
s/halstead_difficulty/d/
s/halstead_volume/v/
s/loc_comments/loc_comment/
s/design_complexity/iv(g)/
s/locomment/loc_comment/
s/loc_total/loc/
s/locode/loc/
s/[\t ]c[\t ]/ defects /
s/[\t ]problems[\t ]/ defects /
s/branchcout/branch_count/
s/total_op[\t ]/n1 /
s/total_opnd/n2/
s/{no,yes}/{false,true}/
EOF
}
prep() {
for i in $Dirs/mdp/*.arff ; do
cat $i |
tr A-Z a-z |
sed -f $Tmp/etc/seds \
> $Tmp/arff/`basename $i`
done
}
classes() {
local brief=0
while [ `echo $1 | grep "-"` ]; do
case $1 in
-b|--brief) brief=1;;
*) blabln "'"$1"' unknown\n usage cat file | classes [options]"
return 1;;
esac
shift 1
done
gawk '
BEGIN { OFS=FS=","
IGNORECASE=1
Brief=0 }
{ gsub(/#.*/,"") }
/^[ \t]*$/ { next }
Data && NF > 1 { Freq[$NF]++ }
/@data/ { Data=1 }
END {
for(N in Freq)
if (Brief) { print N } else { print Freq[N],N }}
' Brief=$brief -
}
intersectAttributes() {
# list the intersection of attributes
# found in a set of arff files
gawk '
BEGIN { IGNORECASE=1; OFS=","}
FNR==1 { Files++ }
/@attribute/ { Got[$2]++ }
END { for(A in Got)
if (Got[A]>=Files)
print A
}' $1
}
shared() {
for i in `intesectAttributes $Tmp/arff/*.arff |
sort |
grep -v defects`; do
echo $i
done
echo defects
}
some() {
# generate an arff file that only contains certaina attributes
gawk -f some.awk -v Some="$1" $2
}
makeshare() {
Shared=`shared`
for i in $Tmp/arff/*.arff; do
echo $i
some "$Shared" $i > $Tmp/shared/`basename $i`
done
}
report() {
gawk 'BEGIN {RS=""; FS="\n"}
NR==1 { M=split(Show,Shows,",") }
{ R[++N]=indent($0)
}
END {print " ";
for(r=1;r<=M;r++) printf("\n%s",R[Shows[r]]);
print ""; }
function str(n,chr, out) {
chr = chr ? chr : " ";
while(n-- > 0) out= out chr;
return out
}
function indent(str, i, out) {
for(i=1;i<=NF;i++)
out=out str(Indent," ") $i "\n"
return out
}
' Show=$2 Indent=$1 -
}
#### end inter intra stuff
#### misc utils
makeTrainTest() {
cat - | someArff --seed $1 --bins $2 --bin $3
}
gotwant() { gawk '
BEGIN {Unlog = 0;
OFS = ","
Ee = 848456353 / 312129649;
}
NF == 3 { if (UnLog) { print Ee^$2 , Ee^$3
} else { print $2,$3 }
}
NF == 4 { print $2 , $4 }
' -
}
abcd() {
local goal="true|yes"
local before=""
local prefix=""
local decimals=2
while [ `echo $1 | grep "-"` ]; do
case $1 in
-d|--decimals) decimals=$2;;
-b|--before) before=$2;;
-p|--prefix) prefix=$2;;
-g|--goal) goal=$2;;
*) blabln "'"$1"' unknown\n usage abcd [options]";
return 1;;
esac
shift 2
done
[ -n "$before" ] && printf $before
gawk '
BEGIN {
Decimals = 3
Got = 1
Want = 2;
Prefix = "";
True = "true"; ## define symbol 1
A=B=C=D=0 ;
FS=OFS=","
GoalPd = 1;
GoalPf = 0;
}
function yes(s) {return s ~ True }
function no(s) {return ( yes(s) ? 0 : 1 ) }
{ sub(/#.*/,"") }
/^[ \t]*$/ { next }
NF==2 { N++;
Predicted=$Got;
Actual=$Want;
if (Predicted == Actual) Good++;
if (no( Actual) && no( Predicted)) A++;
if (yes(Actual) && no( Predicted)) B++;
if (no( Actual) && yes(Predicted)) C++;
if (yes(Actual) && yes(Predicted)) D++;
#print N,$0,A,B,C,D
}
END {
OFMT = "%." Decimals "f";
Balance=Precision=Accuracy=Pf=NotPf=Pd=0;
if (C+D > 0 ) Precision = D/(C+D);
if ((A+B+C+D) > 0) Accuracy = (A+D)/(A+B+C+D);
if (A+C > 0 ) Pf = C/(A+C)
if (B+D > 0 ) Pd = D/(B+D);
if (B+C+D > 0) { # special case- everything misses
Balance = 1 - sqrt((GoalPd - Pd)^2 + (GoalPf - Pf)^2)/sqrt(2)
}
if(Prefix) printf Txt=Prefix OFS;
print A,B,C,D,
sprintf(OFMT,100*Accuracy),
sprintf(OFMT,100*Pd),
sprintf(OFMT,100*Pf),
sprintf(OFMT,100*Precision),
sprintf(OFMT,100*Balance);
}' Prefix="$prefix" Decimals="$decimals" True="$goal" -
}
quartile2tex() {
cat - | gawk 'BEGIN { FS=","
OFS="&"}
/===/ {print $1; last=0 ; rank=1; next}
NF==0 { print ""; next}
/#/ {next}
Last!=NF {
print (NF==7 ? "\\scriptsize\\begin{tabular}{rrrr} rank & treatment & median & distribution\\\\" : "stats") ; Last=NF}
NF==5 { print statsprint() }
NF==7 { print qprint() }
function statsprint( sep,i,out) {
if ($4 != last) {
rank++
}
last=$4
out = rank " & "
for(i=1;i<=NF;i++) {
out = out sep $i
sep="&"
}
return out "\\\\"
}
function qprint( sep,i,out) {
out = "1 & " $1 "&" $4 "&"
out=out "\\boxplot{"$2"}{"$3"}{"$4"}{"$5 - $3"}{"$6"}"
return out "\\\\"
}
function trim(s) {
gsub(/[ \t]/,"",s);
return s
} '
# gawk 'BEGIN {FS="\n"; RS=""}
# { M=split($0,Lines,/\\t/)
# Z=0;
# for(I=1;I<=M;I++) {
# N=split(Lines[I],Words,/\\t/)
# print ++Z " " trim(Lines[I])
# #for(J=1;J<=N; J++)
# # print Z " :: " J " :: " trim(Words[J])
# }
# }
# function trim(s) {
# sub(/^[ \t\n]*/,"",s);
# sub(/[ \t]\n*$/,"",s);
# return s
# }'
}
malign() {
cat - | gawk '
BEGIN { Width=1;
Gutter=1;
OFS=FS=",";
}
{ N++;
for(I=1;I<=NF;I++) {
if( (L=length($I)) > Max[I]) Max[I]=L;
++Data[N,0];
Data[N,I]=$I; }
}
END {for(J=1;J<=N;J++) {
Str=Sep1="";
if (Data[J,0]>1) {
for(I=1;I<=NF;I++) {
L=length(Data[J,I]);
Str = Str Sep1 \
str(most(Width,Max[I]+Gutter+1)-L," ") \
Data[J,I];
Sep1= OFS;
}}
else {Str=Data[J,1]}
print Str;}
}
function str(n,c, out) { while(--n > 0) out = out c; return out; }
function most(x,y) { return x > y ? x : y; }
'
}
medians() {
local start="2"
while [ `echo $1 | grep "-"` ]; do
case $1 in
-s|--start) start=$2;;
*) blabln "'"$1"' unknown\n usage medians [options]";
return 1;;
esac
shift 2
done
gawk '
BEGIN{FS=","}
{print}
/^[ \t]*$/ {next}
/#/ {next}
{for(I=Start;I<=NF;I++) {
(Data[I,0]++); Data[I,Data[I,0]]=$I }
}
END{ print ""
printf("##");
printf $1
for(I=2;I 2 {
for(I=1;I<=Attr;I++)
if (I in Num)
if ($I !~ /\?/) {
if ( ($I +0) < Min) {Bad=1} else {Bad=0}
if (Bad) $I= Min;
$I=log($I)
}
print $0
}
' -
}
winLossTie() {
local fields=10
local key=1
local performance=$fields
local high=1
local confidence=95
local input="-"
while [ `echo $1 | grep "-"` ]; do
case $1 in
-f|--fields) fields=$2; shift 2;;
--99) confidence=99; shift 1;;
--95) confidence=95; shift 1;;
-k|--key) key=$2; shift 2;;
-p|--perform) performance=$2; shift 2;;
--high) high=1; shift 1;;
--low) high=0; shift 1;;
-i|--input) input=$2; shift 2;;
*) blabln "'"$1"' unknown\n. usage: winLossTie [options]"
return 1;;
esac
done
(echo "#key,ties,win,loss,win-loss @ ${confidence}%"
gawk -f mwu.awk Fields=$fields Key=$key Performance=$performance \
High=$high Confidence=$confidence $input |
sort -t, -r -n -k 5,5
) | malign
}
someArff() {
local bins=3
local bin=1
local seed=$RANDOM
while [ `echo $1 | grep "-"` ]; do
case $1 in
-B|--bins) bins=$2;;
-b|--bin) bin=$2;;
-s|--seed) seed=$2;;
-h|--help) cat <<-EOF
someArff : divide an arrf file into Bins, create train/test files
usage: someArff [flags] arffFile
Flags
-B, --bins NUM Randomly divide the data into NUM bins
-b, --bin NUM Store bin NUM into test.arff and rest into train.arff
-s, --seed NUM Set the random number seed to NUM
-h, --help Print this text
EOF
return 1;;
*) blabln "'"$1"' unknown\n usage cat file | someArff [options]"
return 1;;
esac
shift 2
done
gawk '
BEGIN {
IGNORECASE=1;
Trainf="train.arff"; Testf="test.arff";
Bins=3;
Bin=1;
Seed=1;
}
{ sub(/#.*/,"") }
{ sub(/\%.*/,"") }
/^[ \t]*$/ { next }
/@relation/ { Seed ? srand(Seed) : srand(1) }
/@relation/ { printf "">Trainf; printf "">Testf }
/@relation/,/@data/ { print $0 >> Trainf; print $0 >> Testf; next }
{ Line[rand()] = $0; Lines++ }
END {
Start = Lines/Bins * (Bin - 1) ;
Stop = Lines/Bins * Bin;
for(I in Line) {
N++;
What = (N>= Start && N < Stop) ? Testf : Trainf
print Line[I]>>What; }
}
' Seed=$seed Bins=$bins Bin=$bin -
}
#### Weka stuff
## pruning columns
removeAttributes() {
blab "/"
$Weka weka.filters.unsupervised.attribute.Remove \
-R "${1}-${2}" -i $3 -o tmp.arff
set +x
cat tmp.arff
}
## discretization
discretizeViaFayyadIrani() {
blab "x"
$Weka weka.filters.supervised.attribute.Discretize \
-c last -R first-last -i $1 -o tmp.arff
cat tmp.arff
}
## feature subset selection
rankViaInfoGain() {
blab "<"
$Weka weka.filters.supervised.attribute.AttributeSelection \
-S "weka.attributeSelection.Ranker -T -2.7976931348623157E308 -N -1" \
-E "weka.attributeSelection.InfoGainAttributeEval" \
-i $1 -o tmp.arff
cat tmp.arff
}
### learners
## classifiers
# rule-based classifiers
oner() {
blab "1"
$Weka weka.classifiers.rules.OneR \
-B 6 \
-p 0 -t $1 -T $2
}
jrip() {
blab "j"
$Weka weka.classifiers.rules.JRip \
-F 3 -N 2.0 -O 2 -S 1 \
-p 0 -t $1 -T $2
}
jrip10() {
blab "j"
$Weka weka.classifiers.rules.JRip \
-F 3 -N 2.0 -O 2 -S 1 \
-t $1
}
# bayesian classifiers
aode() {
blab "a"
$Weka weka.classifiers.bayes.AODE \
"-F" 0 \
-p 0 -t $1 -T $2
}
aode10() {
blab "a"
$Weka weka.classifiers.bayes.AODE \
"-F" 0 \
-t $1 -T $2
}
nbd() {
blab "N"
gawk -f nbd.awk Brief=1 Pass=1 $1 Pass=2 $2 |
gawk 'BEGIN {FS=","} {print "-",$1,"-",$2}'
}
nba() {
blab "A"
gawk -f nba.awk Debug=1 Pass=1 $1 Pass=2 $1 Pass=3 $2
}
nb() {
blab "n"
$Weka weka.classifiers.bayes.NaiveBayes \
-p 0 -t $1 -T $2
}
nb10() {
blab "n"
$Weka weka.classifiers.bayes.NaiveBayes \
-i -t $1
}
nbk() {
blab "k"
$Weka weka.classifiers.bayes.NaiveBayes \
-K \
-p 0 -t $1 -T $2
}
# decision tree learners
j48() {
blab "c"
$Weka weka.classifiers.trees.J48 \
-C 0.25 -M 2 \
-p 0 -t $1 -T $2
}
j4810() {
blab "c"
$Weka weka.classifiers.trees.J48 \
-C 0.25 -M 2 \
-i -t $1
}
j4810c() {
blab "c$2"
$Weka weka.classifiers.trees.J48 \
-C $2 -M 2 \
-i -t $1
}
## linear-model learners
lsr() {
blab "L"
$Weka weka.classifiers.functions.LinearRegression \
-S 0 -R 1.0E-8 \
-p 0 -t $1 -T $2
}
m5p() {
blab "P"
$Weka weka.classifiers.trees.M5P \
-p 0 -t $1 -T $2
}
## nearest neighbor
1Bkx() {
blab "N"
$Weka weka.classifiers.lazy.IBk \
-K 1 -W 0 -X -E \
-p 0 -t $1 -T $2
}
1Bk() {
blab "n"
$Weka weka.classifiers.lazy.IBk \
-K -1 -W 0 -E \
-p 0 -t $1 -T $2
}
## association rule learners
apriori() {
blab "A"
$Weke weka.associations.Apriori \
-N 10 -T 0 -C 0.9 -D 0.05 -U 1.0 -M 0.1 -S -1.0 \
-p 0 -t $1 -T $2
}
#### teaching demos
weather.nominal() {
cat<<-EOF
@relation weather.nominal
@attribute outlook {sunny, overcast, rainy}
@attribute temperature {hot, mild, cool}
@attribute humidity {high, normal}
@attribute windy {TRUE, FALSE}
@attribute play {yes, no}
@data
sunny,hot,high,FALSE,no
sunny,hot,high,TRUE,no
overcast,hot,high,FALSE,yes
rainy,mild,high,FALSE,yes
rainy,cool,normal,FALSE,yes
rainy,cool,normal,TRUE,no
overcast,cool,normal,TRUE,yes
sunny,mild,high,FALSE,no
sunny,cool,normal,FALSE,yes
rainy,mild,normal,FALSE,yes
sunny,mild,normal,TRUE,yes
overcast,mild,high,TRUE,yes
overcast,hot,normal,FALSE,yes
rainy,mild,high,TRUE,no
EOF
}
auto93() { cat<<-EOF
@relation 'auto93.names'
@attribute Manufacturer { Acura, Audi, BMW, Buick, Cadillac, Chevrolet, Chrysler, Dodge, Eagle, Ford, Geo, Honda, Hyundai, Infiniti, Lexus, Lincoln, Mazda, Mercedes-Benz, Mercury, Mitsubishi, Nissan, Oldsmobile, Plymouth, Pontiac, Saab, Saturn, Subaru, Suzuki, Toyota, Volkswagen, Volvo}
@attribute Type { Small, Midsize, Compact, Large, Sporty, Van}
@attribute City_MPG real
@attribute Highway_MPG real
@attribute Air_Bags_standard { 0, 2, 1}
@attribute Drive_train_type { 1, 0, 2}
@attribute Number_of_cylinders real
@attribute Engine_size real
@attribute Horsepower real
@attribute RPM real
@attribute Engine_revolutions_per_mile real
@attribute Manual_transmission_available { 1, 0}
@attribute Fuel_tank_capacity real
@attribute Passenger_capacity real
@attribute Length real
@attribute Wheelbase real
@attribute Width real
@attribute U-turn_space real
@attribute Rear_seat_room real
@attribute Luggage_capacity real
@attribute Weight real
@attribute Domestic { 0, 1}
@attribute class real
@data
Acura,Small,25,31,0,1,4,1.8,140,6300,2890,1,13.2,5,177,102,68,37,26.5,11,2705,0,15.9
Acura,Midsize,18,25,2,1,6,3.2,200,5500,2335,1,18,5,195,115,71,38,30,15,3560,0,33.9
Audi,Compact,20,26,1,1,6,2.8,172,5500,2280,1,16.9,5,180,102,67,37,28,14,3375,0,29.1
Audi,Midsize,19,26,2,1,6,2.8,172,5500,2535,1,21.1,6,193,106,70,37,31,17,3405,0,37.7
BMW,Midsize,22,30,1,0,4,3.5,208,5700,2545,1,21.1,4,186,109,69,39,27,13,3640,0,30
Buick,Midsize,22,31,1,1,4,2.2,110,5200,2565,0,16.4,6,189,105,69,41,28,16,2880,1,15.7
Buick,Large,19,28,1,1,6,3.8,170,4800,1570,0,18,6,200,111,74,42,30.5,17,3470,1,20.8
Buick,Large,16,25,1,0,6,5.7,180,4000,1320,0,23,6,216,116,78,45,30.5,21,4105,1,23.7
Buick,Midsize,19,27,1,1,6,3.8,170,4800,1690,0,18.8,5,198,108,73,41,26.5,14,3495,1,26.3
Cadillac,Large,16,25,1,1,8,4.9,200,4100,1510,0,18,6,206,114,73,43,35,18,3620,1,34.7
Cadillac,Midsize,16,25,2,1,8,4.6,295,6000,1985,0,20,5,204,111,74,44,31,14,3935,1,40.1
Chevrolet,Compact,25,36,0,1,4,2.2,110,5200,2380,1,15.2,5,182,101,66,38,25,13,2490,1,13.4
Chevrolet,Compact,25,34,1,1,4,2.2,110,5200,2665,1,15.6,5,184,103,68,39,26,14,2785,1,11.4
Chevrolet,Sporty,19,28,2,0,6,3.4,160,4600,1805,1,15.5,4,193,101,74,43,25,13,3240,1,15.1
Chevrolet,Midsize,21,29,0,1,4,2.2,110,5200,2595,0,16.5,6,198,108,71,40,28.5,16,3195,1,15.9
Chevrolet,Van,18,23,0,1,6,3.8,170,4800,1690,0,20,7,178,110,74,44,30.5,?,3715,1,16.3
Chevrolet,Van,15,20,0,2,6,4.3,165,4000,1790,0,27,8,194,111,78,42,33.5,?,4025,1,16.6
Chevrolet,Large,17,26,1,0,8,5,170,4200,1350,0,23,6,214,116,77,42,29.5,20,3910,1,18.8
Chevrolet,Sporty,17,25,1,0,8,5.7,300,5000,1450,1,20,2,179,96,74,43,?,?,3380,1,38
Chrysler,Large,20,28,2,1,6,3.3,153,5300,1990,0,18,6,203,113,74,40,31,15,3515,1,18.4
Chrysler,Compact,23,28,2,1,4,3,141,5000,2090,0,16,6,183,104,68,41,30.5,14,3085,1,15.8
Chrysler,Large,20,26,1,1,6,3.3,147,4800,1785,0,16,6,203,110,69,44,36,17,3570,1,29.5
Dodge,Small,29,33,0,1,4,1.5,92,6000,3285,1,13.2,5,174,98,66,32,26.5,11,2270,1,9.2
Dodge,Small,23,29,1,1,4,2.2,93,4800,2595,1,14,5,172,97,67,38,26.5,13,2670,1,11.3
Dodge,Compact,22,27,1,1,4,2.5,100,4800,2535,1,16,6,181,104,68,39,30.5,14,2970,1,13.3
Dodge,Van,17,21,1,2,6,3,142,5000,1970,0,20,7,175,112,72,42,26.5,?,3705,1,19
Dodge,Midsize,21,27,1,1,4,2.5,100,4800,2465,0,16,6,192,105,69,42,30.5,16,3080,1,15.6
Dodge,Sporty,18,24,1,2,6,3,300,6000,2120,1,19.8,4,180,97,72,40,20,11,3805,1,25.8
Eagle,Small,29,33,0,1,4,1.5,92,6000,2505,1,13.2,5,174,98,66,36,26.5,11,2295,1,12.2
Eagle,Large,20,28,2,1,6,3.5,214,5800,1980,0,18,6,202,113,74,40,30,15,3490,1,19.3
Ford,Small,31,33,0,1,4,1.3,63,5000,3150,1,10,4,141,90,63,33,26,12,1845,1,7.4
Ford,Small,23,30,0,1,4,1.8,127,6500,2410,1,13.2,5,171,98,67,36,28,12,2530,1,10.1
Ford,Compact,22,27,0,1,4,2.3,96,4200,2805,1,15.9,5,177,100,68,39,27.5,13,2690,1,11.3
Ford,Sporty,22,29,1,0,4,2.3,105,4600,2285,1,15.4,4,180,101,68,40,24,12,2850,1,15.9
Ford,Sporty,24,30,1,1,4,2,115,5500,2340,1,15.5,4,179,103,70,38,23,18,2710,1,14
Ford,Van,15,20,1,2,6,3,145,4800,2080,1,21,7,176,119,72,45,30,?,3735,1,19.9
Ford,Midsize,21,30,1,1,6,3,140,4800,1885,0,16,5,192,106,71,40,27.5,18,3325,1,20.2
Ford,Large,18,26,1,0,8,4.6,190,4200,1415,0,20,6,212,114,78,43,30,21,3950,1,20.9
Geo,Small,46,50,0,1,3,1,55,5700,3755,1,10.6,4,151,93,63,34,27.5,10,1695,0,8.4
Geo,Sporty,30,36,1,1,4,1.6,90,5400,3250,1,12.4,4,164,97,67,37,24.5,11,2475,0,12.5
Honda,Sporty,24,31,2,1,4,2.3,160,5800,2855,1,15.9,4,175,100,70,39,23.5,8,2865,0,19.8
Honda,Small,42,46,1,1,4,1.5,102,5900,2650,1,11.9,4,173,103,67,36,28,12,2350,0,12.1
Honda,Compact,24,31,2,1,4,2.2,140,5600,2610,1,17,4,185,107,67,41,28,14,3040,0,17.5
Hyundai,Small,29,33,0,1,4,1.5,81,5500,2710,1,11.9,5,168,94,63,35,26,11,2345,0,8
Hyundai,Small,22,29,0,1,4,1.8,124,6000,2745,1,13.7,5,172,98,66,36,28,12,2620,0,10
Hyundai,Sporty,26,34,0,1,4,1.5,92,5550,2540,1,11.9,4,166,94,64,34,23.5,9,2285,0,10
Hyundai,Midsize,20,27,0,1,4,2,128,6000,2335,1,17.2,5,184,104,69,41,31,14,2885,0,13.9
Infiniti,Midsize,17,22,1,0,8,4.5,278,6000,1955,0,22.5,5,200,113,72,42,29,15,4000,0,47.9
Lexus,Midsize,18,24,1,1,6,3,185,5200,2325,1,18.5,5,188,103,70,40,27.5,14,3510,0,28
Lexus,Midsize,18,23,2,0,6,3,225,6000,2510,1,20.6,4,191,106,71,39,25,9,3515,0,35.2
Lincoln,Midsize,17,26,2,1,6,3.8,160,4400,1835,0,18.4,6,205,109,73,42,30,19,3695,1,34.3
Lincoln,Large,18,26,2,0,8,4.6,210,4600,1840,0,20,6,219,117,77,45,31.5,22,4055,1,36.1
Mazda,Small,29,37,0,1,4,1.6,82,5000,2370,1,13.2,4,164,97,66,34,27,16,2325,0,8.3
Mazda,Small,28,36,0,1,4,1.8,103,5500,2220,1,14.5,5,172,98,66,36,26.5,13,2440,0,11.6
Mazda,Compact,26,34,1,1,4,2.5,164,5600,2505,1,15.5,5,184,103,69,40,29.5,14,2970,0,16.5
Mazda,Van,18,24,0,2,6,3,155,5000,2240,0,19.6,7,190,110,72,39,27.5,?,3735,0,19.1
Mazda,Sporty,17,25,1,0,?,1.3,255,6500,2325,1,20,2,169,96,69,37,?,?,2895,0,32.5
Mercedes-Benz,Compact,20,29,1,0,4,2.3,130,5100,2425,1,14.5,5,175,105,67,34,26,12,2920,0,31.9
Mercedes-Benz,Midsize,19,25,2,0,6,3.2,217,5500,2220,0,18.5,5,187,110,69,37,27,15,3525,0,61.9
Mercury,Sporty,23,26,1,1,4,1.6,100,5750,2475,1,11.1,4,166,95,65,36,19,6,2450,1,14.1
Mercury,Midsize,19,26,0,0,6,3.8,140,3800,1730,0,18,5,199,113,73,38,28,15,3610,1,14.9
Mitsubishi,Small,29,33,0,1,4,1.5,92,6000,2505,1,13.2,5,172,98,67,36,26,11,2295,0,10.3
Mitsubishi,Midsize,18,24,1,1,6,3,202,6000,2210,0,19,5,190,107,70,43,27.5,14,3730,0,26.1
Nissan,Small,29,33,1,1,4,1.6,110,6000,2435,1,13.2,5,170,96,66,33,26,12,2545,0,11.8
Nissan,Compact,24,30,1,1,4,2.4,150,5600,2130,1,15.9,5,181,103,67,40,28.5,14,3050,0,15.7
Nissan,Van,17,23,0,1,6,3,151,4800,2065,0,20,7,190,112,74,41,27,?,4100,0,19.1
Nissan,Midsize,21,26,1,1,6,3,160,5200,2045,0,18.5,5,188,104,69,41,28.5,14,3200,0,21.5
Oldsmobile,Compact,24,31,0,1,4,2.3,155,6000,2380,0,15.2,5,188,103,67,39,28,14,2910,1,13.5
Oldsmobile,Midsize,23,31,1,1,4,2.2,110,5200,2565,0,16.5,5,190,105,70,42,28,16,2890,1,16.3
Oldsmobile,Van,18,23,0,1,6,3.8,170,4800,1690,0,20,7,194,110,74,44,30.5,?,3715,1,19.5
Oldsmobile,Large,19,28,1,1,6,3.8,170,4800,1570,0,18,6,201,111,74,42,31.5,17,3470,1,20.7
Plymouth,Sporty,23,30,0,2,4,1.8,92,5000,2360,1,15.9,4,173,97,67,39,24.5,8,2640,1,14.4
Pontiac,Small,31,41,0,1,4,1.6,74,5600,3130,1,13.2,4,177,99,66,35,25.5,17,2350,1,9
Pontiac,Compact,23,31,0,1,4,2,110,5200,2665,1,15.2,5,181,101,66,39,25,13,2575,1,11.1
Pontiac,Sporty,19,28,2,0,6,3.4,160,4600,1805,1,15.5,4,196,101,75,43,25,13,3240,1,17.7
Pontiac,Midsize,19,27,0,1,6,3.4,200,5000,1890,1,16.5,5,195,108,72,41,28.5,16,3450,1,18.5
Pontiac,Large,19,28,2,1,6,3.8,170,4800,1565,0,18,6,177,111,74,43,30.5,18,3495,1,24.4
Saab,Compact,20,26,1,1,4,2.1,140,6000,2910,1,18,5,184,99,67,37,26.5,14,2775,0,28.7
Saturn,Small,28,38,1,1,4,1.9,85,5000,2145,1,12.8,5,176,102,68,40,26.5,12,2495,1,11.1
Subaru,Small,33,37,0,2,3,1.2,73,5600,2875,1,9.2,4,146,90,60,32,23.5,10,2045,0,8.4
Subaru,Small,25,30,0,2,4,1.8,90,5200,3375,1,15.9,5,175,97,65,35,27.5,15,2490,0,10.9
Subaru,Compact,23,30,1,2,4,2.2,130,5600,2330,1,15.9,5,179,102,67,37,27,14,3085,0,19.5
Suzuki,Small,39,43,0,1,3,1.3,70,6000,3360,1,10.6,4,161,93,63,34,27.5,10,1965,0,8.6
Toyota,Small,32,37,1,1,4,1.5,82,5200,3505,1,11.9,5,162,94,65,36,24,11,2055,0,9.8
Toyota,Sporty,25,32,1,1,4,2.2,135,5400,2405,1,15.9,4,174,99,69,39,23,13,2950,0,18.4
Toyota,Midsize,22,29,1,1,4,2.2,130,5400,2340,1,18.5,5,188,103,70,38,28.5,15,3030,0,18.2
Toyota,Van,18,22,1,2,4,2.4,138,5000,2515,1,19.8,7,187,113,71,41,35,?,3785,0,22.7
Volkswagen,Small,25,33,0,1,4,1.8,81,5500,2550,1,12.4,4,163,93,63,34,26,10,2240,0,9.1
Volkswagen,Van,17,21,0,1,5,2.5,109,4500,2915,1,21.1,7,187,115,72,38,34,?,3960,0,19.7
Volkswagen,Compact,21,30,0,1,4,2,134,5800,2685,1,18.5,5,180,103,67,35,31.5,14,2985,0,20
Volkswagen,Sporty,18,25,0,1,6,2.8,178,5800,2385,1,18.5,4,159,97,66,36,26,15,2810,0,23.3
Volvo,Compact,21,28,1,0,4,2.3,114,5400,2215,1,15.8,5,190,104,67,37,29.5,14,2985,0,22.7
Volvo,Midsize,20,28,2,1,5,2.4,168,6200,2310,1,19.3,5,184,105,69,38,30,15,3245,0,26.7
EOF
}
auto93discreteClass() {
#some learners can't handle auto93's numeric class
#so we discretize the class. Note that this is a pretty
# dumb discretizer.
auto93 | gawk 'BEGIN {IGNORECASE=1; OFS=","; Round=20}
In && NF > 1 {$NF= "_"int($NF/Round+0.5)*Round}
$2 =="class" {$3 = "{_0,_20,_40,_60}"}
/@data/ {In=1; FS=","}
{ print}'
}
#### some workers
worker1001() {
local learners1="j48 oner aode nb nbk jrip"
local learners2="aode"
local repeats=10;
local bins=10;
local datas="cm1 kc1 kc2 kc3_mod mc1_mod mc2_mod mw1_mod
pc1 pc2_mod pc3_mod pc4_mod pc5_mod"
for one in $data; do
cp $one raw.arff
stem=`basename $one`
stem=${stem/.*/}
logNumbers raw.arff > logged.arff
discretizeViaFayyadIrani raw.arff > discrete.arff
discretizeViaFayyadIrani logged.arff > loggedDiscrete.arff
for x in raw discrete logged loggedDiscrete; do
rankViaInfoGain $x.arff > ranked.arff
for attrs in 4 7 13 16; do
removeAttributes $attrs 16 $x.arff > ranked${attrs}.arff
blab "$stem $x $attrs "
echo "#file,x,attrs,bin,learner,a,b,c,d,acc,pd,pf,prec,g"
for((r=1;r<=$repeats;r++)); do
seed=$RANDOM
for((bin=1; bin <= $bins ; bin++)); do
blab "$bin"
makeTrainTest $seed $bins $bin ranked${attrs}.arff
for learner in $learners1; do
$learner train.arff test.arff |
gotwant |
abcd "$stem,$x,$attrs,$Bin,$Learner"
done
if [ $x != "raw" ]; then
$learner train.arff test.arff |
gotwant |
abcd "$stem,$x,$attrs,$Bin,$Learner"
fi
done
done | medians
blabln
done
done
done | tee $Safe/worker1001.log
}
worker1002() {
for one in $Data; do
cp $one raw.arff
stem=`basename $one`
stem=${stem/.*/}
logNumbers raw.arff > logged.arff
discretizeViaFayyadIrani raw.arff > discrete.arff
discretizeViaFayyadIrani logged.arff > loggedDiscrete.arff
for x in discrete loggedDiscrete; do
rankViaInfoGain $x.arff > ranked.arff
for Attrs in 4 7 13 16; do
removeAttributes $Attrs 16 $x.arff > ranked${Attrs}.arff
blab "$stem $x $Attrs "
Seed=$RANDOM
echo "#file,x,attrs,bin,learner,a,b,c,d,acc,pd,pf,prec,g"
for((R=1;R<=$Repeats;R++)); do
for((Bin=1; Bin <= $Bins ; Bin++)); do
blab "."
makeTrainTest $Seed $Bins $Bin ranked${Attrs}.arff
for Learner in $Learners; do
$Learner train.arff test.arff |
gotwant |
abcd "$stem,$x,$Attrs,$Bin,$Learner"
done
done
done | medians
blabln
done
done
done
}
#### some demos
demo3() {
cd $Tmp
weather.nominal > data.arff
j4810 data.arff
cd $Here
}
demo4() {
cd $Tmp
weather.nominal > data.arff
j4810 data.arff | report 4 3
cd $Here
}
demo5() {
cd $Tmp
weather.nominal > data.arff
j4810 data.arff | report 4 3,18,16
cd $Here
}
demo5a() {
j4810 $Ourmine/lib/arffs/uci/discrete/soybean.arff
}
demo6() {
cd $Tmp
weather.nominal > data.arff
nb10 data.arff
cd $Here
}
demo7() {
cd $Tmp
weather.nominal > data.arff
nb10 data.arff | report 4 2,3,4,5
cd $Here
}
demo8() {
cd $Tmp
weather.nominal > data.arff
nb10 data.arff | report 4 18,16
cd $Here
}
demo9() {
cd $Tmp
weather.nominal > data.arff
j48 data.arff data.arff
cd $Home
}
demo10() {
demo9 | gotwant
}
demo11() {
demo10 |
abcd --before "\na,b,c,d,acc,pd,pf,prec,bal\n" --decimals 1 |
malign
}
demo12() {
cd $Tmp
auto93discreteClass > data.arff
j48 data.arff data.arff | gotwant
cd $Here
}
demo13() {
demo12 | gawk -F, '$1 != $2'
}
demo14() {
demo12 |
for goal in _0 _20 _40 ; do
abcd --goal "$goal" \
--prefix "auto93d,$goal" \
--before "\n#data,goal,a,b,c,d,acc,pd,pf,prec,bal\n" \
--decimals 1
done | malign
}
demo15() {
cd $Tmp
( echo "#data,bin, a,b,c,d,acc,pd,pf,prec,bal"
seed=$RANDOM;
for((bin=1;bin<=10;bin++)); do
blab "$bin"
auto93discreteClass | someArff --seed $seed --bins $Bins --bin $bin
j48 train.arff test.arff | gotwant |
abcd --goal "_20" --prefix "auto93,$bin" --decimals 1
done | sort -t, -n -k 11,11
) | malign > demo15.csv #each bin
blabln " "
echo ""; cat demo15.csv
cp demo15.csv $Safe/demo15.csv
cd $Here
}
demo16() {
cd $Tmp
( echo "#data,repeat,bin,a,b,c,d,acc,pd,pf,prec,bal"
for((r=1;r<=2;r++)); do
blab "repeat=$r "
seed=$RANDOM;
for((bin=1;bin<=5;bin++)); do
blab "$bin"
auto93discreteClass | someArff --seed $seed --bins $Bins --bin $bin
j48 train.arff test.arff | gotwant |
abcd --goal "_20" --prefix "auto93,$r,$bin" --decimals 1
done
blabln
done | sort -t, -n -k 12,12
) | malign > demo15.csv #each bin
blabln " "
echo ""; cat demo15.csv
cp demo15.csv $Safe/demo15.csv
cd $Here
}
demo17() {
local me=demo17
local bins=10
local repeats=2
local learners="oner nb j48"
local datas="diabetes autos"
cd $Tmp
(echo "#data,repeat,bin,learner,goal,a,b,c,d,acc,pd,pf,prec,bal"
for data in $datas; do
arff=$Ourmine/lib/arffs/uci/discrete/$data.arff
for((r=1;r<=$repeats;r++)); do
blab "data=$data repeat=$r "
seed=$RANDOM;
for((bin=1;bin<=$bins;bin++)); do
blab "$bin"
cat $arff | someArff --seed $seed --bins $bins --bin $bin
goals=`cat $arff | classes --brief`
for learner in $learners; do
$learner train.arff test.arff | gotwant > results.dat
for goal in $goals; do
cat results.dat |
abcd --goal "$goal" \
--prefix "$data,$r,$bin,$learner,$goal" \
--decimals 1
done
done
done
blabln
done
done
) | sort -t, -n -k 14,14 | malign > $me.csv #each bin
blabln " "
echo ""; cat $me.csv
cp $me.csv $Safe/$me.csv
cd $Here
}
demo18() {
local stats="$Safe/demo17.csv"
local learners="nb j48 oner"
local datas="diabetes autos"
[ ! -f "$stats" ] && demo17
(echo "#data,learner,goal,a,b,c,d,acc,pd,pf,prec,bal"
for data in $datas; do
for learner in $learners; do
echo -n "$data,$learner, "
grep $data $stats | grep $learner | medians --start 6 | grep "##"
done
done ) | malign | sort -t, -n -k 12,12
}
demo19() {
local stats="$Safe/demo17.csv"
[ ! -f "$stats" ] && demo17
winLossTie --input $stats --fields 14 --perform 14 --key 4 --95 --high
}
demo21() {
local me=demo21
local rs=10
local bins=10
local secrets=$HOME/svns/nextgen/trunk/doc/arffs/raw
local attrs="classic_metrics inter_metrics intra_metrics"
local arffs=`ls $secrets/function*.arff | grep -v pair`
local arffs="$secrets/function_data.arff"
cd $Tmp
for arff in $arffs; do
cat $arff | logNumbers > logged.arff
for((r=1;r<=rs;r++)); do
local seed=$RANDOM
for((b=1;b<=bins;b++)); do
cat logged.arff | someArff --bins $bins --bin $b --seed $seed
for attr in $attrs; do
local want=`cat $secrets/$attr; echo defects`
some "$want" train.arff > trainSome.arff
some "$want" test.arff > testSome.arff
nb trainSome.arff testSome.arff |
gotwant |
abcd --goal "true" --decimals 2 \
--prefix "`basename $arff`,$attr,$r,$b"
done
done
done
done | tee $Safe/$me.log
sort -t, -n -k 13,13 $Safe/$me.log | malign
}
demo22() {
local attrs="classic_metrics inter_metrics intra_metrics"
local log=$Safe/demo21.log
[ ! -f $log ] && demo21
echo ""
(echo "#treatment,min,q1,median,q3,max,"
for attr in $attrs; do
echo -n "$attr,"
grep $attr $log | cut -d, -f 13 | quartile | malign
done | sort -t, -r -k 4,4
) | malign
echo ""
winLossTie --input $log --fields 13 --perform 13 --key 2 --95 --high
echo ""
winLossTie --input $log --fields 13 --perform 13 --key 2 --99 --high
}
funs() {
gawk 'In && NF>20 {print $1} /@data/ {In=1}' FS="," IGNORECASE=1 $1 | sort | uniq
}
# logNumbers miss row 1
# only work on the dynamic metrics
parts() {
local me=parts
local secrets=$HOME/svns/nextgen/trunk/doc/arffs/raw
local all="$secrets/function_data.arff"
local parts="`ls $secrets/function_[0-9]*.arff`";
local inter=`echo function_name; cat $secrets/interClassic_metrics; echo defects`
local inter1=`cat $secrets/interClassic_metrics; echo defects`
cd $Tmp
some "$inter" $all | logNumbers > all_interLogged.arff
for part in `ls $secrets/function_[0-9]*.arff`; do
funs $part > funs.out
some "$inter" $part | logNumbers > isolated0.arff
some "$inter1" isolated0.arff > isolated.arff
gawk 'BEGIN { FS=","; while (getline want < "funs.out" ) Wants[want]=1; close("funs.out")}
/@/ { print; next }
$1 in Wants { print }' all_interLogged.arff > ensembled0.arff
some "$inter1" ensembled0.arff > ensembled.arff
gawk 'BEGIN { FS=","; while (getline want < "funs.out" ) Wants[want]=1; close("funs.out")}
/@/ { print; next; }
(! ($1 in Wants)) { print }' all_interLogged.arff > otherEnsembled0.arff
some "$inter1" otherEnsembled0.arff > otherEnsembled.arff
nb otherEnsembled.arff ensembled.arff | gotwant | abcd --goal "true" --prefix "`basename $part`,ensemble"
nb otherEnsembled.arff isolated.arff | gotwant | abcd --goal "true" --prefix "`basename $part`,isolated"
done > $Safe/$me.out
cd $Here
}
demo23() {
log=$Safe/parts.out
[ ! -f "$log" ] && parts
(echo "#data,type,a,b,c,d,acc,pd,pf,precision,balance"
cat $log) | malign
}
demo24() {
local me=demo24
local datas="weather.nominal diabetes hypothyroid"
cd $Tmp
for data in $datas; do
arff=$Ourmine/lib/arffs/uci/discrete/$data.arff
printf "\n---| `basename $arff` |-----------\n\n"
j4810 $arff | report 4 3
cat $arff | gains | sort -n -t, -r -k 3,3
done
}
demo25() {
local me=demo25
local datas="anneal audiology breast-cancer kr-vs-kp mushroom
primary-tumor soybean splice vote weather.nominal"
local repeats=10
local bins=10
local learners="j48 nb nbd"
cd $Tmp
(echo "#data,repeat,bin,learner,goal,a,b,c,d,acc,pd,pf,prec,bal"
for data in $datas; do
arff=$Ourmine/lib/arffs/uci/discrete/$data.arff
for((r=1;r<=$repeats;r++)); do
blab "data=$data repeat=$r "
seed=$RANDOM;
for((bin=1;bin<=$bins;bin++)); do
blab "$bin"
cat $arff | someArff --seed $seed --bins $bins --bin $bin
goals=`cat $arff | classes --brief`
for learner in $learners; do
$learner train.arff test.arff | gotwant > results.dat
for goal in $goals; do
cat results.dat |
abcd --goal "$goal" \
--prefix "$data,$r,$bin,$learner,$goal" \
--decimals 1
done
done
done
blabln
done
done
) | sort -t, -n -k 14,14 | malign > $me.csv
blabln " "
echo ""; cat $me.csv
cp $me.csv $Safe/$me.csv
cd $Here
}
demo26() {
local log=$Safe/demo25.csv
[ ! -f $log ] && demo25
local learners="nbd nb j48"
local datas="anneal audiology breast-cancer kr-vs-kp mushroom
primary-tumor soybean splice vote weather.nominal"
cd $Tmp
winLossTie --input $log --fields 14 --perform 14 --key 4 --95 --high
for data in $datas; do
printf "\n---| $data |-----------------\n\n";
cat $log | grep $data > some.dat
(echo "#learner,min,q1,median,q3,max,"
for learner in $learners; do
echo -n "$learner,";
grep $learner some.dat | cut -d, -f 14 | quartile
done
) | malign
echo ""
winLossTie --input some.dat --fields 14 --perform 14 --key 4 --95 --high
done
cd $Here
}
demo27() {
cd $Tmp
local me=demo27
local datas="anneal audiology breast-cancer kr-vs-kp mushroom
primary-tumor soybean splice vote "
for data in $datas; do
arff=$Ourmine/lib/arffs/uci/discrete/$data.arff
cat $arff | someArff --seed $RANDOM --bins 10 --bin 1
nba train.arff test.arff > out
cat out | gawk -F, 'BEGIN {FS=OFS=","} $2 == $NF {print $0}' | cut -d, -f 1 > good
cat out | gawk -F, 'BEGIN {FS=OFS=","} $2 != $NF {print $0}' | cut -d, -f 1 > bad
echo ""
for i in good bad; do
printf "\n ---| $data $i |-----------------------\n\n"
cat $i | gawk '
function str(n,chr, out) {
chr = chr ? chr : " ";
while(n-- > 0) out= out chr;
return out
}
{n[int($0)]++; N++}
END {
for(i in n) {
m = int(100*n[i]/N/3)
if (m) print i "," m*3, "," str(m,"X") str(33-m,".")
}
}
' | sort -r -n | malign
done
done | tee $Safe/$me.csv
cd $Here
}
demo28() { # nick's stuff
cat nick/3x10all.csv |
gawk -F, 'NR==1 {print $0 "," f;next} {print $0 "," 2*$11*$13/($11 + $13 + 0.0000001)}' > /tmp/f
for i in a b c d e f g h i j k l m; do grep ",$i," /tmp/f |
cut -d, -f 11 |gawk 'NR==1 {next} {print}' > /tmp/$i.pd; done
for i in a b c d e f g h i j k l m; do grep ",$i," /tmp/f |
cut -d, -f 13 |gawk 'NR==1 {next} {print}' > /tmp/$i.prec; done
for i in a b c d e f g h i j k l m; do grep ",$i," /tmp/f |
cut -d, -f 15 |gawk 'NR==1 {next} {print}' > /tmp/$i.f; done
echo
(
for i in a b c d e f g h i j k l m; do echo -n "pd,$i," ; cat /tmp/$i.pd | quartile; done
winLossTie --input /tmp/f --fields 15 --key 4 --perform 11 --95 --high
echo
for i in a b c d e f g h i j k l m; do echo -n "prec,$i," ; cat /tmp/$i.prec | quartile; done
winLossTie --input /tmp/f --fields 15 --key 4 --perform 13 --95 --high
echo
for i in a b c d e f g h i j k l m; do echo -n "f,$i," ; cat /tmp/$i.f | quartile; done
winLossTie --input /tmp/f --fields 15 --key 4 --perform 15 --95 --high
) | tee $Safe/demo28.log
}
### stop reading. broken after this/
flip() {
local data
local key
local performance
while [ `echo $1 | grep "-"` ]; do
case $1 in
-d|--data) data="$2";;
-k|--key) key="$2";;
-p|--performance) performance=$3;;
*) blabln "'"$1"' unknown\n usage cat file | flip [options]"
return 1;;
esac
shift 2
done
gawk '
BEGIN {FS=OFS=","}
NR==1 {
split(DataStr,TheData,/,/);
split(KeyStr,TheKeys,/,/);
}
/^[ \t]*#[^[#]/ {next}
{ key=data="";
for(d in TheData) data = data "." $d;
for(k in TheKeys) key = key "." $k;
Result[key,data]=$Performance;
if ($Performance > Max[data] ) { Max[data]=$Performance}
Keys[key]=key
Datas[data]=data
}
END {printf "#data"
for(K in Keys) printf "," K ",max?"
print ""
exit
for(D in Datas) {
printf D
for(K in Keys) {
printf "," Result[K,D]
printf (Result[K,D]== Max[D]) ? ",X" : ","
}
print ""
}}
' DataStr=$data KeyStr=$key Performance=$performance -
#| medians | malign
}
summary() {
cd $Tmp
local stats="$Safe/demo17.csv"
[ ! -f "$stats" ] && demo17
demo18 | flip --data 1 --key 2 --performance 12
printf "\n---| all |------\n\n"
winLossTie --input $stats --fields 14 --perform 14 --key 4 --95 --high
for d in diabetes autos; do
printf "\n---| $d |------\n\n"
grep $d $stats > $d.stats;
winLossTie --input $d.stats --fields 14 --perform 14 --key 4 --95 --high
done
}
demo101() {
local me=demo101
local stats="$HOME/tmp/safe/demo2.log"
local learners="aode j48 jrip nb oner"
local preps="loggedDiscrete discrete"
local datas="cm1 kc1 kc2 kc3_mod mc1_mod mc2_mod mw1_mod
pc1 pc2_mod pc3_mod pc4_mod pc5_mod"
(echo "#data,prep,attrs,bin,learner,a,b,c,d,acc,pd,pf,prec,bal"
for data in $datas; do
for learner in $learners; do
for prep in $preps; do
cat $stats |
grep -v '#' |
grep $data |
grep $prep |
grep $learner | sort -t, -n -k 14,14 | medians --start 6
done
done
done
) > $Safe/$me.log
cat $Safe/$me.log
}
demo102() {
cd $Tmp
local stats="$Safe/demo101.log"
[ ! -f "$stats" ] && demo101
cat $stats | grep "##" | flip --data 1 --key 2,5 --performance 14
}
demo103() { #zach's stuff
local me=demo103
local in=$HOME/wisp/var/Zach/var/whichOut/AUCwithRocky.csv
cd $Tmp
cat $in > ready.csv
local data=`cut -d, -f 1 ready.csv | sort | uniq`
local learners=`cut -d, -f 6 ready.csv | sort | uniq`
for d in $data; do
printf "\n\n===| $d |=======\n"
for l in $learners; do
echo -n "$l ,"
grep $l ready.csv | grep $d | cut -d, -f 7 | tee $d$l.done | quartile
done | malign | sort -n -t, +3
done | tee $Safe/$me.out
echo $Safe/$me.out
}
# manaul > anything else
# standard elarners worse
# which whins
#
demo104() { #zach's stuff
cd $Tmp
local me=demo104
local in1=$HOME/svns/wisp/var/Zach/var/whichOut/AUCwithMicro.csv
local in2=$HOME/svns/wisp/var/Zach/var/whichOut/AUCwithTurkey.csv
in=$Tmp/in
cat $in1 > $in
cat $in2 | sed 's/manual/manualDown/' | sed 's/launam/manualUp/' >> $in
#local in=/srv/bronze/zach/wisp/var/Zach/var/whichOut/AUCfinitelist.csv
#cd $Tmp
cat $in | grep -v rocky | grep -v micro10 | grep -v micro5 |
grep -v micro30 | grep -v micro75 | grep -v loc > ready.csv
local data=`cut -d, -f 1 ready.csv | grep -v data |
grep -v pc3 | sort | uniq`
local learners=`cut -d, -f 6 ready.csv | grep -v learner | sort | uniq`
for d in "," $data; do
printf "\n\n===| $d |=======\n"
grep $d ready.csv > ready1.csv
for l in $learners ; do
echo -n "$l,"
grep "$l," ready1.csv | cut -d, -f 7 |sort -n | quartile
#gawk 'BEGIN {OFS=","}
# {N++; F[N] =$1}
# END {N1=int(N/2);
# if (N % 2) { print N,int(F[N1])
# } else {
# N2 = N1+1;
# print N,int((F[N1] + F[N2])/2)
# }}'
done | sort -r -t, -n -k 4 |malign
echo ""
winLossTie --input ready1.csv --fields 7 --perform 7 --key 6 --99 --high
done > $Safe/$me.out
echo $Safe/$me.out
cd $Here
}
demo104a() {
local in=$Safe/demo104.out
[ ! -f $in ] && demo104
cd $Tmp
cat $in | quartile2tex > $Safe/demo104a.out
cd $Here
}
# number of controllables
demo105() {
local me=demo105
local src="$HOME/wisp/var/timm/08/data.csv"
ls -lsa "$src"
rm -f $Safe/$me_*
gawk 'NR==1 {next} {print}' "$src" > $Tmp/data.csv
(cd $Tmp
local projects=`cut -d, -f 1 data.csv | sort | uniq`
local policies=`cut -d, -f 2 data.csv | sort | uniq`
local scorings=`cut -d, -f 3 data.csv | sort | uniq`
local mutations=`cut -d, -f 4 data.csv | sort | uniq`
demo105selector project,policy,MinMedE data.csv |
gawk -F, '{OFS=","; $NF=100*$NF; print $0}'> testa
(printf "POLICIES\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for pol in $policies; do
echo -n "$pol,"
grep ",$pol," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign
echo ""
winLossTie --input relevant --fields 4 --perform 4 --key "3" --99 --low
done ) > $Safe/$me_policyAbsoluteE.txt
# changing policies rarely effects anything. the final numbers are so low. but Bug?
# demo105selector project,policy,FirstMedThreat,MinMedThreat data.csv |
# gawk -F, '{OFS=","; print $0,100*($NF/(0.0000000001 + $(NF-1)));}' > testa
# printf "HOW MUCH THREAT CONTROL DO WE ACHIEVE?\n\n`date`\n\n"
# for p in overall $projects
# do grep "$p," testa > relevant
# printf "\n\n----| $p |----------\n\n"
# for pol in $policies; do
# echo -n "$pol,"
# grep ",$pol," relevant | gawk -F, '{print $NF}' | quartile
# done | malign
# echo ""
# winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
# done > $Safe/$me_reduceTHREATS.txt
# # usually, healthy reducions in defects
#
demo105selector project,policy,FirstMedMonths,MinMedMonths data.csv |
gawk -F, '{OFS=","; print $0,100*($NF/$(NF-1));}' > testa
printf "HOW MUCH MONTHS CONTROL DO WE ACHIEVE?\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for pol in $policies; do
echo -n "$pol,"
grep ",$pol," relevant | gawk -F, '{print $NF}' | quartile
done | malign | sort -n -t, -n +3
echo ""
winLossTie --input relevant --fields 6 --perform 6 --key "3" --99 --low
done > $Safe/$me_reduceMonths.txt
# usually, healthy reducions in defects
demo105selector project,scoring,mutation,numPol data.csv > testa
printf "HOW MUCH important ins mutation? ?\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for score in $scorings; do
for mut in $mutations; do
grep ",$score,$mut," relevant | gawk -F, '{print $NF}' | quartile
done
done | malign | sort -n -t, -n +3
echo ""
winLossTie --input relevant --fields 5 --perform 5 --key "3,4" --99 --low
done | tee $Safe/$me_mutValue.txt
# usually, healthy reducions in defects
exit
demo105selector project,policy,FirstMedDefects,MinMedDefects data.csv |
gawk -F, '{OFS=","; print $0,100*($NF/$(NF-1));}' > testa
printf "HOW MUCH DEFECTS CONTROL DO WE ACHIEVE?\n\n`date`\n\n"
(printf "HOW MUCH DEFECTS CONTROL DO WE ACHIEVE?\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for pol in $policies; do
echo -n "$pol,"
grep ",$pol," relevant | gawk -F, '{print $NF}' | quartile
done | malign
echo ""
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done ) > $Safe/$me_reduceEffort.txt
# usually, healthy reducions in defects
demo105selector project,policy,FirstMedEffort,MinMedEffort data.csv |
gawk -F, '{OFS=","; print $0,100*($NF/$(NF-1));}' > testa
printf "HOW MUCH EFFORT CONTROL DO WE ACHIEVE?\n\n`date`\n\n"
(printf "HOW MUCH EFFORT CONTROL DO WE ACHIEVE?\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for pol in $policies; do
echo -n "$pol,"
grep ",$pol," relevant | gawk -F, '{print $NF}' | quartile
done | malign
echo ""
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done ) > $Safe/$me_reduceEffort.txt
# usually, healthy reducions in effort
#
# demo105selector project,policy,FirstMedE,MinMedE data.csv |
# gawk -F, '{OFS=","; print $0,100*($NF/$(NF-1));}' > testa
# (printf "HOW MUCH ENERGY CONTROL DO WE ACHIEVE?\n\n`date`\n\n"
# for p in overall $projects
# do grep "$p," testa > relevant
# printf "\n\n----| $p |----------\n\n"
# for pol in $policies; do
# echo -n "$pol,"
# grep ",$pol," relevant | gawk -F, '{print $NF}' | quartile
# done | malign
# echo ""
# winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
# done ) > tee $Safe/$me_reduceEnergy.txt
#
# # strange- often the energy reduction is close to 0
#
# demo105selector project,scoring,mutation,MinMedE,MinSpE data.csv |
# gawk -F, '{OFS=",";print $1,$2, $3,$4,100*$5/($6+0.00000000001)}' > testa
# (printf "MUTATION SCORING VS RATIO MIN MEDIAN/SPREAD \n\n`date`\n\n"
# for p in overall $projects
# do grep "$p," testa > relevant
# printf "\n\n----| $p |----------\n\n"
# for score in $scorings; do
# for mut in $mutations; do
# echo -n "$score,$mut,"
# grep ",$score,$mut," relevant | gawk -F, '{print $NF}' | quartile
# done
# done | malign
# echo ""
# winLossTie --input relevant --fields 5 --perform 5 --key "3,4" --95 --high
# done ) > $Safe/$me_mutationScoringRatio.txt
#
# # mutation and scoring policies do not effect variance reduction
#
# demo105selector project,scoring,mutation,numPol data.csv > testa
# (printf "MUTATION SCORING VS NUMBER OF pOLICIES\n\n`date`\n\n"
# for p in overall $projects
# do grep "$p," testa > relevant
# printf "\n\n----| $p |----------\n\n"
# for score in $scorings; do
# for mut in $mutations; do
# echo -n "$score,$mut,"
# grep ",$score,$mut," relevant | gawk -F, '{print $NF}' | quartile
# done
# done | malign
# echo ""
# winLossTie --input relevant --fields 5 --perform 5 --key "3,4" --95 --low
# done) > $Safe/$me_mutationScoringPolicies.txt
#
# # mutation and scoring policies do not effect # of policies
#
demo105selector project,scoring,mutation,FirstMedThreat,MinMedThreat data.csv |
gawk -F, '{print $0,",",100*$NF/$(NF-1)}' > testa
(printf "MUTATION SCORING VS THREAT REDUCTION\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for score in $scorings; do
for mut in $mutations; do
echo -n "$score,$mut,"
grep ",$score,$mut," relevant | gawk -F, '{print $NF}' | quartile
done
done | malign
echo ""
winLossTie --input relevant --fields 7 --perform 7 --key "3,4" --95 --low
done ) > $Safe/$me_mutationScoringThreat.txt
# bore|extreme best at defect reduction
demo105selector project,scoring,mutation,FirstMedMonths,MinMedMonths data.csv |
gawk -F, '{print $0,",",100*$NF/$(NF-1)}' > testa
printf "MUTATION SCORING VS MONTHS REDUCTION\n\n`date`\n\n"
(printf "MUTATION SCORING VS MONTHS REDUCTION\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for score in $scorings; do
for mut in $mutations; do
echo -n "$score,$mut,"
grep ",$score,$mut," relevant | gawk -F, '{print $NF}' | quartile
done
done | malign
echo ""
winLossTie --input relevant --fields 7 --perform 7 --key "3,4" --95 --low
done ) > $Safe/$me_mutationScoringMonths.txt
# bore|extreme best at defect reduction
demo105selector project,scoring,mutation,FirstMedDefects,MinMedDefects data.csv |
gawk -F, '{print $0,",",100*$NF/$(NF-1)}' > testa
printf "MUTATION SCORING VS DEFECT REDUCTION\n\n`date`\n\n"
(printf "MUTATION SCORING VS DEFECT REDUCTION\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for score in $scorings; do
for mut in $mutations; do
echo -n "$score,$mut,"
grep ",$score,$mut," relevant | gawk -F, '{print $NF}' | quartile
done
done | malign
echo ""
winLossTie --input relevant --fields 7 --perform 7 --key "3,4" --95 --low
done ) > $Safe/$me_mutationScoringDefects.txt
# bore|extreme best at defect reduction
demo105selector project,scoring,mutation,FirstMedEffort,MinMedEffort data.csv |
gawk -F, '{print $0,",",100*$NF/$(NF-1)}' > testa
printf "MUTATION SCORING VS EFFORT REDUCTION\n\n`date`\n\n"
(printf "MUTATION SCORING VS EFFORT REDUCTION\n\n`date`\n\n"
for p in overall $projects
do grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for score in $scorings; do
for mut in $mutations; do
echo -n "$score,$mut,"
grep ",$score,$mut," relevant | gawk -F, '{print $NF}' | quartile
done
done | malign
echo ""
winLossTie --input relevant --fields 7 --perform 7 --key "3,4" --95 --low
done ) > $Safe/$me_mutationScoringEffort.txt
# mutation and scoring policies do not effect effort reduction
)
}
demo105selector() {
cat $2 |
gawk -F, -v Want=$1 'BEGIN {
dd();
args();
}
{printf "overall"; for(I=1;I<=N;I++) printf(",%s",$Goals[I]) ; print "";}
function args( n,tmp) {
N=split(Want,tmp,/,/);
for(i=1;i<=N;i++)
Goals[i]=Dd[tmp[i]]
}
function dd() {
Dd["project"] = 1;
Dd["policy"] = 2;
Dd["scoring"] = 3;
Dd["mutation"] = 4;
Dd["alpha"] = 5;
Dd["beta"] = 6;
Dd["gamma"] = 7;
Dd["delta"] = 8;
Dd["relydefect"] = 9;
Dd["run"] = 10;
Dd["N"] = 11;
Dd["SANum"] = 12;
Dd["SATime"] = 13;
Dd["TotalTime"] = 14;
Dd["minEnergy"] = 15;
Dd["numPol"] = 16;
Dd["FirstMedE"] = 17;
Dd["FirstSpE"] = 18;
Dd["FirstMedEffort"] = 19;
Dd["FirstSpEffort"] = 20;
Dd["FirstMedDefects"] = 21;
Dd["FirstSpDefects"] = 22;
Dd["FirstMedThreat"] = 23;
Dd["FirstSpThreat"] = 24;
Dd["FirstMedMonths"] = 25;
Dd["FirstSpMonths"] = 26;
Dd["MinMedE"] = 27;
Dd["MinSpE"] = 28;
Dd["MinMedEffort"] = 29
Dd["MinSpEffort"] = 30;
Dd["MinMedDefects"] = 31;
Dd["MinSpDefects"] = 32;
Dd["MinMedThreat"] = 33;
Dd["MinSpThreat"] = 34;
Dd["MinMedMonths"] = 35;
Dd["MinSpMonths"] = 36;
} '
}
lasttimes100() {
gawk -F, '{OFS=","; $NF=100*$NF; print $0}' -
}
demo106() {
local me=demo106
local src="$HOME/wisp/var/timm/08/data-sampling.csv"
ls -lsa "$src"
rm -f $Safe/$me_*
gawk 'NR==1 {next} {print}' "$src" > $Tmp/data.csv
cd $Tmp
local projects=`cut -d, -f 1 data.csv | sort | uniq`
local policies=`cut -d, -f 2 data.csv | sort | uniq`
local scorings=`cut -d, -f 3 data.csv | sort | uniq`
local mutations=`cut -d, -f 4 data.csv | sort | uniq`
local ns=`cut -d, -f 11 data.csv | sort | uniq`
local sanums=`cut -d, -f 12 data.csv | sort | uniq`
echo "ns $ns"
echo "sanums $sanums"
#demo106selector project,SANum,N,MinSpE,MinMedE,MinSpEffort,MinMedEffort data.csv > testa
demo106selector project,SANum,FirstMedEffort,MinMedEffort data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/($(NF-1)+ 0.00000001)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for a in $sanums; do
echo -n "$a,"
grep ",$a," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done |tee $Safe/${me}_sa_Effort.txt
demo106selector project,SANum,FirstMedDefects,MinMedDefects data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/($(NF-1)+ 0.00000001)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for a in $sanums; do
echo -n "$a,"
grep ",$a," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done |tee $Safe/${me}_sa_Defects.txt
demo106selector project,SANum,FirstMedMonths,MinMedMonths data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/($(NF-1)+ 0.00000001)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for a in $sanums; do
echo -n "$a,"
grep ",$a," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done |tee $Safe/${me}_sa_Months.txt
demo106selector project,SANum,FirstMedThreat,MinMedThreat data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/($(NF-1)+ 0.00000001)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for a in $sanums; do
echo -n "$a,"
grep ",$a," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done |tee $Safe/${me}_sa_Threat.txt
return 0
demo106selector project,N,MinMedThreat,MinSpThreat data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/$(NF-1)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for n in $ns; do
echo -n "$n,"
grep ",$n," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done > $Safe/${me}_n_Threat.txt
demo106selector project,N,MinMedEffort,MinSpEffort data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/$(NF-1)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for n in $ns; do
echo -n "$n,"
grep ",$n," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done > $Safe/${me}_n_effort.txt
demo106selector project,N,MinMedDefects,MinSpDefects data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/$(NF-1)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for n in $ns; do
echo -n "$n,"
grep ",$n," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done > $Safe/${me}_n_Defects.txt
demo106selector project,N,MinMedMonths,MinSpMonths data.csv |
gawk -F, '{OFS=","; print $0, 100*$(NF)/$(NF-1)}'> testa
for p in overall $projects
do
grep "$p," testa > relevant
printf "\n\n----| $p |----------\n\n"
for n in $ns; do
echo -n "$n,"
grep ",$n," relevant | gawk -F, '{print $NF}' |
quartile |
sort -t, -n +2
done | malign | sort -t, -n +3
echo ""
echo "n"
winLossTie --input relevant --fields 6 --perform 6 --key "3" --95 --low
done > $Safe/${me}_n_Months.txt
}
demo106selector() {
cat $2 |
gawk -F, -v Want=$1 'BEGIN {
dd();
args();
}
{printf "overall"; for(I=1;I<=N;I++) printf(",%s",$Goals[I]) ; print "";}
function args( n,tmp) {
N=split(Want,tmp,/,/);
for(i=1;i<=N;i++)
Goals[i]=Dd[tmp[i]]
}
function dd() {
Dd["project"] = 1;
Dd["policy"] = 2;
Dd["scoring"] = 3;
Dd["mutation"] = 4;
Dd["alpha"] = 5;
Dd["beta"] = 6;
Dd["gamma"] = 7;
Dd["delta"] = 8;
Dd["relydefect"] = 9;
Dd["run"] = 10;
Dd["N"] = 11;
Dd["SANum"] = 12;
Dd["SATime"] = 13;
Dd["TotalTime"] = 14;
Dd["minEnergy"] = 15;
Dd["numPol"] = 16;
Dd["attNumber"] = 17;
Dd["FirstMedE"] = 18;
Dd["FirstSpE"] = 19;
Dd["FirstMedEffort"] = 20;
Dd["FirstSpEffort"] = 21;
Dd["FirstMedDefects"] = 22;
Dd["FirstSpDefects"] = 23;
Dd["FirstMedThreat"] = 24;
Dd["FirstSpThreat"] = 25;
Dd["FirstMedMonths"] = 26;
Dd["FirstSpMonths"] = 27;
Dd["MinMedE"] = 28;
Dd["MinSpE"] = 29;
Dd["MinMedEffort"] = 30
Dd["MinSpEffort"] = 31;
Dd["MinMedDefects"] = 32;
Dd["MinSpDefects"] = 33;
Dd["MinMedThreat"] = 34;
Dd["MinSpThreat"] = 35;
Dd["MinMedMonths"] = 36;
Dd["MinSpMonths"] = 37;
} '
}
#please add columns for:
#a1) the #of decisions required to reach minimum point
#a2) the number of variables in "a1". e.g. if "a1"= acap=hi and
#acap=lo then "a2" is only "1".
#b) the max possible energy (which is always, 1, right?)
#c) the min energy reached during SA (so we can see how well our
#policies do)
#d) bore vs energy scoring
#e) number of runs in the SA
#f) number of runs for each point in the back select
#g) sa runtime
#h) total run time (only the "C" not the shell scripts)
#i) policy: all, strategic, tactical, hohin's 8 different ideas.
#j) spreadE/medianE
#i) spreadE and medianE see at the FIRST step of the back select
#
#have we got the ttests going to allow early stopping? that would
#effect (a)
#
#have we got those new threat tables going so we can avoid dumb ass
#mistakes like high tool and low acap?
#
#what else? we want to say
#- that we can do well with a few decision ("a1" and "a2" and "i")
#- that the spread of the final results is very small (j)
#- that sa is an adequate (if "c" is very low);
#- that back select is adequate (medianE/"c")
#- that bore is good (that's "d")
#- that smaller runs are as good as larger runs ("e" and "f")
#- that our tool is fast "g"
#- that our "do anything policies" is as good as anything "hohin"
#offers ("i")
#}
# oner nb j48
#auto 56.6 60.4 85.9*
#diabetes 57.2 68.5 69.3*
#
#demo10() {
# demo9 | gawk -F, '/@/ {next}
# NF>1 {print $NF}' | sort | uniq -c
#}
#demo11() {
# setup; cd $Tmp
# demo9 > data.arff
#
# c=0.1
# printf "confidence limit for pruning = $c (very selective)\n\n"
# j4810c data.arff $c | report 0 3,18,16
#
# c=0.25
# printf "confidence limit for pruning = $c (default, less selective)\n\n"
# j4810c data.arff $c | report 0 3,18,16
# cd $Here
#}
#demo1001() {
# setUpVars
# setUpDirs
# setUpSeds
# prep
# cd $Tmp
# pwd
# makeshare
# worker1001 > log
# cp log $Safe/demo1.log
# winLossTie log | tee $Safe/demo1.winLossTie
#}
#demo1002() {
# setUpVars
# setUpDirs
# setUpSeds
# prep
# cd $Tmp
# pwd
# makeshare
# Learners="j48 jrip oner nb aode"
# worker1002 > log
# cp log $Safe/demo1.log
# winLossTie log | tee $Safe/demo1.winLossTie
#}
#### start up
setup
build
blabln "OurMine version v0.2 (alpha) (c)2007 tim@menzies.us under GPLv3"
blabln "Too many doings, not enough learnings.\n"