1 # /* vim: set filetype=sh : */
   2 # usage: bash our minerc
   3 # warning: requires at least 5MB of free disk
   4 ##########################################################################
   5 #    ourmine : a simple learning environment for data mining
   6 #    Copyright (C) 2007, Tim Menzies, tim@menzies.us, http://menzies.us
   7 #
   8 #    This program is free software: you can redistribute it and/or modify
   9 #    it under the terms of the GNU General Public License as published by
  10 #    the Free Software Foundation, version 3.
  11 #
  12 #    This program is distributed in the hope that it will be useful,
  13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 #    GNU General Public License for more details.
  16 #
  17 #    You should have received a copy of the GNU General Public License
  18 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 ##########################################################################
  20 
  21 # for more info on command-line weka stuff, see
  22 
  23 # http://www.cs.waikato.ac.nz/~remco/weka_bn/node13.html
  24 
  25 Here=`pwd`
  26 
  27 #### generic stuff
  28 reload() {
  29     . $Ourrc
  30 }
  31 show() {
  32     local goal1="^$1"
  33     local com="/^$1 /,/^}/{print}"
  34     if   (set | grep $goal1 | grep "=" > /tmp/debug)
  35     then set  | grep $goal1
  36     else set  | gawk "$com"
  37     fi
  38 }
  39 blab()   { printf "$*"   >&2; }
  40 blabln() { printf "$*\n" >&2; }
  41 #### initialization stuff
  42 setup() {
  43     setUpVars
  44     setUpDirs
  45 }
  46 lcsee() {
  47     alias ls="ls --color"
  48 }
  49 setUpVars() {
  50     alias ls="ls -G"
  51     PROMPT_COMMAND='echo -ne "\033]0;${HOSTNAME}: `pwd`\007"'
  52     PS1="Our MINE!: \!$ "
  53     Ourmine="$HOME/opt/ourmine"
  54     PATH="$Ourmine/bin:$HOME/bin:$PATH"
  55     Safe=$Ourmine/var/safe
  56     Dirs="$Our/lib/arffs"
  57     LibUrl="http://unbox.org/wisp/trunk/our/minerc.lib/lib.zip"
  58     export AWKPATH="$Ourmine/lib:$AWKPATH"
  59     Weka="nice -19 java -Xmx1024M -cp $Tmp/weka.jar "
  60     Bins=10
  61     Repeats=2;
  62     Learners="nb nbk"
  63     Data="$Ourmine/lib/arff/uci/discrete/a*.arff
  64           $Ourmine/lib/arff/uci/discrete/s*.arff"
  65 }
  66 setUpDirs() {
  67     mkdir -p $HOME/tmp
  68     mkdir -p /tmp/$USER
  69     Tmp=`mktemp -d -p /tmp/$USER`
  70     mkdir -p $Tmp
  71     mkdir -p $Ourmine/lib # for support code
  72     mkdir -p $Ourmine/bin # for our executables
  73     mkdir -p $HOME/bin  # for your executables
  74     mkdir -p $Safe      # for stuff you want to keep around
  75     if [ ! -f "$Ourmine/lib/lib.zip" ]; then
  76         downloads
  77     fi
  78     # I had too much trouble with pathname syntax problems
  79     # on mac, windows, linux, etc. So now I just copy weka.jar
  80     # to the working directory (no need for pathnames)
  81     cp $Ourmine/lib/weka.jar $Tmp
  82 }
  83 downloads() {
  84     set -x
  85     (cd $Ourmine/lib
  86     wget -O lib.zip $LibUrl
  87     unzip -o  lib.zip
  88     )
  89     set +x
  90 }
  91 #### stuff for the turkey experiment
  92 setUpSeds() {
  93 cat<<-EOF > $Tmp/etc/seds
  94     s/loccodeandcomment/loc_code_and_comment/
  95     s/locodeandcomment/loc_code_and_comment/
  96     s/locandcomment/loc_code_and_comment/
  97     s/essential_complexity/ev(g)/
  98     s/cyclomatic_complexity/v(g)/
  99     s/halstead_length/n/
 100     s/halstead_level/l/
 101     s/num_operators/n1/
 102     s/num_operands/n2/
 103     s/unique_operands/uniq_opnd/
 104     s/unique_operators/uniq_op/
 105     s/halstead_content/i/
 106     s/halstead_error_est/b/
 107     s/halstead_prog_time/t/
 108     s/halstead_effort/e/
 109     s/halstead_difficulty/d/
 110     s/halstead_volume/v/
 111     s/loc_comments/loc_comment/
 112     s/design_complexity/iv(g)/
 113     s/locomment/loc_comment/
 114     s/loc_total/loc/
 115     s/locode/loc/
 116     s/[\t ]c[\t ]/ defects /
 117     s/[\t ]problems[\t ]/ defects /
 118     s/branchcout/branch_count/
 119     s/total_op[\t ]/n1 /
 120     s/total_opnd/n2/
 121     s/{no,yes}/{false,true}/
 122 EOF
 123 }
 124 
 125 prep() {
 126     for i in $Dirs/mdp/*.arff ; do
 127     cat $i |
 128     tr A-Z a-z |
 129     sed -f $Tmp/etc/seds \
 130     > $Tmp/arff/`basename $i`
 131 done
 132 }
 133 classes() {
 134     #Q5: This script processes an arff file like the one produced
 135     #    by "weather" (see below). It looks for all the class names
 136     #    after the "@data" line and prints their frequency. Carefully comment and explain
 137     #    each line. 
 138     #     Hints: 
 139     #          1) http://www.delorie.com/gnu/docs/gawk/gawk_116.html
 140     #          2) the pattern of processing in this function is repeated
 141     #             elsewhere in this file
 142     local brief=0
 143     while [ `echo $1 | grep "-"` ]; do
 144         case $1 in
 145             -b|--brief) brief=1;;
 146             *)   blabln "'"$1"' unknown\n usage cat file | classes [options]"
 147                  return 1;;
 148         esac
 149         shift 1
 150     done
 151     gawk '
 152    BEGIN      { OFS=FS=","
 153                 IGNORECASE=1
 154                 Brief=0   }
 155               { gsub(/#.*/,"") }
 156    /^[ \t]*$/ { next          }
 157    Data  && NF > 1     { Freq[$NF]++ }
 158    /@data/    { Data=1 }
 159    END        {
 160                  for(N in Freq)
 161                      if (Brief) { print N } else { print Freq[N],N }}
 162    ' Brief=$brief -
 163 }
 164 intersectAttributes() {
 165     # list the intersection of attributes 
 166     # found in a set of arff files
 167         gawk '
 168     BEGIN        { IGNORECASE=1; OFS=","}
 169     FNR==1       { Files++ }
 170     /@attribute/ { Got[$2]++ }
 171     END          { for(A in Got)
 172                        if (Got[A]>=Files)
 173                            print  A
 174     }' $1
 175 }
 176 shared() {
 177     for i in `intesectAttributes $Tmp/arff/*.arff | 
 178               sort | 
 179               grep -v defects`; do
 180         echo $i
 181     done
 182     echo defects
 183 }
 184 some() {
 185     # generate an arff file that only contains certaina attributes
 186     gawk -f some.awk -v Some="$1" $2
 187 }
 188 makeshare() {
 189     Shared=`shared`
 190     for i in $Tmp/arff/*.arff; do
 191         echo $i
 192         some "$Shared" $i > $Tmp/shared/`basename $i`
 193     done
 194 }
 195 report() {
 196     gawk  'BEGIN {RS=""; FS="\n"}
 197            NR==1 { M=split(Show,Shows,",") }
 198                  {  R[++N]=indent($0)
 199                  }
 200            END   {print " ";
 201                   for(r=1;r<=M;r++) printf("\n%s",R[Shows[r]]);
 202                   print ""; }
 203     
 204            function str(n,chr, out) {
 205                   chr = chr ? chr : " ";
 206                   while(n-- > 0) out= out chr;
 207                 return out
 208                }
 209            function indent(str, i, out) {
 210                 for(i=1;i<=NF;i++)
 211                     out=out str(Indent," ")  $i "\n"
 212                 return out
 213            }
 214            ' Show=$2 Indent=$1 -
 215 }
 216 
 217 #### end inter intra stuff 
 218 
 219 #### misc utils
 220 
 221 makeTrainTest() {
 222     cat - | someArff --seed $1 --bins $2 --bin $3
 223 }
 224 gotwant()    {  gawk '
 225     BEGIN   {Unlog  = 0;
 226              OFS    = ","
 227              Ee     = 848456353 / 312129649;
 228             }
 229     NF == 3 { if (UnLog) { print Ee^$2 , Ee^$3
 230               } else     { print $2,$3 }
 231            }
 232     NF == 4    { print $2 , $4 }
 233 ' -
 234 }
 235 abcd() {
 236     local goal="true|yes"
 237     local before=""
 238     local prefix=""
 239     local decimals=2
 240     while [ `echo $1 | grep "-"` ]; do
 241        case $1 in
 242             -d|--decimals) decimals=$2;;
 243             -b|--before) before=$2;;
 244             -p|--prefix) prefix=$2;;
 245             -g|--goal)   goal=$2;;
 246             *)           blabln "'"$1"' unknown\n usage abcd [options]";
 247                          return 1;;
 248        esac
 249        shift 2
 250     done
 251     [  -n "$before" ] && printf $before
 252     gawk '
 253     BEGIN {
 254          Decimals    = 3
 255          Got         = 1
 256          Want        = 2;
 257          Prefix      = "";
 258          True        = "true";  ## define symbol 1
 259          A=B=C=D=0 ;
 260          FS=OFS=","
 261          GoalPd = 1;
 262          GoalPf = 0;
 263        }
 264     function yes(s) {return s ~ True   }
 265     function no(s)  {return ( yes(s) ? 0 : 1 ) }
 266                { sub(/#.*/,"") }
 267     /^[ \t]*$/ { next }
 268     NF==2      { N++;
 269                  Predicted=$Got;
 270                  Actual=$Want;
 271                  if (Predicted == Actual) Good++;
 272                  if (no( Actual) && no( Predicted)) A++;
 273                  if (yes(Actual) && no( Predicted)) B++;
 274                  if (no( Actual) && yes(Predicted)) C++;
 275                  if (yes(Actual) && yes(Predicted)) D++;
 276                 #print N,$0,A,B,C,D
 277                }
 278     END  {
 279         OFMT        = "%." Decimals "f";
 280         Balance=Precision=Accuracy=Pf=NotPf=Pd=0;
 281         if (C+D > 0 )      Precision = D/(C+D);
 282         if ((A+B+C+D) > 0)  Accuracy  = (A+D)/(A+B+C+D);
 283         if (A+C > 0 )      Pf       = C/(A+C)
 284         if (B+D > 0 )      Pd        = D/(B+D);
 285         if (B+C+D > 0)     { # special case- everything misses
 286              Balance = 1 - sqrt((GoalPd - Pd)^2 + (GoalPf - Pf)^2)/sqrt(2)
 287         }
 288         if(Prefix) printf Txt=Prefix OFS;
 289         print A,B,C,D,
 290               sprintf(OFMT,100*Accuracy),
 291               sprintf(OFMT,100*Pd),
 292               sprintf(OFMT,100*Pf),
 293               sprintf(OFMT,100*Precision),
 294               sprintf(OFMT,100*Balance);
 295     }' Prefix="$prefix" Decimals="$decimals" True="$goal" -
 296 }
 297 malign() {
 298     cat - | gawk '
 299     BEGIN { Width=1;
 300             Gutter=1;
 301             OFS=FS=",";
 302     }      
 303     { N++;
 304       for(I=1;I<=NF;I++) {
 305             if( (L=length($I)) > Max[I]) Max[I]=L;
 306             ++Data[N,0];
 307             Data[N,I]=$I; }
 308     }
 309     END {for(J=1;J<=N;J++) {
 310             Str=Sep1="";
 311             if (Data[J,0]>1) {
 312                 for(I=1;I<=NF;I++) {
 313                     L=length(Data[J,I]);
 314                     Str = Str Sep1 \
 315                           str(most(Width,Max[I]+Gutter+1)-L," ") \
 316                           Data[J,I];
 317                     Sep1= OFS;
 318                 }}
 319             else {Str=Data[J,1]}
 320           print Str;}
 321     }
 322     function str(n,c,  out) { while(--n > 0) out = out c; return out; }    
 323     function most(x,y)      { return x > y ? x : y; }
 324     '
 325 }
 326 medians()    {
 327     local start="2"
 328     while [ `echo $1 | grep "-"` ]; do
 329        case $1 in
 330             -s|--start) start=$2;;
 331             *)           blabln "'"$1"' unknown\n usage medians [options]";
 332                          return 1;;
 333        esac
 334        shift 2
 335     done
 336     gawk '
 337     BEGIN{FS=","}
 338          {print}
 339     /^[ \t]*$/ {next}  
 340     /#/  {next}
 341          {for(I=Start;I<=NF;I++) {
 342             (Data[I,0]++); Data[I,Data[I,0]]=$I }
 343          }
 344     END{ #printf("#---")
 345          #for(I=Start;I<=NF;I++)
 346          #   printf(",-----")
 347          #print ""
 348          printf("##");
 349          printf $1
 350          for(I=2;I<Start;I++)
 351              printf ","$I
 352          for(I=Start;I<=NF;I++) {
 353                 Max=Data[I,0];
 354                 delete Val
 355                 N=0;
 356                 for(J=1;J<=Max;J++)
 357                      Val[J]  = Data[I,J]
 358                 asort(Val);
 359                 if(Max % 2 ) { printf(",%s",Val[int(Max/2)]) }
 360                 else        { below=Val[int(Max/2)];
 361                               above=Val[int(Max/2) + 1];
 362                               printf(",%s",(below+above)/2)
 363                             }
 364             }
 365            print ""
 366         }' Start=$start -
 367 }
 368 logNumbers() { blab "l";  gawk -f $Here/asLogs $1 ; }
 369 
 370 winLossTie() {
 371     local fields=10
 372     local key=1
 373     local performance=$fields
 374     local high=1
 375     local confidence=95
 376     local input="-"
 377     while [ `echo $1 | grep "-"` ]; do
 378         case $1 in
 379             -f|--fields)  fields=$2;      shift 2;;
 380             --99)         confidence=99;  shift 1;;
 381             --95)         confidence=95;  shift 1;;
 382             -k|--key)     key=$2;         shift 2;;
 383             -p|--perform) performance=$2; shift 2;;
 384             --high)       high=1;         shift 1;;
 385             --low)        high=0;         shift 1;;
 386             -i|--input)   input=$2;       shift 2;;
 387             *)   blabln "'"$1"' unknown\n. usage: winLossTie [options]"
 388                  return 1;;
 389         esac
 390     done
 391     (echo "#key,ties,win,loss,win-loss"
 392     gawk -f mwu.awk Fields=$fields Key=$key Performance=$performance \
 393                     High=$high Confidence=$confidence $input |
 394     sort -t, -r -n -k 5,5
 395     ) | malign
 396 
 397 }
 398 someArff() {
 399     #Q7: add command-line options to someArff to control the
 400     #    names of the generated test/train files (currently
 401     #    train.arff and test.arff). Remember to define default
 402     #    values for these variables and to update the help
 403     #    text. Hand in your new definition of "someArff"
 404     local bins=3
 405     local bin=1
 406     local seed=$RANDOM
 407     while [ `echo $1 | grep "-"` ]; do
 408         case $1 in
 409             -B|--bins) bins=$2;;
 410             -b|--bin) bin=$2;;
 411             -s|--seed) seed=$2;;
 412             -h|--help) cat <<-EOF
 413             someArff : divide an arrf file into Bins, create train/test files
 414             usage: someArff [flags] arffFile
 415 
 416             Flags
 417             -B, --bins NUM   Randomly divide the data into NUM bins
 418             -b, --bin  NUM   Store bin NUM into test.arff and rest into train.arff
 419             -s, --seed NUM   Set the random number seed to NUM
 420             -h, --help       Print this text
 421             EOF
 422             return 1;;
 423             *)   blabln "'"$1"' unknown\n usage cat file | someArff [options]"
 424                  return 1;;
 425         esac
 426         shift 2
 427     done
 428     gawk '
 429     BEGIN  {
 430       IGNORECASE=1;
 431       Trainf="train.arff"; Testf="test.arff";
 432       Bins=3;
 433       Bin=1;
 434       Seed=1;
 435    }
 436    /^[ \t]*$/          { next }
 437    /@relation/         { Seed ? srand(Seed) : srand(1)      }
 438    /@relation/         { printf "">Trainf;  printf "">Testf }
 439    /@relation/,/@data/ { print $0 >> Trainf;  print $0 >> Testf; next }
 440                        { Line[rand()] = $0; Lines++ }
 441   END {
 442     Start = Lines/Bins * (Bin - 1) ;
 443     Stop  = Lines/Bins * Bin;
 444     for(I in Line) {
 445        N++;
 446        What = (N>= Start && N < Stop) ? Testf : Trainf
 447        print Line[I]>>What; }
 448    }
 449    ' Seed=$seed Bins=$bins Bin=$bin -
 450 }
 451 
 452 #### Weka stuff
 453 ## pruning columns
 454 removeAttributes() {
 455     blab "/"
 456     $Weka weka.filters.unsupervised.attribute.Remove \
 457         -R "${1}-${2}" -i $3 -o tmp.arff
 458         set +x
 459     cat tmp.arff
 460 }
 461 ## discretization
 462 discretizeViaFayyadIrani() {
 463     blab "x"
 464     $Weka weka.filters.supervised.attribute.Discretize \
 465         -c last -R first-last  -i $1 -o tmp.arff
 466      cat tmp.arff
 467 }
 468 ## feature subset selection
 469 rankViaInfoGain() {
 470     blab "<"
 471     $Weka  weka.filters.supervised.attribute.AttributeSelection \
 472         -S  "weka.attributeSelection.Ranker -T -2.7976931348623157E308 -N -1"  \
 473         -E  "weka.attributeSelection.InfoGainAttributeEval"                    \
 474         -i  $1 -o tmp.arff
 475     cat tmp.arff
 476 }
 477 ### learners
 478 ## classifiers
 479 # rule-based classifiers
 480 oner() {
 481   blab "1"
 482   $Weka weka.classifiers.rules.OneR \
 483     -B 6 \
 484     -p 0 -t $1 -T $2
 485 }
 486 jrip() {
 487    blab "j"
 488    $Weka weka.classifiers.rules.JRip \
 489         -F 3 -N 2.0 -O 2 -S 1 \
 490         -p 0 -t $1 -T $2
 491 }
 492 jrip10() {
 493    blab "j"
 494    $Weka weka.classifiers.rules.JRip \
 495         -F 3 -N 2.0 -O 2 -S 1 \
 496         -t $1
 497 }
 498 # bayesian classifiers
 499 aode() {
 500     blab "a"
 501     $Weka weka.classifiers.bayes.AODE \
 502         "-F" 0 \
 503         -p 0 -t  $1 -T $2
 504 }
 505 aode10() {
 506     blab "a"
 507     $Weka weka.classifiers.bayes.AODE \
 508         "-F" 0  \
 509         -t  $1 -T $2
 510 }
 511 nb() {
 512     blab "n"
 513     $Weka weka.classifiers.bayes.NaiveBayes \
 514         -p 0 -t  $1 -T $2
 515 }
 516 nb10() {
 517     blab "n"
 518     $Weka weka.classifiers.bayes.NaiveBayes \
 519         -i -t  $1
 520 }
 521 nbk() {
 522     blab "k"
 523     $Weka weka.classifiers.bayes.NaiveBayes \
 524         -K \
 525         -p 0 -t  $1 -T $2
 526 }
 527 # decision tree learners
 528 j48() {
 529     blab "c"
 530     $Weka weka.classifiers.trees.J48 \
 531         -C 0.25 -M 2 \
 532         -p 0 -t $1 -T $2
 533 }
 534 j4810() {
 535     blab "c"
 536     $Weka weka.classifiers.trees.J48 \
 537         -C 0.25 -M 2 \
 538         -i -t $1
 539 }
 540 j4810c() {
 541     blab "c$2"
 542     $Weka weka.classifiers.trees.J48 \
 543         -C $2 -M 2 \
 544         -i -t $1
 545 }
 546 ## linear-model learners
 547 lsr() {
 548     blab "L"
 549     $Weka weka.classifiers.functions.LinearRegression \
 550          -S 0 -R 1.0E-8  \
 551          -p 0 -t $1 -T $2
 552 }
 553 m5p() {
 554     blab "P"
 555     $Weka weka.classifiers.trees.M5P \
 556          -p 0 -t $1 -T $2
 557 }
 558 ## nearest neighbor
 559 1Bkx() {
 560     blab "N"
 561     $Weka weka.classifiers.lazy.IBk \
 562         -K 1 -W 0 -X -E \
 563         -p 0 -t $1 -T $2
 564 }
 565 1Bk() {
 566     blab "n"
 567     $Weka weka.classifiers.lazy.IBk \
 568         -K -1 -W 0 -E \
 569         -p 0 -t $1 -T $2
 570 }
 571 ## association rule learners
 572 apriori() {
 573     blab "A"
 574     $Weke weka.associations.Apriori \
 575         -N 10 -T 0 -C 0.9 -D 0.05 -U 1.0 -M 0.1 -S -1.0 \
 576         -p 0 -t $1 -T $2
 577 }
 578 #### teaching demos
 579 weather.nominal() {
 580     cat<<-EOF
 581     @relation weather.nominal
 582 
 583     @attribute outlook {sunny, overcast, rainy}
 584     @attribute temperature {hot, mild, cool}
 585     @attribute humidity {high, normal}
 586     @attribute windy {TRUE, FALSE}
 587     @attribute play {yes, no}
 588 
 589     @data
 590     sunny,hot,high,FALSE,no
 591     sunny,hot,high,TRUE,no
 592     overcast,hot,high,FALSE,yes
 593     rainy,mild,high,FALSE,yes
 594     rainy,cool,normal,FALSE,yes
 595     rainy,cool,normal,TRUE,no
 596     overcast,cool,normal,TRUE,yes
 597     sunny,mild,high,FALSE,no
 598     sunny,cool,normal,FALSE,yes
 599     rainy,mild,normal,FALSE,yes
 600     sunny,mild,normal,TRUE,yes
 601     overcast,mild,high,TRUE,yes
 602     overcast,hot,normal,FALSE,yes
 603     rainy,mild,high,TRUE,no
 604 EOF
 605 }
 606 auto93() { cat<<-EOF
 607     @relation 'auto93.names'
 608     @attribute Manufacturer { Acura, Audi, BMW, Buick, Cadillac, Chevrolet, Chrysler, Dodge, Eagle, Ford, Geo, Honda, Hyundai, Infiniti, Lexus, Lincoln, Mazda, Mercedes-Benz, Mercury, Mitsubishi, Nissan, Oldsmobile, Plymouth, Pontiac, Saab, Saturn, Subaru, Suzuki, Toyota, Volkswagen, Volvo}
 609     @attribute Type { Small, Midsize, Compact, Large, Sporty, Van}
 610     @attribute City_MPG real
 611     @attribute Highway_MPG real
 612     @attribute Air_Bags_standard { 0, 2, 1}
 613     @attribute Drive_train_type { 1, 0, 2}
 614     @attribute Number_of_cylinders real
 615     @attribute Engine_size real
 616     @attribute Horsepower real
 617     @attribute RPM real
 618     @attribute Engine_revolutions_per_mile real
 619     @attribute Manual_transmission_available { 1, 0}
 620     @attribute Fuel_tank_capacity real
 621     @attribute Passenger_capacity real
 622     @attribute Length real
 623     @attribute Wheelbase real
 624     @attribute Width real
 625     @attribute U-turn_space real
 626     @attribute Rear_seat_room real
 627     @attribute Luggage_capacity real
 628     @attribute Weight real
 629     @attribute Domestic { 0, 1}
 630     @attribute class real
 631     @data
 632     Acura,Small,25,31,0,1,4,1.8,140,6300,2890,1,13.2,5,177,102,68,37,26.5,11,2705,0,15.9
 633     Acura,Midsize,18,25,2,1,6,3.2,200,5500,2335,1,18,5,195,115,71,38,30,15,3560,0,33.9
 634     Audi,Compact,20,26,1,1,6,2.8,172,5500,2280,1,16.9,5,180,102,67,37,28,14,3375,0,29.1
 635     Audi,Midsize,19,26,2,1,6,2.8,172,5500,2535,1,21.1,6,193,106,70,37,31,17,3405,0,37.7
 636     BMW,Midsize,22,30,1,0,4,3.5,208,5700,2545,1,21.1,4,186,109,69,39,27,13,3640,0,30
 637     Buick,Midsize,22,31,1,1,4,2.2,110,5200,2565,0,16.4,6,189,105,69,41,28,16,2880,1,15.7
 638     Buick,Large,19,28,1,1,6,3.8,170,4800,1570,0,18,6,200,111,74,42,30.5,17,3470,1,20.8
 639     Buick,Large,16,25,1,0,6,5.7,180,4000,1320,0,23,6,216,116,78,45,30.5,21,4105,1,23.7
 640     Buick,Midsize,19,27,1,1,6,3.8,170,4800,1690,0,18.8,5,198,108,73,41,26.5,14,3495,1,26.3
 641     Cadillac,Large,16,25,1,1,8,4.9,200,4100,1510,0,18,6,206,114,73,43,35,18,3620,1,34.7
 642     Cadillac,Midsize,16,25,2,1,8,4.6,295,6000,1985,0,20,5,204,111,74,44,31,14,3935,1,40.1
 643     Chevrolet,Compact,25,36,0,1,4,2.2,110,5200,2380,1,15.2,5,182,101,66,38,25,13,2490,1,13.4
 644     Chevrolet,Compact,25,34,1,1,4,2.2,110,5200,2665,1,15.6,5,184,103,68,39,26,14,2785,1,11.4
 645     Chevrolet,Sporty,19,28,2,0,6,3.4,160,4600,1805,1,15.5,4,193,101,74,43,25,13,3240,1,15.1
 646     Chevrolet,Midsize,21,29,0,1,4,2.2,110,5200,2595,0,16.5,6,198,108,71,40,28.5,16,3195,1,15.9
 647     Chevrolet,Van,18,23,0,1,6,3.8,170,4800,1690,0,20,7,178,110,74,44,30.5,?,3715,1,16.3
 648     Chevrolet,Van,15,20,0,2,6,4.3,165,4000,1790,0,27,8,194,111,78,42,33.5,?,4025,1,16.6
 649     Chevrolet,Large,17,26,1,0,8,5,170,4200,1350,0,23,6,214,116,77,42,29.5,20,3910,1,18.8
 650     Chevrolet,Sporty,17,25,1,0,8,5.7,300,5000,1450,1,20,2,179,96,74,43,?,?,3380,1,38
 651     Chrysler,Large,20,28,2,1,6,3.3,153,5300,1990,0,18,6,203,113,74,40,31,15,3515,1,18.4
 652     Chrysler,Compact,23,28,2,1,4,3,141,5000,2090,0,16,6,183,104,68,41,30.5,14,3085,1,15.8
 653     Chrysler,Large,20,26,1,1,6,3.3,147,4800,1785,0,16,6,203,110,69,44,36,17,3570,1,29.5
 654     Dodge,Small,29,33,0,1,4,1.5,92,6000,3285,1,13.2,5,174,98,66,32,26.5,11,2270,1,9.2
 655     Dodge,Small,23,29,1,1,4,2.2,93,4800,2595,1,14,5,172,97,67,38,26.5,13,2670,1,11.3
 656     Dodge,Compact,22,27,1,1,4,2.5,100,4800,2535,1,16,6,181,104,68,39,30.5,14,2970,1,13.3
 657     Dodge,Van,17,21,1,2,6,3,142,5000,1970,0,20,7,175,112,72,42,26.5,?,3705,1,19
 658     Dodge,Midsize,21,27,1,1,4,2.5,100,4800,2465,0,16,6,192,105,69,42,30.5,16,3080,1,15.6
 659     Dodge,Sporty,18,24,1,2,6,3,300,6000,2120,1,19.8,4,180,97,72,40,20,11,3805,1,25.8
 660     Eagle,Small,29,33,0,1,4,1.5,92,6000,2505,1,13.2,5,174,98,66,36,26.5,11,2295,1,12.2
 661     Eagle,Large,20,28,2,1,6,3.5,214,5800,1980,0,18,6,202,113,74,40,30,15,3490,1,19.3
 662     Ford,Small,31,33,0,1,4,1.3,63,5000,3150,1,10,4,141,90,63,33,26,12,1845,1,7.4
 663     Ford,Small,23,30,0,1,4,1.8,127,6500,2410,1,13.2,5,171,98,67,36,28,12,2530,1,10.1
 664     Ford,Compact,22,27,0,1,4,2.3,96,4200,2805,1,15.9,5,177,100,68,39,27.5,13,2690,1,11.3
 665     Ford,Sporty,22,29,1,0,4,2.3,105,4600,2285,1,15.4,4,180,101,68,40,24,12,2850,1,15.9
 666     Ford,Sporty,24,30,1,1,4,2,115,5500,2340,1,15.5,4,179,103,70,38,23,18,2710,1,14
 667     Ford,Van,15,20,1,2,6,3,145,4800,2080,1,21,7,176,119,72,45,30,?,3735,1,19.9
 668     Ford,Midsize,21,30,1,1,6,3,140,4800,1885,0,16,5,192,106,71,40,27.5,18,3325,1,20.2
 669     Ford,Large,18,26,1,0,8,4.6,190,4200,1415,0,20,6,212,114,78,43,30,21,3950,1,20.9
 670     Geo,Small,46,50,0,1,3,1,55,5700,3755,1,10.6,4,151,93,63,34,27.5,10,1695,0,8.4
 671     Geo,Sporty,30,36,1,1,4,1.6,90,5400,3250,1,12.4,4,164,97,67,37,24.5,11,2475,0,12.5
 672     Honda,Sporty,24,31,2,1,4,2.3,160,5800,2855,1,15.9,4,175,100,70,39,23.5,8,2865,0,19.8
 673     Honda,Small,42,46,1,1,4,1.5,102,5900,2650,1,11.9,4,173,103,67,36,28,12,2350,0,12.1
 674     Honda,Compact,24,31,2,1,4,2.2,140,5600,2610,1,17,4,185,107,67,41,28,14,3040,0,17.5
 675     Hyundai,Small,29,33,0,1,4,1.5,81,5500,2710,1,11.9,5,168,94,63,35,26,11,2345,0,8
 676     Hyundai,Small,22,29,0,1,4,1.8,124,6000,2745,1,13.7,5,172,98,66,36,28,12,2620,0,10
 677     Hyundai,Sporty,26,34,0,1,4,1.5,92,5550,2540,1,11.9,4,166,94,64,34,23.5,9,2285,0,10
 678     Hyundai,Midsize,20,27,0,1,4,2,128,6000,2335,1,17.2,5,184,104,69,41,31,14,2885,0,13.9
 679     Infiniti,Midsize,17,22,1,0,8,4.5,278,6000,1955,0,22.5,5,200,113,72,42,29,15,4000,0,47.9
 680     Lexus,Midsize,18,24,1,1,6,3,185,5200,2325,1,18.5,5,188,103,70,40,27.5,14,3510,0,28
 681     Lexus,Midsize,18,23,2,0,6,3,225,6000,2510,1,20.6,4,191,106,71,39,25,9,3515,0,35.2
 682     Lincoln,Midsize,17,26,2,1,6,3.8,160,4400,1835,0,18.4,6,205,109,73,42,30,19,3695,1,34.3
 683     Lincoln,Large,18,26,2,0,8,4.6,210,4600,1840,0,20,6,219,117,77,45,31.5,22,4055,1,36.1
 684     Mazda,Small,29,37,0,1,4,1.6,82,5000,2370,1,13.2,4,164,97,66,34,27,16,2325,0,8.3
 685     Mazda,Small,28,36,0,1,4,1.8,103,5500,2220,1,14.5,5,172,98,66,36,26.5,13,2440,0,11.6
 686     Mazda,Compact,26,34,1,1,4,2.5,164,5600,2505,1,15.5,5,184,103,69,40,29.5,14,2970,0,16.5
 687     Mazda,Van,18,24,0,2,6,3,155,5000,2240,0,19.6,7,190,110,72,39,27.5,?,3735,0,19.1
 688     Mazda,Sporty,17,25,1,0,?,1.3,255,6500,2325,1,20,2,169,96,69,37,?,?,2895,0,32.5
 689     Mercedes-Benz,Compact,20,29,1,0,4,2.3,130,5100,2425,1,14.5,5,175,105,67,34,26,12,2920,0,31.9
 690     Mercedes-Benz,Midsize,19,25,2,0,6,3.2,217,5500,2220,0,18.5,5,187,110,69,37,27,15,3525,0,61.9
 691     Mercury,Sporty,23,26,1,1,4,1.6,100,5750,2475,1,11.1,4,166,95,65,36,19,6,2450,1,14.1
 692     Mercury,Midsize,19,26,0,0,6,3.8,140,3800,1730,0,18,5,199,113,73,38,28,15,3610,1,14.9
 693     Mitsubishi,Small,29,33,0,1,4,1.5,92,6000,2505,1,13.2,5,172,98,67,36,26,11,2295,0,10.3
 694     Mitsubishi,Midsize,18,24,1,1,6,3,202,6000,2210,0,19,5,190,107,70,43,27.5,14,3730,0,26.1
 695     Nissan,Small,29,33,1,1,4,1.6,110,6000,2435,1,13.2,5,170,96,66,33,26,12,2545,0,11.8
 696     Nissan,Compact,24,30,1,1,4,2.4,150,5600,2130,1,15.9,5,181,103,67,40,28.5,14,3050,0,15.7
 697     Nissan,Van,17,23,0,1,6,3,151,4800,2065,0,20,7,190,112,74,41,27,?,4100,0,19.1
 698     Nissan,Midsize,21,26,1,1,6,3,160,5200,2045,0,18.5,5,188,104,69,41,28.5,14,3200,0,21.5
 699     Oldsmobile,Compact,24,31,0,1,4,2.3,155,6000,2380,0,15.2,5,188,103,67,39,28,14,2910,1,13.5
 700     Oldsmobile,Midsize,23,31,1,1,4,2.2,110,5200,2565,0,16.5,5,190,105,70,42,28,16,2890,1,16.3
 701     Oldsmobile,Van,18,23,0,1,6,3.8,170,4800,1690,0,20,7,194,110,74,44,30.5,?,3715,1,19.5
 702     Oldsmobile,Large,19,28,1,1,6,3.8,170,4800,1570,0,18,6,201,111,74,42,31.5,17,3470,1,20.7
 703     Plymouth,Sporty,23,30,0,2,4,1.8,92,5000,2360,1,15.9,4,173,97,67,39,24.5,8,2640,1,14.4
 704     Pontiac,Small,31,41,0,1,4,1.6,74,5600,3130,1,13.2,4,177,99,66,35,25.5,17,2350,1,9
 705     Pontiac,Compact,23,31,0,1,4,2,110,5200,2665,1,15.2,5,181,101,66,39,25,13,2575,1,11.1
 706     Pontiac,Sporty,19,28,2,0,6,3.4,160,4600,1805,1,15.5,4,196,101,75,43,25,13,3240,1,17.7
 707     Pontiac,Midsize,19,27,0,1,6,3.4,200,5000,1890,1,16.5,5,195,108,72,41,28.5,16,3450,1,18.5
 708     Pontiac,Large,19,28,2,1,6,3.8,170,4800,1565,0,18,6,177,111,74,43,30.5,18,3495,1,24.4
 709     Saab,Compact,20,26,1,1,4,2.1,140,6000,2910,1,18,5,184,99,67,37,26.5,14,2775,0,28.7
 710     Saturn,Small,28,38,1,1,4,1.9,85,5000,2145,1,12.8,5,176,102,68,40,26.5,12,2495,1,11.1
 711     Subaru,Small,33,37,0,2,3,1.2,73,5600,2875,1,9.2,4,146,90,60,32,23.5,10,2045,0,8.4
 712     Subaru,Small,25,30,0,2,4,1.8,90,5200,3375,1,15.9,5,175,97,65,35,27.5,15,2490,0,10.9
 713     Subaru,Compact,23,30,1,2,4,2.2,130,5600,2330,1,15.9,5,179,102,67,37,27,14,3085,0,19.5
 714     Suzuki,Small,39,43,0,1,3,1.3,70,6000,3360,1,10.6,4,161,93,63,34,27.5,10,1965,0,8.6
 715     Toyota,Small,32,37,1,1,4,1.5,82,5200,3505,1,11.9,5,162,94,65,36,24,11,2055,0,9.8
 716     Toyota,Sporty,25,32,1,1,4,2.2,135,5400,2405,1,15.9,4,174,99,69,39,23,13,2950,0,18.4
 717     Toyota,Midsize,22,29,1,1,4,2.2,130,5400,2340,1,18.5,5,188,103,70,38,28.5,15,3030,0,18.2
 718     Toyota,Van,18,22,1,2,4,2.4,138,5000,2515,1,19.8,7,187,113,71,41,35,?,3785,0,22.7
 719     Volkswagen,Small,25,33,0,1,4,1.8,81,5500,2550,1,12.4,4,163,93,63,34,26,10,2240,0,9.1
 720     Volkswagen,Van,17,21,0,1,5,2.5,109,4500,2915,1,21.1,7,187,115,72,38,34,?,3960,0,19.7
 721     Volkswagen,Compact,21,30,0,1,4,2,134,5800,2685,1,18.5,5,180,103,67,35,31.5,14,2985,0,20
 722     Volkswagen,Sporty,18,25,0,1,6,2.8,178,5800,2385,1,18.5,4,159,97,66,36,26,15,2810,0,23.3
 723     Volvo,Compact,21,28,1,0,4,2.3,114,5400,2215,1,15.8,5,190,104,67,37,29.5,14,2985,0,22.7
 724     Volvo,Midsize,20,28,2,1,5,2.4,168,6200,2310,1,19.3,5,184,105,69,38,30,15,3245,0,26.7
 725 EOF
 726 }
 727 auto93discreteClass() {
 728     #some learners can't handle auto93's numeric class 
 729     #so we discretize the class. Note that this is a pretty
 730     # dumb discretizer.
 731     auto93 | gawk  'BEGIN         {IGNORECASE=1; OFS=","; Round=20}
 732                     In && NF > 1  {$NF= "_"int($NF/Round+0.5)*Round}
 733                     $2 =="class"  {$3 = "{_0,_20,_40,_60}"}
 734                     /@data/       {In=1; FS=","}
 735                                   { print}'
 736 }
 737 #### some workers
 738 worker1001() {
 739     for one in $Data; do
 740         cp $one raw.arff
 741         stem=`basename $one`
 742         stem=${stem/.*/}
 743 
 744         logNumbers               raw.arff            > logged.arff
 745         discretizeViaFayyadIrani raw.arff            > discrete.arff
 746         discretizeViaFayyadIrani logged.arff         > loggedDiscrete.arff
 747 
 748         for x in raw discrete logged loggedDiscrete; do
 749             rankViaInfoGain $x.arff > ranked.arff
 750             for Attrs in 4 7 13  16; do
 751                 removeAttributes $Attrs 16 $x.arff > ranked${Attrs}.arff
 752                 blab "$stem $x $Attrs "
 753                 echo "#file,x,attrs,bin,learner,a,b,c,d,acc,pd,pf,prec,g"
 754                 for((R=1;R<=$Repeats;R++)); do
 755                     Seed=$RANDOM
 756                     for((Bin=1; Bin <= $Bins ;  Bin++)); do
 757                         blab "$Bin"
 758                         makeTrainTest $Seed $Bins $Bin ranked${Attrs}.arff
 759                         for Learner in $Learners; do
 760                             $Learner train.arff test.arff |
 761                             gotwant |
 762                             abcd "$stem,$x,$Attrs,$Bin,$Learner"
 763                         done
 764                     done
 765                 done    | medians
 766                 blabln
 767             done
 768         done
 769     done
 770 }
 771 worker1002() {
 772     for one in $Data; do
 773         cp $one raw.arff
 774         stem=`basename $one`
 775         stem=${stem/.*/}
 776 
 777         logNumbers               raw.arff            > logged.arff
 778         discretizeViaFayyadIrani raw.arff            > discrete.arff
 779         discretizeViaFayyadIrani logged.arff         > loggedDiscrete.arff
 780 
 781         for x in discrete loggedDiscrete; do
 782             rankViaInfoGain $x.arff > ranked.arff
 783             for Attrs in 4 7 13  16; do
 784                 removeAttributes $Attrs 16 $x.arff > ranked${Attrs}.arff
 785                 blab "$stem $x $Attrs "
 786                 Seed=$RANDOM
 787                 echo "#file,x,attrs,bin,learner,a,b,c,d,acc,pd,pf,prec,g"
 788                 for((R=1;R<=$Repeats;R++)); do
 789                     for((Bin=1; Bin <= $Bins ;  Bin++)); do
 790                         blab "."
 791                         makeTrainTest $Seed $Bins $Bin ranked${Attrs}.arff
 792                         for Learner in $Learners; do
 793                             $Learner train.arff test.arff |
 794                             gotwant |
 795                             abcd "$stem,$x,$Attrs,$Bin,$Learner"
 796                         done
 797                     done
 798                 done    | medians
 799                 blabln
 800             done
 801         done
 802     done
 803 }
 804 #### some demos
 805 demo3() {
 806     cd $Tmp
 807     weather.nominal > data.arff
 808     j4810 data.arff
 809     cd $Here
 810 }
 811 demo4() {
 812     cd $Tmp
 813     weather.nominal > data.arff
 814     j4810 data.arff  | report 4 3
 815     cd $Here
 816 }
 817 demo5() {
 818     cd $Tmp
 819     weather.nominal > data.arff
 820     j4810 data.arff  | report 4 3,18,16
 821     cd $Here
 822 }
 823 demo5a() {
 824     j4810 $Ourmine/lib/arffs/uci/discrete/soybean.arff
 825 }
 826 demo6() {
 827     cd $Tmp
 828     weather.nominal > data.arff
 829     nb10 data.arff
 830     cd $Here
 831 }
 832 demo7() {
 833     cd $Tmp
 834     weather.nominal > data.arff
 835     nb10 data.arff | report 4 2,3,4,5
 836     cd $Here
 837 }
 838 demo8() {
 839     cd $Tmp
 840     weather.nominal > data.arff
 841     nb10 data.arff | report 4 18,16
 842     cd $Here
 843 }
 844 demo9() {
 845     cd $Tmp
 846     weather.nominal > data.arff
 847     j48 data.arff data.arff
 848     cd $Home
 849 }
 850 #Q1: please write a generalization of demo5,demo7
 851 #    that can be called like this:
 852 #    demo57 --source weather.nominal --learner nb10
 853 demo10() {
 854     demo9 | gotwant
 855 }
 856 demo11() {
 857     demo10 |
 858     abcd --before "\na,b,c,d,acc,pd,pf,prec,bal\n" --decimals 1 |
 859     malign
 860 }
 861 demo12() {
 862     cd $Tmp
 863     auto93discreteClass  > data.arff
 864     j48 data.arff data.arff | gotwant
 865     cd $Here
 866 }
 867 demo13() {
 868     demo12 |  gawk -F, '$1 != $2'
 869 }
 870 demo14() {
 871     demo12 |
 872     for goal in _0 _20 _40 ; do
 873         abcd --goal "$goal" \
 874              --prefix "auto93d,$goal" \
 875              --before "\n#data,goal,a,b,c,d,acc,pd,pf,prec,bal\n" \
 876              --decimals 1
 877     done | malign
 878 }
 879 demo15() {
 880     #Q: what classes are we better at predicting?
 881     #Q: what would happen if the line "seed=$Random$ was move down 2 lines?
 882     cd $Tmp
 883     (  echo "#data,bin, a,b,c,d,acc,pd,pf,prec,bal"
 884         seed=$RANDOM;
 885         for((bin=1;bin<=10;bin++)); do
 886             blab "$bin"
 887             auto93discreteClass  | someArff --seed $seed --bins $Bins --bin $bin
 888             j48 train.arff test.arff | gotwant |
 889             abcd --goal "_20"  --prefix "auto93,$bin"  --decimals 1
 890         done | sort -t, -n -k 11,11
 891     ) | malign > demo15.csv #each bin
 892     blabln " "
 893     echo ""; cat demo15.csv
 894     cp demo15.csv $Safe/demo15.csv
 895     cd $Here
 896 }
 897 demo16() {
 898     cd $Tmp
 899     (  echo "#data,repeat,bin,a,b,c,d,acc,pd,pf,prec,bal"
 900         for((r=1;r<=2;r++)); do
 901             blab "repeat=$r "
 902             seed=$RANDOM;
 903             for((bin=1;bin<=5;bin++)); do
 904                 blab "$bin"
 905                 auto93discreteClass  | someArff --seed $seed --bins $Bins --bin $bin
 906                 j48 train.arff test.arff | gotwant |
 907                 abcd --goal "_20"  --prefix "auto93,$r,$bin" --decimals 1
 908             done
 909             blabln
 910         done | sort -t, -n -k 12,12
 911     ) | malign > demo15.csv #each bin
 912     blabln " "
 913     echo ""; cat demo15.csv
 914     cp demo15.csv $Safe/demo15.csv
 915     cd $Here
 916 }
 917 demo17() {
 918     local me=demo17
 919     local bins=10
 920     local repeats=2
 921     local learners="oner nb j48"
 922     local datas="diabetes autos"
 923     cd $Tmp
 924     (echo "#data,repeat,bin,learner,goal,a,b,c,d,acc,pd,pf,prec,bal"
 925     for data in $datas; do
 926         arff=$Ourmine/lib/arffs/uci/discrete/$data.arff
 927         for((r=1;r<=2;r++)); do
 928             blab "data=$data repeat=$r "
 929             seed=$RANDOM;
 930             for((bin=1;bin<=$bins;bin++)); do
 931                 blab "$bin"
 932                 cat $arff | someArff --seed $seed --bins $Bins --bin $bin
 933                 goals=`cat $arff | classes --brief`
 934                 for learner in $learners; do
 935                     $learner train.arff test.arff | gotwant > results.dat
 936                     for goal in $goals; do
 937                         cat results.dat |
 938                         abcd --goal "$goal" \
 939                              --prefix "$data,$r,$bin,$learner,$goal" \
 940                              --decimals 1
 941                     done
 942                 done
 943             done
 944             blabln
 945         done
 946     done
 947     ) | sort -t, -n -k 14,14 | malign > $me.csv #each bin
 948     blabln " "
 949     echo ""; cat $me.csv
 950     cp $me.csv $Safe/$me.csv
 951     cd $Here
 952 }
 953 demo18() {
 954     local stats="$Safe/demo17.csv"
 955     local learners="nb j48 oner"
 956     local datas="diabetes autos"
 957     [ ! -f "$stats" ] && demo17
 958     (echo "#data,learner,goal,a,b,c,d,acc,pd,pf,prec,bal"
 959     for data in $datas; do
 960         for learner in $learners; do
 961             echo -n "$data,$learner, "
 962             grep $data $stats | grep $learner | medians --start 6 | grep median
 963         done
 964     done ) | malign | sort -t, -n -k 12,12
 965 }
 966 demo19() {
 967     local stats="$Safe/demo17.csv"
 968     [ ! -f "$stats" ] && demo17
 969     winLossTie --input $stats --fields 14 --perform 14 --key 4  --95 --high
 970 }
 971 ### stop reading. broken after this/
 972 flip() {
 973     local data
 974     local key
 975     local performance
 976     while [ `echo $1 | grep "-"` ]; do
 977         case $1 in
 978             -d|--data) data="$2";;
 979             -k|--key) key="$2";;
 980             -p|--performance) performance=$3;;
 981             *)   blabln "'"$1"' unknown\n usage cat file | flip [options]"
 982                  return 1;;
 983         esac
 984         shift 2
 985     done
 986     gawk  '
 987     BEGIN {FS=OFS=","}
 988     NR==1 {
 989         split(DataStr,TheData,/,/);
 990         split(KeyStr,TheKeys,/,/);
 991           }
 992     /^[ \t]*#[^[#]/   {next}
 993           {   key=data="";
 994               for(d in TheData) data = data "." $d;
 995               for(k in TheKeys) key  = key "." $k;
 996               Result[key,data]=$Performance;
 997               if ($Performance > Max[data] ) { Max[data]=$Performance}
 998               Keys[key]=key
 999               Datas[data]=data
1000           }
1001      END {printf "#data"
1002           for(K in Keys) printf "," K ",max?"
1003           print ""
1004           exit
1005           for(D in Datas) {
1006                  printf D
1007                 for(K in Keys) {
1008                   printf "," Result[K,D]
1009                 printf (Result[K,D]== Max[D]) ? ",X" : ","
1010             }
1011             print ""
1012         }}
1013     ' DataStr=$data KeyStr=$key Performance=$performance  -
1014     #| medians | malign
1015 }
1016 summary() {
1017     cd $Tmp
1018     local stats="$Safe/demo17.csv"
1019     [ ! -f "$stats" ] && demo17
1020     demo18 | flip --data 1 --key 2 --performance 12
1021     printf "\n---| all |------\n\n"
1022     winLossTie --input $stats --fields 14 --perform 14 --key 4  --95 --high
1023     for d in diabetes autos; do
1024         printf "\n---| $d |------\n\n"
1025         grep $d $stats  > $d.stats;
1026         winLossTie --input $d.stats --fields 14 --perform 14 --key 4  --95 --high
1027     done
1028 
1029 }
1030 demo101() {
1031     local me=demo101
1032     local stats="$HOME/tmp/safe/demo2.log"
1033     local learners="aode j48 jrip nb oner"
1034     local preps="loggedDiscrete discrete"
1035     local datas="cm1 kc1 kc2 kc3_mod mc1_mod mc2_mod mw1_mod
1036                  pc1 pc2_mod pc3_mod pc4_mod pc5_mod"
1037     (echo "#data,prep,attrs,bin,learner,a,b,c,d,acc,pd,pf,prec,bal"
1038     for data in $datas; do
1039         for learner in $learners; do
1040             for prep in $preps; do
1041                 cat $stats |
1042                 grep -v '#' |
1043                 grep $data  |
1044                 grep $prep   |
1045                 grep $learner  | sort -t, -n -k 14,14 | medians --start 6
1046             done
1047         done
1048     done
1049     ) > $Safe/$me.log
1050     cat $Safe/$me.log
1051 }
1052 demo102() {
1053     cd $Tmp
1054     local stats="$Safe/demo101.log"
1055     [ ! -f "$stats" ] && demo101
1056     cat $stats | grep "##" | flip --data 1 --key 2,5 --performance 14
1057 }
1058 #          oner   nb     j48
1059 #auto       56.6   60.4   85.9*
1060 #diabetes   57.2   68.5   69.3*
1061 # 
1062 #demo10() {
1063 #   demo9 | gawk -F, '/@/   {next} 
1064 #                     NF>1  {print $NF}' | sort | uniq -c
1065 #}
1066 #demo11() {
1067 #   setup; cd $Tmp
1068 #       demo9 > data.arff
1069 #
1070 #   c=0.1
1071 #   printf "confidence limit for pruning = $c (very selective)\n\n"
1072 #   j4810c data.arff $c  | report 0 3,18,16
1073 #
1074 #   c=0.25
1075 #   printf "confidence limit for pruning = $c (default, less selective)\n\n"
1076 #   j4810c data.arff $c  | report 0 3,18,16
1077 #   cd $Here
1078 #}
1079 #demo1001() {
1080 #   setUpVars
1081 #   setUpDirs
1082 #   setUpSeds
1083 #   prep
1084 #   cd $Tmp
1085 #   pwd
1086 #   makeshare
1087 #   worker1001 > log
1088 #   cp log $Safe/demo1.log
1089 #   winLossTie log | tee $Safe/demo1.winLossTie
1090 #}
1091 #demo1002() {
1092 #   setUpVars
1093 #   setUpDirs
1094 #   setUpSeds
1095 #   prep
1096 #   cd $Tmp
1097 #   pwd
1098 #   makeshare
1099 #   Learners="j48 jrip oner nb aode"
1100 #   worker1002 > log
1101 #   cp log $Safe/demo1.log
1102 #   winLossTie log | tee $Safe/demo1.winLossTie
1103 #}
1104 #### start up
1105 setup
1106 blabln "OurMine version v0.1 (c)2007 tim@menzies.us under GPLv3"
1107 blabln "Too many doings, not enough learnings.\n"