#Find the deltas in pd (column 11) with the learners (column 4) running on the data (column 1). # WARNING: slow- takes up to a minute. So be nice to others when running large jobs. printf "finding deltas (takes <60 secs) ...\n" nice -n 19 deltas Data=1 Rx=4 Effect=11 Fields=14 Include='logNums' ../pstats.out > $Sandbox/logNums.deltas #Save some of input and output of this program head -100 ../pstats.out > $Safe/pstats100.deltas head -100 $Sandbox/logNums.deltas > $Safe/logNums100.deltas # Generate box plots from the deltas. # WARNING: slow- takes up to a minute. So be nice to others when running large jobs. printf "summarizing (takes <60 secs) ...\n\n" nice -n 19 deltaSummary $Sandbox/logNums.deltas | malign | tee $Safe/logNums.box # This should generate something like this: # #rx, mean, sd, n, min, q1, median, q3, max, ------------------------0------------------------- # nb, 42.9, 41.6, 187132, -100.0, 0.0, 50.0, 82.4, 100.0, ************************* | ***** # j48, -5.8, 43.7, 187108, -100.0, -28.5, 0.0, 11.8, 100.0, ****************** | *********************** # oneR, -37.1, 39.5, 187108, -100.0, -76.9, -23.1, 0.0, 0.0, ****** | # Now apply the statistical difference tests: printf "\n\n" changes $Safe/logNums.box | malign | tee $Safe/logNums.changes # ID, deltas(95%), #rx, mean, sd, n, min, q1, median, q3, max, ------------------------0------------------------- # 1, (= 1), nb, 42.9, 41.6, 187132, -100.0, 0.0, 50.0, 82.4, 100.0, ************************* | ***** # 2, (< 1), j48, -5.8, 43.7, 187108, -100.0, -28.5, 0.0, 11.8, 100.0, ****************** | *********************** # 3, (< 2), oneR, -37.1, 39.5, 187108, -100.0, -76.9, -23.1, 0.0, 0.0, ****** |