#!/bin/bash bins=3 bin=1 seed=$RANDOM while [ `echo $1 | grep "-"` ]; do case $1 in -B|--bins) bins=$2;; -b|--bin) bin=$2;; -s|--seed) seed=$2;; -h|--help) cat <<-EOF someArff : divide an arrf file into Bins, create train/test files usage: someArff [flags] arffFile Flags -B, --bins NUM Randomly divide the data into NUM bins -b, --bin NUM Store bin NUM into test.arff and rest into train.arff -s, --seed NUM Set the random number seed to NUM -h, --help Print this text EOF return 1;; *) blabln "'"$1"' unknown\n usage cat file | someArff [options]" return 1;; esac shift 2 done gawk ' BEGIN { IGNORECASE=1; Trainf="train.arff"; Testf="test.arff"; Bins=3; Bin=1; Seed=1; } /^[ \t]*$/ { next } /@relation/ { Seed ? srand(Seed) : srand(1) } /@relation/ { printf "">Trainf; printf "">Testf } /@relation/,/@data/ { print $0 >> Trainf; print $0 >> Testf; next } { Line[rand()] = $0; Lines++ } END { Start = Lines/Bins * (Bin - 1) ; Stop = Lines/Bins * Bin; for(I in Line) { N++; What = (N>= Start && N < Stop) ? Testf : Trainf print Line[I]>>What; } }' Seed=$seed Bins=$bins Bin=$bin -