#!/usr/bin/gawk -f BEGIN { Trainf="train.arff"; Testf="test.arff"; Tests = 5; } /^[ \t]*$/ { next } /@relation/ { Seed ? srand(Seed) : srand(1) printf "">Trainf; printf "">Testf } /@relation/,/@data/ { print $0 >> Trainf; print $0 >> Testf; next } { Line[rand()] = $0 } END { for(I in Line) { What = (Tests-- > 0) ? Testf : Trainf; print Line[I]>>What; } }