#!/usr/bin/gawk -f BEGIN{ seed=0; file="file"; sample = 10; first = 1; notest = 1; noDatum = 0; } first{ train="train" file; test="test" file; srand(seed); first = 0; } 1{ data[noDatum++]=$0; } END{ testData = noDatum/sample; for ( i = 0; i < 10; i++ ) { train = "train" i file; test = "test" i file; dataPrinted = 0; for ( d in data ) { if ( dataPrinted >= i * testData && dataPrinted < (i+1)*testData ) print data[d] >> test; else print data[d] >> train; dataPrinted++; } } }