#!/usr/bin/gawk -f BEGIN{ isData = 0; IGNORECASE = 1; FS=OFS=","; att = 0; datum = 0; bins=10; repeat=1; f="file"; } /@relation/{ rel = $0; } /@attribute/{ attrs[att++] = $0; } /@data/{ isData = 1; next; } isData == 1{ datas[datum++] = $0; } END{ for ( i = 0; i < bins; i++ ) { train = f "-" repeat "-" i+1 "-train.arff"; test = f "-" repeat "-" i+1 "-test.arff"; print rel >> train; print rel >> test; for ( a = 0; a < att; a++ ) { print attrs[a] >> train; print attrs[a] >> test; } print "@data" >> train; print "@data" >> test; } for ( d = 0; d < datum; d++ ) { file = int(d/datum * bins) + 1; for ( i = 0; i < bins; i++ ) { base = f "-" repeat "-" i+1; if ( i+1 == file ) { out = base "-test.arff"; print datas[d] >> out; } else { out = base "-train.arff"; print datas[d] >> out; } } } }