#!/usr/bin/gawk -f BEGIN{ FS=","; } { print $0 } /tested_negative/ && NF>1 { Last=$0 } END{ print Last } #first filter the data #try to figure out the unique classes #oversample the minority classs #undersample the majority class #first start with oversampling and undersampling