#This script makes a subset CSV from a given CSV file depending on the 'Col' and 'Want' variables. #It handles any column named 'year' special in that it counts the year from 'Want' as well as the next 4 years. #Example use: $ gawk -f CSVsubset.awk Col="mode" Want="embedded" data/nasa93.csv > data/subsets/nasa93_embedded.csv BEGIN { IGNORECASE = 1; # Ignore case in string comparisons. FS = ","; # Initialize the field separator. OFS = ","; # Initialize the output field separator. Col = "Invalid, Initialize" # Set this variable to the name of the column used to make the subset Want = "Invalid, Initialize" # Set this variable to the value need in the selected column to include the record in the subset. Target = -1; # This is an internal variable for the column number to compare. } #Identify the relevant column NR == 1 { for(I=1; I<=(NF); I++) { if ($I==Col) Target = I; } } # FNR <=2 { print $0 next } #Only keep the records that meet the Want condition for the selected column. { if ((Target == -1) || (Want == "Invalid, Initialize")) { print $0 next } if (Col == "year") { if ((int($Target) == int(Want)) || (int($Target) == (int(Want)+1)) || (int($Target) == (int(Want)+2)) || (int($Target) == (int(Want)+3)) || (int($Target) == (int(Want)+4))) print $0 } else { if ($Target == Want) print $0 } } END { if (Want == "Invalid, Initialize") print "Please set the 'Want' variable and run again." > "/dev/stderr" if (Col == "Invalid, Initialize") print "Please set the 'Col' variable to the name of the column to compare and run again." > "/dev/stderr" if (Target == -1) print "Couldn't find column: '" Col "'" > "/dev/stderr" }