#!/usr/bin/gawk -f #/* vim: set filteype=awk : */ -*- awk -*- #Created by DJ Boland 11/28/06 BEGIN{ IGNORECASE=1; #make program case insensitive Ignore=1; #If True, at a point in the file to ignore(not output) Names=0; #If True, should be outputting to the .names file Data=0; #If True, should be outputting to the .data file Attrs=0; #Number of attributes in the class LastLine="" #The attribute line read Stem="" #The Stem of the filename dataFile="" #The .data file namesFile="" #The .names file cfgFile #The .cfg file #CFG File Settings Granularity=5; #granularity (number of bins for data) MaxNum=10; #maximum number of treatments to generate MaxSize=10; #maximum number of attributes per treatment RandomTrys=50; #Number of runs to compose a treatment FutileTrys=5; #Number of trys with to accept with no new treatments Support="20%"; #percentage of best class required for a treatment #NAMES File Settings #DATA File Settings #ACTIONS } {sub(/\%.*/,"")} {gsub(/[\'\"]/,"",$0)} /^[ \t\n]*$/ {next} /@relation/{ Ignore=0 Names=1 Data=0 split(FILENAME,fileparts, ".") Stem=fileparts[1] } /@attribute/{ Attrs++ LastLine=$0 if(match($0,/ integer/) || match($0,/ numeric/) || match($0,/ real/)) { split($0,names," ") NamesOut[Attrs]= names[2] ": continuous" } else { sub(/@attribute /, "", $0) split($0, names, / *\{/) gsub(/ */, "", names[2]) sub(/\}[ \t]*$/, "", names[2]) split(names[2], value, /\,/) values="" for(K in value) { sub(/\}$/,"", value[K]) sub(/\}.*/, "", value[K]) sub(/\n/, "", value[K]) if(values=="") { values=value[K] } else { values=values ", " value[K] } } NamesOut[Attrs]= names[1] ": " values } } /@data/{ #write .names file namesFile=Stem ".names" sub(/^.*\{/, "", LastLine) sub(/\}[ \t]*$/, "", LastLine) gsub(/[ \t]*/, "", LastLine) gsub(/\}.*$/, "", LastLine) split(LastLine, classes, ",") classlist="" for(J in classes) { sub(/\}.*/, "", classes[J]) if(classlist == "") { classlist=classes[J] } else { classlist=classlist "," classes[J] } } print "|Class List" >namesFile print classlist >> namesFile print "" >> namesFile print "|Attributes" >>namesFile for(I=1;I> namesFile } fflush(namesFile) close(namesFile) Names=0 Ignore=0 Data=1 dataFile=Stem ".data" # print "|Data Entries" > dataFile print "" > dataFile next; } Data==1{ # sub(/.$/, "", $0) print $0 >> dataFile } END{ fflush(dataFile) close(dataFile) #write .cfg file cfgFile=Stem ".cfg" # print "|Configuration Settings" > cfgFile print "" >cfgFile print "granularity: " Granularity >> cfgFile print "maxNumber: " MaxNum >> cfgFile print "maxSize: " Attrs-1 >> cfgFile print "randomTrials: " RandomTrys >> cfgFile print "futileTrials: " FutileTrys >> cfgFile print "bestClass: " Support >>cfgFile fflush(cfgFile) close(cfgFile) }