/*
 * Process.java
 *
 * This java class is responsible for parsing and obtaining information about
 * the input file and then using that information to output a results file in a
 * prespecified format.
 *
 */

package gratesttask;

import java.io.*;

/**
 *
 * @author Oussama El-Rawas
 */
public class Process {
    
    private int numInstance = 0; //counts the number of instances
    private int numAtt = 0; //counts the attribute number
    private boolean classAtt = false; //set if the class attribute is present
    private String attNamesVal [][]; //This will contain the attribute names and corresponding possible values
    private int totalMissing = 0; //This indicates the total number of missing values
    private int missCount[]; //this indicates the attributs missing in the instances, with a number of rows equal to the number of attributes
    private int attValUse [][]; //A counter array to count the usage of every single value of every attribute
    
    /** Creates a new instance of Process and automatically initiates processing
     * of the specified input file.
     *
     * @param pathTFile The input file path and name
     * @param pathOFile The output file path and name
     */
    public Process(String pathTFile, String pathOFile) throws IOException{
        
        // Initiating and opening the input file reader
        File fTFile = new File(pathTFile);
        
        if (fTFile.exists() && fTFile.canRead()) { // Checking whether the input file exists and is readable
            FileReader frTFile = new FileReader(fTFile);
            BufferedReader brTFile = new BufferedReader(frTFile);
            
            String sBuff; // Buffer String for reading from the file
            
            while((sBuff=brTFile.readLine()) != null) { // Reading from the file one line at a time until the end of the file is reached
                
                // Handling Attribute declrations
                if((sBuff.indexOf("@attribute")) != -1) {
                    
                    this.numAtt++; // Incrementing the number of attributes found
                    
                    // This routine handles dynamically increasing the size of the attribute array each time a new attribute is found
                    if (this.numAtt>1) { // If this is not the first attribute found
                        
                        String attNamesValTemp [][]= this.attNamesVal; // The temp array for holding the attribute information
                        this.attNamesVal = new String [this.numAtt][]; // Reinitialting and enlarging to accomodate for new attribute
                        
                        for (int copy=0; copy<this.numAtt-1; copy++)
                            this.attNamesVal[copy] = attNamesValTemp[copy]; // Copying the attribute information back from the temp array
                    } else { // If this is the first attribute found
                        this.attNamesVal = new String [this.numAtt][]; // Initializing the attribute array
                    }
                    
                    String sAttParse [] = sBuff.split(" "); // Parsing the attribute declaration
                    
                    for (int i=sAttParse.length-1;i>0;i--) { // Parsing the attribute values first followed by the attribute name
                        
                        switch(i%2) {
                            
                            case 1: // Case of attribute name
                                
                                int s2 = sAttParse[i].indexOf("'")+1;
                                if (s2!=0) { // If the attribute name is within single quotes
                                    int e2 = sAttParse[i].lastIndexOf("'");
                                    sAttParse[i] = sAttParse[i].substring(s2,e2); // Getting rid of the single quotes
                                }
                                
                                this.attNamesVal [this.numAtt-1][0] = sAttParse[i]; // Saving the attribute name
                                if (sAttParse[i].equalsIgnoreCase("class"))
                                    this.classAtt = true;
                                break;
                                
                            case 0: // Case of attribute values
                                
                                // Parsing the attribute values enclosed by brackets
                                int s = sAttParse[i].indexOf("{'")+2;
                                int e = sAttParse[i].indexOf("'}");
                                
                                String psTemp [] = sAttParse[i].substring(s,e).split("','"); // Parsing the individual attribute values
                                
                                this.attNamesVal [this.numAtt-1] = new String [psTemp.length+1]; //Allocating space for the attribute name and values
                                
                                for (int j=0;j<psTemp.length;j++) {
                                    this.attNamesVal [this.numAtt-1][j+1] = psTemp[j]; //Saving the Attribute values
                                }
                                break;
                        }
                    }
                }
                
                // Handling the the data instances
                else if((sBuff.indexOf("@data")) != -1) {
                    
                    // Initializing the attribute-value-use and the missing-attribute-count arrays
                    this.attValUse = new int[this.numAtt][];
                    this.missCount = new int[this.numAtt];
                    
                    // Initializing each row to the number of values for that attribute
                    for(int init=0;init<this.numAtt;init++)
                        this.attValUse[init] = new int[this.attNamesVal[init].length-1];
                    
                    while((sBuff=brTFile.readLine()).indexOf("%") == -1) { // While the end of the data is not reached, read the data instances
                        
                        int s3 = sBuff.indexOf("'");
                        if (s3!=-1) { // If there is an instance of data enclosed with single quotes
                            int e3 = sBuff.lastIndexOf("'")+1;
                            sBuff = sBuff.substring(s3,e3); // Isolating the data instance for processing
                            
                            this.numInstance++; // Incrementing the number of instances read
                            
                            String sInsParse[] = sBuff.split(","); // This will tokenize the attribute values of the instance
                            for (int chkUse=0;chkUse<this.numAtt;chkUse++) {
                                
                                int s4 = sInsParse[chkUse].indexOf("'")+1;
                                if (s4!=0) { // If the attribute value is enclosed with single quotes
                                    int e4 = sInsParse[chkUse].lastIndexOf("'");
                                    sInsParse[chkUse] = sInsParse[chkUse].substring(s4,e4); // Getting rid of the single quotes
                                }
                                
                                if (sInsParse[chkUse].equalsIgnoreCase("?")) { // If the attribute value is missing, update missing stats
                                    this.missCount[chkUse]++; // Incrementing miss-count for the attribute
                                    this.totalMissing++; // Incrementing the total miss-count
                                }
                                
                                else for (int chkVal=0;chkVal<this.attValUse[chkUse].length;chkVal++) { // Else update attribute value use stats
                                    if (sInsParse[chkUse].equalsIgnoreCase(this.attNamesVal[chkUse][chkVal+1])) // Checking for the value used
                                        this.attValUse[chkUse][chkVal]++; // Incrementing the stats for value use
                                }
                            }
                        }
                    }
                }
            }
            
            this.output(pathOFile);
        }
        
        else
            System.out.println("Source file does not exist or is unreadable.");
    }
    
    /** This function specifies the format for outputting the results from
     * parsing the input file writes the file.
     *
     * @param pathOFile The output file path and name
     */
    public void output(String pathOFile) throws IOException {
        
        // Initiating and opening the output file writer
        File fOFile = new File(pathOFile);
        FileWriter frOFile = new FileWriter(fOFile);
        BufferedWriter brOFile = new BufferedWriter(frOFile);
        
        // Part 1
        brOFile.write("%\tNumber of Instances: "+this.numInstance);
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        
        // Part 2
        if (this.classAtt)
            brOFile.write("%\tNumber of Attributes: "+(this.numAtt-1)+" + the class attribute");
        else
            brOFile.write("%\tNumber of Attributes: "+this.numAtt);
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        
        // Part 3
        brOFile.write("%\tAttribute Information: ");
        for (int attVal=0; attVal<this.numAtt; attVal++) {
            brOFile.newLine();
            brOFile.write("%\t\t"+(attVal+1)+". "+this.attNamesVal[attVal][0]+": ");
            
            for (int val=1; val<this.attNamesVal[attVal].length; val++) {
                brOFile.write(this.attNamesVal[attVal][val]);
                if (val==this.attNamesVal[attVal].length-1)
                    brOFile.write(".");
                else
                    brOFile.write(", ");
            }
        }
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        
        // Part 4
        brOFile.write("%\tMissing Attribute Values: (denoted by \"?\") ");
        brOFile.newLine();
        brOFile.write("%\t\tAttribute #:\tNumber of instances with missing values:");
        for (int miss=0; miss<this.numAtt; miss++) {
            if (this.missCount[miss] != 0) {
                brOFile.newLine();
                brOFile.write("%\t\t"+(miss+2)+"\t\t"+this.missCount[miss]);
            }
        }
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        
        // Part 5
        brOFile.write("%\tClass Distribution:");
        for (int clas=0; clas<this.numAtt; clas++) {
            if (this.attNamesVal[clas][0].equalsIgnoreCase("class")) {
                for (int cAtt=1; cAtt<this.attNamesVal[clas].length; cAtt++) {
                    brOFile.newLine();
                    brOFile.write("%\t\t"+cAtt+". "+this.attNamesVal[clas][cAtt]+": "+this.attValUse[clas][cAtt-1]+" instances");
                }
            }
        }
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        brOFile.write("%\tNum Instances:\t"+this.numInstance);
        brOFile.newLine();
        brOFile.write("%\tNum Attributes:\t"+this.numAtt);
        brOFile.newLine();
        brOFile.write("%\tNum Continuous:\t0 (Int 0 / Real 0)");
        brOFile.newLine();
        brOFile.write("%\tNum Discrete:\t"+this.numAtt);
        brOFile.newLine();
        brOFile.write("%\tMissing Values:\t"+this.totalMissing+" / "+((float)this.totalMissing*100/(this.numAtt*this.numInstance))+"%");
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        // The stats table
        brOFile.write("%\tname                    type     enum        ints  real  missing          distinct");        
        for (int stats=0; stats<this.numAtt; stats++) {
            brOFile.newLine();
            brOFile.write("%\t");
            String temp = new String((stats+1)+" '"+this.attNamesVal[stats][0]+"'");
            while(temp.length()<24)
                temp = temp.concat(" ");
            temp = temp.concat("Enum");
            while(temp.length()<33)
                temp = temp.concat(" ");
            temp = temp.concat((100-(this.missCount[stats]*(float)100/this.numInstance))+"%");
            while(temp.length()<45)
                temp = temp.concat(" ");
            temp = temp.concat("0%");
            while(temp.length()<51)
                temp = temp.concat(" ");
            temp = temp.concat("0%");
            while(temp.length()<57)
                temp = temp.concat(" ");
            temp = temp.concat(this.missCount[stats]+" / "+(this.missCount[stats]*(float)100/this.numInstance)+"%");
            while(temp.length()<74)
                temp = temp.concat(" ");
            int distinct=0;
            for (int distinctCount=0; distinctCount<this.attValUse[stats].length; distinctCount++)
                if (this.attValUse[stats][distinctCount] != 0)
                    distinct++;
            temp = temp.concat(Integer.toString(distinct));
            brOFile.write(temp);
        }
        
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        brOFile.write("%");
        brOFile.newLine();
        brOFile.write("%");
        
        brOFile.flush();
        brOFile.close();
    }
}



