package Experiment; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Random; import java.util.Scanner; import Exceptions.UnsupportedFileTypeException; public class Dataset { private String filename; private String name; private ArrayList rows; private ArrayList headers; public Dataset(String filename) { this.filename = filename; } public Dataset(String filename, String name) { this.filename = filename; this.name = name; } public void load() throws UnsupportedFileTypeException, FileNotFoundException { File temp = new File(filename); Scanner reader = new Scanner(temp); rows = new ArrayList(); headers = new ArrayList(); if (isARFF()) { String tempString; while (reader.hasNext()) { tempString = reader.next(); if (tempString.startsWith("%")) { //do nothing } else if (tempString.startsWith("@")) { headers.add(tempString); } else if (tempString.length() > 1) { rows.add(tempString); } } } else if (isCSV()) { while (reader.hasNext()) { rows.add(reader.next()); } } else { throw new UnsupportedFileTypeException(); } reader.close(); } public void splitThreeWay(long splitSeed, String trainingData, String testingData) throws UnsupportedFileTypeException, IOException { this.load(); FileWriter writeTraining = new FileWriter(trainingData); FileWriter writeTesting = new FileWriter(testingData); Random randomSeed = new Random(splitSeed); ArrayList copyData = new ArrayList(); copyData.addAll(0, rows); Collections.shuffle(copyData, randomSeed); for (String header : headers) { writeTraining.write(header+"\n"); writeTesting.write(header+"\n"); } for (int i = 0; i < 2 *(copyData.size() / 3); i++) { if (i < copyData.size() / 3) writeTraining.write(copyData.get(i)+"\n"); else writeTesting.write(copyData.get(i)+"\n"); } writeTraining.close(); writeTesting.close(); } public boolean isARFF() { if(filename.toLowerCase().contains(".arf")) return true; return false; } public boolean isCSV() { if(filename.toLowerCase().contains(".csv")) return true; return false; } public String getFilename() { return filename; } public void writeTest(String testData, String location) throws IOException, UnsupportedFileTypeException { this.load(); File f = new File(location); FileWriter fw = new FileWriter(f); for (String header : headers) fw.write(header+"\n"); fw.write(testData); fw.close(); } public ArrayList effortValues() throws FileNotFoundException, UnsupportedFileTypeException { this.load(); ArrayList efforts = new ArrayList(); String[] temp; for (String row : rows) { temp = row.split(","); if (temp.length >= 1) efforts.add(temp[temp.length - 1]); } return efforts; } public void setName(String name) { this.name = name; } public String getName() { return name; } }