% below are the load commands for different databases % depending on which database you would like to be processed % just uncomment the related lines load 'MSR Data and Results\coc81_1_1.csv; dataset = coc81_1_1; clear coc81_1_1; % define the variables to keep tract of.. well pretty much everything selectedBestLevels = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlaces = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlacesGAC2 = -1 * ones(20,size(dataset,1)); %selectedBestKValues = -1 * ones(20,size(dataset,1)); actuals = -1 * ones(20,size(dataset,1)); actualsWithFeatures = []; pred0 = -1 * ones(20,size(dataset,1)); mre0 = -1 * ones(20,size(dataset,1)); predGac2 = -1 * ones(20,size(dataset,1)); mreGac2 = -1 * ones(20,size(dataset,1)); instanceSizeGac2 = -1 * ones(20,size(dataset,1)); pred1 = -1 * ones(20,size(dataset,1)); mre1 = -1 * ones(20,size(dataset,1)); pred2 = -1 * ones(20,size(dataset,1)); mre2 = -1 * ones(20,size(dataset,1)); pred4 = -1 * ones(20,size(dataset,1)); mre4 = -1 * ones(20,size(dataset,1)); pred8 = -1 * ones(20,size(dataset,1)); mre8 = -1 * ones(20,size(dataset,1)); pred16 = -1 * ones(20,size(dataset,1)); mre16 = -1 * ones(20,size(dataset,1)); predx = -1 * ones(20,size(dataset,1)); mrex = -1 * ones(20,size(dataset,1)); predFloatingK = -1 * ones(20,size(dataset,1)); mreFloatingK = -1 * ones(20,size(dataset,1)); floatingKValues = -1 * ones(20,size(dataset,1)); % a dummy to keep track of whether the best k value was found bestKFound = 0; % find the best k value for the train data if bestKFound == 0 myBestK = bestk(dataset); end % repeat 20 times for counter = 1:20 % randomize dataset dataset = randomizeDataset(dataset); % for each row in the randomized dataset do the following i = 0; while i < size(dataset,1) % increment i by 1 i = i + 1; % pick up the row myRow = dataset(i,:); % record it into actuals actuals(counter,i) = myRow(1,size(myRow,2)); % record also the features of that instance (since a lot of instances % have the same effort value, I needed to record the features too) actualsWithFeatures = [actualsWithFeatures;myRow(1,:)]; % train is the dataset minus the selected row train = dataset; train(i,:) = []; % now start predictions % the one below -treeK- is our guy to defend [pred0(counter,i), mre0(counter,i),selectedBestLevels(counter,i),varianceOfSelectedBestPlaces(counter,i), gac2Tree, gac2Root] = treeK(myRow,train); % at this point we have build our GAC tree -above executable line- and we can check the % instances with the second tree [predGac2(counter,i), mreGac2(counter,i),instanceSizeGac2(counter,i),varianceOfSelectedBestPlacesGAC2(counter,i)] = secondGac(gac2Tree, gac2Root, myRow); if predGac2(counter,i) == -1 % meaning there were not enough instances for gac2 tree i = i-1; continue; end % below are the ones for various k values [pred1(counter,i), mre1(counter,i)] = nnk(myRow,train,1); [pred2(counter,i), mre2(counter,i)] = nnk(myRow,train,2); [pred4(counter,i), mre4(counter,i)] = nnk(myRow,train,4); [pred8(counter,i), mre8(counter,i)] = nnk(myRow,train,8); [pred16(counter,i), mre16(counter,i)] = nnk(myRow,train,16); floatingKValues(counter,i) = bestFloatingK(myRow,train); %[predFloatingK(counter,i), mreFloatingK(counter,i)] = nnk(myRow,train,floatingKValues(counter,i)); [predx(counter,i), mrex(counter,i)] = nnk(myRow,train,myBestK); end counter end