% below are the load commands for different databases % depending on which database you would like to be processed % just uncomment the related lines allMyDatasets = {'coc81_1_1','coc81_1_1_organic','coc81_1_1_embedded','ISBSG_Banking_Applications','SDR_Banking','COCOMO_NASA93','nasa93_center_5','nasa93_center_2.csv','albrecht','COCOMOII_SDR','desharnais'}; for datasetCounter = 1:10 eval(['load ',char(allMyDatasets(datasetCounter)),'.csv;']); eval(['dataset = ',char(allMyDatasets(datasetCounter)),';']); eval(['clear ',char(allMyDatasets(datasetCounter)),';']); % load coc81_1_1.csv; % dataset = coc81_1_1; % clear coc81_1_1; % define the variables to keep tract of.. well pretty much everything selectedBestLevels = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlaces = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlacesGAC2 = -1 * ones(20,size(dataset,1)); %selectedBestKValues = -1 * ones(20,size(dataset,1)); actuals = -1 * ones(20,size(dataset,1)); actualsWithFeatures = []; pred0 = -1 * ones(20,size(dataset,1)); mre0 = -1 * ones(20,size(dataset,1)); predGac2 = -1 * ones(20,size(dataset,1)); mreGac2 = -1 * ones(20,size(dataset,1)); instanceSizeGac2 = -1 * ones(20,size(dataset,1)); pred1 = -1 * ones(20,size(dataset,1)); mre1 = -1 * ones(20,size(dataset,1)); pred3 = -1 * ones(20,size(dataset,1)); mre3 = -1 * ones(20,size(dataset,1)); pred5 = -1 * ones(20,size(dataset,1)); mre5 = -1 * ones(20,size(dataset,1)); pred7 = -1 * ones(20,size(dataset,1)); mre7 = -1 * ones(20,size(dataset,1)); pred9 = -1 * ones(20,size(dataset,1)); mre9 = -1 * ones(20,size(dataset,1)); predx = -1 * ones(20,size(dataset,1)); mrex = -1 * ones(20,size(dataset,1)); predFloatingK = -1 * ones(20,size(dataset,1)); mreFloatingK = -1 * ones(20,size(dataset,1)); floatingKValues = -1 * ones(20,size(dataset,1)); % a dummy to keep track of whether the best k value was found bestKFound = 0; % find the best k value for the train data if bestKFound == 0 myBestK = bestk(dataset); end % repeat 20 times for counter = 1:20 % randomize dataset dataset = randomizeDataset(dataset); % for each row in the randomized dataset do the following i = 0; while i < size(dataset,1) % increment i by 1 i = i + 1; % pick up the row myRow = dataset(i,:); % record it into actuals actuals(counter,i) = myRow(1,size(myRow,2)); % record also the features of that instance (since a lot of instances % have the same effort value, I needed to record the features too) actualsWithFeatures = [actualsWithFeatures;myRow(1,:)]; % train is the dataset minus the selected row train = dataset; train(i,:) = []; % now start predictions % the one below -treeK- is our guy to defend [pred0(counter,i), mre0(counter,i),selectedBestLevels(counter,i),varianceOfSelectedBestPlaces(counter,i), gac2Tree, gac2Root] = treeK(myRow,train); % at this point we have build our GAC tree -above executable line- and we can check the % instances with the second tree [predGac2(counter,i), mreGac2(counter,i),instanceSizeGac2(counter,i),varianceOfSelectedBestPlacesGAC2(counter,i)] = secondGac(gac2Tree, gac2Root, myRow); if predGac2(counter,i) == -1 % meaning there were not enough instances for gac2 tree i = i-1; continue; end % below are the ones for various k values [pred1(counter,i), mre1(counter,i)] = nnk(myRow,train,1); [pred3(counter,i), mre3(counter,i)] = nnk(myRow,train,3); [pred5(counter,i), mre5(counter,i)] = nnk(myRow,train,5); [pred7(counter,i), mre7(counter,i)] = nnk(myRow,train,7); [pred9(counter,i), mre9(counter,i)] = nnk(myRow,train,9); % floatingKValues(counter,i) = bestFloatingK(myRow,train); %[predFloatingK(counter,i), mreFloatingK(counter,i)] = nnk(myRow,train,floatingKValues(counter,i)); [predx(counter,i), mrex(counter,i)] = nnk(myRow,train,myBestK); end counter end % save workspace eval(['save(teak_odd_numbered_k_experiment_workspaces\',char(allMyDatasets(datasetCounter)),');']); end