% below are the load commands for different databases % depending on which database you would like to be processed % just uncomment the related lines load coc81_1_1.csv; dataset = coc81_1_1; clear coc81_1_1; % load coc81_1_1_organic.csv; % dataset = coc81_1_1_organic; % clear coc81_1_1_organic; % load coc81_1_1_embedded.csv; % dataset = coc81_1_1_embedded; % clear coc81_1_1_embedded; % load ISBSG_Banking_Applications.csv; % dataset = ISBSG_Banking_Applications; % clear ISBSG_Banking_Applications; % load SDR_Banking.csv; % dataset = SDR_Banking; % clear SDR_Banking; % load COCOMO_NASA93.csv; % dataset = COCOMO_NASA93; % clear COCOMO_NASA93; % load nasa93_center_5.csv; % dataset = nasa93_center_5; % clear nasa93_center_5; % load nasa93_center_2.csv; % dataset = nasa93_center_2; % clear nasa93_center_2; % load nasa93_between70_80.csv; % dataset = nasa93_between70_80; % clear nasa93_between70_80; % load nasa93_between81_90.csv; % dataset = nasa93_between81_90; % clear nasa93_between81_90; % load albrecht.csv; % dataset = albrecht; % clear albrecht; % load COCOMOII_SDR.csv; % dataset = COCOMOII_SDR; % clear COCOMOII_SDR; % load desharnais.csv; % dataset = desharnais; % clear desharnais; % define the variables to keep tract of.. well pretty much everything selectedBestLevels = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlaces = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlacesGAC2 = -1 * ones(20,size(dataset,1)); %selectedBestKValues = -1 * ones(20,size(dataset,1)); actuals = -1 * ones(20,size(dataset,1)); actualsWithFeatures = []; pred0 = -1 * ones(20,size(dataset,1)); mre0 = -1 * ones(20,size(dataset,1)); predGac2 = -1 * ones(20,size(dataset,1)); mreGac2 = -1 * ones(20,size(dataset,1)); instanceSizeGac2 = -1 * ones(20,size(dataset,1)); pred1 = -1 * ones(20,size(dataset,1)); mre1 = -1 * ones(20,size(dataset,1)); pred2 = -1 * ones(20,size(dataset,1)); mre2 = -1 * ones(20,size(dataset,1)); pred4 = -1 * ones(20,size(dataset,1)); mre4 = -1 * ones(20,size(dataset,1)); pred8 = -1 * ones(20,size(dataset,1)); mre8 = -1 * ones(20,size(dataset,1)); pred16 = -1 * ones(20,size(dataset,1)); mre16 = -1 * ones(20,size(dataset,1)); predx = -1 * ones(20,size(dataset,1)); mrex = -1 * ones(20,size(dataset,1)); predFloatingK = -1 * ones(20,size(dataset,1)); mreFloatingK = -1 * ones(20,size(dataset,1)); floatingKValues = -1 * ones(20,size(dataset,1)); % a dummy to keep track of whether the best k value was found bestKFound = 0; % find the best k value for the train data if bestKFound == 0 myBestK = bestk(dataset); end % repeat 20 times for counter = 1:20 % randomize dataset dataset = randomizeDataset(dataset); % for each row in the randomized dataset do the following i = 0; while i < size(dataset,1) % increment i by 1 i = i + 1; % pick up the row myRow = dataset(i,:); % record it into actuals actuals(counter,i) = myRow(1,size(myRow,2)); % record also the features of that instance (since a lot of instances % have the same effort value, I needed to record the features too) actualsWithFeatures = [actualsWithFeatures;myRow(1,:)]; % train is the dataset minus the selected row train = dataset; train(i,:) = []; % now start predictions % the one below -treeK- is our guy to defend [pred0(counter,i), mre0(counter,i),selectedBestLevels(counter,i),varianceOfSelectedBestPlaces(counter,i), gac2Tree, gac2Root] = treeK(myRow,train); % at this point we have build our GAC tree -above executable line- and we can check the % instances with the second tree [predGac2(counter,i), mreGac2(counter,i),instanceSizeGac2(counter,i),varianceOfSelectedBestPlacesGAC2(counter,i)] = secondGac(gac2Tree, gac2Root, myRow); if predGac2(counter,i) == -1 % meaning there were not enough instances for gac2 tree i = i-1; continue; end % below are the ones for various k values [pred1(counter,i), mre1(counter,i)] = nnk(myRow,train,1); [pred2(counter,i), mre2(counter,i)] = nnk(myRow,train,2); [pred4(counter,i), mre4(counter,i)] = nnk(myRow,train,4); [pred8(counter,i), mre8(counter,i)] = nnk(myRow,train,8); [pred16(counter,i), mre16(counter,i),] = nnk(myRow,train,16); floatingKValues(counter,i) = bestFloatingK(myRow,train); %[predFloatingK(counter,i), mreFloatingK(counter,i)] = nnk(myRow,train,floatingKValues(counter,i)); [predx(counter,i), mrex(counter,i)] = nnk(myRow,train,myBestK); end counter end