load nasa93_center_1.csv; center1 = nasa93_center_1; clear nasa93_center_1; load nasa93_center_2.csv; center2 = nasa93_center_2; clear nasa93_center_2; load nasa93_center_5.csv; center5 = nasa93_center_5; clear nasa93_center_5; % the above load shall not be uncommented % these files are used to decide on the class labels later on %%%%%%%%%%%%%%%%%%%% DO NOT ALTER THE ABOVE SEGMENT %%%%%%%%%%%%%%%%%%%%%%% % depending on which dataset we want to experiment on % uncomment one of the dataset loads below % load COCOMO_NASA93.csv; % dataset = COCOMO_NASA93; % clear COCOMO_NASA93; % load nasa93_center_1.csv; % dataset = nasa93_center_1; % clear nasa93_center_1; % load nasa93_center_2.csv; % dataset = nasa93_center_2; % clear nasa93_center_2; % load nasa93_center_5.csv; % dataset = nasa93_center_5; % clear nasa93_center_5; % load nasa93_all_but_center_1.csv; % dataset = nasa93_all_but_center_1; % clear nasa93_all_but_center_1; % load nasa93_all_but_center_2.csv; % dataset = nasa93_all_but_center_2; % clear nasa93_all_but_center_2; % load nasa93_all_but_center_5.csv; % dataset = nasa93_all_but_center_5; % clear nasa93_all_but_center_5; % define the variables to keep tract of.. well pretty much everything selectedBestLevels = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlaces = -1 * ones(20,size(dataset,1)); varianceOfSelectedBestPlacesGAC2 = -1 * ones(20,size(dataset,1)); %selectedBestKValues = -1 * ones(20,size(dataset,1)); actuals = -1 * ones(20,size(dataset,1)); actualsWithFeatures = []; pred0 = -1 * ones(20,size(dataset,1)); mre0 = -1 * ones(20,size(dataset,1)); predGac2 = -1 * ones(20,size(dataset,1)); mreGac2 = -1 * ones(20,size(dataset,1)); instanceSizeGac2 = -1 * ones(20,size(dataset,1)); pred1 = -1 * ones(20,size(dataset,1)); mre1 = -1 * ones(20,size(dataset,1)); pred2 = -1 * ones(20,size(dataset,1)); mre2 = -1 * ones(20,size(dataset,1)); pred4 = -1 * ones(20,size(dataset,1)); mre4 = -1 * ones(20,size(dataset,1)); pred8 = -1 * ones(20,size(dataset,1)); mre8 = -1 * ones(20,size(dataset,1)); pred16 = -1 * ones(20,size(dataset,1)); mre16 = -1 * ones(20,size(dataset,1)); predx = -1 * ones(20,size(dataset,1)); mrex = -1 * ones(20,size(dataset,1)); predFloatingK = -1 * ones(20,size(dataset,1)); mreFloatingK = -1 * ones(20,size(dataset,1)); floatingKValues = -1 * ones(20,size(dataset,1)); % variables for the new chinese data related experiments predictionZoneSize = -1 * ones(20,size(dataset,1)); testInstanceType = -1 * ones(20,size(dataset,1)); type1SizeInPredictionZone = -1 * ones(20,size(dataset,1)); type2SizeInPredictionZone = -1 * ones(20,size(dataset,1)); type5SizeInPredictionZone = -1 * ones(20,size(dataset,1)); % a dummy to keep track of whether the best k value was found bestKFound = 0; % find the best k value for the train data if bestKFound == 0 myBestK = bestk(dataset); end % repeat 20 times for counter = 1:20 % randomize dataset dataset = randomizeDataset(dataset); % for each row in the randomized dataset do the following i = 0; while i < size(dataset,1) % increment i by 1 i = i + 1; % pick up the row myRow = dataset(i,:); % record it into actuals actuals(counter,i) = myRow(1,size(myRow,2)); % record also the features of that instance (since a lot of instances % have the same effort value, I needed to record the features too) actualsWithFeatures = [actualsWithFeatures;myRow(1,:)]; % train is the dataset minus the selected row train = dataset; train(i,:) = []; % now start predictions % the one below -treeK- is our guy to defend [pred0(counter,i), mre0(counter,i),selectedBestLevels(counter,i),varianceOfSelectedBestPlaces(counter,i), gac2Tree, gac2Root] = treeK(myRow,train); % at this point we have build our GAC tree -above executable line- and we can check the % instances with the second tree [predGac2(counter,i), mreGac2(counter,i),instanceSizeGac2(counter,i),varianceOfSelectedBestPlacesGAC2(counter,i), predictionZone] = secondGac(gac2Tree, gac2Root, myRow); if predGac2(counter,i) == -1 % meaning there were not enough instances for gac2 tree i = i-1; continue; end % now get statistics from the prediction zone [predictionZoneSize(counter,i), testInstanceType(counter,i), type1SizeInPredictionZone(counter,i), type2SizeInPredictionZone(counter,i), type5SizeInPredictionZone(counter,i)] = findMyClassStatistics(myRow, predictionZone, center1, center2, center5); % below are the ones for various k values [pred1(counter,i), mre1(counter,i)] = nnk(myRow,train,1); [pred2(counter,i), mre2(counter,i)] = nnk(myRow,train,2); [pred4(counter,i), mre4(counter,i)] = nnk(myRow,train,4); [pred8(counter,i), mre8(counter,i)] = nnk(myRow,train,8); % [pred16(counter,i), mre16(counter,i)] = nnk(myRow,train,16); floatingKValues(counter,i) = bestFloatingK(myRow,train); %[predFloatingK(counter,i), mreFloatingK(counter,i)] = nnk(myRow,train,floatingKValues(counter,i)); [predx(counter,i), mrex(counter,i)] = nnk(myRow,train,myBestK); end counter end