% define datasets of experiment myDatasets = { 'cocomo81o','cocomo81s','cocomo81','cocomo81e',... 'desharnaisL1',... 'desharnais','desharnaisL2',... 'desharnaisL3',... 'nasa93','nasa93_center_1','nasa93_center_2','nasa93_center_5',... 'sdr'... 'albrecht',... 'finnish','kemerer','maxwell','miyazaki94','telecom1',... 'china'... }; % define pre-processors of experiment myPreProcessors = {'none','log',... 'freq3bin','freq5bin','width3bin','width5bin',... 'norm',... 'SWReg','PCA','SFS'}; % define algorithms of experiment myAlgorithms = {'SWReg','SLReg',... 'CART-On','CART-Off',... 'PLSR','PCR',... '1NN','ABE0',... 'NNet'... % ,'TEAK' }; % define error-measures of experiment myErrorMeasures = {'MAR','MMRE','MMER','MBRE','MIBRE','MDMRE','Pred25'}; % define how many mixture of expert methods there will be MoESize = 12; % define how many random runs you want for each dataset randomRunSize = size(myAlgorithms,2) * size(myPreProcessors,2); % store size of each of the above variables datasetSize = size(myDatasets,2); preProcessorSize = size(myPreProcessors,2); errorMeasureSize = size(myErrorMeasures,2); algorithmSize = size(myAlgorithms,2); % delete previous files by opening and closing same files with write % permission separateLossValues = fopen('separateLossValues.txt','w'); separateWinValues = fopen('separateWinValues.txt','w'); sumLossValues = fopen('sumLossValues.txt','w'); sumWinValues = fopen('sumWinValues.txt','w'); mdmreValues = fopen('mdmreValues.txt','w'); pred25Values = fopen('pred25Values.txt','w'); fclose(separateLossValues); fclose(separateWinValues); fclose(sumLossValues); fclose(sumWinValues); fclose(mdmreValues); fclose(pred25Values); for i = 1:size(myErrorMeasures,2) eval(['myErrorMeasure' num2str(i) 'LossValues = fopen(''myErrorMeasure' num2str(i) 'LossValues.txt'', ''w'');']); eval(['fclose(myErrorMeasure' num2str(i) 'LossValues);']); eval(['myErrorMeasure' num2str(i) 'WinValues = fopen(''myErrorMeasure' num2str(i) 'WinValues.txt'', ''w'');']); eval(['fclose(myErrorMeasure' num2str(i) 'WinValues);']); end % define variables for randomSize methods fixedPreProcessors = cell(randomRunSize+MoESize,1) ; fixedAlgorithms = cell(randomRunSize+MoESize,1); % above code is for random mappings between methods and pre-processors % below code is somewhat hard coded mappings randNum1 = []; randNum2 = []; for i = 1:size(myPreProcessors,2) for j = 1:size(myAlgorithms,2) randNum1 = [randNum1,i]; randNum2 = [randNum2,j]; end end for runIndex = 1:randomRunSize % randomly pick up pre-processor fixedPreProcessors(runIndex) = (myPreProcessors(randNum1(runIndex))); % randomly pick up an algorithm fixedAlgorithms(runIndex) = (myAlgorithms(randNum2(runIndex))); end moePreProcessors = {'Top2','Top4','Top8','Top16'}; moeAlgorithms = {'MoEMean','MoEMedian','MoEIrwm'}; fixedPreProcessors(runIndex+1) = moePreProcessors(1); fixedAlgorithms(runIndex+1) = moeAlgorithms(1); fixedPreProcessors(runIndex+2)= moePreProcessors(2); fixedAlgorithms(runIndex+2) = moeAlgorithms(1); fixedPreProcessors(runIndex+3) = moePreProcessors(3); fixedAlgorithms(runIndex+3) = moeAlgorithms(1); fixedPreProcessors(runIndex+4) = moePreProcessors(4); fixedAlgorithms(runIndex+4) = moeAlgorithms(1); fixedPreProcessors(runIndex+5) = moePreProcessors(1); fixedAlgorithms(runIndex+5) = moeAlgorithms(2); fixedPreProcessors(runIndex+6)= moePreProcessors(2); fixedAlgorithms(runIndex+6) = moeAlgorithms(2); fixedPreProcessors(runIndex+7) = moePreProcessors(3); fixedAlgorithms(runIndex+7) = moeAlgorithms(2); fixedPreProcessors(runIndex+8) = moePreProcessors(4); fixedAlgorithms(runIndex+8) = moeAlgorithms(2); fixedPreProcessors(runIndex+9) = moePreProcessors(1); fixedAlgorithms(runIndex+9) = moeAlgorithms(3); fixedPreProcessors(runIndex+10)= moePreProcessors(2); fixedAlgorithms(runIndex+10) = moeAlgorithms(3); fixedPreProcessors(runIndex+11) = moePreProcessors(3); fixedAlgorithms(runIndex+11) = moeAlgorithms(3); fixedPreProcessors(runIndex+12) = moePreProcessors(4); fixedAlgorithms(runIndex+12) = moeAlgorithms(3); % for each dataset in the study for datasetCounter = 1:datasetSize eval(['load ',char(myDatasets(datasetCounter)),'.csv;']); eval(['dataset = ',char(myDatasets(datasetCounter)),';']); eval(['clear ',char(myDatasets(datasetCounter)),';']); clc; myDatasets(datasetCounter) % print out the dataset name % define a variable to keep the loss values lossValues = -1 * ones(errorMeasureSize,randomRunSize+MoESize); % define a variable to keep the win values winValues = -1 * ones(errorMeasureSize,randomRunSize+MoESize); % define a variable to keep the results of each method algorithmResults = -1 * ones(randomRunSize+MoESize, (10*size(dataset,1))); actual10by3values = []; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%% ALGORITHMS AND PRE-PROCESSORS RUN BELOW %%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% for runIndex = 1:(randomRunSize) clc; runIndex % pick up the pre-processor on the queue currentPreProcessor = char(fixedPreProcessors(runIndex)); % run the next algorithm currentAlgorithm = char(fixedAlgorithms(runIndex)); % below variable keeps track of all instances in a 10 by 3 way instanceTracker = 0; %%%%%%%%%%% START 10 BY 3 if runIndex == 1 % randomize dataset and divide into 3 datasets for timesCounter = 1:10 [fold1, fold2, fold3] = divideInto3(dataset); for foldCol = 1:3 eval(['fold' num2str(timesCounter) num2str(foldCol) ' = fold' num2str(foldCol) ';']); eval(['actual10by3values = [actual10by3values fold' num2str(foldCol) '(:,size(fold' num2str(foldCol) ',2))''];']); end end end % store algorithm results temporarily myTempResults = []; for timesCounter = 1:10 % since we are doing a ten by 3 cross validation for foldCol = 1:3 eval(['fold' num2str(foldCol) ' = fold' num2str(timesCounter) num2str(foldCol) ';']); end % apply the pre-processor and get the new folds processedFolds = applyPreProcess([fold1;fold2;fold3], currentPreProcessor); fold1 = processedFolds(1:size(fold1,1),:); fold2 = processedFolds((size(fold1,1)+1):(size(fold1,1)+size(fold2,1)),:); fold3 = processedFolds((size(fold1,1)+size(fold2,1)+1):(size(fold1,1)+size(fold2,1)+size(fold3,1)),:); % for each fold do the following for foldCounter = 1:3 if foldCounter == 1 testInstances = fold1; trainSet10by3 = [fold2;fold3]; elseif foldCounter == 2 testInstances = fold2; trainSet10by3 = [fold1;fold3]; else testInstances = fold3; trainSet10by3 = [fold1;fold2]; end myTempResults = [myTempResults algorithmRunner10By3(testInstances, trainSet10by3, currentAlgorithm, currentPreProcessor, 0)]; % 0 means without GAC pruning end end % % now record the temp results % algorithmResults(runIndex,:) = tmpAlgorithmResults; algorithmResults(runIndex,:) = myTempResults; end % before going to win-tie-loss calculation over all methods, we will % calculate multi-methods. For that, we first find best solo-methods % then we take top 2,4,8,16 solo-methods and % take their mean, median and IRWM (by Mendes) % for each error measure winValuesSum = []; lossValuesSum = []; tieValuesSum = []; for errorCounter = 1: size(myErrorMeasures,2) tmpErrorMeasure = char(myErrorMeasures(errorCounter)); % at this point we have the predictions of all the algorithms % now we can compare these results and store their loss values [tmpLoss tmpWin tmpTie] = orderAlgorithms(algorithmResults(1:randomRunSize,:),actual10by3values',tmpErrorMeasure,fixedAlgorithms(1:randomRunSize,:), fixedPreProcessors(1:randomRunSize,:), myDatasets(datasetCounter)); winValuesSum = [winValuesSum,tmpWin]; lossValuesSum = [lossValuesSum,tmpLoss]; tieValuesSum = [tieValuesSum,tmpTie]; end % now save the workspace for this particular dataset eval(['save(''workspaces\3Way\' char(myDatasets(datasetCounter)) '.mat'', ''algorithmResults'', ''actual10by3values'', ''bestMethodIndices'',''winValues'', ''lossValues'');']); end