% define datasets of experiment myDatasets = { 'cocomo81o','cocomo81s','cocomo81','cocomo81e',... 'desharnaisL1','desharnais','desharnaisL2',... 'desharnaisL3',... 'nasa93','nasa93_center_1','nasa93_center_2','nasa93_center_5',... 'sdr'... 'albrecht',... 'finnish','kemerer','maxwell','miyazaki94','telecom1',... %'china' }; % define pre-processors of experiment myPreProcessors = {'none','log',... 'freq3bin','freq5bin','width3bin','width5bin',... 'norm',... 'SWReg','PCA','SFS'}; % define algorithms of experiment myAlgorithms = {'SWReg','SLReg',... 'CART-On','CART-Off',... 'PLSR','PCR',... '1NN','ABE0',... 'NNet','TEAK'}; % define error-measures of experiment myErrorMeasures = {'MAR','MMRE','MMER','MBRE','MIBRE','MDMRE','Pred25'}; % define how many mixture of expert methods there will be MoESize = 12; % define how many random runs you want for each dataset randomRunSize = size(myAlgorithms,2) * size(myPreProcessors,2); % store size of each of the above variables datasetSize = size(myDatasets,2); preProcessorSize = size(myPreProcessors,2); errorMeasureSize = size(myErrorMeasures,2); algorithmSize = size(myAlgorithms,2); % delete previous files by opening and closing same files with write % permission separateLossValues = fopen('separateLossValues.txt','w'); separateWinValues = fopen('separateWinValues.txt','w'); sumLossValues = fopen('sumLossValues.txt','w'); sumWinValues = fopen('sumWinValues.txt','w'); mdmreValues = fopen('mdmreValues.txt','w'); fclose(separateLossValues); fclose(separateWinValues); fclose(sumLossValues); fclose(sumWinValues); fclose(mdmreValues); for i = 1:size(myErrorMeasures,2) eval(['myErrorMeasure' num2str(i) 'LossValues = fopen(''myErrorMeasure' num2str(i) 'LossValues.txt'', ''w'');']); eval(['fclose(myErrorMeasure' num2str(i) 'LossValues);']); eval(['myErrorMeasure' num2str(i) 'WinValues = fopen(''myErrorMeasure' num2str(i) 'WinValues.txt'', ''w'');']); eval(['fclose(myErrorMeasure' num2str(i) 'WinValues);']); end % define variables for randomSize methods fixedPreProcessors = cell(randomRunSize+MoESize,1) ; fixedAlgorithms = cell(randomRunSize+MoESize,1); % above code is for random mappings between methods and pre-processors % below code is somewhat hard coded mappings randNum1 = []; randNum2 = []; for i = 1:size(myPreProcessors,2) for j = 1:size(myAlgorithms,2) randNum1 = [randNum1,i]; randNum2 = [randNum2,j]; end end for runIndex = 1:randomRunSize % randomly pick up pre-processor fixedPreProcessors(runIndex) = (myPreProcessors(randNum1(runIndex))); % randomly pick up an algorithm fixedAlgorithms(runIndex) = (myAlgorithms(randNum2(runIndex))); end moePreProcessors = {'Top2','Top4','Top8','Top16'}; moeAlgorithms = {'MoEMean','MoEMedian','MoEIrwm'}; fixedPreProcessors(runIndex+1) = moePreProcessors(1); fixedAlgorithms(runIndex+1) = moeAlgorithms(1); fixedPreProcessors(runIndex+2)= moePreProcessors(2); fixedAlgorithms(runIndex+2) = moeAlgorithms(1); fixedPreProcessors(runIndex+3) = moePreProcessors(3); fixedAlgorithms(runIndex+3) = moeAlgorithms(1); fixedPreProcessors(runIndex+4) = moePreProcessors(4); fixedAlgorithms(runIndex+4) = moeAlgorithms(1); fixedPreProcessors(runIndex+5) = moePreProcessors(1); fixedAlgorithms(runIndex+5) = moeAlgorithms(2); fixedPreProcessors(runIndex+6)= moePreProcessors(2); fixedAlgorithms(runIndex+6) = moeAlgorithms(2); fixedPreProcessors(runIndex+7) = moePreProcessors(3); fixedAlgorithms(runIndex+7) = moeAlgorithms(2); fixedPreProcessors(runIndex+8) = moePreProcessors(4); fixedAlgorithms(runIndex+8) = moeAlgorithms(2); fixedPreProcessors(runIndex+9) = moePreProcessors(1); fixedAlgorithms(runIndex+9) = moeAlgorithms(3); fixedPreProcessors(runIndex+10)= moePreProcessors(2); fixedAlgorithms(runIndex+10) = moeAlgorithms(3); fixedPreProcessors(runIndex+11) = moePreProcessors(3); fixedAlgorithms(runIndex+11) = moeAlgorithms(3); fixedPreProcessors(runIndex+12) = moePreProcessors(4); fixedAlgorithms(runIndex+12) = moeAlgorithms(3); % write current dataset and error measure to output files sumLossValues=fopen('sumLossValues.txt','a' ); fprintf(sumLossValues, 'SUM OF LOSSES','%s'); sumWinValues=fopen('sumWinValues.txt','a' ); fprintf(sumWinValues, 'SUM OF WINS','%s'); mdmreValues=fopen('mdmreValues.txt','a' ); fprintf(mdmreValues, 'MdMRE Values','%s'); % write the selected methods for runIndex = 1:(randomRunSize+MoESize) % write pre-processor fprintf(separateLossValues, [',' char(fixedPreProcessors(runIndex))]); % write algorithm fprintf(separateLossValues, ['-' char(fixedAlgorithms(runIndex))]); % write pre-processor fprintf(separateWinValues, [',' char(fixedPreProcessors(runIndex))]); % write algorithm fprintf(separateWinValues, ['-' char(fixedAlgorithms(runIndex))]); % write pre-processor fprintf(mdmreValues, [',' char(fixedPreProcessors(runIndex))]); % write algorithm fprintf(mdmreValues, ['-' char(fixedAlgorithms(runIndex))]); end fprintf(sumLossValues, '\n'); % append a new line at the end of the line fprintf(sumWinValues, '\n'); % append a new line at the end of the line fprintf(mdmreValues, '\n'); % append a new line at the end of the line fclose(sumLossValues); % close file fclose(sumWinValues); % close file fclose(mdmreValues); % close file % for each dataset in the study for datasetCounter = 1:datasetSize eval(['load ',char(myDatasets(datasetCounter)),'.csv;']); eval(['dataset = ',char(myDatasets(datasetCounter)),';']); eval(['clear ',char(myDatasets(datasetCounter)),';']); clc; myDatasets(datasetCounter) % print out the dataset name % define a variable to keep the loss values lossValues = -1 * ones(errorMeasureSize,randomRunSize+MoESize); % define a variable to keep the win values winValues = -1 * ones(errorMeasureSize,randomRunSize+MoESize); % write current dataset and error measure to output files separateLossValues=fopen('separateLossValues.txt','a' ); fprintf(separateLossValues, char(myDatasets(datasetCounter)),'%s'); separateWinValues=fopen('separateWinValues.txt','a' ); fprintf(separateWinValues, char(myDatasets(datasetCounter)),'%s'); % write the selected methods for runIndex = 1:randomRunSize % write pre-processor fprintf(separateLossValues, [',' char(fixedPreProcessors(runIndex))]); % write algorithm fprintf(separateLossValues, ['-' char(fixedAlgorithms(runIndex))]); % write pre-processor fprintf(separateWinValues, [',' char(fixedPreProcessors(runIndex))]); % write algorithm fprintf(separateWinValues, ['-' char(fixedAlgorithms(runIndex))]); end fprintf(separateLossValues, '\n'); % append a new line at the end of the line fprintf(separateWinValues, '\n'); % append a new line at the end of the line % close files fclose(separateLossValues); fclose(separateWinValues); % define a variable to keep the results of each method algorithmResults = -1 * ones(randomRunSize+MoESize, size(dataset,1)); for runIndex = 1:(randomRunSize) % reload dataset to get rid of previous pre-processor effect eval(['load ',char(myDatasets(datasetCounter)),'.csv;']); eval(['dataset = ',char(myDatasets(datasetCounter)),';']); eval(['clear ',char(myDatasets(datasetCounter)),';']); % pick up the pre-processor on the queue currentPreProcessor = char(fixedPreProcessors(runIndex)); % apply the pre-processor and get the new dataset dataset = applyPreProcess(dataset, currentPreProcessor); % run the next algorithm currentAlgorithm = char(fixedAlgorithms(runIndex)); algorithmResults(runIndex,:) = algorithmRunner(dataset, currentAlgorithm, currentPreProcessor, 0); % 0 means without GAC pruning % algorithmResults(runIndex,:) = algorithmRunner(dataset, currentAlgorithm, currentPreProcessor, 1); % 1 means without GAC pruning end % before going to win-tie-loss calculation over all methods, we will % calculate multi-methods. For that, we first find best solo-methods % then we take top 2,4,8,16 solo-methods and % take their mean, median and IRWM (by Mendes) % for each error measure winValuesSum = []; lossValuesSum = []; tieValuesSum = []; for errorCounter = 1: size(myErrorMeasures,2) tmpErrorMeasure = char(myErrorMeasures(errorCounter)); % at this point we have the predictions of all the algorithms % now we can compare these results and store their loss values [tmpLoss tmpWin tmpTie] = orderAlgorithms(algorithmResults(1:randomRunSize,:),dataset(:,size(dataset,2)),tmpErrorMeasure,fixedAlgorithms(1:randomRunSize,:), fixedPreProcessors(1:randomRunSize,:), myDatasets(datasetCounter)); winValuesSum = [winValuesSum,tmpWin]; lossValuesSum = [lossValuesSum,tmpLoss]; tieValuesSum = [tieValuesSum,tmpTie]; end % now find the (win-loss) values tmpWinMinusLossValues = sum((winValuesSum - lossValuesSum)'); % then sort win-loss values and keep the indices % these indices will be used to make the multi-methods [sortedStuff bestMethodIndices] = sort(tmpWinMinusLossValues,'descend'); % firstly store indices of top 16 methods out of 90 methods % these indices come from previous experiments bestMethodIndices = bestMethodIndices(1:16); bestMethodResults = []; % stores the predictions of best methods bestMethodActualValues = []; for moeCounter = 1:size(bestMethodIndices,2) bestMethodResults = [bestMethodResults;algorithmResults(bestMethodIndices(moeCounter),:)]; end algorithmResults(runIndex+1,:) = mean(bestMethodResults(1:2,:)); algorithmResults(runIndex+2,:) = mean(bestMethodResults(1:4,:)); algorithmResults(runIndex+3,:) = mean(bestMethodResults(1:8,:)); algorithmResults(runIndex+4,:) = mean(bestMethodResults(1:16,:)); algorithmResults(runIndex+5,:) = median(bestMethodResults(1:2,:)); algorithmResults(runIndex+6,:) = median(bestMethodResults(1:4,:)); algorithmResults(runIndex+7,:) = median(bestMethodResults(1:8,:)); algorithmResults(runIndex+8,:) = median(bestMethodResults(1:16,:)); algorithmResults(runIndex+9,:) = irwm(bestMethodResults(1:2,:)); algorithmResults(runIndex+10,:) = irwm(bestMethodResults(1:4,:)); algorithmResults(runIndex+11,:) = irwm(bestMethodResults(1:8,:)); algorithmResults(runIndex+12,:) = irwm(bestMethodResults(1:16,:)); % for each error measure for errorCounter = 1: size(myErrorMeasures,2) currentErrorMeasure = char(myErrorMeasures(errorCounter)); % open file to append separateLossValues=fopen('separateLossValues.txt','a' ); % write error measure to file separateWinValues=fopen('separateWinValues.txt','a' ); % write error measure to file fprintf(separateLossValues,char(myErrorMeasures(errorCounter))); fprintf(separateWinValues,char(myErrorMeasures(errorCounter))); % close files fclose(separateLossValues); fclose(separateWinValues); % at this point we have the predictions of all the algorithms % now we can compare these results and store their loss values [lossValues(errorCounter,:) winValues(errorCounter,:)] = orderAlgorithms(algorithmResults,dataset(:,size(dataset,2)),currentErrorMeasure,fixedAlgorithms, fixedPreProcessors, myDatasets(datasetCounter)); % open file to append and write the win and loss values for current % error measure separateLossValues=fopen('separateLossValues.txt','a' ); % write error measure to file separateWinValues=fopen('separateWinValues.txt','a' ); % write error measure to file for printCounter = 1:size(lossValues,2) fprintf(separateLossValues,[', ' num2str(lossValues(errorCounter,printCounter))]); fprintf(separateWinValues,[', ' num2str(winValues(errorCounter,printCounter))]); end fprintf(separateLossValues, '\n'); fprintf(separateWinValues, '\n'); % close files fclose(separateLossValues); fclose(separateWinValues); errorMeasureLossValues(errorCounter,datasetCounter,:) = lossValues(errorCounter,:); errorMeasureWinValues(errorCounter,datasetCounter,:) = winValues(errorCounter,:); end % save algorithm results eval([char(myDatasets(datasetCounter)) 'Results = algorithmResults;']); % open file to append separateLossValues=fopen('separateLossValues.txt','a' ); % write sum to file separateWinValues=fopen('separateWinValues.txt','a' ); % write sum to file fprintf(separateLossValues,'\n\n'); % put a blank line for the next dataset fprintf(separateWinValues,'\n\n'); % put a blank line for the next dataset % close files fclose(separateLossValues); fclose(separateWinValues); %%%% now write the sum of win and loss values to related files % write current dataset and error measure to output files sumLossValues=fopen('sumLossValues.txt','a' ); fprintf(sumLossValues, char(myDatasets(datasetCounter)),'%s'); sumWinValues=fopen('sumWinValues.txt','a' ); fprintf(sumWinValues, char(myDatasets(datasetCounter)),'%s'); sums = sum(lossValues); for tempCounter = 1:(randomRunSize+MoESize) fprintf(sumLossValues, [',' num2str(sums(tempCounter))]); end fprintf(sumLossValues,'\n'); % put a blank line for the next dataset sums = sum(winValues); for tempCounter = 1:(randomRunSize+MoESize) fprintf(sumWinValues, [',' num2str(sums(tempCounter))]); end fprintf(sumWinValues,'\n'); % put a blank line for the next dataset % close files fclose(sumLossValues); fclose(sumWinValues); % now save the workspace for this particular dataset actualsToSave = dataset(:,size(dataset,2)); eval(['save(''workspaces\LOO\' char(myDatasets(datasetCounter)) '.mat'', ''algorithmResults'', ''actualsToSav'', ''bestMethodIndices'');']); end % open the separate loss and win counts for different error measures for errorCounter = 1:size(myErrorMeasures,2) % firstly open the required files eval(['myErrorMeasure' num2str(errorCounter) 'LossValues = fopen(''myErrorMeasure' num2str(errorCounter) 'LossValues.txt'', ''a'');']); eval(['myErrorMeasure' num2str(errorCounter) 'WinValues = fopen(''myErrorMeasure' num2str(errorCounter) 'WinValues.txt'', ''a'');']); % eval(['fprintf([''' char(myErrorMeasures(errorCounter)) ''' ], myErrorMeasure' num2str(errorCounter) 'LossValues);']); % eval(['fprintf([''' char(myErrorMeasures(errorCounter)) ''' ], myErrorMeasure' num2str(errorCounter) 'WinValues);']); for datasetCounter = 1:datasetSize eval(['fprintf(myErrorMeasure' num2str(errorCounter) 'LossValues,'''... char(myDatasets(datasetCounter)) ... ''');']); eval(['fprintf(myErrorMeasure' num2str(errorCounter) 'WinValues,'''... char(myDatasets(datasetCounter)) ... ''');']); for instanceCounter = 1:size(errorMeasureLossValues(errorCounter,datasetCounter,:),3) eval(['fprintf(myErrorMeasure' num2str(errorCounter) 'LossValues,'''... [ ',' num2str(errorMeasureLossValues(errorCounter,datasetCounter,instanceCounter))] ... ''');']); eval(['fprintf(myErrorMeasure' num2str(errorCounter) 'WinValues,'''... [ ',' num2str(errorMeasureWinValues(errorCounter,datasetCounter,instanceCounter))] ... ''');']); end eval(['fprintf(myErrorMeasure' num2str(errorCounter) 'LossValues,'''... '\n' ... ''');']); % add new line at the end of every dataset eval(['fprintf(myErrorMeasure' num2str(errorCounter) 'WinValues,'''... '\n' ... ''');']); % add new line at the end of every dataset end % lastly close the opened files eval(['fclose(myErrorMeasure' num2str(errorCounter) 'LossValues);']); eval(['fclose(myErrorMeasure' num2str(errorCounter) 'WinValues);']); end