function [discreteDataset] = discretize(dataset) % the details of this discretizer are as follows % N bin, equal Freq (equal number of items in each bin) N = 5; % number of bins binSize = ceil(size(dataset,1)/N); % number of instances in each bin % make a same size matrix for discrete values discreteDataset = -1 * ones(size(dataset,1),size(dataset,2)); % process each column of the dataset one at a time % except the last column, which is the effort value binCounter = 1; for i = 1:(size(dataset,2)-1) [C IND] = sort(dataset(:,i); % get the indices of sorted instances in column binCounter for j = 1:size(dataset,1) dataset(j,i) = binCounter; end end end