Created
June 5, 2015 05:29
-
-
Save xypaul/e6cf7abd00d2e0739196 to your computer and use it in GitHub Desktop.
Recreating the dataset from the MFS Paper by Bayne
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function main() | |
% LOAD IRIS | |
% dataset was modified by changing the class name from string to a | |
% number | |
iris = load('iris.csv', ','); | |
% LOAD SONAR | |
sonar = load('sonar.csv', ','); | |
% NN & KNN TESTING | |
display(['Iris NN ==> ' num2str(averageTestKNN(100, iris, 80, 1))]); | |
display(['Iris kNN 2 ==> ' num2str(averageTestKNN(100, iris, 80, 2))]); | |
display(['Iris kNN 3 ==> ' num2str(averageTestKNN(100, iris, 80, 3))]); | |
display(['Iris kNN 4 ==> ' num2str(averageTestKNN(100, iris, 80, 4))]); | |
display(['Iris kNN 5 ==> ' num2str(averageTestKNN(100, iris, 80, 5))]); | |
display(['Sonar NN ==> ' num2str(averageTestKNN(100, sonar, 80, 1))]); | |
display(['Sonar kNN 2 ==> ' num2str(averageTestKNN(100, sonar, 80, 2))]); | |
display(['Sonar kNN 3 ==> ' num2str(averageTestKNN(100, sonar, 80, 3))]); | |
display(['Sonar kNN 4 ==> ' num2str(averageTestKNN(100, sonar, 80, 4))]); | |
display(['Sonar kNN 5 ==> ' num2str(averageTestKNN(100, sonar, 80, 5))]); | |
% MFS IRIS | |
display(['Iris mfs1 1 ==> ' num2str(averageTestMFS(10, iris, 80, 1, 'mfs1'))]); | |
display(['Iris mfs1 2 ==> ' num2str(averageTestMFS(10, iris, 80, 2, 'mfs1'))]); | |
display(['Iris mfs1 3 ==> ' num2str(averageTestMFS(10, iris, 80, 3, 'mfs1'))]); | |
display(['Iris mfs1 4 ==> ' num2str(averageTestMFS(10, iris, 80, 4, 'mfs1'))]); | |
display(['Iris mfs2 1 ==> ' num2str(averageTestMFS(10, iris, 80, 1, 'mfs2'))]); | |
display(['Iris mfs2 2 ==> ' num2str(averageTestMFS(10, iris, 80, 2, 'mfs2'))]); | |
display(['Iris mfs2 3 ==> ' num2str(averageTestMFS(10, iris, 80, 3, 'mfs2'))]); | |
display(['Iris mfs2 4 ==> ' num2str(averageTestMFS(10, iris, 80, 4, 'mfs2'))]); | |
% MFS SONAR | |
display(['Sonar mfs1 6 ==> ' num2str(averageTestMFS(10, sonar, 80, 6, 'mfs1'))]); | |
display(['Sonar mfs1 12 ==> ' num2str(averageTestMFS(10, sonar, 80, 12, 'mfs1'))]); | |
display(['Sonar mfs1 18 ==> ' num2str(averageTestMFS(10, sonar, 80, 18, 'mfs1'))]); | |
display(['Sonar mfs1 24 ==> ' num2str(averageTestMFS(10, sonar, 80, 24, 'mfs1'))]); | |
display(['Sonar mfs1 30 ==> ' num2str(averageTestMFS(10, sonar, 80, 30, 'mfs1'))]); | |
display(['Sonar mfs1 36 ==> ' num2str(averageTestMFS(10, sonar, 80, 36, 'mfs1'))]); | |
display(['Sonar mfs1 42 ==> ' num2str(averageTestMFS(10, sonar, 80, 42, 'mfs1'))]); | |
display(['Sonar mfs1 48 ==> ' num2str(averageTestMFS(10, sonar, 80, 48, 'mfs1'))]); | |
display(['Sonar mfs1 54 ==> ' num2str(averageTestMFS(10, sonar, 80, 54, 'mfs1'))]); | |
display(['Sonar mfs1 60 ==> ' num2str(averageTestMFS(10, sonar, 80, 60, 'mfs1'))]); | |
display(['Sonar mfs2 6 ==> ' num2str(averageTestMFS(10, sonar, 80, 6, 'mfs2'))]); | |
display(['Sonar mfs2 12 ==> ' num2str(averageTestMFS(10, sonar, 80, 12, 'mfs2'))]); | |
display(['Sonar mfs2 18 ==> ' num2str(averageTestMFS(10, sonar, 80, 18, 'mfs2'))]); | |
display(['Sonar mfs2 24 ==> ' num2str(averageTestMFS(10, sonar, 80, 24, 'mfs2'))]); | |
display(['Sonar mfs2 30 ==> ' num2str(averageTestMFS(10, sonar, 80, 30, 'mfs2'))]); | |
display(['Sonar mfs2 36 ==> ' num2str(averageTestMFS(10, sonar, 80, 36, 'mfs2'))]); | |
display(['Sonar mfs2 42 ==> ' num2str(averageTestMFS(10, sonar, 80, 42, 'mfs2'))]); | |
display(['Sonar mfs2 48 ==> ' num2str(averageTestMFS(10, sonar, 80, 48, 'mfs2'))]); | |
display(['Sonar mfs2 54 ==> ' num2str(averageTestMFS(10, sonar, 80, 54, 'mfs2'))]); | |
display(['Sonar mfs2 60 ==> ' num2str(averageTestMFS(10, sonar, 80, 60, 'mfs2'))]); | |
end | |
% Creates two different matrices one for testing and one for trainData | |
function [train, test] = randomizeDataset(data, percentage) | |
shuffledArray = data(randperm(size(data,1)),:); | |
split = round(size(data,1)*(percentage/100)); | |
train = shuffledArray(1:split, :); | |
test = shuffledArray(split+1:size(data,1), :); | |
end | |
function average = averageTestKNN(numberOfTests, data, percentage, n) | |
result = zeros(numberOfTests,1); | |
for i = 1:1:numberOfTests | |
[trainData,testingData] = randomizeDataset(data, percentage); | |
result(i,1) = 100 - knn(testingData, trainData, n); | |
end | |
average = mean(result); | |
end | |
function average = averageTestMFS(numberOfTests, data, percentage, features, type) | |
result = zeros(numberOfTests,1); | |
for i = 1:1:numberOfTests | |
[trainData,testingData] = randomizeDataset(data, percentage); | |
result(i,1) = 100 - mfs(testingData, trainData, features, type); | |
end | |
average = mean(result); | |
end | |
function error = knn(testingData, trainData, n) | |
result = zeros(size(testingData, 1),1); | |
for i = 1:1:size(testingData, 1) | |
if(testingData(i,size(testingData,2)) == identify(trainData, testingData(i,:), n)); | |
result(i) = 100; | |
else | |
result(i) = 0; | |
end | |
end | |
% Get overall error | |
error = mean(result); | |
end | |
function error = mfs(testingData, trainData, features, type) | |
% Function to easily check the performance of mfs1 & mfs2 | |
% The number of features can be choosen as necessary | |
result = zeros(size(testingData, 1),1); | |
for i = 1:1:size(testingData, 1) | |
if (strcmp(type,'mfs1')) | |
identification = identifymfs1(trainData, testingData(i,:), features); | |
elseif (strcmp(type,'mfs2')) | |
identification = identifymfs2(trainData, testingData(i,:), features); | |
else | |
display('Type has to be either mfs1 or mfs2'); | |
end | |
if(testingData(i,size(testingData,2)) == identification); | |
result(i) = 100; | |
else | |
result(i) = 0; | |
end | |
end | |
% Get overall error | |
error = mean(result); | |
end | |
function class = identifymfs1(training, item, features) | |
% Wrapper class for mfs1 (with replacement) | |
class = identifymfs(training, item, true, features); | |
end | |
function class = identifymfs2(training, item, features) | |
% Wrapper class for mfs2 (without replacement) | |
class = identifymfs(training, item, false, features); | |
end | |
function class = identifymfs(training, item, replacement, features) | |
% features = 4; % to get data this variable was manually changed :) | |
runthroughs = 10; % how many times it does voting on it | |
identity = zeros(runthroughs, 1); | |
for i = 1:1:runthroughs | |
[pickedFeatures, itemFeatures] = pickFeature(training, item, features, replacement); | |
identity(i, 1) = identify(pickedFeatures, itemFeatures, 1); % k = 1 | |
end | |
class = mode(identity); | |
end | |
function [dataPicked, itemPicked] = pickFeature(data, item, n, replacement) | |
% This functions picks random features, but leaves the identification | |
% column (last one) in tact | |
numberOfColumns = size(data,2); | |
pickedColumns = numberOfColumns; | |
% loop through as many features(columns) to select | |
for i = 1:1:n | |
if (replacement) | |
% With replacement just choose a random value form the columns | |
% name (that also explain the negative one, as you can't choose | |
% the last value | |
value = ceil((numberOfColumns-1)*rand()); | |
else | |
% find intersection from previous one | |
chooseFrom = setdiff(1:1:numberOfColumns, pickedColumns); | |
% pick a random item from the ones left over | |
value = chooseFrom(ceil(length(chooseFrom)*rand())); | |
end | |
% this line isn't as efficient as it could be, as it creates a new | |
% array in memory, but should be fine since the array is so small | |
pickedColumns(end+1) = value; | |
end | |
% sort it, so the identification number stays at the end | |
pickedColumns = sort(pickedColumns); | |
% filter by the correct columns | |
dataPicked = data(:, pickedColumns); | |
itemPicked = item(:, pickedColumns); | |
end | |
function class = identify(training, item, k) | |
% get the distances | |
distData = dist(training, item); | |
% sort them | |
distData = sortrows(distData,1); | |
% find the most common element | |
class = mode(distData(1:k,2)); | |
end | |
function d = dist(data, item) | |
%get the size | |
w = size(data); | |
% create empty array for storage | |
result = zeros(w(1),2); | |
% loop through each row | |
for i = 1:1:w(1) | |
% set label, to the class type | |
result(i,2) = data(i, w(2)); | |
% loop through all items in row - except for last one which is the | |
% class name | |
for j = 1:1:w(2)-1 | |
temp = data(i,j) - item(1,j); | |
result(i,1) = result(i,1) + temp^2; | |
end | |
result(i,1) = sqrt(result(i,1)); | |
end | |
d = result; | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment