0001 function ExampleMusicSpeechClassification(cDatasetPath)
0002
0003 if (nargin<1)
0004
0005
0006
0007 cDatasetPath = 'd:\dataset\music_speech\';
0008 end
0009 if (exist('ComputeFeature') ~= 2)
0010 error('Please add the ACA scripts (https://github.com/alexanderlerch/ACA-Code) to your path!');
0011 end
0012 if ((exist([cDatasetPath 'music']) ~= 7) || (exist([cDatasetPath 'speech']) ~= 7))
0013 error('Dataset path wrong or does not contain music/speech folders!')
0014 end
0015
0016 iNumFeatures = 2;
0017
0018
0019 music_files = dir([cDatasetPath 'music/*.au']);
0020 speech_files = dir([cDatasetPath 'speech/*.au']);
0021
0022 v_music = zeros(iNumFeatures,size(music_files,1));
0023 v_speech = zeros(iNumFeatures,size(speech_files,1));
0024
0025
0026 for i = 1:size(music_files, 1)
0027 v_music(:, i) = ExtractFeaturesFromFile_I(...
0028 [cDatasetPath 'music/' music_files(i).name]);
0029 end
0030 for i = 1:size(speech_files, 1)
0031 v_speech(:, i) = ExtractFeaturesFromFile_I(...
0032 [cDatasetPath 'speech/' speech_files(i).name]);
0033 end
0034
0035
0036 C = [zeros(1, size(music_files, 1)) ones(1, size(speech_files, 1))];
0037
0038
0039 v = [v_music, v_speech];
0040 m = mean(v, 2);
0041 s = std(v, 0, 2);
0042 v = (v - repmat(m, 1, size(music_files, 1) + size(speech_files, 1)))./...
0043 repmat(s, 1, size(music_files, 1)+size(speech_files, 1));
0044
0045
0046 [acc, mat] = ToolLooCrossVal(v, C);
0047
0048 disp('confusion matrix:'),
0049 disp(mat);
0050
0051 disp('micro accuracy:'),
0052 disp(sum(diag(mat)) / sum(sum(mat)))
0053 tmp = zeros(size(mat, 1), 1);
0054 for i = 1:size(mat, 1)
0055 tmp(i) = mat(i, i) / sum(mat(i, :));
0056 end
0057 disp('macro accuracy:'),
0058 disp(mean(tmp))
0059
0060
0061 [acc1, mat1] = ToolLooCrossVal(v(1, :), C);
0062 sprintf('centroid accuracy: %f', acc1)
0063 [acc2, mat2] = ToolLooCrossVal(v(2 ,:), C);
0064 sprintf('rms accuracy: %f', acc2)
0065 end
0066
0067 function [v] = ExtractFeaturesFromFile_I(cFilePath)
0068
0069 cFeatureNames = char('SpectralCentroid',...
0070 'TimeRms');
0071
0072
0073 [x, fs] = audioread(cFilePath);
0074 x = x / max(abs(x));
0075
0076
0077 feature = ComputeFeature (deblank(cFeatureNames(1, :)), x, fs);
0078 v(1, 1) = mean(feature);
0079
0080
0081 feature = ComputeFeature (deblank(cFeatureNames(2, :)), x, fs);
0082 v(2, 1) = std(feature(1,:));
0083 end