Home > ACA-Code > ExampleMusicSpeechClassification.m

ExampleMusicSpeechClassification

PURPOSE ^

SYNOPSIS ^

function ExampleMusicSpeechClassification(cDatasetPath)

DESCRIPTION ^

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function ExampleMusicSpeechClassification(cDatasetPath)
0002 
0003     if (nargin<1)
0004         % this script is written for the GTZAN music/speech dataset
0005         % modify this path or use the function parameter to specify your
0006         % dataset path
0007         cDatasetPath = 'd:\dataset\music_speech\'; 
0008     end
0009     if (exist('ComputeFeature') ~= 2)
0010         error('Please add the ACA scripts (https://github.com/alexanderlerch/ACA-Code) to your path!');
0011     end
0012     if ((exist([cDatasetPath 'music']) ~= 7) || (exist([cDatasetPath 'speech']) ~= 7))
0013         error('Dataset path wrong or does not contain music/speech folders!')
0014     end
0015     
0016     iNumFeatures = 2;
0017     
0018     % read directory contents
0019     music_files = dir([cDatasetPath 'music/*.au']);
0020     speech_files = dir([cDatasetPath 'speech/*.au']);
0021  
0022     v_music = zeros(iNumFeatures,size(music_files,1));
0023     v_speech = zeros(iNumFeatures,size(speech_files,1)); 
0024     
0025     % extract features, this may take a while...
0026     for i = 1:size(music_files, 1)
0027         v_music(:, i) = ExtractFeaturesFromFile_I(...
0028             [cDatasetPath 'music/' music_files(i).name]);
0029     end
0030     for i = 1:size(speech_files, 1)
0031         v_speech(:, i) = ExtractFeaturesFromFile_I(...
0032             [cDatasetPath 'speech/' speech_files(i).name]);
0033     end
0034     
0035     % assign class labels for training and eval
0036     C = [zeros(1, size(music_files, 1)) ones(1, size(speech_files, 1))];
0037 
0038     % normalize features
0039     v = [v_music, v_speech];
0040     m = mean(v, 2);
0041     s = std(v, 0, 2);
0042     v = (v - repmat(m, 1, size(music_files, 1) + size(speech_files, 1)))./...
0043         repmat(s, 1, size(music_files, 1)+size(speech_files, 1));
0044   
0045     % compute the overall accuracy with cross validation
0046     [acc, mat] = ToolLooCrossVal(v, C);
0047     
0048     disp('confusion matrix:'),
0049     disp(mat);
0050 
0051     disp('micro accuracy:'), 
0052     disp(sum(diag(mat)) / sum(sum(mat)))
0053     tmp = zeros(size(mat, 1), 1);
0054     for i = 1:size(mat, 1)
0055         tmp(i) = mat(i, i) / sum(mat(i, :));
0056     end
0057     disp('macro accuracy:'), 
0058     disp(mean(tmp))
0059     
0060     % compute the individual feature performance
0061     [acc1, mat1] = ToolLooCrossVal(v(1, :), C);
0062     sprintf('centroid accuracy: %f', acc1)
0063     [acc2, mat2] = ToolLooCrossVal(v(2 ,:), C);
0064     sprintf('rms accuracy: %f', acc2)
0065 end
0066 
0067 function [v] = ExtractFeaturesFromFile_I(cFilePath)
0068 
0069     cFeatureNames = char('SpectralCentroid',...
0070     'TimeRms');
0071 
0072     % read audio
0073     [x, fs] = audioread(cFilePath);
0074     x = x / max(abs(x));
0075 
0076     % compute first feature
0077     feature = ComputeFeature (deblank(cFeatureNames(1, :)), x, fs);
0078     v(1, 1)  = mean(feature);
0079     
0080     % compute second feature
0081     feature = ComputeFeature (deblank(cFeatureNames(2, :)), x, fs);
0082     v(2, 1)  = std(feature(1,:));
0083 end

Generated on Fri 22-Apr-2022 20:59:51 by m2html © 2005