Home > ACA-Code > ComputeSpectrogram.m

ComputeSpectrogram

PURPOSE ^

computes a mel spectrogram from the audio data

SYNOPSIS ^

function [X, f, t] = ComputeSpectrogram (x, f_s, afWindow, iBlockLength, iHopLength, bNormalize, bMagnitude)

DESCRIPTION ^

computes a mel spectrogram from the audio data
>
> @param x: time domain sample data, dimension channels X samples
> @param f_s: sample rate of audio data
> @param afWindow: FFT window of length iBlockLength (default: hann), can be [] empty
> @param iBlockLength: internal block length (default: 4096 samples)
> @param iHopLength: internal hop length (default: 2048 samples)
> @param bNormalize: normalize input audio (default: True)
> @param bMagnitude: return magnitude instead of complex spectrum (default: True)
>
> @retval X spectrogram
> @retval f frequency bands
> @retval t time stamps
 ======================================================================

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 %computes a mel spectrogram from the audio data
0002 %>
0003 %> @param x: time domain sample data, dimension channels X samples
0004 %> @param f_s: sample rate of audio data
0005 %> @param afWindow: FFT window of length iBlockLength (default: hann), can be [] empty
0006 %> @param iBlockLength: internal block length (default: 4096 samples)
0007 %> @param iHopLength: internal hop length (default: 2048 samples)
0008 %> @param bNormalize: normalize input audio (default: True)
0009 %> @param bMagnitude: return magnitude instead of complex spectrum (default: True)
0010 %>
0011 %> @retval X spectrogram
0012 %> @retval f frequency bands
0013 %> @retval t time stamps
0014 % ======================================================================
0015 function [X, f, t] = ComputeSpectrogram (x, f_s, afWindow, iBlockLength, iHopLength, bNormalize, bMagnitude)
0016 
0017     % set default parameters if necessary
0018     if (nargin < 7)
0019         bMagnitude = true;
0020     end
0021     if (nargin < 6)
0022         bNormalize = true;
0023     end
0024     if (nargin < 5)
0025         iHopLength = 2048;
0026     end
0027     if (nargin < 4)
0028         iBlockLength = 4096;
0029     end
0030     if (nargin < 3 || isempty(afWindow))
0031         afWindow = hann(iBlockLength,'periodic');
0032     end
0033     
0034     if (length(afWindow) ~= iBlockLength)
0035         error('window length mismatch');
0036     end
0037     
0038     if (size(afWindow, 1) < size(afWindow, 2))
0039         afWindow = afWindow';
0040     end
0041     if (size(x, 1) < size(x, 2))
0042         x = x';
0043     end
0044     
0045     % pre-processing: down-mixing
0046     x = ToolDownmix(x);
0047     
0048     % pre-processing: normalization
0049     if bNormalize
0050         x = ToolNormalizeAudio(x);
0051     end
0052 
0053     [x_b, t] = ToolBlockAudio (x, iBlockLength, iHopLength, f_s);
0054 
0055     X = zeros(size(x_b, 2)/2+1, size(x_b, 1));
0056     f = linspace(0, f_s/2, (size(X, 1)));
0057 
0058     for n = 1:size(X,2)
0059         tmp = fft(x_b(n, :)' .* afWindow);
0060         
0061         if bMagnitude
0062             X(:, n) = abs(tmp(1:size(X, 1))) * 2 / iBlockLength;
0063         else
0064             X(:, n) = (tmp(1:size(X, 1))) * 2 / iBlockLength;
0065         end            
0066     end
0067     
0068     % normalization
0069     X([1 end],:) = X([1 end],:) / sqrt(2);
0070 end

Generated on Fri 22-Apr-2022 20:59:51 by m2html © 2005