"No one is harder on a talented person than the person themselves" - Linda Wilkinson ; "Trust your guts and don't follow the herd" ; "Validate direction not destination" ;

April 04, 2019

Day #235 - Audio Analysis

#pip install librosa
#pip install python_speech_features
#librosa with python_speech_analysis
#Credits - https://github.com/librosa/librosa/issues/573
import librosa
import python_speech_features
from scipy.signal.windows import hann
n_mfcc = 13
n_mels = 40
n_fft = 512 # in librosa, win_length is assumed to be equal to n_fft implicitly
hop_length = 160
fmin = 0
fmax = None
#https://librosa.github.io/librosa/generated/librosa.feature.mfcc.html
# y - Audio Time series
# sr - Sampling Rate
y, sr = librosa.load(r'E:\Audio_Analytics\test_data\1_street_music.wav')
#sr = 16000 # fake sample rate just to make the point
# librosa
#n_mfcc: int > 0 [scalar], number of MFCCs to return
mfcc_librosa = librosa.feature.mfcc(y=y, sr=sr, n_fft=n_fft,
n_mfcc=n_mfcc, n_mels=n_mels,
hop_length=hop_length,
fmin=fmin, fmax=fmax)
#https://python-speech-features.readthedocs.io/en/latest/
# python_speech_features
# no preemph nor ceplifter in librosa, so setting to zero
# librosa default stft window is hann
#winlen – the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
#winstep – the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
#nfilt – the number of filters in the filterbank, default 26.
#Returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
mfcc_speech = python_speech_features.mfcc(signal=y, samplerate=sr, winlen=n_fft / sr, winstep=hop_length / sr,
numcep=n_mfcc, nfilt=n_mels, nfft=n_fft, lowfreq=fmin, highfreq=fmax,
preemph=0, ceplifter=0, appendEnergy=False, winfunc=hann)
print(list(mfcc_librosa[:, 0]))
print(list(mfcc_speech[0, :]))

Happy Mastering DL!!!

No comments: