import numpy as np
import pandas as pd
import os
import librosa
from scipy.io import wavfile as wav
from tqdm import tqdm
import matplotlib.pyplot as plt
import librosa.display
import tensorflow
import numpy as np
max_pad_len = 237
test_num = 0
def extract_features(file_name):
audio, sample_rate = librosa.load(file_name, res_type = 'kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr = sample_rate, n_mfcc = 40)
pad_width = max_pad_len - mfccs.shape[1]
mfccs = np.pad(mfccs, pad_width = ((0, 0), (0, pad_width)), mode = 'constant')
global max
if mfccs.shape[1] > max:
max = mfccs.shape[1]
return mfccs
import pandas as pd
import os
import librosa
max = 0
fulldatasetpath = 'data/'
metadata = pd.read_csv("data/sound3.csv")
features = []
for index, row in metadata.iterrows():
file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
class_label = row['class_name']
data = extract_features(file_name)
features.append([data, class_label])
featuresdf = pd.DataFrame(features, columns = ['feature', 'class_label'])
print("Finished feature extraction from ", len(featuresdf), ' files')
print('Max :',max)
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
x = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))