인공지능 공부/딥러닝 논문읽기

(딥러닝) Python EDA-Audio Classification Project

from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime

num_epochs = 1000
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='save_models/audio_classification.hdf5', verbose=1, save_best_only=True)
start = datetime.now

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer])​
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics​
!pip install librosa

import matplotlib.pyplot as plt
%matplotlib inline
filename = './UrbanSound8K/dog_bark.wav'
import IPython.display as ipd
import librosa
import librosa.display

### 강아지 소리 
plt.figure(figsize=(14,5))
data, sample_rate = librosa.load(filename)
librosa.display.waveplot(data, sr=sample_rate)
ipd.Audio(filename)

from scipy.io import wavfile as wav
wave_sample_rate, scipy_audio = wav.read(filename)

import pandas as pd

metadata = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')
metadata.head(10)

### Check whether th dataset is imbalanced
metadata['class'].value_counts()

engine_idling       1000
dog_bark            1000
street_music        1000
drilling            1000
jackhammer          1000
air_conditioner     1000
children_playing    1000
siren                929
car_horn             429
gun_shot             374
Name: class, dtype: int64
##Sound
filename = './UrbanSound8K/13577-3-0-2.wav'
plt.figure(figsize=(14,5))
data, sample_rate = librosa.load(filename)
librosa.display.waveplot(data, sr=sample_rate)
ipd.Audio(filename)

mfccs = librosa.feature.mfcc(y=librosa_audio_data, sr =librosa_sample_rate, n_mfcc=40)
print(mfccs.s)
import pandas as pd
import os
import librosa

audio_dataset_path = 'UrbanSound8K/audio/'
matadata = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')
metadata.head()

def features_extractor(file):
    audio, sample_rate = librosa.load(file_name, res_type = 'kaiser_fast')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    
    return mfccs_scaled_features
import numpy as np
from tqdm import tqdm

extracted_features = []
for index_num, row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path), 'fold' + str(row["fold"])+'/', str(row["slice_file_name"]))
    final_class_labels = row["class"]
    data = features_extractor(file_name)
    extracted_features.append([data,final_class_labels])
extracted_features_df = pd.DataFrame(extracted_features, columns=['feature', 'class'])
extracted_features_df.head()
X = np.array(extracted_features_df['feature'].tolist())
y = np.array(extracted_features_df['class'].tolist())
y = np.array(pd.get_dummies(y))

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= 0.2, random_state=0)
model = Sequential()
model.add(Dense(100, input_shape =(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(test_accuracy[1])
filename="UrbanSound8K/dog_bark.wav"
prediction_feature = features_extractor(file_name)
prediction_feature  = prediction_feature.reshape(1,-1)
model.predict_classes(prediction_feature)

metadata['class'].unique()