인공지능 공부/해커톤

(해커톤) 서울시 빅데이터 노인 비명소리 및 위급 알림 CNN딥러닝 설계

import numpy as np
import pandas as pd
import os
import librosa 
from scipy.io import wavfile as wav
from tqdm import tqdm
import matplotlib.pyplot as plt
import librosa.display
import tensorflow
filename_cry = 'data/fold1/scream_1.wav'
filename_laugh = 'data/fold2/laugh_1.wav'
filename_noise = 'data/fold3/noise_1.wav'
filename_silence = 'data/fold4/silence_1.wav'
filename_fall = 'data/fold5/fall_1.wav'

#scream은 비명소리
#fall은 낙상소리

example_list = [filename_cry, filename_laugh, filename_noise, filename_silence, filename_fall]
librosa_sample_rate  = 0


def draw_sound(filename):
    plt.figure(figsize = (12, 4))
    data, librosa_sample_rate  = librosa.load(filename)
    plt.title(filename)
    _ = librosa.display.waveplot(data, sr = librosa_sample_rate )
    
for i in  example_list:
    draw_sound(i)

import librosa 
from scipy.io import wavfile as wav

import numpy as np

filename = 'data/fold1/cry_1.wav' 

librosa_audio, librosa_sample_rate = librosa.load(filename) 
scipy_sample_rate, scipy_audio = wav.read(filename) 

print('Original sample rate:', scipy_sample_rate) 
print('Librosa sample rate:', librosa_sample_rate) 

mfccs = librosa.feature.mfcc(y = librosa_audio, sr = librosa_sample_rate, n_mfcc = 40)
print(mfccs.shape)
import numpy as np
max_pad_len = 237
test_num = 0

def extract_features(file_name):
      audio, sample_rate = librosa.load(file_name, res_type = 'kaiser_fast')
      mfccs = librosa.feature.mfcc(y=audio, sr = sample_rate, n_mfcc = 40)
      pad_width = max_pad_len - mfccs.shape[1]
      mfccs = np.pad(mfccs, pad_width = ((0, 0), (0, pad_width)), mode = 'constant')
      global max
      if mfccs.shape[1] > max:
        max = mfccs.shape[1]

    
      return mfccs
import pandas as pd
import os
import librosa
max = 0
fulldatasetpath = 'data/'
metadata = pd.read_csv("data/sound3.csv")
features = []

for index, row in metadata.iterrows():
    file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))   
    class_label = row['class_name']
    data = extract_features(file_name)   
    features.append([data, class_label])

featuresdf = pd.DataFrame(features, columns = ['feature', 'class_label'])

print("Finished feature extraction from ", len(featuresdf), ' files') 
print('Max :',max)
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

x = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, yy, test_size = 0.2, random_state = 42)
num_rows = 40
num_columns = 237
num_channels = 1

print("train data shape")
print(x_train.shape)
print(x_test.shape)

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

print("\ntrain data reshape 결과")
print(x_train.shape)
print(x_test.shape)

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

num_labels = yy.shape[1]
filter_size = 2

#CNN모델 구현
model = Sequential()
model.add(Conv2D(filters = 16, kernel_size = 2, input_shape = (num_rows, num_columns, num_channels), activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 32, kernel_size = 2, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 64, kernel_size = 2, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 128, kernel_size = 2, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation = 'softmax'))


#컴파일
model.compile(loss = 'categorical_crossentropy'
    , metrics = ['accuracy']
    , optimizer = 'adam')
                
model.summary()

hist = model.fit(x_train,
                    y_train,
                    epochs=100,
                    batch_size=128,
                    verbose=1,
                    validation_data=(x_test, y_test))

loss_and_metrics = model.evaluate(x_test, y_test, batch_size=32)
print('loss_and_metrics : ' + str(loss_and_metrics))

fig, loss_ax = plt.subplots()

loss_ax.plot(hist.history['loss'], 'b', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label= 'test loss')

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')

loss_ax.legend(loc = 'upper right')
plt.show()

fig, loss_ax = plt.subplots()

loss_ax.plot(hist.history['accuracy'], 'b', label = 'train accuracy')
loss_ax.plot(hist.history['val_accuracy'], 'r', label = 'test accuracy')

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('accuracy')

loss_ax.legend(loc='upper right')

plt.show()

def predict_fatures(file_name):
    max_ped_len = 237
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    pad_width = max_ped_len - mfccs.shape[1]
    mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    mfccs = mfccs.reshape(1, 40, 237, 1)
    
    return mfccs