Urban Sound Classifier using CNN v2

In [1]:
# draw
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# basic handling
import os
import glob
import pickle
import numpy as np
# audio
import librosa
import librosa.display
import IPython.display
# normalization
import sklearn
# nn
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers.convolutional import Conv2D,MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import TensorBoard
keras.__version__
Using TensorFlow backend.
Out[1]:
'2.2.0'
In [2]:
def show_history(history):
    print(history.history.keys())
    fig = plt.figure(figsize=(20,5))
    plt.subplot(121)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.subplot(122)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='lower left')
    plt.show()
In [3]:
train_x = pickle.load(open('./train_x.dat', 'rb'))
train_y = pickle.load(open('./train_y.dat', 'rb'))
val_x = pickle.load(open('./val_x.dat', 'rb'))
val_y = pickle.load(open('./val_y.dat', 'rb'))
test_x = pickle.load(open('./test_x.dat', 'rb'))
test_y = pickle.load(open('./test_y.dat', 'rb'))

train_x = train_x.reshape(train_x.shape[0],train_x.shape[1],train_x.shape[2],1)
val_x = val_x.reshape(val_x.shape[0],val_x.shape[1],val_x.shape[2],1)
test_x = test_x.reshape(test_x.shape[0],test_x.shape[1],test_x.shape[2],1)

train_y = to_categorical(train_y)
val_y = to_categorical(val_y)
test_y = to_categorical(test_y)
In [4]:
model = Sequential()

model.add(Conv2D(32,(3,3),strides=(1,2),padding='same',activation='relu',kernel_initializer='uniform',input_shape = train_x.shape[1:]))
model.add(Conv2D(32,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(32,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(48,(3,3),strides=(1,2),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(48,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(64,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(64,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10,activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary(line_length=80)
________________________________________________________________________________
Layer (type)                        Output Shape                    Param #     
================================================================================
conv2d_1 (Conv2D)                   (None, 40, 87, 32)              320         
________________________________________________________________________________
conv2d_2 (Conv2D)                   (None, 40, 87, 32)              9248        
________________________________________________________________________________
conv2d_3 (Conv2D)                   (None, 40, 87, 32)              9248        
________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)      (None, 20, 43, 32)              0           
________________________________________________________________________________
dropout_1 (Dropout)                 (None, 20, 43, 32)              0           
________________________________________________________________________________
conv2d_4 (Conv2D)                   (None, 20, 22, 48)              13872       
________________________________________________________________________________
conv2d_5 (Conv2D)                   (None, 20, 22, 48)              20784       
________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D)      (None, 10, 11, 48)              0           
________________________________________________________________________________
dropout_2 (Dropout)                 (None, 10, 11, 48)              0           
________________________________________________________________________________
conv2d_6 (Conv2D)                   (None, 10, 11, 64)              27712       
________________________________________________________________________________
conv2d_7 (Conv2D)                   (None, 10, 11, 64)              36928       
________________________________________________________________________________
max_pooling2d_3 (MaxPooling2D)      (None, 5, 5, 64)                0           
________________________________________________________________________________
dropout_3 (Dropout)                 (None, 5, 5, 64)                0           
________________________________________________________________________________
flatten_1 (Flatten)                 (None, 1600)                    0           
________________________________________________________________________________
dense_1 (Dense)                     (None, 512)                     819712      
________________________________________________________________________________
dropout_4 (Dropout)                 (None, 512)                     0           
________________________________________________________________________________
dense_2 (Dense)                     (None, 10)                      5130        
================================================================================
Total params: 942,954
Trainable params: 942,954
Non-trainable params: 0
________________________________________________________________________________
In [5]:
# callbacks=[TensorBoard(log_dir='./log')]
history = model.fit(train_x, train_y, 
                    epochs=20, batch_size=32, 
                    validation_data=(test_x, test_y))
show_history(history)
Train on 2686 samples, validate on 837 samples
Epoch 1/20
2686/2686 [==============================] - 11s 4ms/step - loss: 2.1792 - acc: 0.1489 - val_loss: 1.7582 - val_acc: 0.3238
Epoch 2/20
2686/2686 [==============================] - 10s 4ms/step - loss: 1.8199 - acc: 0.2669 - val_loss: 1.5907 - val_acc: 0.3441
Epoch 3/20
2686/2686 [==============================] - 14s 5ms/step - loss: 1.6164 - acc: 0.3570 - val_loss: 1.5220 - val_acc: 0.4313
Epoch 4/20
2686/2686 [==============================] - 14s 5ms/step - loss: 1.4913 - acc: 0.4099 - val_loss: 1.5972 - val_acc: 0.3763
Epoch 5/20
2686/2686 [==============================] - 15s 6ms/step - loss: 1.4049 - acc: 0.4646 - val_loss: 1.4690 - val_acc: 0.4026
Epoch 6/20
2686/2686 [==============================] - 15s 6ms/step - loss: 1.3350 - acc: 0.4829 - val_loss: 1.4243 - val_acc: 0.4707
Epoch 7/20
2686/2686 [==============================] - 15s 6ms/step - loss: 1.2170 - acc: 0.5343 - val_loss: 1.4735 - val_acc: 0.4863
Epoch 8/20
2686/2686 [==============================] - 15s 6ms/step - loss: 1.2066 - acc: 0.5402 - val_loss: 1.3760 - val_acc: 0.4839
Epoch 9/20
2686/2686 [==============================] - 15s 6ms/step - loss: 1.1188 - acc: 0.5611 - val_loss: 1.3651 - val_acc: 0.4743
Epoch 10/20
2686/2686 [==============================] - 16s 6ms/step - loss: 1.0607 - acc: 0.5983 - val_loss: 1.3697 - val_acc: 0.5269
Epoch 11/20
2686/2686 [==============================] - 15s 6ms/step - loss: 1.0086 - acc: 0.6039 - val_loss: 1.3516 - val_acc: 0.5269
Epoch 12/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.9337 - acc: 0.6448 - val_loss: 1.3869 - val_acc: 0.5532
Epoch 13/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.9101 - acc: 0.6564 - val_loss: 1.4543 - val_acc: 0.5161
Epoch 14/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.8409 - acc: 0.6802 - val_loss: 1.4127 - val_acc: 0.5329
Epoch 15/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.8137 - acc: 0.6869 - val_loss: 1.4134 - val_acc: 0.5376
Epoch 16/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.7572 - acc: 0.7163 - val_loss: 1.4846 - val_acc: 0.5329
Epoch 17/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.7197 - acc: 0.7238 - val_loss: 1.5415 - val_acc: 0.5257
Epoch 18/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.6359 - acc: 0.7532 - val_loss: 1.6522 - val_acc: 0.5400
Epoch 19/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.5783 - acc: 0.7833 - val_loss: 1.5846 - val_acc: 0.5603
Epoch 20/20
2686/2686 [==============================] - 15s 6ms/step - loss: 0.5564 - acc: 0.7926 - val_loss: 1.8774 - val_acc: 0.5412
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
In [6]:
model.evaluate(test_x,test_y)
837/837 [==============================] - 1s 2ms/step
Out[6]:
[1.8774392947002694, 0.5412186380640438]