Sound_Classifier_Neural_Network

In [1]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

plt.style.use('ggplot')

import keras
from keras import models
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import to_categorical
keras.__version__
Using TensorFlow backend.
Out[1]:
'2.2.0'
In [2]:
spl = 0

parent_dir = '../data/UrbanSound8K/audio/'
if spl:
    sub_dir = 'samples/'
else:
    sub_dir = 'train/'    
file_name = '*.wav'
files = glob.glob(os.path.join(parent_dir, sub_dir, file_name))
test_files = glob.glob(os.path.join(parent_dir, 'fold10/', file_name))
files[1]
Out[2]:
'../data/UrbanSound8K/audio/train\\100652-3-0-1.wav'
In [3]:
def parse_audio(files):
    features, labels = np.empty((0,52)), np.empty(0)
    for file in files:
        # extract label from filename
        labels = np.append(labels, file.split('\\')[1].split('-')[1])
        
        # extract features from audio file
        x, sr = librosa.load(file)
        stft = np.abs(librosa.stft(x))
        mfccs = np.mean(librosa.feature.mfcc(y=x, sr=sr, n_mfcc=40).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(S=stft,sr=sr).T, axis=0)
        feature = np.hstack((mfccs,chroma))
#         print(chroma.shape)
#         print(mfccs.shape)
#         print(feature.shape)
        features = np.vstack((features, feature))
    return np.array(features), np.array(labels, dtype=np.int)
In [4]:
features, labels = parse_audio(files)
train_labels = to_categorical(labels)
print(features.shape)
print(labels.shape)

test_features, test_labels = parse_audio(test_files)
test_labels = to_categorical(test_labels)
In [22]:
np.savetxt('train_features',features)
np.savetxt('train_labels',train_labels)
np.savetxt('test_features',test_features)
np.savetxt('test_labels',test_labels)

Build and compile model

In [41]:
# model = models.Sequential()
# model.add(layers.Dense(256, activation='relu', input_shape=(52,)))
# model.add(layers.Dense(50, activation='relu'))
# # model.add(layers.Dense(512, activation='relu'))
# model.add(layers.Dense(10, activation='softmax'))

# model.compile(optimizer='Adagrad',
#                 loss='categorical_crossentropy',
#                 metrics=['accuracy'])

model = models.Sequential()

model.add(Dense(256,input_shape=(52,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(optimizer='Adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

Train model

In [42]:
# from keras.callbacks import TensorBoard
# from keras.utils import plot_model

# tb = TensorBoard(log_dir='./logs',  # log 目录
#                  histogram_freq=1,  # 按照何等频率(epoch)来计算直方图,0为不计算
#                  batch_size=32,     # 用多大量的数据计算直方图
#                  write_graph=True,  # 是否存储网络结构图
#                  write_grads=False, # 是否可视化梯度直方图
#                  write_images=False,# 是否可视化参数
#                  embeddings_freq=0, 
#                  embeddings_layer_names=None, 
#                  embeddings_metadata=None)
# callbacks = [tb]
# history = model.fit(features, train_labels, epochs=50, batch_size=32, validation_data=(test_features, test_labels),callbacks=callbacks)

history = model.fit(features, train_labels, epochs=50, batch_size=32, validation_data=(test_features, test_labels))
Train on 2686 samples, validate on 837 samples
Epoch 1/50
2686/2686 [==============================] - 2s 775us/step - loss: 13.1498 - acc: 0.1322 - val_loss: 8.5091 - val_acc: 0.2593
Epoch 2/50
2686/2686 [==============================] - 1s 242us/step - loss: 12.8031 - acc: 0.1608 - val_loss: 11.7348 - val_acc: 0.1589
Epoch 3/50
2686/2686 [==============================] - 1s 242us/step - loss: 11.8480 - acc: 0.1951 - val_loss: 9.7277 - val_acc: 0.2091
Epoch 4/50
2686/2686 [==============================] - 1s 240us/step - loss: 9.6323 - acc: 0.2074 - val_loss: 2.9280 - val_acc: 0.2151
Epoch 5/50
2686/2686 [==============================] - 1s 294us/step - loss: 4.0753 - acc: 0.1966 - val_loss: 2.2166 - val_acc: 0.2521
Epoch 6/50
2686/2686 [==============================] - 1s 268us/step - loss: 2.4666 - acc: 0.2211 - val_loss: 2.1852 - val_acc: 0.1995
Epoch 7/50
2686/2686 [==============================] - 1s 278us/step - loss: 2.2166 - acc: 0.2405 - val_loss: 2.1598 - val_acc: 0.2330
Epoch 8/50
2686/2686 [==============================] - 1s 247us/step - loss: 2.1291 - acc: 0.2580 - val_loss: 2.1237 - val_acc: 0.2748
Epoch 9/50
2686/2686 [==============================] - 1s 302us/step - loss: 2.0336 - acc: 0.2856 - val_loss: 2.0454 - val_acc: 0.3178
Epoch 10/50
2686/2686 [==============================] - 1s 251us/step - loss: 2.0170 - acc: 0.2967 - val_loss: 2.0451 - val_acc: 0.2915
Epoch 11/50
2686/2686 [==============================] - 1s 281us/step - loss: 1.9170 - acc: 0.3362 - val_loss: 1.9840 - val_acc: 0.3297
Epoch 12/50
2686/2686 [==============================] - 1s 298us/step - loss: 1.8561 - acc: 0.3574 - val_loss: 1.9121 - val_acc: 0.3501
Epoch 13/50
2686/2686 [==============================] - 1s 271us/step - loss: 1.7711 - acc: 0.3909 - val_loss: 1.8688 - val_acc: 0.3584
Epoch 14/50
2686/2686 [==============================] - 1s 286us/step - loss: 1.7446 - acc: 0.3943 - val_loss: 1.8162 - val_acc: 0.4098
Epoch 15/50
2686/2686 [==============================] - 1s 390us/step - loss: 1.6430 - acc: 0.4267 - val_loss: 1.7411 - val_acc: 0.4731
Epoch 16/50
2686/2686 [==============================] - 1s 413us/step - loss: 1.6218 - acc: 0.4401 - val_loss: 1.7364 - val_acc: 0.4421
Epoch 17/50
2686/2686 [==============================] - 1s 388us/step - loss: 1.5416 - acc: 0.4751 - val_loss: 1.7072 - val_acc: 0.4409
Epoch 18/50
2686/2686 [==============================] - 1s 407us/step - loss: 1.4907 - acc: 0.4829 - val_loss: 1.6668 - val_acc: 0.4217
Epoch 19/50
2686/2686 [==============================] - 1s 428us/step - loss: 1.4308 - acc: 0.5112 - val_loss: 1.6261 - val_acc: 0.4492
Epoch 20/50
2686/2686 [==============================] - 1s 391us/step - loss: 1.3739 - acc: 0.5261 - val_loss: 1.6281 - val_acc: 0.4922
Epoch 21/50
2686/2686 [==============================] - 1s 378us/step - loss: 1.3403 - acc: 0.5514 - val_loss: 1.6537 - val_acc: 0.4444
Epoch 22/50
2686/2686 [==============================] - 1s 421us/step - loss: 1.2957 - acc: 0.5506 - val_loss: 1.6364 - val_acc: 0.4516
Epoch 23/50
2686/2686 [==============================] - 1s 434us/step - loss: 1.2605 - acc: 0.5771 - val_loss: 1.6125 - val_acc: 0.5006
Epoch 24/50
2686/2686 [==============================] - 1s 473us/step - loss: 1.2356 - acc: 0.5663 - val_loss: 1.6059 - val_acc: 0.4839
Epoch 25/50
2686/2686 [==============================] - 1s 444us/step - loss: 1.1729 - acc: 0.5916 - val_loss: 1.6217 - val_acc: 0.4875
Epoch 26/50
2686/2686 [==============================] - 1s 463us/step - loss: 1.1612 - acc: 0.5953 - val_loss: 1.6144 - val_acc: 0.4456
Epoch 27/50
2686/2686 [==============================] - 1s 441us/step - loss: 1.1055 - acc: 0.6206 - val_loss: 1.6383 - val_acc: 0.4373
Epoch 28/50
2686/2686 [==============================] - 1s 414us/step - loss: 1.0957 - acc: 0.6210 - val_loss: 1.6738 - val_acc: 0.4851
Epoch 29/50
2686/2686 [==============================] - 1s 376us/step - loss: 1.0592 - acc: 0.6296 - val_loss: 1.6904 - val_acc: 0.4659
Epoch 30/50
2686/2686 [==============================] - 1s 382us/step - loss: 1.0491 - acc: 0.6512 - val_loss: 1.6667 - val_acc: 0.4970
Epoch 31/50
2686/2686 [==============================] - 1s 379us/step - loss: 1.0102 - acc: 0.6590 - val_loss: 1.6715 - val_acc: 0.4719
Epoch 32/50
2686/2686 [==============================] - 1s 398us/step - loss: 0.9938 - acc: 0.6660 - val_loss: 1.7188 - val_acc: 0.4910
Epoch 33/50
2686/2686 [==============================] - 1s 384us/step - loss: 0.9480 - acc: 0.6787 - val_loss: 1.6877 - val_acc: 0.4456
Epoch 34/50
2686/2686 [==============================] - 1s 408us/step - loss: 0.9410 - acc: 0.6727 - val_loss: 1.6739 - val_acc: 0.4516
Epoch 35/50
2686/2686 [==============================] - 1s 392us/step - loss: 0.9400 - acc: 0.6821 - val_loss: 1.6848 - val_acc: 0.4970
Epoch 36/50
2686/2686 [==============================] - 1s 405us/step - loss: 0.9268 - acc: 0.6917 - val_loss: 1.6262 - val_acc: 0.5114
Epoch 37/50
2686/2686 [==============================] - 1s 387us/step - loss: 0.8449 - acc: 0.7159 - val_loss: 1.7366 - val_acc: 0.5257
Epoch 38/50
2686/2686 [==============================] - 1s 382us/step - loss: 0.8960 - acc: 0.6999 - val_loss: 1.6705 - val_acc: 0.4815
Epoch 39/50
2686/2686 [==============================] - 1s 386us/step - loss: 0.8650 - acc: 0.7137 - val_loss: 1.6759 - val_acc: 0.5006
Epoch 40/50
2686/2686 [==============================] - 1s 379us/step - loss: 0.8479 - acc: 0.7163 - val_loss: 1.7041 - val_acc: 0.5006
Epoch 41/50
2686/2686 [==============================] - 1s 392us/step - loss: 0.8406 - acc: 0.7241 - val_loss: 1.6608 - val_acc: 0.4946
Epoch 42/50
2686/2686 [==============================] - 1s 396us/step - loss: 0.8141 - acc: 0.7297 - val_loss: 1.6669 - val_acc: 0.4875
Epoch 43/50
2686/2686 [==============================] - 1s 406us/step - loss: 0.7533 - acc: 0.7375 - val_loss: 1.7022 - val_acc: 0.4958
Epoch 44/50
2686/2686 [==============================] - 1s 429us/step - loss: 0.7606 - acc: 0.7427 - val_loss: 1.6468 - val_acc: 0.4910
Epoch 45/50
2686/2686 [==============================] - 1s 416us/step - loss: 0.7576 - acc: 0.7319 - val_loss: 1.7242 - val_acc: 0.5125
Epoch 46/50
2686/2686 [==============================] - 1s 411us/step - loss: 0.7575 - acc: 0.7502 - val_loss: 1.6251 - val_acc: 0.5209
Epoch 47/50
2686/2686 [==============================] - 1s 380us/step - loss: 0.7460 - acc: 0.7509 - val_loss: 1.6690 - val_acc: 0.5018
Epoch 48/50
2686/2686 [==============================] - 1s 384us/step - loss: 0.7294 - acc: 0.7494 - val_loss: 1.6261 - val_acc: 0.5137
Epoch 49/50
2686/2686 [==============================] - 1s 372us/step - loss: 0.7060 - acc: 0.7602 - val_loss: 1.6200 - val_acc: 0.5281
Epoch 50/50
2686/2686 [==============================] - 1s 388us/step - loss: 0.6886 - acc: 0.7535 - val_loss: 1.6549 - val_acc: 0.5257
In [48]:
print(history.history.keys())

fig = plt.figure(figsize=(20,5))
plt.subplot(121)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.subplot(122)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower left')
plt.show()
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

Test model

In [49]:
test_loss, test_acc = model.evaluate(test_features, test_labels)
test_acc
837/837 [==============================] - 0s 306us/step
Out[49]:
0.5256869772998806