-
Matt Harvey authored1d5e2c5b
"""
A collection of models we'll use to attempt to classify videos.
"""
from keras.layers import Dense, Flatten, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from keras.layers.wrappers import TimeDistributed
from keras.layers.convolutional import (Convolution2D, MaxPooling3D, Convolution3D,
MaxPooling2D)
from collections import deque
import sys
class ResearchModels():
def __init__(self, nb_classes, model, seq_length,
saved_model=None, features_length=2048):
"""
`model` = one of:
lstm
crnn
mlp
conv_3d
`nb_classes` = the number of classes to predict
`seq_length` = the length of our video sequences
`saved_model` = the path to a saved Keras model to load
"""
# Set defaults.
self.seq_length = seq_length
self.load_model = load_model
self.saved_model = saved_model
self.nb_classes = nb_classes
self.feature_queue = deque()
# Set the metrics. Only use top k if there's a need.
metrics = ['accuracy']
if self.nb_classes >= 10:
metrics.append('top_k_categorical_accuracy')
# Get the appropriate model.
if self.saved_model is not None:
print("Loading model %s" % self.saved_model)
self.model = load_model(self.saved_model)
elif model == 'lstm':
print("Loading Deep LSTM model.")
self.input_shape = (seq_length, features_length)
self.model = self.lstm()
elif model == 'crnn':
print("Loading CRNN model.")
self.input_shape = (seq_length, 80, 80, 3)
self.model = self.crnn()
elif model == 'mlp':
print("Loading simple MLP.")
self.input_shape = features_length * seq_length
self.model = self.mlp()
elif model == 'conv_3d':
print("Loading Conv3D")
self.input_shape = (seq_length, 80, 80, 3)
self.model = self.conv_3d()
else:
print("Unknown network.")
sys.exit()
# Now compile the network.
optimizer = Adam(lr=1e-6) # aggressively small learning rate
self.model.compile(loss='categorical_crossentropy', optimizer=optimizer,
metrics=metrics)
def lstm(self):
"""Build a simple LSTM network. We pass the extracted features from
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
our CNN to this model predomenently."""
# Model.
model = Sequential()
model.add(LSTM(4096, return_sequences=True, input_shape=self.input_shape,
dropout_W=0.5, dropout_U=0.5))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(self.nb_classes, activation='softmax'))
return model
def crnn(self):
"""Build a CNN into RNN.
Starting version from:
https://github.com/udacity/self-driving-car/blob/master/
steering-models/community-models/chauffeur/models.py
"""
model = Sequential()
model.add(TimeDistributed(Convolution2D(32, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid'), input_shape=self.input_shape))
model.add(TimeDistributed(Convolution2D(32, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(MaxPooling2D()))
model.add(TimeDistributed(Convolution2D(48, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(Convolution2D(48, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(MaxPooling2D()))
model.add(TimeDistributed(Convolution2D(64, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(Convolution2D(64, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(MaxPooling2D()))
model.add(TimeDistributed(Convolution2D(128, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(Convolution2D(128, 3, 3,
init= "he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(MaxPooling2D()))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(256, return_sequences=True))
model.add(Flatten())
model.add(Dense(512))
model.add(Dropout(0.5))
model.add(Dense(self.nb_classes, activation='softmax'))
return model
def mlp(self):
"""Build a simple MLP."""
# Model.
model = Sequential()
model.add(Dense(512, input_dim=self.input_shape))
model.add(Dropout(0.5))
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
model.add(Dense(512))
model.add(Dropout(0.5))
model.add(Dense(self.nb_classes, activation='softmax'))
return model
def conv_3d(self):
"""
Build a 3D convolutional network, based loosely on C3D.
https://arxiv.org/pdf/1412.0767.pdf
"""
# Model.
model = Sequential()
model.add(Convolution3D(
32, 7, 7, 7, activation='relu', input_shape=self.input_shape
))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
model.add(Convolution3D(64, 3, 3, 3, activation='relu'))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
model.add(Convolution3D(128, 2, 2, 2, activation='relu'))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
model.add(Flatten())
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(Dense(self.nb_classes, activation='softmax'))
return model