models.py 9.47 KB
Newer Older
1
2
3
"""
A collection of models we'll use to attempt to classify videos.
"""
Matt Harvey's avatar
Matt Harvey committed
4
from keras.layers import Dense, Flatten, Dropout, ZeroPadding3D
Matt Harvey's avatar
Matt Harvey committed
5
from keras.layers.recurrent import LSTM
6
from keras.models import Sequential, load_model
Matt Harvey's avatar
Matt Harvey committed
7
from keras.optimizers import Adam, RMSprop
8
from keras.layers.wrappers import TimeDistributed
9
from keras.layers.convolutional import (Conv2D, MaxPooling3D, Conv3D,
Matt Harvey's avatar
Matt Harvey committed
10
    MaxPooling2D)
11
from collections import deque
Matt Harvey's avatar
Matt Harvey committed
12
import sys
13
14
15
16
17
18
19

class ResearchModels():
    def __init__(self, nb_classes, model, seq_length,
                 saved_model=None, features_length=2048):
        """
        `model` = one of:
            lstm
Matt Harvey's avatar
Matt Harvey committed
20
            lrcn
21
22
            mlp
            conv_3d
23
            c3d
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
        `nb_classes` = the number of classes to predict
        `seq_length` = the length of our video sequences
        `saved_model` = the path to a saved Keras model to load
        """

        # Set defaults.
        self.seq_length = seq_length
        self.load_model = load_model
        self.saved_model = saved_model
        self.nb_classes = nb_classes
        self.feature_queue = deque()

        # Set the metrics. Only use top k if there's a need.
        metrics = ['accuracy']
        if self.nb_classes >= 10:
            metrics.append('top_k_categorical_accuracy')

        # Get the appropriate model.
        if self.saved_model is not None:
            print("Loading model %s" % self.saved_model)
            self.model = load_model(self.saved_model)
        elif model == 'lstm':
Matt Harvey's avatar
Matt Harvey committed
46
            print("Loading LSTM model.")
47
48
            self.input_shape = (seq_length, features_length)
            self.model = self.lstm()
49
50
        elif model == 'lrcn':
            print("Loading CNN-LSTM model.")
Matt Harvey's avatar
Matt Harvey committed
51
            self.input_shape = (seq_length, 80, 80, 3)
52
            self.model = self.lrcn()
53
54
        elif model == 'mlp':
            print("Loading simple MLP.")
55
            self.input_shape = (seq_length, features_length)
56
57
58
            self.model = self.mlp()
        elif model == 'conv_3d':
            print("Loading Conv3D")
59
            self.input_shape = (seq_length, 80, 80, 3)
60
            self.model = self.conv_3d()
Matt Harvey's avatar
Matt Harvey committed
61
62
63
64
        elif model == 'c3d':
            print("Loading C3D")
            self.input_shape = (seq_length, 80, 80, 3)
            self.model = self.c3d()
65
66
67
68
69
        else:
            print("Unknown network.")
            sys.exit()

        # Now compile the network.
Matt Harvey's avatar
Matt Harvey committed
70
        optimizer = Adam(lr=1e-5, decay=1e-6)
71
72
73
        self.model.compile(loss='categorical_crossentropy', optimizer=optimizer,
                           metrics=metrics)

74
75
        print(self.model.summary())

76
77
78
79
80
    def lstm(self):
        """Build a simple LSTM network. We pass the extracted features from
        our CNN to this model predomenently."""
        # Model.
        model = Sequential()
81
82
        model.add(LSTM(2048, return_sequences=False,
                       input_shape=self.input_shape,
Matt Harvey's avatar
Matt Harvey committed
83
                       dropout=0.5))
Ubuntu's avatar
Ubuntu committed
84
        model.add(Dense(512, activation='relu'))
85
        model.add(Dropout(0.5))
86
87
88
89
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model

90
    def lrcn(self):
91
92
        """Build a CNN into RNN.
        Starting version from:
93
94
95
96
97
98
99
100
            https://github.com/udacity/self-driving-car/blob/master/
                steering-models/community-models/chauffeur/models.py

        Heavily influenced by VGG-16:
            https://arxiv.org/abs/1409.1556

        Also known as an LRCN:
            https://arxiv.org/pdf/1411.4389.pdf
101
102
        """
        model = Sequential()
103

104
        model.add(TimeDistributed(Conv2D(32, (7, 7), strides=(2, 2),
Ubuntu's avatar
Ubuntu committed
105
            activation='relu', padding='same'), input_shape=self.input_shape))
106
        model.add(TimeDistributed(Conv2D(32, (3,3),
Ubuntu's avatar
Ubuntu committed
107
108
            kernel_initializer="he_normal", activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
109
110

        model.add(TimeDistributed(Conv2D(64, (3,3),
Ubuntu's avatar
Ubuntu committed
111
            padding='same', activation='relu')))
112
        model.add(TimeDistributed(Conv2D(64, (3,3),
Ubuntu's avatar
Ubuntu committed
113
114
            padding='same', activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
115
116

        model.add(TimeDistributed(Conv2D(128, (3,3),
117
            padding='same', activation='relu')))
Ubuntu's avatar
Ubuntu committed
118
        model.add(TimeDistributed(Conv2D(128, (3,3),
119
120
121
122
123
124
125
126
127
128
129
130
131
            padding='same', activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))

        model.add(TimeDistributed(Conv2D(256, (3,3),
            padding='same', activation='relu')))
        model.add(TimeDistributed(Conv2D(256, (3,3),
            padding='same', activation='relu')))
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
        
        model.add(TimeDistributed(Conv2D(512, (3,3),
            padding='same', activation='relu')))
        model.add(TimeDistributed(Conv2D(512, (3,3),
            padding='same', activation='relu')))
Ubuntu's avatar
Ubuntu committed
132
        model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
133

134
        model.add(TimeDistributed(Flatten()))
135

Matt Harvey's avatar
Matt Harvey committed
136
137
        model.add(Dropout(0.5))
        model.add(LSTM(256, return_sequences=False, dropout=0.5))
138
139
140
141
142
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model

    def mlp(self):
143
144
        """Build a simple MLP. It uses extracted features as the input
        because of the otherwise too-high dimensionality."""
145
146
        # Model.
        model = Sequential()
147
148
        model.add(Flatten(input_shape=self.input_shape))
        model.add(Dense(512))
149
        model.add(Dropout(0.5))
150
        model.add(Dense(512))
151
        model.add(Dropout(0.5))
152
153
154
155
156
157
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model

    def conv_3d(self):
        """
158
        Build a 3D convolutional network, based loosely on C3D.
159
160
            https://arxiv.org/pdf/1412.0767.pdf
        """
161
        # Model.
162
        model = Sequential()
163
        model.add(Conv3D(
Matt Harvey's avatar
Matt Harvey committed
164
            32, (3,3,3), activation='relu', input_shape=self.input_shape
165
166
        ))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
167
        model.add(Conv3D(64, (3,3,3), activation='relu'))
168
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
Matt Harvey's avatar
Matt Harvey committed
169
170
        model.add(Conv3D(128, (3,3,3), activation='relu'))
        model.add(Conv3D(128, (3,3,3), activation='relu'))
171
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
Matt Harvey's avatar
Matt Harvey committed
172
173
174
175
        model.add(Conv3D(256, (2,2,2), activation='relu'))
        model.add(Conv3D(256, (2,2,2), activation='relu'))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

176
        model.add(Flatten())
Matt Harvey's avatar
Matt Harvey committed
177
178
179
180
        model.add(Dense(1024))
        model.add(Dropout(0.5))
        model.add(Dense(1024))
        model.add(Dropout(0.5))
181
182
        model.add(Dense(self.nb_classes, activation='softmax'))

183
        return model
184

Matt Harvey's avatar
Matt Harvey committed
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
    def c3d(self):
        """
        Build a 3D convolutional network, aka C3D.
            https://arxiv.org/pdf/1412.0767.pdf

        With thanks:
            https://gist.github.com/albertomontesg/d8b21a179c1e6cca0480ebdf292c34d2
        """
        model = Sequential()
        # 1st layer group
        model.add(Conv3D(64, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv1',
                         subsample=(1, 1, 1),
                         input_shape=self.input_shape))
        model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
                               border_mode='valid', name='pool1'))
        # 2nd layer group
        model.add(Conv3D(128, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv2',
                         subsample=(1, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool2'))
        # 3rd layer group
        model.add(Conv3D(256, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv3a',
                         subsample=(1, 1, 1)))
        model.add(Conv3D(256, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv3b',
                         subsample=(1, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool3'))
        # 4th layer group
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv4a',
                         subsample=(1, 1, 1)))
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv4b',
                         subsample=(1, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool4'))

        # 5th layer group
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv5a',
                         subsample=(1, 1, 1)))
        model.add(Conv3D(512, 3, 3, 3, activation='relu',
                         border_mode='same', name='conv5b',
                         subsample=(1, 1, 1)))
        model.add(ZeroPadding3D(padding=(0, 1, 1)))
        model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                               border_mode='valid', name='pool5'))
        model.add(Flatten())

        # FC layers group
        model.add(Dense(4096, activation='relu', name='fc6'))
        model.add(Dropout(0.5))
        model.add(Dense(4096, activation='relu', name='fc7'))
        model.add(Dropout(0.5))
        model.add(Dense(self.nb_classes, activation='softmax'))
Matt Harvey's avatar
Matt Harvey committed
244
245

        return model