Commit 41d3f8a5 authored by Matt Harvey's avatar Matt Harvey
Browse files

Initial commit of blog post for video classificatoin methods.

Showing with 1127 additions and 0 deletions
+1127 -0
.gitignore 0 → 100644
*jpg
*rar
*zip
*gz
*avi
*swp
data/logs/*
data/checkpoints/*
data/sequences/*
data/train/*
data/test/*
README.md 0 → 100644
# Five video classification methods
The five video classification methods:
1. Classify one frame at a time with a ConvNet
1. Extract features from each frame with a ConvNet, passing the sequence to an RNN, in a separate network
1. Use a time-dstirbuted ConvNet, passing the features to an RNN, much like #2 but all in one network
1. Extract features from each frame with a ConvNet and pass the sequence to an MLP
1. Use a 3D convolutional network
See the accompanying blog post for full details: TK
## Getting the data
First, download the dataset from UCF into the `data` folder:
`cd data && wget http://crcv.ucf.edu/data/UCF101/UCF101.rar`
Then extract it with `unrar -e UCF101.rar`.
Next, create train and test folders with `mkdir train && mkdir test`.
Now you can run the scripts in the data folder to move the videos to the appropriate place, extract their frames and make the CSV file the rest of the code references.
## Extracting features
Before you can run Methods #2 and #4, you need to extract features from the images with the CNN. This is done by running `extract_features.py`. On my Dell with a GeFore 960m GPU, this takes about 8 hours. If you want to limit to just the first N classes, you can set that option in the file.
## Running models
The CNN-only method (method #1 in the blog post) is run from `train_cnn.py`.
The rest of the models are run from `train.py`. There are configuration options you can set in that file to choose which model you want to run.
The models are all defined in `models.py`. Reference that file to see which models you are able to run in `train.py`.
### UCF101 Citation
Khurram Soomro, Amir Roshan Zamir and Mubarak Shah, UCF101: A Dataset of 101 Human Action Classes From Videos in The Wild., CRCV-TR-12-01, November, 2012.
File added
data.py 0 → 100644
"""
Class for managing our data.
"""
import csv
import numpy as np
import random
import glob
import os.path
import pandas as pd
import sys
import operator
from processor import process_image
from keras.utils import np_utils
class DataSet():
def __init__(self, seq_length=40, class_limit=None, image_shape=(224, 224, 3)):
"""Constructor.
seq_length = (int) the number of frames to consider
class_limit = (int) number of classes to limit the data to.
None = no limit.
"""
self.seq_length = seq_length
self.class_limit = class_limit
self.sequence_path = './data/sequences/'
self.max_frames = 300 # max number of frames a video can have for us to use it
# Get the data.
self.data = self.get_data()
# Get the classes.
self.classes = self.get_classes()
# Now do some minor data cleaning.
self.data = self.clean_data()
self.image_shape = image_shape
@staticmethod
def get_data():
"""Load our data from file."""
with open('./data/data_file.csv', 'r') as fin:
reader = csv.reader(fin)
data = list(reader)
return data
def clean_data(self):
"""Limit samples to greater than the sequence length and fewer
than N frames. Also limit it to classes we want to use."""
data_clean = []
for item in self.data:
if int(item[3]) >= self.seq_length and int(item[3]) <= self.max_frames \
and item[1] in self.classes:
data_clean.append(item)
return data_clean
def get_classes(self):
"""Extract the classes from our data. If we want to limit them,
only return the classes we need."""
classes = []
for item in self.data:
if item[1] not in classes:
classes.append(item[1])
# Sort them.
classes = sorted(classes)
# Return.
if self.class_limit is not None:
return classes[:self.class_limit]
else:
return classes
def get_class_one_hot(self, class_str):
"""Given a class as a string, return its number in the classes
list. This lets us encode and one-hot it for training."""
# Encode it first.
label_encoded = self.classes.index(class_str)
# Now one-hot it.
label_hot = np_utils.to_categorical(label_encoded, len(self.classes))
label_hot = label_hot[0] # just get a single row
return label_hot
def split_train_test(self):
"""Split the data into train and test groups."""
train = []
test = []
for item in self.data:
if item[0] == 'train':
train.append(item)
else:
test.append(item)
return train, test
def frame_generator(self, batch_size, train_test, data_type, concat=False):
"""Return a generator that we can use to train on. There are
a couple different things we can return:
data_type: 'features', 'images'
"""
# Get the right dataset for the generator.
train, test = self.split_train_test()
data = train if train_test == 'train' else test
print("Creating %s generator with %d samples." % (train_test, len(data)))
while 1:
X, y = [], []
# Generate batch_size samples.
for i in range(batch_size):
# Reset to be safe.
sequence = None
# Get a random sample.
sample = random.choice(data)
# Check to see if we've already saved this sequence.
if data_type is "images":
# Get and resample frames.
frames = self.get_frames_for_sample(sample)
frames = self.rescale_list(frames, self.seq_length)
# Build the image sequence
sequence = self.build_image_sequence(frames)
else:
# Get the sequence from disk.
sequence = self.get_extracted_sequence(sample)
if sequence is None:
print("Can't find sequence. Did you generate them?")
sys.exit() # TODO this should raise
if concat:
# We want to pass the sequence back as a single array. This
# is used to pass into an MLP rather than an RNN.
sequence = np.concatenate(sequence).ravel()
X.append(sequence)
y.append(self.get_class_one_hot(sample[1]))
yield np.array(X), np.array(y)
def build_image_sequence(self, frames):
"""Given a set of frames (filenames), build our sequence."""
return [process_image(x, self.image_shape) for x in frames]
def get_extracted_sequence(self, sample):
"""Get the saved extracted features."""
filename = sample[2]
path = self.sequence_path + filename + '-' + str(self.seq_length) + \
'-features.txt'
if os.path.isfile(path):
# Use a dataframe/read_csv for speed increase over numpy.
features = pd.read_csv(path, sep=" ", header=None)
return features.values
else:
return None
@staticmethod
def get_frames_for_sample(sample):
"""Given a sample row from the data file, get all the corresponding frame
filenames."""
path = './data/' + sample[0] + '/' + sample[1] + '/'
filename = sample[2]
images = sorted(glob.glob(path + filename + '*jpg'))
return images
@staticmethod
def get_filename_from_image(filename):
parts = filename.split('/')
return parts[-1].replace('.jpg', '')
@staticmethod
def rescale_list(input_list, size):
"""Given a list and a size, return a rescaled/samples list. For example,
if we want a list of size 5 and we have a list of size 25, return a new
list of size five which is every 5th element of the origina list."""
assert len(input_list) >= size
# Get the number to skip between iterations.
skip = len(input_list) // size
# Build our new output.
output = [input_list[i] for i in range(0, len(input_list), skip)]
# Cut off the last one if needed.
return output[:size]
@staticmethod
def print_class_from_prediction(predictions, nb_to_return=5):
"""Given a prediction, print the top classes."""
# Get the prediction for each label.
label_predictions = {}
for i, label in enumerate(data.classes):
label_predictions[label] = predictions[i]
# Now sort them.
sorted_lps = sorted(
label_predictions.items(),
key=operator.itemgetter(1),
reverse=True
)
# And return the top N.
for i, class_prediction in enumerate(sorted_lps):
if i > nb_to_return - 1 or class_prediction[1] == 0.0:
break
print("%s: %.2f" % (class_prediction[0], class_prediction[1]))
if __name__ == '__main__':
from PIL import Image
data = DataSet(seq_length=20, class_limit=3)
print(data.classes)
gen = data.frame_generator(32, 'train', 'images', False)
for x, y in gen:
# x should be 32, with each having seq_ength images.
for j, row in enumerate(x):
for i, image in enumerate(row):
image *= 255
actual = Image.fromarray(image.astype('uint8'))
actual.save('tmp/whatever-' + str(i) + '.jpg')
print(y[j])
data.print_class_from_prediction(y[j])
break
break
"""
After extracting the RAR, we run this to move all the files into
the appropriate train/test folders.
Should only run this file once!
"""
import os
import os.path
def get_train_test_lists(version='01'):
"""
Using one of the train/test files (01, 02, or 03), get the filename
breakdowns we'll later use to move everything.
"""
# Get our files based on version.
test_file = './ucfTrainTestlist/testlist' + version + '.txt'
train_file = './ucfTrainTestlist/trainlist' + version + '.txt'
# Build the test list.
with open(test_file) as fin:
test_list = [row.strip() for row in list(fin)]
# Build the train list. Extra step to remove the class index.
with open(train_file) as fin:
train_list = [row.strip() for row in list(fin)]
train_list = [row.split(' ')[0] for row in train_list]
# Set the groups in a dictionary.
file_groups = {
'train': train_list,
'test': test_list
}
return file_groups
def move_files(file_groups):
"""This assumes all of our files are currently in _this_ directory.
So move them to the appropriate spot. Only needs to happen once.
"""
# Do each of our groups.
for group, videos in file_groups.items():
# Do each of our videos.
for video in videos:
# Get the parts.
parts = video.split('/')
classname = parts[0]
filename = parts[1]
# Check if this class exists.
if not os.path.exists(group + '/' + classname):
print("Creating folder for %s/%s" % (group, classname))
os.makedirs(group + '/' + classname)
# Check if we have already moved this file, or at least that it
# exists to move.
if not os.path.exists(filename):
print("Can't find %s to move. Skipping." % (filename))
continue
# Move it.
dest = group + '/' + classname + '/' + filename
print("Moving %s to %s" % (filename, dest))
os.rename(filename, dest)
print("Done.")
def main():
"""
Go through each of our train/test text files and move the videos
to the right place.
"""
# Get the videos in groups so we can move them.
group_lists = get_train_test_lists()
# Move the files.
move_files(group_lists)
if __name__ == '__main__':
main()
"""
After moving all the files using the 1_ file, we run this one to extract
the images from the videos and also create a data file we can use
for training and testing later.
"""
import csv
import glob
import os
import os.path
from subprocess import call
def extract_files():
"""After we have all of our videos split between train and test, and
all nested within folders representing their classes, we need to
make a data file that we can reference when training our RNN(s).
This will let us keep track of image sequences and other parts
of the training process.
We'll first need to extract images from each of the videos. We'll
need to record the following data in the file:
[train|test], class, filename, nb frames
Extracting can be done with ffmpeg:
`ffmpeg -i video.mpg image-%04d.jpg`
"""
data_file = []
folders = ['./train/', './test/']
for folder in folders:
class_folders = glob.glob(folder + '*')
for vid_class in class_folders:
class_files = glob.glob(vid_class + '/*.avi')
for video_path in class_files:
# Get the parts of the file.
video_parts = get_video_parts(video_path)
train_or_test, classname, filename_no_ext, filename = video_parts
# Only extract if we haven't done it yet. Otherwise, just get
# the info.
if not check_already_extracted(video_parts):
# Now extract it.
src = train_or_test + '/' + classname + '/' + \
filename
dest = train_or_test + '/' + classname + '/' + \
filename_no_ext + '-%04d.jpg'
call(["ffmpeg", "-i", src, dest])
# Now get how many frames it is.
nb_frames = get_nb_frames_for_video(video_parts)
data_file.append([train_or_test, classname, filename_no_ext, nb_frames])
print("Generated %d frames for %s" % (nb_frames, filename_no_ext))
with open('data_file.csv', 'w') as fout:
writer = csv.writer(fout)
writer.writerows(data_file)
print("Extracted and wrote %d video files." % (len(data_file)))
def get_nb_frames_for_video(video_parts):
"""Given video parts of an (assumed) already extracted video, return
the number of frames that were extracted."""
train_or_test, classname, filename_no_ext, _ = video_parts
generated_files = glob.glob(train_or_test + '/' + classname + '/' +
filename_no_ext + '*.jpg')
return len(generated_files)
def get_video_parts(video_path):
"""Given a full path to a video, return its parts."""
parts = video_path.split('/')
filename = parts[3]
filename_no_ext = filename.split('.')[0]
classname = parts[2]
train_or_test = parts[1]
return train_or_test, classname, filename_no_ext, filename
def check_already_extracted(video_parts):
"""Check to see if we created the -0001 frame of this file."""
train_or_test, classname, filename_no_ext, _ = video_parts
return bool(os.path.exists(train_or_test + '/' + classname +
'/' + filename_no_ext + '-0001.jpg'))
def main():
"""
Extract images from videos and build a new file that we
can use as our data input file. It can have format:
[train|test], class, filename, nb frames
"""
extract_files()
if __name__ == '__main__':
main()
"""
This script generates extracted features for each video, which other
models make use of.
You can change you sequence length and limit to a set number of classes
below.
class_limit is an integer that denotes the first N classes you want to
extract features from. This is useful is you don't want to wait to
extract all 101 classes. For instance, set class_limit = 8 to just
extract features for the first 8 (alphabetical) classes in the dataset.
Then set the same number when training models.
"""
import numpy as np
import os.path
from data import DataSet
from extractor import Extractor
from tqdm import tqdm
# Set defaults.
seq_length = 40
class_limit = None # integer, number of classes to extract
# Get the dataset.
data = DataSet(seq_length=seq_length, class_limit=class_limit)
# get the model.
model = Extractor()
# Loop through data.
pbar = tqdm(total=len(data.data))
for video in data.data:
# Get the path to the sequence for this video.
path = './data/sequences-ucf/' + video[2] + '-' + str(seq_length) + \
'-features.txt'
# Check if we already have it.
if os.path.isfile(path):
pbar.update(1)
continue
# Get the frames for this video.
frames = data.get_frames_for_sample(video)
# Now downsample to just the ones we need.
frames = data.rescale_list(frames, seq_length)
# Now loop through and extract features to build the sequence.
sequence = []
for image in frames:
features = model.extract(image)
sequence.append(features)
# Save the sequence.
np.savetxt(path, sequence)
pbar.update(1)
pbar.close()
from keras.preprocessing import image
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model
from keras.layers import Input
import numpy as np
class Extractor():
def __init__(self, weights=None):
"""Either load pretrained from imagenet, or load our saved
weights from our own training."""
self.weights = weights # so we can check elsewhere which model
if weights is None:
# Get model with pretrained weights.
input_tensor = Input(shape=(299, 299, 3))
base_model = InceptionV3(
input_tensor=input_tensor,
weights='imagenet',
include_top=True
)
# We'll extract features at the final pool layer.
self.model = Model(
input=base_model.input,
output=base_model.get_layer('avg_pool').output
)
else:
# Load the model first.
self.model = load_model(weights)
# Then remove the top so we get features not predictions.
# From: https://github.com/fchollet/keras/issues/2371
self.model.layers.pop()
self.model.layers.pop() # two pops to get to pool layer
self.model.outputs = [self.model.layers[-1].output]
self.model.output_layers = [self.model.layers[-1]]
self.model.layers[-1].outbound_nodes = []
def extract(self, image_path):
img = image.load_img(image_path, target_size=(299, 299))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
# Get the prediction.
features = self.model.predict(x)
if self.weights is None:
# For imagenet/default network:
features = features[0][0][0]
else:
# For loaded network:
features = features[0]
return features
models.py 0 → 100644
"""
A collection of models we'll use to attempt to classify videos.
"""
from keras.layers import Dense, Flatten, Dropout
from keras.layers.recurrent import LSTM, GRU
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from keras.layers.wrappers import TimeDistributed
from keras.layers.convolutional import Convolution2D, MaxPooling3D, Convolution3D
from collections import deque
class ResearchModels():
def __init__(self, nb_classes, model, seq_length,
saved_model=None, features_length=2048):
"""
`model` = one of:
lstm
crnn
mlp
conv_3d
`nb_classes` = the number of classes to predict
`seq_length` = the length of our video sequences
`saved_model` = the path to a saved Keras model to load
"""
# Set defaults.
self.seq_length = seq_length
self.load_model = load_model
self.saved_model = saved_model
self.nb_classes = nb_classes
self.feature_queue = deque()
# Set the metrics. Only use top k if there's a need.
metrics = ['accuracy']
if self.nb_classes >= 10:
metrics.append('top_k_categorical_accuracy')
# Get the appropriate model.
if self.saved_model is not None:
print("Loading model %s" % self.saved_model)
self.model = load_model(self.saved_model)
elif model == 'lstm':
print("Loading Deep LSTM model.")
self.input_shape = (seq_length, features_length)
self.model = self.lstm()
elif model == 'crnn':
print("Loading CRNN model.")
self.input_shape = (seq_length, 224, 224, 3)
self.model = self.crnn()
elif model == 'mlp':
print("Loading simple MLP.")
self.input_shape = features_length * seq_length
self.model = self.mlp()
elif model == 'conv_3d':
print("Loading Conv3D")
self.input_shape = (seq_length, 112, 112, 3)
self.model = self.conv_3d()
else:
print("Unknown network.")
sys.exit()
# Now compile the network.
optimizer = Adam(lr=1e-6) # aggressively small learning rate
self.model.compile(loss='categorical_crossentropy', optimizer=optimizer,
metrics=metrics)
def lstm(self):
"""Build a simple LSTM network. We pass the extracted features from
our CNN to this model predomenently."""
# Model.
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=self.input_shape,
dropout_W=0.4, dropout_U=0.4))
model.add(LSTM(128, return_sequences=True, dropout_W=0.4, dropout_U=0.4))
model.add(LSTM(128, return_sequences=True, dropout_W=0.4, dropout_U=0.4))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(self.nb_classes, activation='softmax'))
return model
def crnn(self):
"""Build a CNN into RNN.
Starting version from:
https://github.com/udacity/self-driving-car/blob/master/
steering-models/community-models/chauffeur/models.py
"""
model = Sequential()
model.add(TimeDistributed(Convolution2D(24, 5, 5,
init="he_normal",
activation='relu',
subsample=(5, 4),
border_mode='valid'), input_shape=self.input_shape))
model.add(TimeDistributed(Convolution2D(32, 5, 5,
init="he_normal",
activation='relu',
subsample=(3, 2),
border_mode='valid')))
model.add(TimeDistributed(Convolution2D(48, 3, 3,
init="he_normal",
activation='relu',
subsample=(1, 2),
border_mode='valid')))
model.add(TimeDistributed(Convolution2D(64, 3, 3,
init="he_normal",
activation='relu',
border_mode='valid')))
model.add(TimeDistributed(Convolution2D(128, 3, 3,
init="he_normal",
activation='relu',
subsample=(1, 2),
border_mode='valid')))
model.add(TimeDistributed(Flatten()))
model.add(Dropout(0.2))
model.add(GRU(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(128, return_sequences=True))
model.add(Flatten())
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(Dense(self.nb_classes, activation='softmax'))
return model
def mlp(self):
"""Build a simple MLP."""
# Model.
model = Sequential()
model.add(Dense(512, input_dim=self.input_shape))
model.add(Dropout(0.4))
model.add(Dense(512))
model.add(Dropout(0.4))
model.add(Dense(512))
model.add(Dropout(0.4))
model.add(Dense(512))
model.add(Dropout(0.4))
model.add(Dense(512))
model.add(Dropout(0.4))
model.add(Dense(self.nb_classes, activation='softmax'))
return model
def conv_3d(self):
"""
Build a 3D convolutional network, C3D.
Based on the paper:
https://arxiv.org/pdf/1412.0767.pdf
As implemented in Keras by:
https://gist.github.com/albertomontesg/d8b21a179c1e6cca0480ebdf292c34d2
Note that this requires a lot of memory to run.
"""
model = Sequential()
# 1st layer group
model.add(Convolution3D(64, 3, 3, 3, activation='relu',
border_mode='same', name='conv1',
subsample=(1, 1, 1),
input_shape=self.input_shape))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
border_mode='valid', name='pool1'))
# 2nd layer group
model.add(Convolution3D(128, 3, 3, 3, activation='relu',
border_mode='same', name='conv2',
subsample=(1, 1, 1)))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool2'))
# 3rd layer group
model.add(Convolution3D(256, 3, 3, 3, activation='relu',
border_mode='same', name='conv3a',
subsample=(1, 1, 1)))
model.add(Convolution3D(256, 3, 3, 3, activation='relu',
border_mode='same', name='conv3b',
subsample=(1, 1, 1)))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool3'))
# 4th layer group
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv4a',
subsample=(1, 1, 1)))
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv4b',
subsample=(1, 1, 1)))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool4'))
# 5th layer group
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv5a',
subsample=(1, 1, 1)))
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv5b',
subsample=(1, 1, 1)))
model.add(ZeroPadding3D(padding=(0, 1, 1)))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool5'))
model.add(Flatten())
# FC layers group
model.add(Dense(4096, activation='relu', name='fc6'))
model.add(Dropout(.5))
model.add(Dense(4096, activation='relu', name='fc7'))
model.add(Dropout(.5))
model.add(Dense(487, activation='softmax', name='fc8'))
return model
"""
Given a training log file, plot something.
"""
import csv
import matplotlib.pyplot as plt
def main(training_log):
with open(training_log) as fin:
reader = csv.reader(fin)
next(reader, None) # skip the header
accuracies = []
top_5_accuracies = []
cnn_benchmark = [] # this is ridiculous
for epoch,acc,loss,top_k_categorical_accuracy,val_acc,val_loss,val_top_k_categorical_accuracy in reader:
accuracies.append(float(val_acc))
top_5_accuracies.append(float(val_top_k_categorical_accuracy))
cnn_benchmark.append(0.65) # ridiculous
plt.plot(accuracies)
plt.plot(top_5_accuracies)
plt.plot(cnn_benchmark)
plt.show()
if __name__ == '__main__':
training_log = 'data/logs/mlp-training-1489455559.7089438.log'
main(training_log)
"""
Process an image that we can pass to our networks.
"""
from keras.preprocessing.image import img_to_array, load_img
import numpy as np
def process_image(image, target_shape):
"""Given an image, process it and return the array."""
# Load the image.
h, w, _ = target_shape
image = load_img(image, target_size=(h, w))
# Turn it into numpy, normalize and return.
img_arr = img_to_array(image)
x = (img_arr / 255.).astype(np.float32)
return x
"""
Try to "classify" samples based on random chance and always guessing
the most popular category.
"""
import random
from data import DataSet
most_pop = 'TennisSwing'
data = DataSet()
nb_classes = len(data.classes)
# Try a random guess.
nb_random_matched = 0
nb_mode_matched = 0
for item in data.data:
choice = random.choice(data.classes)
actual = item[1]
if choice == actual:
nb_random_matched += 1
if actual == most_pop:
nb_mode_matched += 1
random_accuracy = nb_random_matched / len(data.data)
mode_accuracy = nb_mode_matched / len(data.data)
print("Randomly matched %.2f%%" % (random_accuracy * 100))
print("Mode matched %.2f%%" % (mode_accuracy * 100))
train.py 0 → 100644
"""
Train our RNN on bottlecap or prediction files generated from our CNN.
"""
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, CSVLogger
from models import ResearchModels
from data import DataSet
import time
def train(data_type, seq_length, model, saved_model=None,
concat=False, class_limit=None, image_shape=None):
# Set variables.
nb_epoch = 1000
batch_size = 32
# Helper: Save the model.
checkpointer = ModelCheckpoint(
filepath='./data/checkpoints/' + model + '-' + data_type + \
'.{epoch:03d}-{val_loss:.3f}.hdf5',
verbose=1,
save_best_only=True)
# Helper: TensorBoard
tb = TensorBoard(log_dir='./data/logs')
# Helper: Stop when we stop learning.
early_stopper = EarlyStopping(patience=10)
# Helper: Save results.
timestamp = time.time()
csv_logger = CSVLogger('./data/logs/' + model + '-' + 'training-' + \
str(timestamp) + '.log')
# Get the data and process it.
if image_shape is None:
data = DataSet(
seq_length=seq_length,
class_limit=class_limit
)
else:
data = DataSet(
seq_length=seq_length,
class_limit=class_limit,
image_shape=image_shape
)
# Get samples per epoch.
# Multiply by 0.7 to attempt to guess how much of data.data is the train set.
samples_per_epoch = ((len(data.data) * 0.7) // batch_size) * batch_size
# Get generators.
generator = data.frame_generator(batch_size, 'train', data_type, concat)
val_generator = data.frame_generator(batch_size, 'test', data_type, concat)
# Get the model.
rm = ResearchModels(len(data.classes), model, seq_length, saved_model)
# Fit!
rm.model.fit_generator(
generator=generator,
samples_per_epoch=samples_per_epoch,
nb_epoch=nb_epoch,
verbose=1,
callbacks=[checkpointer, tb, early_stopper, csv_logger],
validation_data=val_generator,
nb_val_samples=256)
def main():
"""These are the main training settings. Set each before running
this file."""
data_type = 'features' # can be 'features' or 'images'
seq_length = 40
model = 'lstm' # see `models.py` for more
class_limit = None # int, can be 1-101 or None
saved_model = None # None or weights file
concat = False # true for MLP only
image_shape = None # Use None for default or specify a new size
train(data_type, seq_length, model, saved_model=saved_model,
class_limit=class_limit, concat=concat, image_shape=image_shape)
if __name__ == '__main__':
main()
"""
Train on images split into directories. This assumes we've split
our videos into frames and moved them to their respective folders.
Based on:
https://keras.io/preprocessing/image/
and
https://keras.io/applications/
"""
from keras.applications.inception_v3 import InceptionV3
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from data import DataSet
data = DataSet()
# Helper: Save the model.
checkpointer = ModelCheckpoint(
filepath='./data/checkpoints/inception.{epoch:03d}-{val_loss:.2f}.hdf5',
verbose=1,
save_best_only=True)
# Helper: Stop when we stop learning.
early_stopper = EarlyStopping(patience=10)
# Helper: TensorBoard
tensorboard = TensorBoard(log_dir='./data/logs/')
def get_generators():
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
horizontal_flip=True,
rotation_range=10.,
width_shift_range=0.2,
height_shift_range=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
'./data/train/',
target_size=(299, 299),
batch_size=32,
classes=data.classes,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
'./data/test/',
target_size=(299, 299),
batch_size=32,
classes=data.classes,
class_mode='categorical')
return train_generator, validation_generator
def get_model(weights='imagenet'):
# create the base pre-trained model
base_model = InceptionV3(weights=weights, include_top=False)
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 2 classes
predictions = Dense(len(data.classes), activation='softmax')(x)
# this is the model we will train
model = Model(input=base_model.input, output=predictions)
return model
def get_top_layer_model(base_model):
"""Used to train just the top layers of the model."""
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
base_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
return base_model
def get_mid_layer_model(model):
"""After we fine-tune the dense layers, train deeper."""
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(
optimizer=SGD(lr=0.0001, momentum=0.9),
loss='categorical_crossentropy',
metrics=['accuracy', 'top_k_categorical_accuracy'])
return model
def train_model(model, nb_epoch, generators, callbacks=[]):
train_generator, validation_generator = generators
model.fit_generator(
train_generator,
samples_per_epoch=128,
validation_data=validation_generator,
nb_val_samples=768,
nb_epoch=nb_epoch,
callbacks=callbacks)
return model
def main(weights_file):
model = get_model()
generators = get_generators()
if weights_file is None:
print("Loading network from ImageNet weights.")
# Get and train the top layers.
model = get_top_layer_model(model)
model = train_model(model, 10, generators)
else:
print("Loading saved model: %s." % weights_file)
model.load_weights(weights_file)
# Get and train the mid layers.
model = get_mid_layer_model(model)
model = train_model(model, 1000, generators,
[checkpointer, early_stopper, tensorboard])
if __name__ == '__main__':
weights_file = None
main(weights_file)
"""
Classify a few images through our CNN.
"""
import numpy as np
import operator
import random
import glob
from data import DataSet
from processor import process_image
from keras.models import load_model
def main(nb_images=5):
"""Spot-check `nb_images` images."""
data = DataSet()
model = load_model('data/checkpoints/inception.057-1.16.hdf5')
# Get all our test images.
images = glob.glob('./data/test/**/*.jpg')
for _ in range(nb_images):
print('-'*80)
# Get a random row.
sample = random.randint(0, len(images) - 1)
image = images[sample]
# Turn the image into an array.
print(image)
image_arr = process_image(image, (299, 299, 3))
image_arr = np.expand_dims(image_arr, axis=0)
# Predict.
predictions = model.predict(image_arr)
# Show how much we think it's each one.
label_predictions = {}
for i, label in enumerate(data.classes):
label_predictions[label] = predictions[0][i]
sorted_lps = sorted(label_predictions.items(), key=operator.itemgetter(1), reverse=True)
for i, class_prediction in enumerate(sorted_lps):
# Just get the top five.
if i > 4:
break
print("%s: %.2f" % (class_prediction[0], class_prediction[1]))
i += 1
if __name__ == '__main__':
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment