Created
November 7, 2021 16:20
-
-
Save logankilpatrick/965e04b949180635ee91ff39f5d306a2 to your computer and use it in GitHub Desktop.
Train a basic CNN model in Tensorflow / Keras with Image Transformations / Augmentations like padding, zoom, crop, etc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import numpy as np | |
from tensorflow.keras import models, layers, callbacks | |
from tensorflow.keras.utils import to_categorical | |
from tensorflow.keras.optimizers import Adam | |
from tensorflow.keras.datasets import mnist | |
from matplotlib import pyplot as plt | |
# Change this to the location of the database directories | |
DB_DIR = os.path.dirname(os.path.realpath(__file__)) | |
# Import databases | |
sys.path.insert(1, DB_DIR) | |
from db_utils import get_imdb_dataset, get_speech_dataset | |
def Secure_Voice_Channel(func): | |
"""Define Secure_Voice_Channel decorator.""" | |
def execute_func(*args, **kwargs): | |
print('Established Secure Connection.') | |
returned_value = func(*args, **kwargs) | |
print("Ended Secure Connection.") | |
return returned_value | |
return execute_func | |
@Secure_Voice_Channel | |
def generic_vns_function(input_dim, number_dense_layers, classes, units, lr): | |
"""Generic Deep Learning Model generator.""" | |
model = models.Sequential() | |
model.add(layers.Conv2D(64, (4,4), activation='relu')) | |
model.add(layers.MaxPool2D(2,2)) | |
model.add(layers.Flatten()) | |
for i in range(number_dense_layers): | |
model.add(layers.Dense(units=units, input_dim=input_dim, | |
kernel_initializer='normal', activation='relu')) | |
model.add(layers.Dense(classes, kernel_initializer='normal', | |
activation='softmax')) | |
opt = Adam(lr=lr) | |
model.compile(loss='categorical_crossentropy', optimizer=opt, | |
metrics=['accuracy']) | |
return model | |
def train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test): | |
"""Generic Deep Learning Model training function.""" | |
cb = [callbacks.EarlyStopping(monitor='val_loss', patience=3)] | |
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, | |
batch_size=batch_size, verbose=1, callbacks=cb) | |
scores = model.evaluate(X_test, y_test, verbose=2) | |
print("Baseline Error: %.2f%%" % (100-scores[1]*100)) | |
return model | |
def choose_dataset(dataset_type): | |
"""Select dataset based on string variable.""" | |
if dataset_type == "nlp": | |
return get_imdb_dataset(dir=DB_DIR) | |
elif dataset_type == "computer_vision": | |
(X_train, y_train), (X_test, y_test) = mnist.load_data() | |
elif dataset_type == "speech_recognition": | |
(X_train, y_train), (X_test, y_test), (_, _) = get_speech_dataset() | |
else: | |
raise ValueError("Couldn't find dataset.") | |
(X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test) | |
(X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test) | |
return (X_train, y_train), (X_test, y_test) | |
def normalize_dataset(string, X_train, X_test): | |
"""Normalize speech recognition and computer vision datasets.""" | |
if string is "computer vision": | |
X_train = X_train / 255 | |
X_test = X_test / 255 | |
else: | |
mean = np.mean(X_train) | |
std = np.std(X_train) | |
X_train = (X_train-std)/mean | |
X_test = (X_test-std)/mean | |
return (X_train, X_test) | |
def reshape_dataset(X_train, y_train, X_test, y_test): | |
"""Reshape Computer Vision and Speech datasets.""" | |
num_pixels = X_test.shape[1]*X_test.shape[2] | |
# Add padding to Train and test | |
X_train, X_test = add_padding(X_train), add_padding(X_test) | |
# Rotate the test dataset | |
X_train = rotate_array(X_train) | |
# Zoom the test dataset | |
X_train = zoom_array(X_train) | |
# Translate the test dataset | |
X_train, X_test = move_array(X_train), move_array(X_test) | |
# Change Reshape Function to the format (num_samples, x_axis, y_axis, channels) | |
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1).astype('float32') | |
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1).astype('float32') | |
y_train = to_categorical(y_train) | |
y_test = to_categorical(y_test) | |
return (X_train, y_train), (X_test, y_test) | |
def main(): | |
# Hyperparameters | |
layers = 2 | |
layer_units = 1000 | |
epochs = 20 | |
batch_size = 200 | |
lr = 0.001 | |
# Dataset : "computer_vision" | |
dataset = "computer_vision" | |
# Import Datasets | |
(X_train, y_train), (X_test, y_test) = choose_dataset(dataset) | |
# Generate and train model | |
# Removed the layers argument since we are using Conv layers now. | |
model = generic_vns_function(X_train.shape[1], layers, y_train.shape[1], layer_units, lr) | |
trained_model = train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test) | |
# # Save model to h5 file | |
trained_model.save('models/model_%s_a3.h5' % dataset) | |
return None | |
######################DATA AUGMENTATION FUNCTIONS############################### | |
################################################################################ | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from random import random, seed, randint, uniform | |
import cv2 | |
from scipy.ndimage.interpolation import rotate, zoom | |
def add_padding(X, padding=10): | |
"""Add padding to images in array, by changing image size""" | |
new_X = [] | |
for img in X: | |
new_image = np.zeros((img.shape[0]+padding*2,img.shape[0]+padding*2)) | |
new_image[padding:padding+img.shape[0],padding:padding+img.shape[1]] = img | |
new_X.append(new_image) | |
return np.asarray(new_X) | |
def move_array(X, transform_range=5): | |
"""Transform X (image array) with a random move within range.""" | |
new_X = [] | |
for img in X: | |
moved_x = randint(-transform_range, transform_range) | |
moved_y = randint(-transform_range, transform_range) | |
translation_matrix = np.float32([[1,0,moved_x], [0,1,moved_y]]) | |
moved_image = cv2.warpAffine(img, translation_matrix, (img.shape[0], img.shape[0])) | |
new_X.append(moved_image) | |
return np.asarray(new_X) | |
def rotate_array(X, angle_range=60): | |
"""Rotate X (image array) with a random angle within angle range.""" | |
new_X = [] | |
for img in X: | |
angle = random()*angle_range*2-angle_range | |
new_image = rotate(img, angle=angle, reshape=False) | |
new_X.append(new_image) | |
return np.asarray(new_X) | |
def zoom_array(X, zoom_range_min=0.6, zoom_range_max=1): | |
"""Zoom X (image array) with a random zoom within zoom range.""" | |
new_X = [] | |
for img in X: | |
zoom = uniform(zoom_range_min,zoom_range_max) | |
zoomed_image = clipped_zoom(img, zoom) | |
new_X.append(zoomed_image) | |
return np.asarray(new_X) | |
def clipped_zoom(img, zoom_factor, **kwargs): | |
"""Zoom while clippig image to keep array size.""" | |
# Code inspired on ali_m's response in Stack Overflow issue: | |
# https://stackoverflow.com/questions/37119071/scipy-rotate-and-zoom-an-image-without-changing-its-dimensions | |
h, w = img.shape[:2] | |
# For multichannel images we don't want to apply the zoom factor to the RGB | |
# dimension, so instead we create a tuple of zoom factors, one per array | |
# dimension, with 1's for any trailing dimensions after the width and height. | |
zoom_tuple = (zoom_factor,) * 2 + (1,) * (img.ndim - 2) | |
# Zooming out | |
if zoom_factor < 1: | |
# Bounding box of the zoomed-out image within the output array | |
zh = int(np.round(h * zoom_factor)) | |
zw = int(np.round(w * zoom_factor)) | |
top = (h - zh) // 2 | |
left = (w - zw) // 2 | |
# Zero-padding | |
out = np.zeros_like(img) | |
out[top:top+zh, left:left+zw] = zoom(img, zoom_tuple, **kwargs) | |
# Zooming in | |
elif zoom_factor > 1: | |
# Bounding box of the zoomed-in region within the input array | |
zh = int(np.round(h / zoom_factor)) | |
zw = int(np.round(w / zoom_factor)) | |
top = (h - zh) // 2 | |
left = (w - zw) // 2 | |
out = zoom(img[top:top+zh, left:left+zw], zoom_tuple, **kwargs) | |
# `out` might still be slightly larger than `img` due to rounding, so | |
# trim off any extra pixels at the edges | |
if out.shape[0] < h: | |
out = add_padding([out],padding=1)[0] | |
trim_top = ((out.shape[0] - h) // 2) | |
trim_left = ((out.shape[1] - w) // 2) | |
out = out[trim_top:trim_top+h, trim_left:trim_left+w] | |
# If zoom_factor == 1, just return the input array | |
else: | |
out = img | |
return out | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment