Python implementation of the training process
The following Python code block shows an end-to-end implementation of the training process. It consists of all of the functional blocks that were discussed in the preceding sections. Let's start by calling all of the Python packages that are required, as follows:
import numpy as np
np.random.seed(1000)
import os
import glob
import cv2
import datetime
import pandas as pd
import time
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
from sklearn.metrics import cohen_kappa_score
from keras.models import Sequential,Model
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import GlobalMaxPooling2D,GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import log_loss
import keras
from keras import __version__ as keras_version
from keras.applications.inception_v3 import InceptionV3
from keras.applications.resnet50 import ResNet50
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, Callback
from keras.applications.resnet50 import preprocess_input
import h5py
import argparse
from sklearn.externals import joblib
import json
Once we have imported the required library, we can define the TransferLearning class:
class TransferLearning:
def __init__(self):
parser = argparse.ArgumentParser(description='Process the inputs')
parser.add_argument('--path',help='image directory')
parser.add_argument('--class_folders',help='class images folder
names')
parser.add_argument('--dim',type=int,help='Image dimensions to
process')
parser.add_argument('--lr',type=float,help='learning
rate',default=1e-4)
parser.add_argument('--batch_size',type=int,help='batch size')
parser.add_argument('--epochs',type=int,help='no of epochs to
train')
parser.add_argument('--initial_layers_to_freeze',type=int,help='the
initial layers to freeze')
parser.add_argument('--model',help='Standard Model to
load',default='InceptionV3')
parser.add_argument('--folds',type=int,help='num of cross
validation folds',default=5)
parser.add_argument('--outdir',help='output directory')
args = parser.parse_args()
self.path = args.path
self.class_folders = json.loads(args.class_folders)
self.dim = int(args.dim)
self.lr = float(args.lr)
self.batch_size = int(args.batch_size)
self.epochs = int(args.epochs)
self.initial_layers_to_freeze = int(args.initial_layers_to_freeze)
self.model = args.model
self.folds = int(args.folds)
self.outdir = args.outdir
Next, let's define a function that can read the images and resize them to a suitable dimension, as follows:
def get_im_cv2(self,path,dim=224):
img = cv2.imread(path)
resized = cv2.resize(img, (dim,dim), cv2.INTER_LINEAR)
return resized
# Pre Process the Images based on the ImageNet pre-trained model
Image transformation
def pre_process(self,img):
img[:,:,0] = img[:,:,0] - 103.939
img[:,:,1] = img[:,:,0] - 116.779
img[:,:,2] = img[:,:,0] - 123.68
return img
# Function to build X, y in numpy format based on the
train/validation datasets
def read_data(self,class_folders,path,num_class,dim,train_val='train'):
print(train_val)
train_X,train_y = [],[]
for c in class_folders:
path_class = path + str(train_val) + '/' + str(c)
file_list = os.listdir(path_class)
for f in file_list:
img = self.get_im_cv2(path_class + '/' + f)
img = self.pre_process(img)
train_X.append(img)
train_y.append(int(c.split('class')[1]))
train_y = keras.utils.np_utils.to_categorical(np.array(train_y),num_class)
return np.array(train_X),train_y
Following that, we will now define the three models for transfer learning, starting with InceptionV3:
def inception_pseudo(self,dim=224,freeze_layers=30,full_freeze='N'):
model = InceptionV3(weights='imagenet',include_top=False)
x = model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
out = Dense(5,activation='softmax')(x)
model_final = Model(input = model.input,outputs=out)
if full_freeze != 'N':
for layer in model.layers[0:freeze_layers]:
layer.trainable = False
return model_final
Then, we will define the ResNet50 Model for transfer learning:
def resnet_pseudo(self,dim=224,freeze_layers=10,full_freeze='N'):
model = ResNet50(weights='imagenet',include_top=False)
x = model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
out = Dense(5,activation='softmax')(x)
model_final = Model(input = model.input,outputs=out)
if full_freeze != 'N':
for layer in model.layers[0:freeze_layers]:
layer.trainable = False
return model_final
Lastly, we will define the VGG16 model:
def VGG16_pseudo(self,dim=224,freeze_layers=10,full_freeze='N'):
model = VGG16(weights='imagenet',include_top=False)
x = model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
out = Dense(5,activation='softmax')(x)
model_final = Model(input = model.input,outputs=out)
if full_freeze != 'N':
for layer in model.layers[0:freeze_layers]:
layer.trainable = False
return model_final
Now, let's define the training function, as follows:
def train_model(self,train_X,train_y,n_fold=5,batch_size=16,epochs=40,
dim=224,lr=1e-5,model='ResNet50'):
model_save_dest = {}
k = 0
kf = KFold(n_splits=n_fold, random_state=0, shuffle=True)
for train_index, test_index in kf.split(train_X):
k += 1
X_train,X_test = train_X[train_index],train_X[test_index]
y_train, y_test = train_y[train_index],train_y[test_index]
if model == 'Resnet50':
model_final =
self.resnet_pseudo(dim=224,freeze_layers=10,full_freeze='N')
if model == 'VGG16':
model_final =
self.VGG16_pseudo(dim=224,freeze_layers=10,full_freeze='N')
if model == 'InceptionV3':
model_final = self.inception_pseudo(dim=224,freeze_layers=10,full_freeze='N')
datagen = ImageDataGenerator(
horizontal_flip = True,
vertical_flip = True,
width_shift_range = 0.1,
height_shift_range = 0.1,
channel_shift_range=0,
zoom_range = 0.2,
rotation_range = 20)
adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,
epsilon=1e-08, decay=0.0)
model_final.compile(optimizer=adam,
loss= ["categorical_crossentropy"],metrics=['accuracy'])
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
factor=0.50, patience=3, min_lr=0.000001)
callbacks = [
EarlyStopping(monitor='val_loss', patience=10, mode='min',
verbose=1),
CSVLogger('keras-5fold-run-01-v1-epochs_ib.log',
separator=',', append=False),reduce_lr,
ModelCheckpoint(
'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k + 1)) +
'-run-' + str('%02d' % (1 + 1)) + '.check',
monitor='val_loss', mode='min',
save_best_only=True,
verbose=1)
]
model_final.fit_generator(datagen.flow(X_train,y_train,
batch_size=batch_size),
steps_per_epoch=X_train.shape[0]/batch_size, epochs=epochs,
verbose=1, validation_data= (X_test,y_test),
callbacks=callbacks, class_weight=
{0:0.012,1:0.12,2:0.058,3:0.36,4:0.43})
model_name = 'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k +
1)) + '-run-' + str('%02d' % (1 + 1)) + '.check'
del model_final
f = h5py.File(model_name, 'r+')
del f['optimizer_weights']
f.close()
model_final = keras.models.load_model(model_name)
model_name1 = self.outdir + str(model) + '___' + str(k)
model_final.save(model_name1)
model_save_dest[k] = model_name1
return model_save_dest
We will also define an inference function for the holdout dataset, as follows:
def inference_validation(self,test_X,test_y,model_save_dest,
n_class=5,folds=5):
pred = np.zeros((len(test_X),n_class))
for k in range(1,folds + 1):
model = keras.models.load_model(model_save_dest[k])
pred = pred + model.predict(test_X)
pred = pred/(1.0*folds)
pred_class = np.argmax(pred,axis=1)
act_class = np.argmax(test_y,axis=1)
accuracy = np.sum([pred_class == act_class])*1.0/len(test_X)
kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic')
return pred_class,accuracy,kappa
Now, let's call the main function, to trigger the training process, as follows:
def main(self):
start_time = time.time()
self.num_class = len(self.class_folders)
if self.mode == 'train':
print("Data Processing..")
file_list,labels=
self.read_data(self.class_folders,self.path,self.num_class,
self.dim,train_val='train')
print(len(file_list),len(labels))
print(labels[0],labels[-1])
self.model_save_dest =
self.train_model(file_list,labels,n_fold=self.folds,
batch_size=self.batch_size,
epochs=self.epochs,dim=self.dim,
lr=self.lr,model=self.model)
joblib.dump(self.model_save_dest,f'{self.outdir}/model_dict.pkl')
print("Model saved to dest:",self.model_save_dest)
else:
model_save_dest = joblib.load(self.model_save_dest)
print('Models loaded from:',model_save_dest)
# Do inference/validation
test_files,test_y =
self.read_data(self.class_folders,self.path,self.num_class,
self.dim,train_val='validation')
test_X = []
for f in test_files:
img = self.get_im_cv2(f)
img = self.pre_process(img)
test_X.append(img)
test_X = np.array(test_X)
test_y = np.array(test_y)
print(test_X.shape)
print(len(test_y))
pred_class,accuracy,kappa =
self.inference_validation(test_X,test_y,model_save_dest,
n_class=self.num_class,folds=self.folds)
results_df = pd.DataFrame()
results_df['file_name'] = test_files
results_df['target'] = test_y
results_df['prediction'] = pred_class
results_df.to_csv(f'{self.outdir}/val_resuts_reg.csv',index=False)
print("-----------------------------------------------------")
print("Kappa score:", kappa)
print("accuracy:", accuracy)
print("End of training")
print("-----------------------------------------------------")
print("Processing Time",time.time() - start_time,' secs')
We can change several parameters, such as learning rate, batch size, image size, and so on, and we can experiment, to come up with a decent model. During the training phase, the model locations are saved in the model_save_dest dictionary that is written to the dict_model file.
During the inference phase, the model just makes predictions on the new test data, based on the trained models.
The script for transfer learning named TransferLearning.py can be invoked as follows:
python TransferLearning.py --path '/media/santanu/9eb9b6dc-b380-486e-b4fd-c424a325b976/book AI/Diabetic Retinopathy/Extra/assignment2_train_dataset/' --class_folders '["class0","class1","class2","class3","class4"]' --dim 224 --lr 1e-4 --batch_size 16 --epochs 20 --initial_layers_to_freeze 10 --model InceptionV3 --folds 5 --outdir '/home/santanu/ML_DS_Catalog-/Transfer_Learning_DR/'
The output log of the script is as follows:
Model saved to dest: {1: '/home/santanu/ML_DS_Catalog-/Transfer_Learning_DR/categorical/InceptionV3___1', 2: '/home/santanu/ML_DS_Catalog-/Transfer_Learning_DR/categorical/InceptionV3___2', 3: '/home/santanu/ML_DS_Catalog-/Transfer_Learning_DR/categorical/InceptionV3___3', 4: '/home/santanu/ML_DS_Catalog-/Transfer_Learning_DR/categorical/InceptionV3___4', 5: '/home/santanu/ML_DS_Catalog-/Transfer_Learning_DR/categorical/InceptionV3___5'}
validation
-----------------------------------------------------
Kappa score: 0.42969781637876836
accuracy: 0.5553973227000855
End of training
-----------------------------------------------------
Processing Time 26009.3344039917 secs
As we can see from the results in the log, we achieve a decent cross validation accuracy of around 56% and a quadratic Kappa of around 0.43.
In this script, we have loaded all the data into memory and then fed the augmented images from the ImageDataGenerator to the model for training. If the set of training images are few and/or of moderate dimension, then loading the data into memory might not be of great concern. However, if the image corpus is huge and/or we have limited resources, loading all the data into memory won't be a viable option. Since the machine on which these experiments have been run has 64 GB RAM, we were able to train these models without issues. Even a 16 GB RAM machine might not be sufficient to run these experiments by loading all the data in memory and you might run into a memory error.
The question is, do we need to load all the data into memory at once?
Since neural networks work with mini-batches, we would only require the data corresponding to one mini-batch to train the model through back-propagation at one time. Similarly for the next back-propagation, we can discard the data corresponding to the current batch and process the next batch instead. So in a way the memory requirement at each mini-batch is only the data corresponding to that batch. So we can get around training deep learning models in machines with less memory by creating dynamic batches at training time. Keras has a good function to create dynamic batches at training time which we will discuss in the next section.