MNIST CNN (LeNET)

In [1]:
import tensorflow as tf
from pathlib import Path
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

MNIST dataset reading pipeline

We want our iterator to be usable when finetuning models. Since it is not possible to serialize iterators into a model file, we use Feedable iterators

In [2]:
p = Path('./datasets/MNIST_data/') 
train_images_path = p / 'train-images.idx3-ubyte'
train_label_path = p / 'train-labels.idx1-ubyte'
test_images_path = p / 't10k-images.idx3-ubyte'
test_label_path = p / 't10k-labels.idx1-ubyte'

def mnist_dataset(train:bool) -> tf.data.Dataset:
    if train:
        #4 byte offset for 4 numbers
        im = tf.data.FixedLengthRecordDataset([str(train_images_path)],28*28,header_bytes=16)
        #4 byte offset for 2 numbers
        label = tf.data.FixedLengthRecordDataset([str(train_label_path)],1,header_bytes=8)        
    else:
        im = tf.data.FixedLengthRecordDataset([str(test_images_path)],28*28,header_bytes=16)
        label = tf.data.FixedLengthRecordDataset([str(test_label_path)],1,header_bytes=8)
        
    im = im.map(lambda x: tf.decode_raw(x,tf.uint8),num_parallel_calls=4)
    im = im.map(lambda x: tf.reshape(x,(28,28,1)),num_parallel_calls=4) 
    im = im.map(lambda x: tf.image.convert_image_dtype(x,tf.float32),num_parallel_calls=4)
    im = im.map(lambda x: tf.image.resize_images(x,(32,32)))
    
    label = label.map(lambda x: tf.decode_raw(x,tf.uint8), num_parallel_calls=4)
    #label = label.map(lambda x: tf.one_hot(x,10), num_parallel_calls=4)
    
    dataset = tf.data.Dataset.zip((im,label))
        
    return dataset

with tf.device('/cpu:0'):
    train_dataset = mnist_dataset(True)
    train_dataset = train_dataset.shuffle(20000)
    train_dataset = train_dataset.repeat(10)
    train_dataset = train_dataset.batch(10)
    train_dataset = train_dataset.prefetch(2)
    
with tf.device('/cpu:0'):
    test_dataset = mnist_dataset(False)
    test_dataset = test_dataset.batch(10)
    test_dataset = test_dataset.prefetch(2)
  
#It is not possible to save iterators to be used in other models. Therefore, we use feedable iterators
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle,train_dataset.output_types,train_dataset.output_shapes)
model_input,label = iterator.get_next()


reinit_iterator = tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
training_init_op = reinit_iterator.make_initializer(train_dataset)
test_init_op = reinit_iterator.make_initializer(test_dataset)

Check

In [3]:
train_dataset.output_shapes,train_dataset.output_types
Out[3]:
((TensorShape([Dimension(None), Dimension(32), Dimension(32), Dimension(1)]),
  TensorShape([Dimension(None), Dimension(None)])),
 (tf.float32, tf.uint8))

To run feedable iterators, we have to pass the iterator handle at sess.run

In [4]:
with tf.Session() as sess:
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    i,l = sess.run([model_input,label],{handle:hdl})
In [5]:
i.shape
Out[5]:
(10, 32, 32, 1)
In [6]:
plt.imshow(i[0].reshape(32,32))
Out[6]:
<matplotlib.image.AxesImage at 0x7fde14aea198>
In [7]:
l
Out[7]:
array([[7],
       [2],
       [1],
       [0],
       [4],
       [1],
       [4],
       [9],
       [5],
       [9]], dtype=uint8)

Building LE-net

Calculating CNN output shape: (Same folrmula applies for pooling too)

out_W = ((Width - Filter_W + 2xPadding)/stride_W) + 1

out_H = ((Height - Filter_H + 2xPadding)/stride_H) + 1

In [8]:
model_input.shape
Out[8]:
TensorShape([Dimension(None), Dimension(32), Dimension(32), Dimension(1)])

Convolition 1

In [9]:
conv1 = tf.layers.conv2d(model_input,6,(5,5),(1,1),activation=tf.nn.tanh)
In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(conv1,{handle:hdl})
In [11]:
o.shape #(32-5)/1. + 1 = 28
Out[11]:
(10, 28, 28, 6)

Average pool 1

In [12]:
avgpool1 = tf.layers.average_pooling2d(conv1,(2,2),(2,2))
In [13]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(avgpool1,{handle:hdl})
o.shape #(28-2)/2. + 1 = 14
Out[13]:
(10, 14, 14, 6)

Convolution 2

In [14]:
conv2 = tf.layers.conv2d(avgpool1,16,(5,5),(1,1),activation=tf.nn.tanh)
In [15]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(conv2,{handle:hdl})
o.shape# (14-5)/1. + 1 = 10
Out[15]:
(10, 10, 10, 16)

Average pool 2

In [16]:
avgpool2 = tf.layers.average_pooling2d(conv2,(2,2),(2,2))
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(avgpool2,{handle:hdl})
o.shape #(10-2)/2. + 1 = 5
Out[16]:
(10, 5, 5, 16)

Convolution 3

In [17]:
conv3 = tf.layers.conv2d(avgpool2,120,(5,5),(1,1),activation=tf.nn.tanh)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(conv3,{handle:hdl})
o.shape #(5-5)/1. + 1 = 1
Out[17]:
(10, 1, 1, 120)

Flatten

In [18]:
flatten = tf.layers.Flatten()(conv3)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(flatten,{handle:hdl})
o.shape
Out[18]:
(10, 120)

CNN features

In [9]:
def cnn_features(model_input):
    
    conv1 = tf.layers.conv2d(model_input,6,(5,5),(1,1),activation=tf.nn.tanh,name="Conv1")
    avgpool1 = tf.layers.average_pooling2d(conv1,(2,2),(2,2),name="AvgPool1")
    
    conv2 = tf.layers.conv2d(avgpool1,16,(5,5),(1,1),activation=tf.nn.tanh,name="Conv2")
    avgpool2 = tf.layers.average_pooling2d(conv2,(2,2),(2,2),name="AvgPool2")
    
    conv3 = tf.layers.conv2d(avgpool2,120,(5,5),(1,1),activation=tf.nn.tanh,name="Conv3")
    
    flatten = tf.layers.Flatten()(conv3)
    
    features = tf.layers.dense(flatten,84,activation=tf.nn.tanh,name="Dense_84")
    
    return features
In [10]:
features = cnn_features(model_input)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(features,{handle:hdl})
o.shape
Out[10]:
(10, 84)

Classifier

We have the classifier separate, so that the features can be finetunes on some other dataset

In [11]:
classifier = tf.layers.dense(features,10,name="Dense_10")
In [12]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    o = sess.run(classifier,{handle:hdl})
o
Out[12]:
array([[-0.20531213,  0.04251097, -0.03315976,  0.03445453,  0.04151768,
         0.00456036, -0.12547094, -0.02607075,  0.02615655,  0.04311508],
       [ 0.07981408,  0.02922436,  0.02569927,  0.01603307, -0.05419018,
         0.02898067, -0.09507059, -0.06044836,  0.04393085,  0.10714986],
       [-0.01316756, -0.03947343,  0.04362742, -0.00307302, -0.06044022,
         0.02515776, -0.06662248, -0.01794723, -0.07488122, -0.02658559],
       [-0.20453325, -0.00027713,  0.03020309,  0.03719905, -0.02660405,
         0.11379763, -0.10966177, -0.06239372,  0.02662674,  0.11430524],
       [-0.1540167 ,  0.027791  ,  0.07667388,  0.07668108, -0.01214264,
         0.06469291, -0.10004922, -0.04193221,  0.05688235,  0.15320799],
       [-0.0189077 , -0.05867729,  0.06295789,  0.00058284, -0.08034124,
         0.0327592 , -0.10286202, -0.02338684, -0.10404497, -0.01895019],
       [-0.13319953,  0.05301343, -0.05021135, -0.00540791,  0.02219173,
         0.10332845, -0.10700808, -0.02184497,  0.04532049,  0.10607778],
       [-0.07240183,  0.01063994,  0.00943119, -0.03639436,  0.01682143,
        -0.00155305, -0.1551005 , -0.02203377,  0.01246432,  0.05506425],
       [-0.16981661,  0.03260854,  0.01053533,  0.01983605, -0.01343607,
         0.11462651, -0.15326248, -0.17958029,  0.06578241,  0.19804746],
       [-0.17639552, -0.07132781,  0.02309839,  0.06553371, -0.08243968,
         0.12195807, -0.18619922, -0.01408935,  0.02503699,  0.08158158]],
      dtype=float32)

Training

In [13]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf.one_hot(label,10),logits=classifier))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
In [14]:
!rm models/MNIST_CNN/*
In [15]:
import time
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    hdl,_ = sess.run([reinit_iterator.string_handle(), training_init_op])


    start = time.time()
    try:
        i = 1
        tmp = []
        while True:
            i = i+1
            l,_ = sess.run([loss,train],{handle:hdl})
            tmp.append(l)
            if i%5000 == 0:
                avg_loss = np.array(tmp).mean()
                print("Batch: ",i,avg_loss)
                tmp = []
                
    except tf.errors.OutOfRangeError:
        pass
    
    end = time.time()
    elapsed = end-start
    print("Elapsed time : ", elapsed, " s")
    
    #tf.add_to_collection('iterator',reinit_iterator) #Not possible
    tf.add_to_collection('data_handle',handle)
    tf.add_to_collection('classifier',classifier)
    tf.add_to_collection('loss',loss)
    tf.add_to_collection('train',train)
    tf.add_to_collection('target',label)
    tf.add_to_collection('model_input',model_input)
    
    saver.save(sess,'models/MNIST_CNN/mnist_model.ckpt')
Batch:  5000 0.40081772
Batch:  10000 0.15119536
Batch:  15000 0.100981064
Batch:  20000 0.073384106
Batch:  25000 0.061649695
Batch:  30000 0.05345069
Batch:  35000 0.0459595
Batch:  40000 0.041632738
Batch:  45000 0.036608294
Batch:  50000 0.033815123
Batch:  55000 0.03124176
Batch:  60000 0.029135156
Elapsed time :  70.00993418693542  s

Evaluation

In [16]:
def get_accuracy(predict:'eg: [2,4,1,...]',true: 'eg: [2,4,1,...]') -> int:
    correct_pred = tf.equal(predict,true)
    #We have to cast [True,False,True,...] --> [1,0,1...]
    acc = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
    return acc

with tf.Session() as sess:
    saver.restore(sess,'models/MNIST_CNN/mnist_model.ckpt')
    hdl,_ = sess.run([reinit_iterator.string_handle(), test_init_op])
    
    #IMPORTANT:
    #Dont place this code inside the loop! This will slow down everything
    acc = get_accuracy(tf.argmax(classifier,axis=1),tf.transpose(tf.argmax(tf.one_hot(label,10),axis=2)))
    
    try:
        i = 0
        acc_list = []
        while True:
            i = i+1
            a = sess.run(acc,{handle:hdl})
            acc_list.append(a)
            if i%100 == 0:
                print(i, "Mean Acc : ", np.array(acc_list).mean())
                acc_list = []
                           
    except tf.errors.OutOfRangeError:
        pass    
INFO:tensorflow:Restoring parameters from models/MNIST_CNN/mnist_model.ckpt
100 Mean Acc :  0.98800004
200 Mean Acc :  0.97900003
300 Mean Acc :  0.98399997
400 Mean Acc :  0.988
500 Mean Acc :  0.985
600 Mean Acc :  0.991
700 Mean Acc :  0.987
800 Mean Acc :  0.997
900 Mean Acc :  0.995
1000 Mean Acc :  0.987

Serving

In [17]:
test_ims_paths = tf.placeholder(tf.string)
serving_data = tf.data.Dataset.from_tensor_slices(test_ims_paths)

def read_img(filepath):
    image_string = tf.read_file(filepath)
    image = tf.image.decode_jpeg(image_string)
    image = tf.reshape(image,(28,28,1))
    image = tf.image.convert_image_dtype(image,tf.float32)
    image = tf.image.resize_images(image,(32,32))
    return image
    
serving_data = serving_data.map(lambda x: read_img(x))
serving_data = serving_data.map(lambda x: (x,tf.expand_dims(tf.cast(0,tf.uint8),axis=0)))#To match with iterator
serving_data = serving_data.batch(5)
In [18]:
serving_data.output_types,serving_data.output_shapes
Out[18]:
((tf.float32, tf.uint8),
 (TensorShape([Dimension(None), Dimension(32), Dimension(32), Dimension(1)]),
  TensorShape([Dimension(None), Dimension(1)])))
In [19]:
#Do not use the reinit iterator from the model. it wont be available at serving
serving_iterator = serving_data.make_initializable_iterator()
In [30]:
with tf.Session() as sess:
    saver.restore(sess,'models/MNIST_CNN/mnist_model.ckpt')

    ims_paths = ['pics/img_13.jpg','pics/img_24.jpg']
    hdl,_ = sess.run([serving_iterator.string_handle(),serving_iterator.initializer],{test_ims_paths:ims_paths})
    predictions = tf.argmax(classifier,axis=1)
    out = sess.run(predictions,{handle:hdl})
    print(out)  
INFO:tensorflow:Restoring parameters from models/MNIST_CNN/mnist_model.ckpt
[4 5]