import tensorflow as tf
from pathlib import Path
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
p = Path('./datasets/MNIST_data/')
train_images_path = p / 'train-images.idx3-ubyte'
train_label_path = p / 'train-labels.idx1-ubyte'
test_images_path = p / 't10k-images.idx3-ubyte'
test_label_path = p / 't10k-labels.idx1-ubyte'
First 16 bytes contain 4 int32 numbers having meta info. Remaining are int8 pixel data with 1 byte each
ft = train_images_path.open('rb')
m,n,h,w = np.frombuffer(ft.read(4*4),np.dtype('u4').newbyteorder('>'))
m,n,h,w
o_im = np.frombuffer(ft.read(),np.dtype('u1')).reshape(n,h,w)
o_im.shape
Image.fromarray(o_im[0])
Check train label file
First 8 bytes contain 2 int32 numbers having meta info. Remaining are int8 label data 1 byte each
ft = train_label_path.open('rb')
m,n = np.frombuffer(ft.read(2*4),np.dtype('u4').newbyteorder('>'))
m,n
o = np.frombuffer(ft.read(),np.dtype('u1'))
o.shape
o[:5]
Create separate training and test dataset pipeline. Use Reinitializable iterator to switch between them. This type of iterators are useful when we want to fine tune the model on different dataset with different preprocessing pipeline
Make it possible not to depend on iterators for serving
def mnist_dataset(train:bool) -> tf.data.Dataset:
if train:
#4 byte offset for 4 numbers
im = tf.data.FixedLengthRecordDataset([str(train_images_path)],28*28,header_bytes=16)
#4 byte offset for 2 numbers
label = tf.data.FixedLengthRecordDataset([str(train_label_path)],1,header_bytes=8)
else:
im = tf.data.FixedLengthRecordDataset([str(test_images_path)],28*28,header_bytes=16)
label = tf.data.FixedLengthRecordDataset([str(test_label_path)],1,header_bytes=8)
im = im.map(lambda x: tf.decode_raw(x,tf.uint8),num_parallel_calls=4)
im = im.map(lambda x: tf.reshape(x,(28,28,1)),num_parallel_calls=4)
im = im.map(lambda x: tf.image.convert_image_dtype(x,tf.float32),num_parallel_calls=4)
label = label.map(lambda x: tf.decode_raw(x,tf.uint8), num_parallel_calls=4)
label = label.map(lambda x: tf.one_hot(x,10), num_parallel_calls=4)
dataset = tf.data.Dataset.zip((im,label))
return dataset
Training dataset
with tf.device('/cpu:0'):
train_dataset = mnist_dataset(True)
train_dataset = train_dataset.shuffle(20000)
train_dataset = train_dataset.repeat(10)
train_dataset = train_dataset.batch(10)
train_dataset = train_dataset.prefetch(2)
train_dataset.output_types,train_dataset.output_shapes
Test dataset
with tf.device('/cpu:0'):
test_dataset = mnist_dataset(False)
test_dataset = test_dataset.batch(10)
test_dataset = test_dataset.prefetch(2)
test_dataset.output_types,test_dataset.output_shapes
While creating reinitializable iterator, the output shapes and types of dataset shpuld match
iterator = tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
im,label = iterator.get_next()
training_init_op = iterator.make_initializer(train_dataset)
test_init_op = iterator.make_initializer(test_dataset)
Production : In this case, dont use iterator (The if condition however increases training time by a minor fraction)
is_serving = tf.placeholder_with_default(tf.constant(False),[])
#Do not use tf.int32 or int64 images! This scales float values close to 0
serving_input = tf.placeholder_with_default(np.zeros((1,28,28,1),dtype=np.uint8),(None,28,28,1))
serving_input_float = tf.image.convert_image_dtype(serving_input,tf.float32,saturate=True)
Model input
#Using this if cond increases total training(60Kx10 images) time by 2s
model_input = tf.cond(is_serving,lambda:serving_input_float,lambda:im)
with tf.Session() as sess:
sess.run(test_init_op)
i,l = sess.run([model_input,label])
Check
i.shape
l.shape
plt.imshow(i[0].reshape(28,28))
It is a float image
i[0].min(),i[0].max()
Check label
print(np.where(l[0][0]==1))
l
Check serving
Image.open('pics/img_13.jpg')
Although we dont use iterators, error was thrown when one of them were not initialized
test_im1 = np.asarray(Image.open('pics/img_13.jpg')).reshape(1,28,28,1)
test_im2 = np.asarray(Image.open('pics/img_24.jpg')).reshape(1,28,28,1)
test_ims = np.concatenate((test_im1,test_im2),axis=0) #(2,28,28,1)
print(test_ims.shape)
with tf.Session() as sess:
sess.run(test_init_op) #Throws error when one of the iterators are not initialized
it = sess.run(model_input,{is_serving:True,serving_input:test_ims})
it[0].min(), it[0].max(), it.shape
plt.imshow(it[0].reshape(28,28))
tf.layers.dense()
units : Number of output units
activation: (Default:No activation) Eg, tf.nn.relu (No brackets in the end)
kernel_initializer: (Default:glorot_uniform). Initializer for W Eg. tf.initializers.glorot_normal() (Need brackets in the end)
bias_initializer : (Default: zeros) Initializer for b
kernel_regularizer : (Default:None) Regularizer for W. Reg term should be manually added to the final loss
bias_regularizer : (Default:None) Regularizer for b. Eg. tf.contrib.layers.l2_regularizer(scale=0.01)
activity regularizer: (Default:None) Allows regularizer of the output from the layer to be computed.
Adding regularization to the loss,
l2_loss = tf.losses.get_regularization_loss() .... loss += l2_losskernel/bias constraint : TODO
def dense_model(model_input):
x = tf.reshape(model_input,(-1,784))
#x = tf.layers.Flatten()(model_input) #Flattens input preserving batch axis (axis 0)
#x = tf.reshape(model_input,(tf.shape(model_input)[0],784)) # use tf.shape(tensor)[0] instead of tensor.shape[0]
h1 = tf.layers.dense(x,300,
activation=tf.nn.relu,
kernel_initializer=tf.initializers.glorot_normal(),
bias_initializer=tf.initializers.zeros(),
kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.01))
out = tf.layers.dense(h1,10,kernel_initializer=tf.initializers.glorot_normal()) #No activation
return out
check
dense_net = dense_model(model_input)
dense_net
While reshaping tensors, get batchsize with tf.shape(tensor)[0] instead of tensor.shape[0]. This is because the former returns a tensor, while the latter returns just a number
print(dense_net.shape[0])
print(tf.shape(dense_net)[0])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(training_init_op)
o = sess.run(dense_net)
o #(Batch size x 10)
Loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label,logits=dense_net))
#l2_loss = tf.losses.get_regularization_loss()
#loss += l2_loss
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(training_init_op)
print(sess.run(loss))
Optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
IMPORTANT: After training the model inside a session, SAVE IT. While evaluvating in a different session, we are not supposed to init global variables!. This will reset all weights!
Create session and train
!rm models/MNIST_DENSE/*
import time
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(training_init_op)
start = time.time()
try:
i = 1
tmp = []
while True:
i = i+1
l,_ = sess.run([loss,train])
tmp.append(l)
if i%5000 == 0:
avg_loss = np.array(tmp).mean()
print("Batch: ",i,avg_loss)
tmp = []
except tf.errors.OutOfRangeError:
pass
end = time.time()
elapsed = end-start
print("Elapsed time : ", elapsed, " s")
saver.save(sess,'models/MNIST_DENSE/mnist_model.ckpt')
Observation : Elapsed time is two seconds more when the if condition for serving is used
def get_accuracy(predict:'eg: [2,4,1,...]',true: 'eg: [2,4,1,...]') -> int:
correct_pred = tf.equal(predict,true)
#We have to cast [True,False,True,...] --> [1,0,1...]
acc = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
return acc
Closer look at argmax reduction
with tf.Session() as sess:
saver.restore(sess,'models/MNIST_DENSE/mnist_model.ckpt')
sess.run(test_init_op)
#label is (10x(1x10)) --> reduced to 10x1
o = sess.run(tf.argmax(label,axis=2))
print(o.shape)
print(o)
with tf.Session() as sess:
saver.restore(sess,'models/MNIST_DENSE/mnist_model.ckpt')
sess.run(test_init_op)
#sm = tf.nn.softmax(dense_net) #We get the same answer. max(sm) = max(logit)
#densenet is 10x10 --> reduced to 10
o = sess.run(tf.argmax(dense_net,axis=1))
print(o.shape)
print(o)
IMPORTANT:
While using tf.equal we have to keep the broadcasting rule in mind
labels: 10x1 dense_net: 10 RESULT : 10x10Therefore, transpose labels to get 1x10
with tf.Session() as sess:
saver.restore(sess,'models/MNIST_DENSE/mnist_model.ckpt')
sess.run(test_init_op)
pred = tf.argmax(dense_net,axis=1) #(10x1)
true = tf.argmax(label,axis=2)# (1x10)
correct_pred = tf.equal(pred,tf.transpose(true)) #Without transpose, we get 10x10 bool matrix
print(sess.run(correct_pred))
IMPORTANT:
Dont create new tensors inside a loop! This will slow down everything
with tf.Session() as sess:
saver.restore(sess,'models/MNIST_DENSE/mnist_model.ckpt')
sess.run(test_init_op)
#IMPORTANT:
#Dont place this code inside the loop! This will slow down everything
acc = get_accuracy(tf.argmax(dense_net,axis=1),tf.transpose(tf.argmax(label,axis=2)))
try:
i = 0
acc_list = []
while True:
i = i+1
a = sess.run(acc)
acc_list.append(a)
if i%100 == 0:
print(i, "Mean Acc : ", np.array(acc_list).mean())
acc_list = []
except tf.errors.OutOfRangeError:
pass
We test the following images
Image.open('pics/img_13.jpg')
Image.open('pics/img_24.jpg')
Recommended when there is not much of preprocessing
with tf.Session() as sess:
saver.restore(sess,'models/MNIST_DENSE/mnist_model.ckpt')
sess.run(test_init_op)
test_im1 = np.asarray(Image.open('pics/img_13.jpg')).reshape(1,28,28,1)
test_im2 = np.asarray(Image.open('pics/img_24.jpg')).reshape(1,28,28,1)
test_ims = np.concatenate((test_im1,test_im2),axis=0) #(2,28,28,1)
predictions = tf.argmax(dense_net,axis=1)
out = sess.run(predictions,{is_serving:True,serving_input:test_ims})
print(out)
It is recommended to use dataset API whenever it is possible to match the dataset types and shapes with the training datasets
Thus we can see that reinitializable iterators are useful when we want to fine tune the model on different dataset with different preprocessing pipeline
test_ims_paths = tf.placeholder(tf.string)
serving_data = tf.data.Dataset.from_tensor_slices(test_ims_paths)
def read_img(filepath):
image_string = tf.read_file(filepath)
image = tf.image.decode_jpeg(image_string)
image = tf.reshape(image,(28,28,1))
image = tf.image.convert_image_dtype(image,tf.float32)
return image
serving_data = serving_data.map(lambda x: read_img(x))
serving_data = serving_data.map(lambda x: (x,tf.zeros((1,10)))) #To make output types match
serving_data = serving_data.batch(5)
serving_data.output_types,serving_data.output_shapes
serving_init_op = iterator.make_initializer(serving_data)
with tf.Session() as sess:
saver.restore(sess,'models/MNIST_DENSE/mnist_model.ckpt')
ims_paths = ['pics/img_13.jpg','pics/img_24.jpg']
sess.run(serving_init_op,{test_ims_paths:ims_paths})
predictions = tf.argmax(dense_net,axis=1)
out = sess.run(predictions)
print(out)
1) Do not use tf.int32 or int64 images while converting to float image. This scales float values close to 0
2) We used placeholder to serve models, checking them with if condition. This increased training time by 2s for 60Kx10 images.
3) Although we dont use iterators while serving, one of the iterators had to be initialized.
4) While reshaping tensors, get batchsize with tf.shape(tensor)[0] instead of tensor.shape[0]. This is because the former returns a tensor, while the latter returns just a number
5) We looked into arguments of tf.layers.Dense. Familiarize with different kinds of regularizers and how to add them to loss function
6) After training the model inside a session, SAVE IT. While evaluvating in a different session, we are not supposed to init global variables!. This will reset all weights!
7) Make a note of resulting dimensions of tf.argmax.
(10x1x10) ---> along axis 2 ---> (10x1) (10x10) ---> along axis 1 ---> 108) While using tf.equal we have to keep the broadcasting rule in mind
labels: 10x1 dense_net: 10 tf.equal : 10x10Therefore, transpose/reshape labels to get the right dimension
9) MOST IMPORTANT : Dont create new tensors inside a loop! This will slow down everything
10) We can see that reinitializable iterators are useful when we want to fine tune the model on different dataset with different preprocessing pipeline