Implementation of Perceptron model using using TensorFlow library.
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
plt.rcParams['figure.figsize'] = (16, 8)
mnist = tf.keras.datasets.mnist
[X_train, Y_train],[X_test, Y_test] = mnist.load_data()
# data cleaning
# using only zeros and ones
X_train = X_train[Y_train <= 1]
Y_train = Y_train[Y_train <= 1]
X_test = X_test[Y_test <= 1]
Y_test = Y_test[Y_test <= 1]
print('X_train:', X_train.shape)
print('Y_train:', Y_train.shape)
print('X_test:', X_test.shape)
print('Y_test:', Y_test.shape)
X_train: (12665, 28, 28) Y_train: (12665,) X_test: (2115, 28, 28) Y_test: (2115,)
fig, AX = plt.subplots(3, 6, sharex=True, sharey=True)
np.random.seed(1234)
for ax in AX.ravel():
rindex = np.random.randint(Y_train.size)
ax.imshow(X_train[rindex])
ax.set_title('label: {}'.format(Y_train[rindex]))
plt.grid(False)
# data preparation
# scales, dimensions and dtypes
x_train, y_train = X_train/255, Y_train[np.newaxis].T
x_test, y_test = X_test/255, Y_test[np.newaxis].T
x_train = x_train.astype(np.float32).reshape(-1, 28*28)
y_train = y_train.astype(np.float32)
x_test = x_test.astype(np.float32).reshape(-1, 28*28)
y_test = y_test.astype(np.float32)
print('x_train:', x_train.shape)
print('y_train:', y_train.shape)
print('x_test:', x_test.shape)
print('y_test:', y_test.shape)
x_train: (12665, 784) y_train: (12665, 1) x_test: (2115, 784) y_test: (2115, 1)
EPOCHS = 500 # epochs
ALPHA = 0.001 # learning rate
BATCH = 100 # batch size
# m is the number of examples
# n_x is the input size 28x28=784
m, n_x = x_train.shape
X = tf.placeholder(tf.float32, shape=[None, n_x], name='X')
Y = tf.placeholder(tf.float32, shape=[None, 1], name='Y')
# variables initialization
W = tf.Variable(tf.zeros([n_x, 1]), tf.float32, name='W')
B = tf.Variable(tf.zeros([1, 1]), tf.float32, name='B')
init_variables = tf.global_variables_initializer()
# model
Z = tf.add(tf.matmul(X, W), B)
A = tf.nn.sigmoid(Z)
# training graph and optimization
loss = tf.reduce_mean(tf.losses.mean_squared_error(predictions=A, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=ALPHA).minimize(loss)
# prediction graph
prediction = tf.round(A)
compare = tf.equal(prediction, Y)
cast = tf.cast(compare, tf.float32)
accuracy = tf.reduce_mean(cast)*100
# loss and accuracy storage
loss_plot = []; accA_plot = []
with tf.Session() as sess:
sess.run(init_variables)
for epoch in range(EPOCHS + 1):
# randomic batch definition
rbatch = np.random.choice(Y_train.size, size=BATCH)
# training, metrics and storage
sess.run(optimizer, feed_dict={X: x_train[rbatch], Y: y_train[rbatch]})
L = sess.run(loss, feed_dict={X: x_train[rbatch], Y: y_train[rbatch]})
acc = sess.run(accuracy, feed_dict={X: x_test, Y: y_test})
loss_plot += [L]; accA_plot += [acc]
if (not epoch % 100) and (epoch != 0):
print('epoch: {0:04d} | loss: {1:.3f} | accuracy: {2:06.2f} %'.format(epoch, L, acc))
w_ = sess.run(W) # store W and B for visualization and test
b_ = sess.run(B)
epoch: 0100 | loss: 0.187 | accuracy: 099.72 % epoch: 0200 | loss: 0.144 | accuracy: 099.76 % epoch: 0300 | loss: 0.121 | accuracy: 099.76 % epoch: 0400 | loss: 0.096 | accuracy: 099.81 % epoch: 0500 | loss: 0.082 | accuracy: 099.81 %
axA = plt.subplot(121)
axA.imshow(w_.T.reshape(28, 28))
cb = axA.set_title('W')
axB = plt.subplot(222)
axB.plot(loss_plot)
axB.set_ylabel('loss')
axC = plt.subplot(224)
axC.plot(accA_plot)
axC.set_ylabel('accuracy')
plt.xlabel('epochs')
plt.show()
fig, AX = plt.subplots(3, 6, figsize=(2048//72, 1024//72))
AX = [b for a in AX for b in a]
np.random.seed(1)
for ax in AX:
index = np.random.randint(y_test.size)
z_ = np.dot(w_.T, x_test[index]) + b_
a_ = 1/(1 + np.exp(-z_))
y_ = 1 if a_ > 0.5 else 0
if y_ == y_test[index]:
ax.imshow(x_test[index].reshape(28, 28))
else:
ax.imshow(1 - x_test[index].reshape(28, 28))
ax.set_title(r'$\hat{y_i}$ = ' + str(y_) + r' ; $a_i$ = {:.02f}'.format(float(a_)), fontsize=20)