Implementation of Multi-class Logistic Regression using Keras library.
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
plt.rcParams['figure.figsize'] = (16, 8)
mnist = tf.keras.datasets.mnist
[X_train, Y_train],[X_test, Y_test] = mnist.load_data()
# one hot for multi-class classification
# MNIST = 10 classes [0-9]
y_train_oh = np.zeros((Y_train.size, 10))
y_train_oh[np.arange(Y_train.size), Y_train] = 1
y_test_oh = np.zeros((Y_test.size, 10))
y_test_oh[np.arange(Y_test.size), Y_test] = 1
print('X_train:', X_train.shape)
print('Y_train:', Y_train.shape)
print('y_train_oh:', y_train_oh.shape)
print('X_test:', X_test.shape)
print('Y_test:', Y_test.shape)
print('y_test_oh:', y_test_oh.shape)
X_train: (60000, 28, 28) Y_train: (60000,) y_train_oh: (60000, 10) X_test: (10000, 28, 28) Y_test: (10000,) y_test_oh: (10000, 10)
fig, AX = plt.subplots(3, 6, sharex=True, sharey=True)
np.random.seed(1234)
for ax in AX.ravel():
rindex = np.random.randint(Y_train.size)
ax.imshow(X_train[rindex])
# title label + one-hot
title = '{} :: '.format(Y_train[rindex])
title += ''.join([str(int(e)) for e in y_train_oh[rindex]])
ax.set_title(title)
plt.grid(False)
x_train, y_train = X_train/255, Y_train[np.newaxis].T
x_test, y_test = X_test/255, Y_test[np.newaxis].T
x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.float32)
x_test = x_test.astype(np.float32)
y_test = y_test.astype(np.float32)
print('x_train:', x_train.shape)
print('y_train:', y_train.shape)
print('x_test:', x_test.shape)
print('y_test:', y_test.shape)
x_train: (60000, 28, 28) y_train: (60000, 1) x_test: (10000, 28, 28) y_test: (10000, 1)
# reshape Xs
x_train = x_train.reshape(-1, 28*28)
x_test = x_test.reshape(-1, 28*28)
print('x_train:', x_train.shape)
print('x_test:', x_test.shape)
x_train: (60000, 784) x_test: (10000, 784)
EPOCHS = 5000 # epochs
ALPHA = 0.005 # learning rate
BATCH = 100 # batch size
# m is the number of examples
# n_x is the input size 28x28=784
m, n_x = x_train.shape
# model
model = tf.keras.Sequential([
tf.keras.layers.Input(n_x),
tf.keras.layers.Dense(10, activation='softmax')
])
# Compile model
model.compile(
loss='categorical_crossentropy',
optimizer='SGD',
lr=ALPHA,
metrics=['accuracy']
)
# loss and accuracy storage
loss_plot = []; accA_plot = []
for epoch in range(EPOCHS + 1):
# randomic batch definition
rbatch = np.random.choice(Y_train.size, size=BATCH)
# training, metrics and storage
model.fit(x_train[rbatch], y_train_oh[rbatch], epochs=1, verbose=0)
loss_plot += [e*100 for e in model.history.history['loss']]
accA_plot += [e*100 for e in model.history.history['accuracy']]
if (not epoch % 1000) and (epoch != 0):
print(f'epoch: {epoch:04d} | loss: {loss_plot[-1]:.3f} | accuracy: {accA_plot[-1]:06.2f} %')
W_ = model.weights[0].numpy() # store W and B for visualization and test
epoch: 1000 | loss: 38.040 | accuracy: 092.00 % epoch: 2000 | loss: 30.766 | accuracy: 091.00 % epoch: 3000 | loss: 25.848 | accuracy: 094.00 % epoch: 4000 | loss: 29.970 | accuracy: 090.00 % epoch: 5000 | loss: 51.808 | accuracy: 084.00 %
fig, AX = plt.subplots(1, 10, sharey=True)
for i in range(10):
AX[i].imshow(W_.T[i].reshape(28, 28))
AX[i].set_title(r'$W_{}$'.format(i))
fig, [axA, axB] = plt.subplots(2, 1, sharex=True)
axA.plot(loss_plot)
axA.set_ylabel('loss')
axB.plot(accA_plot)
axB.set_ylabel('accuracy')
plt.xlabel('epochs')
plt.show()
fig, AX = plt.subplots(3, 6, figsize=(2048//72, 1024//72))
AX = [b for a in AX for b in a]
pred = model.predict(x_test)
np.random.seed(1)
for ax in AX:
index = np.random.randint(y_test.size)
Z_ = pred[index]
Y_ = np.argmax(Z_)
if Y_ == y_test[index]:
ax.imshow(x_test[index].reshape(28, 28))
else:
ax.imshow(1 - x_test[index].reshape(28, 28))
ez = np.exp(Z_ - Z_.max())
A_ = ez/ez.sum(); A_ = float(A_.T[Y_])
ax.set_title(r'$\hat{Y_i}$ = ' + str(Y_) + ' ; $A_i$ = {:.03f}'.format(A_), fontsize=20)