#Import Modules
import numpy as np
#numpy for all data proccessing and modeling
from matplotlib import pyplot as plt
#Pyplot for image generation
from keras.datasets import mnist
#Mnist for loading dataset
from time import perf_counter_ns


#Load and Reshape data to have 60000 columns of 784 nodes
(train_X_2d, train_y), (test_X_2d, test_y) = mnist.load_data()
#Collapse 28x28 images into 784 node columns
train_X = np.reshape(train_X_2d, (60000, 784))[0:10000, :]
train_X_2d = train_X_2d[0:10000, :, :]
train_y = train_y[0:10000]
test_X = np.reshape(test_X_2d, (10000, 784))
m, n = train_X.shape
train_X = train_X.T
test_X = test_X.T


#Generate random samples for analysis
nums = np.random.randint(0, 9999, 9)
plt.figure(1)
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.imshow(test_X[:,nums[i]].reshape(28,28), cmap='Greys')
    plt.yticks([])
    plt.xticks([])
    plt.title('Actual:' + str(test_y[nums[i]]))
plt.suptitle('Sample of dataset Images and Values from Test Dataset', y=1)
plt.subplots_adjust(hspace = .5, wspace = 1.5)


def sigmoid(Z):
    return (1/(1+np.exp(-Z)))

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

def sigmoid_deriv(Z):
    dsig = (np.exp(-Z))/((np.exp(-Z)+1)**2)
    #Some extremely large numbers are passed through here that give infinetesimal values. To account for problems caused by this these values are set to 0.
    dsig = np.nan_to_num(dsig)
    return dsig

def y_col_arr(Y):
    #Set y values to be arrays of length 10 where the solution is 1 and all other values are 0
    y_col_array = np.zeros((Y.size, Y.max() + 1))
    y_col_array[np.arange(Y.size), Y] = 1
    y_col_array = y_col_array.T
    return y_col_array

def get_predictions(A3, Y):
    #Find predicted values from probability vectors
    predictions = np.argmax(A3,0)
    print('Predictions:')
    print(predictions)
    print('Solutions:')
    print(Y)
    #Generate accuracy value
    accuracy = np.sum(predictions == Y)/ Y.size
    return accuracy, predictions


def init_params():
    #Generate initial constants as random values between -.5 and .5
    W1 = np.random.rand(72, 784) - 0.5
    b1 = np.random.rand(72, 1) - 0.5
    W2 = np.random.rand(36, 72) - 0.5
    b2 = np.random.rand(36, 1) - 0.5
    W3 = np.random.rand(10, 36) - 0.5
    b3 = np.random.rand(10, 1,) - 0.5
    return (W1, b1, W2, b2, W3, b3)


def forward_prop(W1, b1, W2, b2, W3, b3, X):
    #Compute node values from X data and constant values
    Z1 = np.dot(W1, X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    Z3 = np.dot(W3, A2) + b3
    A3 = softmax(Z3)
    return (Z1, A1, Z2, A2, Z3, A3)

def backward_prop(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y):
    #Find gradient of cost function to determine the dW and db values that are used to adjust constants
    y_col_arr_Y = y_col_arr(Y)
    dZ3 = A3 - y_col_arr_Y
    dW3 = 1/m * dZ3.dot(A2.T)
    db3 = 1/m * np.sum(dZ3)
    dZ2 = W3.T.dot(dZ3) * sigmoid_deriv(Z2)
    dW2 = 1/m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * sigmoid_deriv(Z1)
    print(np.shape(dZ1), np.shape(X.T))
    dW1 = 1/m * dZ1.dot(X.T)
    db1 = 1/m * np.sum(dZ1)  
    return (dW1, db1, dW2, db2, dW3, db3)

def update_par(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha):
    #Adjust constants based db dW and alpha
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    W3 = W3 - alpha * dW3
    b3 = b3 - alpha * db3
    return (W1, b1, W2, b2, W3, b3)


import warnings
    
def gradient_decent(X, Y, alpha, iterations):
    W1, b1, W2, b2, W3, b3 = init_params()
    print(np.shape(X))
    #Iterate through forward prop - backward prop - update par 500 times 
    for i in range(iterations):
        warnings.filterwarnings('ignore')
        Z1, A1, Z2, A2, Z3, A3 = forward_prop(W1, b1, W2, b2, W3, b3, X)
        dW1, db1, dW2, db2, dW3, db3 = backward_prop(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y)
        W1, b1, W2, b2, W3, b3 = update_par(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha)
        if i % 25 == 0:
            #Report status of model for every fifth iteration.
            print('Iterations: ', i)
            accuracy, predictions = get_predictions(A3, Y)
            print(str(accuracy*100) + '%')
    print('Final Accuracy')
    accuracy, predictions = get_predictions(A3,Y)
    print(str(accuracy*100) + '%')
            

    return (W1, b1, W2, b2, W3, b3)


#Train Model
W1, b1, W2, b2, W3, b3 = gradient_decent(train_X, train_y, 1, 1)
print(np.shape(W1), np.shape(b1),np.shape(W2),np.shape(b2),np.shape(W3),np.shape(b3))

(784, 10000)
(72, 10000) (10000, 784)
Iterations:  0
Predictions:
[1 5 1 ... 1 1 1]
Solutions:
[5 0 4 ... 6 9 7]
11.84%
Final Accuracy
Predictions:
[1 5 1 ... 1 1 1]
Solutions:
[5 0 4 ... 6 9 7]
11.84%
(72, 784) (72, 1) (36, 72) (36, 1) (10, 36) (10, 1)


#Test Model
Z1, A1, Z2, A2, Z3, A3 = forward_prop(W1, b1, W2, b2, W3, b3, test_X)
accuracy, predictions = get_predictions(A3, test_y)
print(str(accuracy*100) + '%')

Predictions:
[9 0 4 ... 4 7 6]
Solutions:
[7 2 1 ... 4 5 6]
17.349999999999998%


plt.figure(1)
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.imshow(test_X[:,nums[i]].reshape(28,28), cmap='Greys')
    plt.yticks([])
    plt.xticks([])
    plt.title('Actual:' + str(test_y[nums[i]]) + '  Prediction:' + str(predictions[nums[i]]))
plt.suptitle("Sample of dataset Images and Values from Test Dataset With Predictions", y=1)
plt.subplots_adjust(hspace = .5, wspace = 1.5)

MNIST Neural Network From Scratch¶

Sigmoid Function:¶

Softmax Function:¶

Forward Propogation:¶

Backward Propogation:¶

Update Parameters:¶