#!/usr/bin/python3 # ------------------------------------ # -- Imports import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import pickle from pylab import subplot, imshow, title, gray, NullLocator import scipy.misc as mi import scipy.special as sp from PIL import Image from itertools import chain from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support from keras.datasets import mnist from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.utils import np_utils from keras.models import model_from_json from keras.layers import Flatten from keras.layers.convolutional import Conv2D from keras.layers.convolutional import MaxPooling2D from keras.utils import np_utils from keras import backend as K # ------------------------------------ # -- Functions # plotting of data # ------------------------------- def displayData(X, t, rows=10, cols=10, img_ind=None, size=16, class_value=False): if len(X) > rows * cols: # -- get random permutation of rows*cols indices representing # -- values in X. img_ind = np.random.permutation(len(X))[0:rows * cols] else: img_ind = range(rows * cols) fig = plt.figure(figsize = (size, size)) fig.patch.set_facecolor('white') ax = fig.gca() for i in range(100): plt.subplot(10, 10, 1+i) plt.imshow([255-x for x in X[img_ind[i]]], cmap='gray', interpolation='gaussian') # -- used to eliminate x/y axis (since we just turn axis off # we don't need this). # plt.gca().xaxis.set_major_locator(plt.NullLocator()) # plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.axis('off') plt.subplots_adjust(top=1) plt.show() # NOTE: we never use this. def plotData(X, Y, c, npixel=28): m, n = X.shape image = np.array(X[c,:]) plt.figure(figsize = (6, 6)) plt.imshowW((image.reshape(npixel, npixel)), cmpa='Greys', interpolation='quadratic') plt.show def plotAccuracy(acc_history_train, acc_history_test): plt.figure(figsize = (12, 8)) plt.plot(acc_history_train, marker='o', markersize=5, label='Train') plt.plot(acc_history_test, marker='o', markersize=5, label='Test') plt.legend() plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.show() # saving/loading model parameters # ------------------------------- def save_model(m, filename): model_json = m.to_json() with open("./models/" + filename + ".json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 m.save_weights("./models/" + filename + ".h5") def load_model(filename): with open("./models/" + filename + ".json", "r") as json_file: json_model = json_file.read() m = model_from_json(json_model) # load weights into new model m.load_weights("./models/" + filename + ".h5") return m def save_history(history, filename): with open("./models/" + filename + ".history", "wb") as history_file: pickle.dump(history.history, history_file) def load_history(filename): with open("./models/" + filename + ".history", "rb") as history_file: return pickle.load(history_file) # testing model # ------------------------------- def test_model(model, history, X_test, t_test): history_df = pd.DataFrame(history) # -- plot accuracy in history training plotAccuracy(history_df.accuracy, history_df.val_accuracy) # -- do some prediction on test data predictions_test = model.predict_classes(X_test, verbose=0) # -- compute accuracy of said predictions confusion_matrix(t_test, predictions_test) accuracy_score(t_test, predictions_test) # -- compute for each class precision, recall and f-measure meas = precision_recall_fscore_support(t_test, predictions_test, average=None) for i in range(nb_classes): print("Class {0:d}: precision = {1:5.2f}, recall={2:5.2f}, f-measure={2:5.2f}".format(i, meas[1][i], meas[2][i], meas[3][i])) # ------------------------------------ # -- Models # model 0: softmax regression # ------------------------------- def model_0(X_train, T_train, X_test, T_test, t_test): model_name = "softmax" if os.path.exists(f"./models/{model_name}.json") and os.path.exists(f"./models/{model_name}.h5"): print("Module found on disk, no need to train!") model0 = load_model(model_name) if os.path.exists(f"./models/{model_name}.history"): history0 = load_history(model_name) else: model0 = Sequential() model0.add(Dense(10, input_shape=(784,))) model0.add(Activation('softmax')) model0.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='sgd') # Find the best possible function with the given data history0 = model0.fit(X_train, T_train, batch_size=128, epochs=50, verbose=1, validation_data=(X_test, T_test)) save_model(model0, model_name) save_history(history0, model_name) test_model(model0, history0, X_test, t_test) #-- get weights w0 = model0.layers[0].get_weights() # model 1: 3-layer neural network # ------------------------------- def model_1(X_train, T_train, X_test, T_test, t_test): model_name = "nn3" if os.path.exists(f"./models/{model_name}.json") and os.path.exists(f"./models/{model_name}.h5"): print("Module found on disk, no need to train!") model1 = load_model(model_name) if os.path.exists(f"./models/{model_name}.history"): history1 = load_history(model_name) else: model1 = Sequential() model1.add(Dense(512, input_shape=(784,))) model1.add(Activation('relu')) model1.add(Dropout(rate=0.2)) model1.add(Dense(10)) model1.add(Activation('softmax')) model1.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') # Find the best possible function with the given data history1 = model1.fit(X_train, T_train, batch_size=1024, epochs=10, verbose=1, validation_data=(X_test, T_test)) save_model(model1, model_name) save_history(history1, model_name) test_model(model1, history1, X_test, t_test) #-- get weights w0 = model1.layers[0].get_weights() # model 2: 4-layer neural network # ------------------------------- def model_2(X_train, T_train, X_test, T_test, t_test): model_name = "nn4" if os.path.exists(f"./models/{model_name}.json") and os.path.exists(f"./models/{model_name}.h5"): print("Module found on disk, no need to train!") model2 = load_model(model_name) if os.path.exists(f"./models/{model_name}.history"): history2 = load_history(model_name) else: model2 = Sequential() model2.add(Dense(512, input_shape=(784,))) model2.add(Activation('relu')) model2.add(Dropout(0.2)) model2.add(Dense(512)) model2.add(Activation('relu')) model2.add(Dropout(0.2)) model2.add(Dense(10)) model2.add(Activation('softmax')) model2.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') history2 = model2.fit(X_train, T_train, batch_size=1024, epochs=10, verbose=1, validation_data=(X_test, T_test)) save_model(model2, model_name) save_history(history2, model_name) test_model(model2, history2, X_test, t_test) #-- get weights w0 = model2.layers[0].get_weights() # model 3: convolutional neural network # ------------------------------- def model_3(X_train, T_train, X_test, T_test, t_test): model_name = "cnn" X_train_c = X_train.reshape(X_train.shape[0], 28, 28,1).astype('float32') X_test_c = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') if os.path.exists(f"./models/{model_name}.json") and os.path.exists(f"./models/{model_name}.h5"): print("Module found on disk, no need to train!") model3 = load_model(model_name) if os.path.exists(f"./models/{model_name}.history"): history3 = load_history(model_name) else: model3 = Sequential() model3.add(Conv2D(30, (5, 5), input_shape=(28, 28, 1), activation='relu')) model3.add(MaxPooling2D(pool_size=(2, 2))) model3.add(Conv2D(15, (3, 3), activation='relu')) model3.add(MaxPooling2D(pool_size=(2, 2))) model3.add(Dropout(0.2)) model3.add(Flatten()) model3.add(Dense(128, activation='relu')) model3.add(Dense(50, activation='relu')) model3.add(Dense(10, activation='softmax')) model3.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') history3 = model3.fit(X_train_c, T_train, batch_size=1024, epochs=10, verbose=1, validation_data=(X_test_c, T_test)) save_model(model3, model_name) save_history(history3, model_name) test_model(model3, history3, X_test_c, t_test) # ------------------------------------ # -- Actual code if __name__ == "__main__": # number of classes, each class represents a digit nb_classes = 10 # load data, divided in train set and test set. (X_train, t_train), (X_test, t_test) = mnist.load_data() # this dataset contains images which are represented as 28x28 # matrices. # print(X_train[0,:,:]) # displayData(X_train[0:100], t_train[0:100]) # Change data representation from 28 x 28 matrices to 784 dimensional # vectors with elements in the range [0, 1]. # # After this change X_train (as well as X_test) will be matrix with # 6000 rows and 784 columns, where each column represents a particular # digit. X_train = X_train.reshape(X_train.shape[0], 784) X_train = X_train.astype('float32') X_train /= 255 X_test = X_test.reshape(X_test.shape[0], 784) X_test = X_test.astype('float32') X_test /= 255 # To extract, say, the first image, we have to extract the first # column from the 2D matrix. This is done with the comma operator, as # is shown below # -- print(X_train[0,:].shape) # Computes the one-hot-encoding of the various classes # -- print(t_train) T_train = np_utils.to_categorical(t_train, nb_classes) T_test = np_utils.to_categorical(t_test, nb_classes) # print(T_train[0]) model_0(X_train, T_train, X_test, T_test, t_test) # model_1(X_train, T_train, X_test, T_test, t_test) # model_2(X_train, T_train, X_test, T_test, t_test) # model_3(X_train, T_train, X_test, T_test, t_test)