{ pd import matplotlib.pyplot as plt import matplotlib.ticker from keras import

{  “cells”:     {      “metadata”: {        “_cell_guid”: “2fade5dc-0ec5-4799-a77a-88788dbb4ebb”,        “_uuid”: “543fa201139f4d8c457f8280bfa38f4f5d5eac9d”      },      “cell_type”: “markdown”,      “source”: “Load and Normalize data”    },    {      “metadata”: {        “_cell_guid”: “0aedf990-b473-4701-987c-2588fdd9442d”,        “_uuid”: “b405a019132e9c8d93d7b76d60cabb9ab6a35d55”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: “import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import Callback
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold

data = pd.read_csv(‘../input/data.csv’)

We Will Write a Custom Essay Specifically
For You For Only $13.90/page!


order now

def diagnosisToBinary(a):
    a”diagnosis” = a”diagnosis”.astype(“category”)
    a”diagnosis”.cat.categories = 0,1
    a”diagnosis” = a”diagnosis”.astype(“int”)
diagnosisToBinary(data)

xValues = data.drop(‘diagnosis’, ‘Unnamed: 32’, ‘id’, axis=1)
yValues = data’diagnosis’
X_train, X_test, Y_train, Y_test = train_test_split(xValues, yValues, test_size=0.25)
X_train_Normalized = preprocessing.scale(X_train)
X_test_Normalized = preprocessing.scale(X_test)

“,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “_cell_guid”: “04b96bd5-8e01-4291-b00b-9976b09aebd9”,        “_uuid”: “8b0e4b2090292077e42bf4ce23e7ad014f4ec94f”      },      “cell_type”: “markdown”,      “source”: “Run NN method”    },    {      “metadata”: {        “collapsed”: true,        “_cell_guid”: “90f093db-8fad-48fe-9969-36aa9c36cb0e”,        “_uuid”: “f24668414868c02bbe0e1e9fd53a3a7c13669e7f”,        “trusted”: false      },      “cell_type”: “code”,      “source”: “#Stores performance after every training epoch
class PerformanceHistory(Callback):
    def on_train_begin(self, logs={}):
        self.epochs =
        self.train_losses =
        self.val_losses =
        self.logs =

    def on_epoch_end(self, epoch, logs={}):
        self.logs.append(logs)
        self.epochs.append(epoch)
        self.train_losses.append(logs.get(‘loss’))
        self.val_losses.append(logs.get(‘val_loss’))

def createNN(hidden_layer_size):
    model = Sequential()
    model.add(Dense(input_dim=30, activation=’relu’, units=hidden_layer_size, kernel_initializer=’glorot_normal’))
    model.add(Dense(1,activation=”sigmoid”,kernel_initializer=’random_normal’))
    model.compile(loss=’binary_crossentropy’, optimizer=’RMSprop’, metrics=’accuracy’)
    return model
   
def trainNN(model, x_train,y_train,x_test,y_test, early_stop = True, experiment_name = “”, draw = True):
    callbacks =
    history = PerformanceHistory()
    callbacks += history
    #Stops training when validation loss doesn’t decrease for 20 epochs.
    early_stop_callback = EarlyStopping(monitor=’val_loss’, patience=20)
    #saves NN after each epoch if validation loss decreased
    checkpoint = ModelCheckpoint(‘TrainingProgress’, save_best_only=True)
    if early_stop:
        callbacks += early_stop_callback, checkpoint
    model_out = model.fit(np.array(x_train), np.array(y_train), epochs=600, verbose=0, validation_split=0.2, callbacks = callbacks)
   
    df = pd.DataFrame(columns=’Description’, ‘Value’)
    df = df.append({“Description”: “Experiment name”, “Value”:experiment_name},ignore_index=True)
    # Priting error/accuracy for the validation set
    results = model.evaluate(model_out.validation_data0,model_out.validation_data1, verbose=0)
    df = df.append({“Description”:”Validation set size”, “Value”:len(model_out.validation_data0)},ignore_index=True)
    df = df.append({“Description”:”Validation set error”, “Value”:results0},ignore_index=True)
    df = df.append({“Description”:”Validation set Accuracy(%)”, “Value”:results1*100},ignore_index=True)
    # Printing confusion matrix for the validation set
    y_pred = model.predict(np.array(model_out.validation_data0),batch_size=len(model_out.validation_data0), verbose=0)
    y_pred = np.round(y_pred).astype(int).reshape(1,-1)0
    tn, fp, fn, tp=confusion_matrix(model_out.validation_data1,y_pred).ravel()
    df = df.append({“Description”:”Validation Set Confusion Matrix”, “Value”:”TN: {}, FP: {}, FN: {}, TP: {}”.format(tn, fp, fn, tp)},ignore_index=True)
   
    #Calculating size of the training set
    validation_size = model_out.validation_data0.shape0
    training_size = x_train.shape0 – validation_size
    # Priting error/accuracy for the train set
    results = model.evaluate(x_train0:training_size, y_train0:training_size, verbose=0)
    df = df.append({“Description”:”Training set size”, “Value”:training_size},ignore_index=True)
    df = df.append({“Description”:”Training set error”, “Value”:results0},ignore_index=True)
    df = df.append({“Description”:”Training set Accuracy(%)”, “Value”:results1*100},ignore_index=True)
    # Priting confusion matrix for the train set
    y_pred = model.predict(np.array(x_train0:training_size),batch_size=training_size, verbose=0)
    y_pred = np.round(y_pred).astype(int).reshape(1,-1)0
    tn, fp, fn, tp=confusion_matrix(y_train0:training_size,y_pred).ravel()
    df = df.append({“Description”:”Training Set Confusion Matrix”, “Value”:”TN: {}, FP: {}, FN: {}, TP: {}”.format(tn, fp, fn, tp)},ignore_index=True)
   
    if (early_stop):
        results = model.evaluate(x_test,y_test, verbose = 0)
        # Output results to CSV file
        df = df.append({“Description” : “Test set size”, “Value” : len(x_test)},ignore_index=True)
        df = df.append({“Description” : “Test set error”, “Value” : results0},ignore_index=True)
        df = df.append({“Description” : “Test set Accuracy(%)”, “Value” : results1*100},ignore_index=True)
        # Priting confusion matrix for the test set
        y_pred = model.predict(x_test,batch_size=len(x_test))
        y_pred = np.round(y_pred).astype(int).reshape(1,-1)0
        tn, fp, fn, tp=confusion_matrix(y_test,y_pred).ravel()
        df = df.append({“Description” : “Test Set Confusion Matrix”, “Value”: “TN: {}, FP: {}, FN: {}, TP: {}”.format(tn, fp, fn, tp)},ignore_index=True)       

    if (draw):
        #Draw performance graph
        fig = plt.figure()
        plt.plot(history.epochs, history.train_losses, label=”Training loss”)
        plt.plot(history.epochs, history.val_losses , label=”Validation loss”)
        plt.legend()
        plt.show()
        fig.savefig(experiment_name)
        plt.close()
   
    return df”,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “_cell_guid”: “3f02328c-6b33-4c6a-b520-d92b2791d19b”,        “_uuid”: “4f81fa2763be92ad2a6906073650415ce8778748”      },      “cell_type”: “markdown”,      “source”: “Experiment 1:  One neural network with 10 neurons. Not Normalized data.
”    },    {      “metadata”: {        “_cell_guid”: “ed4f19dc-de86-4cc5-817c-2d7bdc979876”,        “scrolled”: true,        “_uuid”: “a1c197de01c37055956b376668c5b4a8720a5b2e”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: ”
print(“Not normalized data:”)
model = createNN(10)
df = trainNN(model, X_train,Y_train,X_test,Y_test,early_stop = False, experiment_name = “Experiment_1”)
df.to_csv(“Experiment_1.csv”)
“,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “_cell_guid”: “793c1e3a-b36c-42fe-80ea-fe423da4aaf3”,        “_uuid”: “bb01df85a7a5aad1af52074c2de2c9f3530e9d09”      },      “cell_type”: “markdown”,      “source”: “Experiment 2:  One neural network with 50 neurons. Not Normalized data.”    },    {      “metadata”: {        “_cell_guid”: “49dc2509-001c-4cc4-bf85-c3fbf7ee09f0”,        “_uuid”: “e1c99bffbe641a0eb198943bd4e274552228b22e”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: ”
print(“Not normalized data:”)
model = createNN(50)
df = trainNN(model, X_train,Y_train,X_test,Y_test,early_stop = False, experiment_name = “Experiment_2”)
df.to_csv(“Experiment_2.csv”)”,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “_cell_guid”: “b4d63aa8-565d-4891-993e-1ffa812c9b41”,        “_uuid”: “6964a0ddb0590005d5189bcce3479d73a04c82fa”      },      “cell_type”: “markdown”,      “source”: “Experiment 3:  One neural network with 50 neurons. Normalized data.
”    },    {      “metadata”: {        “_cell_guid”: “c0dc7156-35d4-49a3-9307-ec64d9d84ee0”,        “_uuid”: “601d32015feaebe100364cee905c3723ba70dedc”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: “print(“Normalized data:”)
model = createNN(50)
df = trainNN(model, X_train_Normalized,Y_train,X_test_Normalized,Y_test, early_stop = False, experiment_name = “Experiment_3”)
df.to_csv(“Experiment_3.csv”)”,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “_cell_guid”: “90b37118-d0c7-4342-8285-c16194e89a7d”,        “_uuid”: “6beaabfcdfabcb2d54068815f038bbc283198d4b”      },      “cell_type”: “markdown”,      “source”: “Experiment 4:  Early stopping. 50 neurons. Normalized data.
”    },    {      “metadata”: {        “_cell_guid”: “92e562b7-a780-465f-a63a-ec99f4e0af27”,        “_uuid”: “e144ea0f7920a70601aff32a8cf7871d17289266”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: “print(“Normalized data:”)
model = createNN(50)
df = trainNN(model, X_train_Normalized,Y_train,X_test_Normalized,Y_test, early_stop = True, experiment_name = “Experiment_4”)
df.to_csv(“Experiment_4.csv”)
df”,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “_cell_guid”: “121b67ff-124a-45f7-b9a6-037429b096a0”,        “_uuid”: “f8da327af7484bd6ed57ccfb93304770ab90ce84”      },      “cell_type”: “markdown”,      “source”: “Experiment5: Grid Search Not Normal. Early stopping”    },    {      “metadata”: {        “_cell_guid”: “173507f9-f63d-4203-aea2-7dd2391dff5a”,        “_uuid”: “7cb8595b429d40cd9cb0b01688908ae13dc1e52a”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: “neurons_num_list = 5,10,15,20,30,50,70,90,120,150,200,250,300
accuracy_list =

for neurons_num in neurons_num_list:
    model = createNN(neurons_num)
    df = trainNN(model, X_train,Y_train,X_test,Y_test, draw = False)
    results = model.evaluate(X_test,Y_test, verbose = 0)
    accuracy_list += results1*100

#Draw total performance vs number of neurons graph
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xscale(‘log’)
ax.set_xticks(neurons_num_list)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.set_xlim(4,350)
labels = ax.get_xticklabels()
plt.setp(labels, rotation=90, horizontalalignment=’right’)
plt.grid(True)
plt.yscale(‘linear’)
plt.xlabel(‘Neurons No’)
plt.ylabel(‘Test Accuracy %’)
ax = plt.plot(neurons_num_list, accuracy_list, ‘o-‘, label=’Raw data’)
fig.savefig(“Experiment_5.png”)
“,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “_cell_guid”: “9d94d0f6-fc7d-49f2-aedc-01ee18b59f85”,        “_uuid”: “0d03c4abd84fea174629473f16f9d37499d22750”      },      “cell_type”: “markdown”,      “source”: “Experiment6: Grid Search Normal. Early stopping”    },    {      “metadata”: {        “_cell_guid”: “df07c352-a8c3-49ed-a6dc-a233a892159b”,        “_uuid”: “bd5dab1c67729e0e1859036e4f688d1bb7609a6d”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: “neurons_num_list = 5,10,15,20,30,50,70,90,120,150,200,250,300
accuracy_list =

for neurons_num in neurons_num_list:
    model = createNN(neurons_num)
    df = trainNN(model, X_train_Normalized,Y_train,X_test_Normalized,Y_test, draw = False)
    results = model.evaluate(X_test_Normalized,Y_test, verbose = 0)
    accuracy_list += results1*100

#Draw total performance vs number of neurons graph
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xscale(‘log’)
ax.set_xticks(neurons_num_list)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.set_xlim(4,350)
labels = ax.get_xticklabels()
plt.setp(labels, rotation=90, horizontalalignment=’right’)
plt.grid(True)
plt.yscale(‘linear’)
plt.xlabel(‘Neurons No’)
plt.ylabel(‘Test Accuracy %’)
ax = plt.plot(neurons_num_list, accuracy_list, ‘o-‘, label=’Raw data’)
fig.savefig(“Experiment_6.png”)”,      “execution_count”: null,      “outputs”:     },    {      “metadata”: {        “collapsed”: true,        “_cell_guid”: “75e70d45-4a4d-4240-b099-5ab40e0936d3”,        “_uuid”: “16d48d0dc367a22fe80bc9f487c37595a7a11b4a”      },      “cell_type”: “markdown”,      “source”: “Experiment 7: Grid Search. Stratified 10 Folds Validation .  Normalized data.”    },    {      “metadata”: {        “_cell_guid”: “6caa88b5-e365-425e-bb53-e5c08fd0447f”,        “_uuid”: “646dc0344b1f42893c4ac2a50aa3116e4a888f3f”,        “trusted”: false,        “collapsed”: true      },      “cell_type”: “code”,      “source”: “neurons_num_list = 5,10,15,20,30,50,70,90,120,170,240,300
accuracy_list =
err_list =

kfold = StratifiedKFold(n_splits=10, shuffle=True)

for neurons_num in neurons_num_list:
    fold_num = 0
    kfolds_results =
    # Raw data K Fold Accuracy calculation
    for train_set, validation_set in kfold.split(X_train_Normalized, Y_train):
        fold_num +=1
        model = createNN(neurons_num)
        df = trainNN(model, X_train_Normalizedtrain_set,Y_train.iloctrain_set,X_train_Normalizedvalidation_set,Y_train.ilocvalidation_set, draw = False)
        results = model.evaluate(X_train_Normalizedvalidation_set, Y_train.ilocvalidation_set, verbose = 0)         
        kfolds_results+=results1*100
    accuracy_list += np.mean(kfolds_results)
    #Compute the standard deviation
    err_list += np.std(kfolds_results)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xscale(‘log’)
ax.set_xticks(neurons_num_list)
ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.set_xlim(4,350)
labels = ax.get_xticklabels()
plt.setp(labels, rotation=90, horizontalalignment=’right’)
plt.grid(True)
plt.yscale(‘linear’)
plt.xlabel(‘Neurons No’)
plt.ylabel(‘Test Accuracy K Folds %’)
plt.errorbar(neurons_num_list, accuracy_list,err_list, elinewidth = 5, fmt=’-o’)
fig.savefig(“Experiment_5.png”)
plt.legend()”,      “execution_count”: null,      “outputs”:     }  ,  “metadata”: {    “kernelspec”: {      “display_name”: “Python 3”,      “language”: “python”,      “name”: “python3”    },    “language_info”: {      “name”: “python”,      “mimetype”: “text/x-python”,      “pygments_lexer”: “ipython3”,      “file_extension”: “.py”,      “codemirror_mode”: {        “name”: “ipython”,        “version”: 3      },      “version”: “3.6.4”,      “nbconvert_exporter”: “python”    }  },  “nbformat”: 4,  “nbformat_minor”: 1}