When creating a post, please add:
- Week # must be added in the tags option of the post. (Week 2)
- Link to the classroom item you are referring to:
- Description (include relevant info but please do not post solution code or your entire notebook):
I have the following simple neural network:
# Neural Network
def create_nn_clf(number_of_features, first_layer_neurons=25, second_layer_neurons=15):
nn_clf = Sequential([
Dense(first_layer_neurons, activation='relu', input_shape=(number_of_features,)), #input_shape seems necessary for KerasClassifier below
Dense(second_layer_neurons, activation='relu'),
Dense(1, activation='linear')
])
return nn_clf
The easiest way of training is:
nn_clf_1 = create_nn_clf(number_of_features=number_of_features)
nn_clf_1.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate), loss=BinaryCrossentropy(from_logits=True))
nn_clf_1.fit(X_train, y_train_5, epochs=epochs, batch_size=batch_size, verbose=1)
y_predict_1 = nn_clf_1.predict(X_train)
However, sometimes the above approach is not sufficient, say if you want to do cross validation based on sklearn.model_selection
. For this case, (as I learned from the internet), we can use KerasClassifier
as follows:
# Second estimator
nn_clf_2 = KerasClassifier(create_nn_clf, number_of_features=number_of_features, epochs=epochs, batch_size=batch_size, verbose=1, loss=BinaryCrossentropy(from_logits=True), optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate))
nn_clf_2.fit(X_train, y_train_5)
y_predict_2 = nn_clf_2.predict(X_train)
However, y_predict_1
and y_predict_2
are kind of different (I expect that they should be same). For example, if we use prediction vs recall plot, we get the following.
Why? Should I trust KerasClassifier?
Entire code is:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import BinaryCrossentropy
from scikeras.wrappers import KerasClassifier
from sklearn.metrics import precision_recall_curve
from sklearn.datasets import fetch_openml
# mnist dataset
mnist = fetch_openml('mnist_784', as_frame=False)
X, y = mnist.data, mnist.target
X_train, y_train = X[:60000], y[:60000]
# binary classification if the digit is 5 or not
y_train_5 = (y_train == '5')
# Neural Network
def create_nn_clf(number_of_features, first_layer_neurons=25, second_layer_neurons=15):
nn_clf = Sequential([
Dense(first_layer_neurons, activation='relu', input_shape=(number_of_features,)), #input_shape seems necessary for KerasClassifier below
Dense(second_layer_neurons, activation='relu'),
Dense(1, activation='linear')
])
return nn_clf
# Common parameters
number_of_features = X_train.shape[1]
learning_rate = 1e-5
epochs = 20
batch_size = 50
# First estimator
nn_clf_1 = create_nn_clf(number_of_features=number_of_features)
nn_clf_1.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate), loss=BinaryCrossentropy(from_logits=True))
nn_clf_1.fit(X_train, y_train_5, epochs=epochs, batch_size=batch_size, verbose=1)
y_predict_1 = nn_clf_1.predict(X_train)
precisions_1, recalls_1, thresholds_1 = precision_recall_curve(y_train_5, y_predict_1)
# Second estimator
nn_clf_2 = KerasClassifier(create_nn_clf, number_of_features=number_of_features, epochs=epochs, batch_size=batch_size, verbose=1, loss=BinaryCrossentropy(from_logits=True), optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate))
nn_clf_2.fit(X_train, y_train_5)
y_predict_2 = nn_clf_2.predict(X_train)
precisions_2, recalls_2, thresholds_2 = precision_recall_curve(y_train_5, y_predict_2)
# Compare predictions
plt.figure(figsize=(6, 5)) # extra code – not needed, just formatting
plt.plot(recalls_1, precisions_1, "b-", linewidth=2,
label="Bare sequential")
plt.plot(recalls_2, precisions_2, "--", linewidth=2, label="KerasClassifier")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.axis([0, 1, 0, 1])
plt.grid()
plt.legend(loc="lower left")
plt.savefig("sequential_vs_kerasclassifier.png")
plt.show()