Beta
Table of Contents
The outline of your notebook will show up here. You can include headings in any text cell by starting a line with #
, ##
, ###
, etc., depending on the desired title hierarchy.
Run cancelled
%%capture
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tensorflow import keras
import pandas as pd
import numpy as np
es_monitor = keras.callbacks.EarlyStopping(monitor='accuracy', patience=20)
model_checkpoint = keras.callbacks.ModelCheckpoint('best_preds.html', save_best_only=True)
df = pd.read_csv('titanic.csv')
predictors = df.drop(['survived'], axis=1)
target = keras.utils.to_categorical(df.survived)
predictors = np.array(predictors, dtype=np.float32)
target = np.array(target, dtype=np.float32)
X_train, X_test, y_train, y_test = train_test_split(predictors, target)
model = keras.Sequential()
model.add(keras.layers.Dense(892, input_shape=(10,), activation='tanh'))
model.add(keras.layers.Dense(700, activation='tanh'))
model.add(keras.layers.Dense(500, activation='tanh'))
model.add(keras.layers.Dense(250, activation='tanh'))
model.add(keras.layers.Dense(100, activation='tanh'))
model.add(keras.layers.Dense(50, activation='tanh'))
model.add(keras.layers.Dense(20, activation='tanh'))
model.add(keras.layers.Dense(10, activation='tanh'))
model.add(keras.layers.Dense(2, activation='tanh'))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
initial_weights = model.get_weights()
train_scores = []
test_scores = []
for size in range(1,902,100):
predictors = df.drop(['survived'], axis=1).iloc[:size]
target = keras.utils.to_categorical(df.survived.iloc[:size])
predictors = np.array(predictors, dtype=np.float32)
target = np.array(target, dtype=np.float32)
model.set_weights(initial_weights)
model.fit(X_train, y_train, epochs=20, callbacks=[es_monitor, model_checkpoint], verbose=0)
# train_scores.append([model.evaluate(X_train, y_train)[1], size])
# test_scores.append([model.evaluate(X_test, y_test)[1], size])
train_scores.append(model.evaluate(X_train, y_train)[1])
test_scores.append(model.evaluate(X_test, y_test)[1])
#model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10000000000, callbacks=[es_monitor, model_checkpoint])
plt.plot(list(range(1,902,100)), train_scores, label='Training', marker='.', markersize=10, alpha=0.8)
plt.plot(list(range(1,902,100)), test_scores, label='Testing', marker='.', markersize=10, alpha=0.8)
plt.xlabel('Data points')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
Run cancelled
def make_model(activation):
model = keras.Sequential()
model.add(keras.layers.Dense(50, input_shape=(10,), activation=activation))
model.add(keras.layers.Dense(20, activation=activation))
model.add(keras.layers.Dense(2, activation=activation))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
return model
fig, ax = plt.subplots(1,2, figsize=(15,6))
for i in ['elu', 'relu', 'leaky_relu', 'sigmoid', 'tanh', 'softmax']:
model = make_model(i)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=25, callbacks=[model_checkpoint], verbose=0)
history = model.history.history
ax[0].plot(history['accuracy'], label=i)
ax[0].set_xlabel('epoch')
ax[0].set_ylabel('Accuracy')
ax[0].legend(loc='lower right')
ax[1].plot(history['loss'], label=i)
ax[1].set_xlabel('epoch')
ax[1].set_ylabel('Loss (MSE)')
ax[1].legend()
plt.show()
Run cancelled
def make_model():
model = keras.Sequential()
model.add(keras.layers.Dense(50, input_shape=(10,), activation='tanh'))
model.add(keras.layers.Dense(20, activation='tanh'))
model.add(keras.layers.Dense(2, activation='tanh'))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
return model
fig, ax = plt.subplots(1,2, figsize=(15,6))
best_acc = {}
for i in [1,2,4,8,16]:
model = make_model()
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=25, callbacks=[model_checkpoint], verbose=0, batch_size=i)
history = model.history.history
best_acc[str(i)] = history['accuracy'][-1]
ax[0].plot(history['accuracy'], label=f'batch_size={i}')
ax[0].set_xlabel('epoch')
ax[0].set_ylabel('Accuracy')
ax[0].legend(loc='lower right')
ax[1].plot(history['loss'], label=f'batch_size={i}')
ax[1].set_xlabel('epoch')
ax[1].set_ylabel('Loss (MSE)')
ax[1].legend()
plt.show()
btm = float('{:.2f}'.format(min(best_acc.values())-0.0045))
top = float('{:.2f}'.format(max(best_acc.values())+0.0045))
plt.bar(best_acc.keys(), best_acc.values())
plt.xlabel('Batch size')
plt.ylabel('Accuracy (%)')
plt.ylim(btm, top)
plt.show()
Run cancelled
model = make_model()
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=1000, callbacks=[es_monitor, model_checkpoint], verbose=0)
print(model.evaluate(X_train, y_train))
print(model.evaluate(X_test, y_test))
Run cancelled
def make_model(norm):
if norm == 'With':
model = keras.Sequential()
model.add(keras.layers.Dense(50, input_shape=(10,), activation='tanh'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(20, activation='tanh'))
model.add(keras.layers.Dense(2, activation='tanh'))
else:
model = keras.Sequential()
model.add(keras.layers.Dense(50, input_shape=(10,), activation='tanh'))
model.add(keras.layers.Dense(20, activation='tanh'))
model.add(keras.layers.Dense(2, activation='tanh'))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
return model
fig, ax = plt.subplots(1,2, figsize=(15,6))
for i in ['With', 'Without']:
model = make_model(i)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, callbacks=[model_checkpoint], verbose=0, batch_size=256)
history = model.history.history
ax[0].plot(history['accuracy'], label=f'{i} batch normalization')
ax[0].set_xlabel('epoch')
ax[0].set_ylabel('Accuracy')
ax[0].legend(loc='lower right')
ax[1].plot(history['loss'], label=f'{i} batch normalization')
ax[1].set_xlabel('epoch')
ax[1].set_ylabel('Loss (MSE)')
ax[1].legend()
plt.show()
Run cancelled
X_train, X_test, y_train, y_test = train_test_split(predictors, df.survived, test_size=0.4)
params = {'max_depth':[100,200,300,400,500],
'criterion':['gini', 'entropy'],
'min_samples_leaf':[5,25,50,100],
'min_samples_split':[5,6,7,8,9,10]}
np.random.seed(np.random.randint(0,100000))
dc = RandomForestClassifier(random_state=np.random.randint(0,100000))
grid = RandomizedSearchCV(param_distributions=params, estimator=dc, scoring='accuracy', cv=5, refit=True, n_iter=50)
grid.fit(X_train, y_train)
preds = grid.predict(X_test)
print(f'Best score: {grid.best_score_}')
print(f'Best parameters: {grid.best_params_}')
print('\nPredictions:')
print(preds)