Workspace
Ilya Selivanov/

Simple neural network

0
Beta
Spinner
Run cancelled
%%capture
    from sklearn.model_selection import RandomizedSearchCV
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score
    import matplotlib.pyplot as plt
    from tensorflow import keras
    import pandas as pd
    import numpy as np
    
es_monitor = keras.callbacks.EarlyStopping(monitor='accuracy', patience=20)
model_checkpoint = keras.callbacks.ModelCheckpoint('best_preds.html', save_best_only=True)

df = pd.read_csv('titanic.csv')

predictors = df.drop(['survived'], axis=1)
target = keras.utils.to_categorical(df.survived)
predictors = np.array(predictors, dtype=np.float32)
target = np.array(target, dtype=np.float32)

X_train, X_test, y_train, y_test = train_test_split(predictors, target)
model = keras.Sequential()

model.add(keras.layers.Dense(892, input_shape=(10,), activation='tanh'))
model.add(keras.layers.Dense(700, activation='tanh'))
model.add(keras.layers.Dense(500, activation='tanh'))
model.add(keras.layers.Dense(250, activation='tanh'))
model.add(keras.layers.Dense(100, activation='tanh'))
model.add(keras.layers.Dense(50, activation='tanh'))
model.add(keras.layers.Dense(20, activation='tanh'))
model.add(keras.layers.Dense(10, activation='tanh'))
model.add(keras.layers.Dense(2, activation='tanh'))

model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

initial_weights = model.get_weights()

train_scores = []
test_scores = []

for size in range(1,902,100):
    
    predictors = df.drop(['survived'], axis=1).iloc[:size]
    target = keras.utils.to_categorical(df.survived.iloc[:size])

    predictors = np.array(predictors, dtype=np.float32)
    target = np.array(target, dtype=np.float32)
    
    model.set_weights(initial_weights)
    
    model.fit(X_train, y_train, epochs=20, callbacks=[es_monitor, model_checkpoint], verbose=0)
    
    # train_scores.append([model.evaluate(X_train, y_train)[1], size])
    # test_scores.append([model.evaluate(X_test, y_test)[1], size])
    train_scores.append(model.evaluate(X_train, y_train)[1])
    test_scores.append(model.evaluate(X_test, y_test)[1])
    
#model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10000000000, callbacks=[es_monitor, model_checkpoint])
plt.plot(list(range(1,902,100)), train_scores, label='Training', marker='.', markersize=10, alpha=0.8)
plt.plot(list(range(1,902,100)), test_scores, label='Testing', marker='.', markersize=10, alpha=0.8)

plt.xlabel('Data points')
plt.ylabel('Accuracy')

plt.legend()
plt.show()
Run cancelled
def make_model(activation):
    
    model = keras.Sequential()
    model.add(keras.layers.Dense(50, input_shape=(10,), activation=activation))
    model.add(keras.layers.Dense(20, activation=activation))
    model.add(keras.layers.Dense(2, activation=activation))

    model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
   
    return model

fig, ax = plt.subplots(1,2, figsize=(15,6))

for i in ['elu', 'relu', 'leaky_relu', 'sigmoid', 'tanh', 'softmax']:
    model = make_model(i)
    
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=25, callbacks=[model_checkpoint], verbose=0)

    history = model.history.history

    ax[0].plot(history['accuracy'], label=i)
    ax[0].set_xlabel('epoch')
    ax[0].set_ylabel('Accuracy')
    ax[0].legend(loc='lower right')

    ax[1].plot(history['loss'], label=i)
    ax[1].set_xlabel('epoch')
    ax[1].set_ylabel('Loss (MSE)')
    ax[1].legend()

plt.show()
Run cancelled
def make_model():
    
    model = keras.Sequential()
    model.add(keras.layers.Dense(50, input_shape=(10,), activation='tanh'))
    model.add(keras.layers.Dense(20, activation='tanh'))
    model.add(keras.layers.Dense(2, activation='tanh'))

    model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
   
    return model

fig, ax = plt.subplots(1,2, figsize=(15,6))
best_acc = {}

for i in [1,2,4,8,16]:
    model = make_model()
    
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=25, callbacks=[model_checkpoint], verbose=0, batch_size=i)

    history = model.history.history

    best_acc[str(i)] = history['accuracy'][-1]
    
    ax[0].plot(history['accuracy'], label=f'batch_size={i}')
    ax[0].set_xlabel('epoch')
    ax[0].set_ylabel('Accuracy')
    ax[0].legend(loc='lower right')

    ax[1].plot(history['loss'], label=f'batch_size={i}')
    ax[1].set_xlabel('epoch')
    ax[1].set_ylabel('Loss (MSE)')
    ax[1].legend()

plt.show()

btm = float('{:.2f}'.format(min(best_acc.values())-0.0045))
top = float('{:.2f}'.format(max(best_acc.values())+0.0045))

plt.bar(best_acc.keys(), best_acc.values())
plt.xlabel('Batch size')
plt.ylabel('Accuracy (%)')
plt.ylim(btm, top)
plt.show()
Run cancelled
model = make_model()

model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=1000, callbacks=[es_monitor, model_checkpoint], verbose=0)
print(model.evaluate(X_train, y_train))
print(model.evaluate(X_test, y_test))
Run cancelled
def make_model(norm):
    if norm == 'With':
        model = keras.Sequential()
        model.add(keras.layers.Dense(50, input_shape=(10,), activation='tanh'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dense(20, activation='tanh'))
        model.add(keras.layers.Dense(2, activation='tanh'))
    else:
        model = keras.Sequential()
        model.add(keras.layers.Dense(50, input_shape=(10,), activation='tanh'))
        model.add(keras.layers.Dense(20, activation='tanh'))
        model.add(keras.layers.Dense(2, activation='tanh'))
    
    model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
    return model 

fig, ax = plt.subplots(1,2, figsize=(15,6))

for i in ['With', 'Without']:
    model = make_model(i)

    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, callbacks=[model_checkpoint], verbose=0, batch_size=256)

    history = model.history.history

    ax[0].plot(history['accuracy'], label=f'{i} batch normalization')
    ax[0].set_xlabel('epoch')
    ax[0].set_ylabel('Accuracy')
    ax[0].legend(loc='lower right')

    ax[1].plot(history['loss'], label=f'{i} batch normalization')
    ax[1].set_xlabel('epoch')
    ax[1].set_ylabel('Loss (MSE)')
    ax[1].legend()

plt.show()
Run cancelled
X_train, X_test, y_train, y_test = train_test_split(predictors, df.survived, test_size=0.4)

params = {'max_depth':[100,200,300,400,500],
          'criterion':['gini', 'entropy'],
          'min_samples_leaf':[5,25,50,100],
          'min_samples_split':[5,6,7,8,9,10]}

np.random.seed(np.random.randint(0,100000))

dc = RandomForestClassifier(random_state=np.random.randint(0,100000))

grid = RandomizedSearchCV(param_distributions=params, estimator=dc, scoring='accuracy', cv=5, refit=True, n_iter=50)

grid.fit(X_train, y_train)

preds = grid.predict(X_test)

print(f'Best score: {grid.best_score_}')
print(f'Best parameters: {grid.best_params_}')

print('\nPredictions:')
print(preds)