Course Notes: Recurrent Neural Networks (RNN) for Language Modeling in Python

add text here

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.layers import Dense, Embedding, Subtract, concatenate, LSTM, GRU, Bidirectional, Dropout, Input, SimpleRNN
from tensorflow.keras.models import Sequential, Model
from tensorflow import keras
from tensorflow.keras.initializers import he_uniform
from tensorflow.keras.optimizers import SGD

#read text file
with open('datasets/bigbang.txt', 'r') as file:
    bigbang = file.readlines()

#print and view the first 10 lines of the script
N = 10
with open("datasets/bigbang.txt", "r") as file: 
    for i in range(N):
        line = next(file).strip()
        print(line)

#read text file
with open('datasets/en-pt-sample5k.txt', 'r') as file:
    bigbang = file.readlines()

#print and view the first 10 lines of the script
N = 10
with open("datasets/en-pt-sample5k.txt", "r") as file: 
    for i in range(N):
        line = next(file).strip()
        print(line)

REFRESHER

#define sheldon quotes as text
sheldon_quotes="You're afraid of insects and women, Ladybugs must render you catatonic.Scissors cuts paper, paper covers rock, rock crushes lizard, lizard poisons Spock, Spock smashes scissors, scissors decapitates lizard, lizard eats paper, paper disproves Spock, Spock vaporizes rock, and as it always has, rock crushes scissors.For example, I cry because others are stupid, and that makes me sad.I'm not insane, my mother had me tested.Two days later, Penny moved in and so much blood rushed to your genitals, your brain became a ghost town.Amy's birthday present will be my genitals.(3 knocks) Penny! (3 knocks) Penny! (3 knocks) Penny!Thankfully all the things my girlfriend used to do can be taken care of with my right hand.I would have been here sooner but the bus kept stopping for other people to get on it.Oh gravity, thou art a heartless bitch.I am aware of the way humans usually reproduce which is messy, unsanitary and based on living next to you for three years, involves loud and unnecessary appeals to a deity.Well, today we tried masturbating for money.I think that you have as much of a chance of having a sexual relationship with Penny as the Hubble telescope does of discovering at the center of every black hole is a little man with a flashlight searching for a circuit breaker.Well, well, well, if it isn't Wil Wheaton! The Green Goblin to my Spider-Man, the Pope Paul V to my Galileo, the Internet Explorer to my Firefox.What computer do you have? And please don't say a white one.She calls me moon-pie because I'm nummy-nummy and she could just eat me up.Ah, memory impairment; the free prize at the bottom of every vodka bottle."

# Transform the list of sentences into a list of words
all_words = ' '.join(sheldon_quotes).split(' ')

# Get number of unique words
unique_words = list(set(all_words))

# Dictionary of indexes as keys and words as values
index_to_word = {i:wd for i, wd in enumerate(sorted(unique_words))}

# Dictionary of words as keys and indexes as values
word_to_index = {wd:i for i,wd in enumerate(sorted(unique_words))}

# Create lists to keep the sentences and the next character
sentences = []   # ~ Training data
next_chars = []  # ~ Training labels

# Define hyperparameters
step = 2        # ~ Step to take when reading the texts in characters
chars_window = 10 # ~ Number of characters to use to predict the next one  

# Loop over the text: length `chars_window` per time with step equal to `step`
for i in range(0, len(sheldon_quotes) - chars_window, step):
    sentences.append(sheldon_quotes[i:i + chars_window])
    next_chars.append(sheldon_quotes[i + chars_window])

# Print 10 pairs
print_examples(sentences, next_chars, 10)

# Instantiate the class
model = Sequential(name='sequential_model')

# One LSTM layer (defining the input shape because it is the 
# initial layer)
model.add(LSTM(128, input_shape=(None, 10), name="LSTM"))

# Add a dense layer with one unit
model.add(Dense(1, activation="sigmoid", name="output"))

# The summary shows the layers and the number of parameters 
# that will be trained
model.summary()

# Define the input layer
main_input = Input(shape=(None, 10), name="input")

# One LSTM layer (input shape is already defined)
lstm_layer = LSTM(128, name="LSTM")(main_input)

# Add a dense layer with one unit
main_output = Dense(1, activation="sigmoid", name="output")(lstm_layer)

# Instantiate the class at the end
model = Model(inputs=main_input, outputs=main_output, name="modelclass_model")

# Same amount of parameters to train as before (71,297)
model.summary()

texts=['So if a photon is directed through a plane with two slits in it and either slit is observed it will not go through both slits. If it’s unobserved it will, however, if it’s observed after it’s left the plane but before it hits its target, it will not have gone through both slits.',
 'Hello, female children. Allow me to inspire you with a story about a great female scientist. Polish-born, French-educated Madame Curie. Co-discoverer of radioactivity, she was a hero of science, until her hair fell out, her vomit and stool became filled with blood, and she was poisoned to death by her own discovery. With a little hard work, I see no reason why that can’t happen to any of you. Are we done? Can we go?']

# Import relevant classes/functions
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Build the dictionary of indexes
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Change texts into sequence of indexes
texts_numeric = tokenizer.texts_to_sequences(texts)
print("Number of words in the sample texts: ({0}, {1})".format(len(texts_numeric[0]), len(texts_numeric[1])))

# Pad the sequences
texts_pad = pad_sequences(texts_numeric, 60)
print("Now the texts have fixed length: 60. Let's see the first one: \n{0}".format(texts_pad[0]))

Build a simpleRNN model based on Keras inbuilt imdb dataset

This is a sentiment analysis model with 0 representing negative sentiment and 1 representing positive sentiment

import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
import numpy as np

# Load imdb dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data()

# Convert the input sequences to have the same length
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test)

# Build model
model = Sequential()
model.add(SimpleRNN(units=128, input_shape=(None, 1)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

# Method '.evaluate()' shows the loss and accuracy
loss, acc = model.evaluate(x_test, y_test, verbose=0)
print("Loss: {0} \nAccuracy: {1}".format(loss, acc))

GRU and LSTM cells

GRU cells were proposed in 2014, and add one gate to the vanilla RNN cell. Now before updating the memory cell, we first compute a candidate a-tilde that will carry the present information. Then we compute the update gate GU that will determine if the candidate a tilde will be used as memory state or if we keep the past memory state a minus one. If the gate is zero, the network keeps the previous hidden state, and if it is equal to one it uses the new value of a tilde. Other values will be a combination of the previous and the candidate memory state, but during training it tends to get close to zero or one.

LSTM was first proposed in 1997, and adds three gates to the vanilla RNN cell. The forget gate g_f determines if the previous state c_t minus one state should be forgotten (meaning to have its value set to zero) or not. The update gate g_u do the same for the candidate hidden state c tilde. The output gate g_o do the same for the new hidden state c_t. The green circles on the picture represent the gates. We can think of them as an open or closed gate, allowing for the left side to pass through or not if the gates value are 0 or 1 respectively.

# Build simple RNN model
SimpleRNN_model = Sequential()
SimpleRNN_model.add(SimpleRNN(units=128, input_shape=(None, 1)))
SimpleRNN_model.add(Dense(1, activation='sigmoid'))
SimpleRNN_model.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

#Build gru_model
gru_model = Sequential()
gru_model.add(GRU(units=128, input_shape=(None,1)))
gru_model.add(Dense(1, activation='sigmoid'))
gru_model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Print the old and new model summaries
SimpleRNN_model.summary()
gru_model.summary()

# Evaluate the models' performance (ignore the loss value)
_, acc_simpleRNN = SimpleRNN_model.evaluate(x_test, y_test, verbose=0)
_, acc_GRU = gru_model.evaluate(x_test, y_test, verbose=0)

# Print the results
print("SimpleRNN model's accuracy:\t{0}".format(acc_simpleRNN))
print("GRU model's accuracy:\t{0}".format(acc_GRU))

# be warned! running this cell will take 20+ minutes
# Build model
model = Sequential()
model.add(LSTM(units=128, input_shape=(None, 1), return_sequences=True))
model.add(LSTM(units=128, return_sequences=True))
model.add(LSTM(units=128, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


print("Loss: %0.04f\nAccuracy: %0.04f" % tuple(model.evaluate(x_test, y_test, verbose=0)))

‌
‌
‌