Skip to content
Course Notes: Recurrent Neural Networks (RNN) for Language Modeling in Python
  • AI Chat
  • Code
  • Report
  • Spinner

    add text here

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    from tensorflow.keras.layers import Dense, Embedding, Subtract, concatenate, LSTM, GRU, Bidirectional, Dropout, Input, SimpleRNN
    from tensorflow.keras.models import Sequential, Model
    from tensorflow import keras
    from tensorflow.keras.initializers import he_uniform
    from tensorflow.keras.optimizers import SGD
    

    #read text file
    with open('datasets/bigbang.txt', 'r') as file:
        bigbang = file.readlines()
    
    #print and view the first 10 lines of the script
    N = 10
    with open("datasets/bigbang.txt", "r") as file: 
        for i in range(N):
            line = next(file).strip()
            print(line)
    #read text file
    with open('datasets/en-pt-sample5k.txt', 'r') as file:
        bigbang = file.readlines()
    
    #print and view the first 10 lines of the script
    N = 10
    with open("datasets/en-pt-sample5k.txt", "r") as file: 
        for i in range(N):
            line = next(file).strip()
            print(line)

    REFRESHER

    #define sheldon quotes as text
    sheldon_quotes="You're afraid of insects and women, Ladybugs must render you catatonic.Scissors cuts paper, paper covers rock, rock crushes lizard, lizard poisons Spock, Spock smashes scissors, scissors decapitates lizard, lizard eats paper, paper disproves Spock, Spock vaporizes rock, and as it always has, rock crushes scissors.For example, I cry because others are stupid, and that makes me sad.I'm not insane, my mother had me tested.Two days later, Penny moved in and so much blood rushed to your genitals, your brain became a ghost town.Amy's birthday present will be my genitals.(3 knocks) Penny! (3 knocks) Penny! (3 knocks) Penny!Thankfully all the things my girlfriend used to do can be taken care of with my right hand.I would have been here sooner but the bus kept stopping for other people to get on it.Oh gravity, thou art a heartless bitch.I am aware of the way humans usually reproduce which is messy, unsanitary and based on living next to you for three years, involves loud and unnecessary appeals to a deity.Well, today we tried masturbating for money.I think that you have as much of a chance of having a sexual relationship with Penny as the Hubble telescope does of discovering at the center of every black hole is a little man with a flashlight searching for a circuit breaker.Well, well, well, if it isn't Wil Wheaton! The Green Goblin to my Spider-Man, the Pope Paul V to my Galileo, the Internet Explorer to my Firefox.What computer do you have? And please don't say a white one.She calls me moon-pie because I'm nummy-nummy and she could just eat me up.Ah, memory impairment; the free prize at the bottom of every vodka bottle."
    
    # Transform the list of sentences into a list of words
    all_words = ' '.join(sheldon_quotes).split(' ')
    
    # Get number of unique words
    unique_words = list(set(all_words))
    
    # Dictionary of indexes as keys and words as values
    index_to_word = {i:wd for i, wd in enumerate(sorted(unique_words))}
    
    # Dictionary of words as keys and indexes as values
    word_to_index = {wd:i for i,wd in enumerate(sorted(unique_words))}
    
    # Create lists to keep the sentences and the next character
    sentences = []   # ~ Training data
    next_chars = []  # ~ Training labels
    
    # Define hyperparameters
    step = 2        # ~ Step to take when reading the texts in characters
    chars_window = 10 # ~ Number of characters to use to predict the next one  
    
    # Loop over the text: length `chars_window` per time with step equal to `step`
    for i in range(0, len(sheldon_quotes) - chars_window, step):
        sentences.append(sheldon_quotes[i:i + chars_window])
        next_chars.append(sheldon_quotes[i + chars_window])
    
    # Print 10 pairs
    print_examples(sentences, next_chars, 10)
    
    # Instantiate the class
    model = Sequential(name='sequential_model')
    
    # One LSTM layer (defining the input shape because it is the 
    # initial layer)
    model.add(LSTM(128, input_shape=(None, 10), name="LSTM"))
    
    # Add a dense layer with one unit
    model.add(Dense(1, activation="sigmoid", name="output"))
    
    # The summary shows the layers and the number of parameters 
    # that will be trained
    model.summary()
    
    # Define the input layer
    main_input = Input(shape=(None, 10), name="input")
    
    # One LSTM layer (input shape is already defined)
    lstm_layer = LSTM(128, name="LSTM")(main_input)
    
    # Add a dense layer with one unit
    main_output = Dense(1, activation="sigmoid", name="output")(lstm_layer)
    
    # Instantiate the class at the end
    model = Model(inputs=main_input, outputs=main_output, name="modelclass_model")
    
    # Same amount of parameters to train as before (71,297)
    model.summary()
    texts=['So if a photon is directed through a plane with two slits in it and either slit is observed it will not go through both slits. If it’s unobserved it will, however, if it’s observed after it’s left the plane but before it hits its target, it will not have gone through both slits.',
     'Hello, female children. Allow me to inspire you with a story about a great female scientist. Polish-born, French-educated Madame Curie. Co-discoverer of radioactivity, she was a hero of science, until her hair fell out, her vomit and stool became filled with blood, and she was poisoned to death by her own discovery. With a little hard work, I see no reason why that can’t happen to any of you. Are we done? Can we go?']
    
    # Import relevant classes/functions
    from tensorflow.keras.preprocessing.text import Tokenizer
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    
    # Build the dictionary of indexes
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(texts)
    
    # Change texts into sequence of indexes
    texts_numeric = tokenizer.texts_to_sequences(texts)
    print("Number of words in the sample texts: ({0}, {1})".format(len(texts_numeric[0]), len(texts_numeric[1])))
    
    # Pad the sequences
    texts_pad = pad_sequences(texts_numeric, 60)
    print("Now the texts have fixed length: 60. Let's see the first one: \n{0}".format(texts_pad[0]))

    Build a simpleRNN model based on Keras inbuilt imdb dataset

    This is a sentiment analysis model with 0 representing negative sentiment and 1 representing positive sentiment

    import tensorflow as tf
    from tensorflow.keras.datasets import imdb
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import SimpleRNN, Dense
    import numpy as np
    
    # Load imdb dataset
    (x_train, y_train), (x_test, y_test) = imdb.load_data()
    
    # Convert the input sequences to have the same length
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test)
    
    # Build model
    model = Sequential()
    model.add(SimpleRNN(units=128, input_shape=(None, 1)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', 
                  optimizer='adam',
                  metrics=['accuracy'])
    
    # Method '.evaluate()' shows the loss and accuracy
    loss, acc = model.evaluate(x_test, y_test, verbose=0)
    print("Loss: {0} \nAccuracy: {1}".format(loss, acc))

    GRU and LSTM cells

    GRU cells were proposed in 2014, and add one gate to the vanilla RNN cell. Now before updating the memory cell, we first compute a candidate a-tilde that will carry the present information. Then we compute the update gate GU that will determine if the candidate a tilde will be used as memory state or if we keep the past memory state a minus one. If the gate is zero, the network keeps the previous hidden state, and if it is equal to one it uses the new value of a tilde. Other values will be a combination of the previous and the candidate memory state, but during training it tends to get close to zero or one.

    LSTM was first proposed in 1997, and adds three gates to the vanilla RNN cell. The forget gate g_f determines if the previous state c_t minus one state should be forgotten (meaning to have its value set to zero) or not. The update gate g_u do the same for the candidate hidden state c tilde. The output gate g_o do the same for the new hidden state c_t. The green circles on the picture represent the gates. We can think of them as an open or closed gate, allowing for the left side to pass through or not if the gates value are 0 or 1 respectively.

    # Build simple RNN model
    SimpleRNN_model = Sequential()
    SimpleRNN_model.add(SimpleRNN(units=128, input_shape=(None, 1)))
    SimpleRNN_model.add(Dense(1, activation='sigmoid'))
    SimpleRNN_model.compile(loss='binary_crossentropy', 
                  optimizer='adam',
                  metrics=['accuracy'])
    
    #Build gru_model
    gru_model = Sequential()
    gru_model.add(GRU(units=128, input_shape=(None,1)))
    gru_model.add(Dense(1, activation='sigmoid'))
    gru_model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    # Print the old and new model summaries
    SimpleRNN_model.summary()
    gru_model.summary()
    
    # Evaluate the models' performance (ignore the loss value)
    _, acc_simpleRNN = SimpleRNN_model.evaluate(x_test, y_test, verbose=0)
    _, acc_GRU = gru_model.evaluate(x_test, y_test, verbose=0)
    
    # Print the results
    print("SimpleRNN model's accuracy:\t{0}".format(acc_simpleRNN))
    print("GRU model's accuracy:\t{0}".format(acc_GRU))
    # be warned! running this cell will take 20+ minutes
    # Build model
    model = Sequential()
    model.add(LSTM(units=128, input_shape=(None, 1), return_sequences=True))
    model.add(LSTM(units=128, return_sequences=True))
    model.add(LSTM(units=128, return_sequences=False))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    
    print("Loss: %0.04f\nAccuracy: %0.04f" % tuple(model.evaluate(x_test, y_test, verbose=0)))