%load_ext autoreload
  %aimport helper, tests
  %autoreload 1


              
                  import collections
  import helper
  import numpy as np

  from keras.preprocessing.text import Tokenizer
  from keras.preprocessing.sequence import pad_sequences
  from keras.models import Model, Sequential
  from keras.layers import GRU, Input, Dense, TimeDistributed, Activation, RepeatVector, Bidirectional, Dropout
  from keras.layers.embeddings import Embedding
  from keras.optimizers import Adam
  from keras.losses import sparse_categorical_crossentropy

Using TensorFlow backend.


              
                  from tensorflow.python.client import device_lib
  print(device_lib.list_local_devices())

[name: "/cpu:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8195025083738673623
, name: "/gpu:0"
device_type: "GPU"
memory_limit: 357564416
locality {
  bus_id: 1
}
incarnation: 12650583625369465834
physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0"
]


              
                  # Load English data
  english_sentences = helper.load_data('data/small_vocab_en')

  # Load French data
  french_sentences = helper.load_data('data/small_vocab_fr')

  print('Dataset Loaded')

Dataset Loaded


              
                      for sample_i in range(2):
      print('small_vocab_en Line {}:  {}'.format(sample_i + 1, english_sentences[sample_i]))
      print('small_vocab_fr Line {}:  {}'.format(sample_i + 1, french_sentences[sample_i]))

... Line 1:  new jersey is sometimes quiet during autumn , and it is snowy in april .
... Line 1:  new jersey est parfois calme pendant l' automne , et il est neigeux en avril .
... Line 2:  the united states is usually chilly during july , and it is usually freezing in november .
... Line 2:  les états-unis est généralement froid en juillet , et il gèle habituellement en novembre .


              
                  english_words_counter = collections.Counter([word for sentence in english_sentences for word in sentence.split()])
  french_words_counter = collections.Counter([word for sentence in french_sentences for word in sentence.split()])

  print('{} English words.'.format(len([word for sentence in english_sentences for word in sentence.split()])))
  print('{} unique English words.'.format(len(english_words_counter)))
  print('10 Most common words in the English dataset:')
  print('"' + '" "'.join(list(zip(*english_words_counter.most_common(10)))[0]) + '"')
  print()
  print('{} French words.'.format(len([word for sentence in french_sentences for word in sentence.split()])))
  print('{} unique French words.'.format(len(french_words_counter)))
  print('10 Most common words in the French dataset:')
  print('"' + '" "'.join(list(zip(*french_words_counter.most_common(10)))[0]) + '"')

1823250 English words.
227 unique English words.
10 Most common words in the English dataset:
"is" "," "." "in" "it" "during" "the" "but" "and" "sometimes"

1961295 French words.
355 unique French words.
10 Most common words in the French dataset:
"est" "." "," "en" "il" "les" "mais" "et" "la" "parfois"


              
                  def tokenize(x):
      """
      Tokenize x
      :param x: List of sentences/strings to be tokenized
      :return: Tuple of (tokenized x data, tokenizer used to tokenize x)
      """
      x_tk = Tokenizer(char_level = False)
      x_tk.fit_on_texts(x)

      return x_tk.texts_to_sequences(x), x_tk

  # Test function and print results
  text_sentences = [
      'The quick brown fox jumps over the lazy dog .',
      'By Jove , my quick study of lexicography won a prize .',
      'This is a short sentence .']
  text_tokenized, text_tokenizer = tokenize(text_sentences)
  print(text_tokenizer.word_index)
  print()
  for sample_i, (sent, token_sent) in enumerate(zip(text_sentences, text_tokenized)):
      print('Sequence {} in x'.format(sample_i + 1))
      print('  Input:  {}'.format(sent))
      print('  Output: {}'.format(token_sent))

{'the': 1, 'quick': 2, 'a': 3, 'brown': 4, 'fox': 5, 'jumps': 6, 'over': 7, 'lazy': 8, 'dog': 9, 'by': 10, 'jove': 11, 'my': 12, 'study': 13, 'of': 14, 'lexicography': 15, 'won': 16, 'prize': 17, 'this': 18, 'is': 19, 'short': 20, 'sentence': 21}

Sequence 1 in x
  Input:  The quick brown fox jumps over the lazy dog .
  Output: [1, 2, 4, 5, 6, 7, 1, 8, 9]
Sequence 2 in x
  Input:  By Jove , my quick study of lexicography won a prize .
  Output: [10, 11, 12, 2, 13, 14, 15, 16, 3, 17]
Sequence 3 in x
  Input:  This is a short sentence .
  Output: [18, 19, 3, 20, 21]


              
                  def pad(x, length=None):
      """
      Pad x
      :param x: List of sequences.
      :param length: Length to pad the sequence to.  If None, use length of longest sequence in x.
      :return: Padded numpy array of sequences
      """
      if length is None:
          length = max([len(sentence) for sentence in x])

      return pad_sequences(x, maxlen = length, padding = "post")

  # Test function and print results
  test_pad = pad(text_tokenized)
  for sample_i, (token_sent, pad_sent) in enumerate(zip(text_tokenized, test_pad)):
      print('Sequence {} in x'.format(sample_i + 1))
      print('  Input:  {}'.format(np.array(token_sent)))
      print('  Output: {}'.format(pad_sent))

Sequence 1 in x
  Input:  [1 2 4 5 6 7 1 8 9]
  Output: [1 2 4 5 6 7 1 8 9 0]
Sequence 2 in x
  Input:  [10 11 12  2 13 14 15 16  3 17]
  Output: [10 11 12  2 13 14 15 16  3 17]
Sequence 3 in x
  Input:  [18 19  3 20 21]
  Output: [18 19  3 20 21  0  0  0  0  0]


              
                  def preprocess(x, y):
      """
      Preprocess x and y
      :param x: Feature List of sentences
      :param y: Label List of sentences
      :return: Tuple of (Preprocessed x, Preprocessed y, x tokenizer, y tokenizer)
      """
      preprocess_x, x_tk = tokenize(x)
      preprocess_y, y_tk = tokenize(y)

      preprocess_x = pad(preprocess_x)
      preprocess_y = pad(preprocess_y)

      # Loss function requires labels to be in 3D
      preprocess_y = preprocess_y.reshape(*preprocess_y.shape, 1)

      return preprocess_x, preprocess_y, x_tk, y_tk

  preproc_english_sentences, preproc_french_sentences, english_tokenizer, french_tokenizer =\
      preprocess(english_sentences, french_sentences)
      
  max_english_sequence_length = preproc_english_sentences.shape[1]
  max_french_sequence_length = preproc_french_sentences.shape[1]

  # Add 1 for <PAD> token
  english_vocab_size = len(english_tokenizer.word_index) + 1
  french_vocab_size = len(french_tokenizer.word_index) + 1

  print('Data Preprocessed')
  print("Max English sentence length:", max_english_sequence_length)
  print("Max French sentence length:", max_french_sequence_length)
  print("English vocabulary size:", english_vocab_size)
  print("French vocabulary size:", french_vocab_size)

Data Preprocessed
Max English sentence length: 15
Max French sentence length: 21
English vocabulary size: 200
French vocabulary size: 345


              
                  def logits_to_text(logits, tokenizer):
      """
      Turn logits from a neural network into text using the tokenizer
      :param logits: Logits from a neural network
      :param tokenizer: Keras Tokenizer fit on the labels
      :return: String that represents the text of the logits
      """
      index_to_words = {id: word for word, id in tokenizer.word_index.items()}
      index_to_words[0] = '<PAD>'
      
      return ' '.join([index_to_words[prediction] for prediction in np.argmax(logits, 1)])

  print('`logits_to_text` function loaded.')

`logits_to_text` function loaded.


              
                  def simple_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
      """
      Build and train a basic RNN on x and y
      :param input_shape: Tuple of input shape
      :param output_sequence_length: Length of output sequence
      :param english_vocab_size: Number of unique English words in the dataset
      :param french_vocab_size: Number of unique French words in the dataset
      :return: Keras model built, but not trained
      """
      learning_rate = 0.01

      input_seq = Input(input_shape[1:])
      rnn = GRU(256, return_sequences = True)(input_seq)
      logits = TimeDistributed(Dense(french_vocab_size))(rnn)

      model = Model(input_seq, Activation("softmax")(logits))
      model.compile(loss = sparse_categorical_crossentropy,
                    optimizer = Adam(learning_rate),
                    metrics = ['accuracy'])

      return model

  # Reshape input to work with base Keras RNN
  tmp_x = pad(preproc_english_sentences, max_french_sequence_length)
  tmp_x = tmp_x.reshape((-1, preproc_french_sentences.shape[-2], 1))

  # Train network
  simple_rnn_model = simple_model(
      tmp_x.shape,
      max_french_sequence_length,
      english_vocab_size,
      french_vocab_size)
  simple_rnn_model.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)

  # Print prediction(s)
  print(logits_to_text(simple_rnn_model.predict(tmp_x[:1])[0], french_tokenizer))

Train on 110288 samples, validate on 27573 samples
Epoch 1/10
110k/110k [==========] - 13s 118us/step - loss: 1.6598 - acc: 0.5866 - val_loss: 1.1817 - val_acc: 0.6496
Epoch 2/10
110k/110k [==========] - 11s 99us/step - loss: 1.0830 - acc: 0.6646 - val_loss: 1.0070 - val_acc: 0.6741
Epoch 3/10
110k/110k [==========] - 11s 101us/step - loss: 0.9677 - acc: 0.6830 - val_loss: 0.9412 - val_acc: 0.6812
Epoch 4/10
110k/110k [==========] - 11s 100us/step - loss: 0.8949 - acc: 0.6983 - val_loss: 0.8732 - val_acc: 0.7052
Epoch 5/10
110k/110k [==========] - 11s 100us/step - loss: 0.8490 - acc: 0.7108 - val_loss: 0.8361 - val_acc: 0.7141
Epoch 6/10
110k/110k [==========] - 11s 101us/step - loss: 0.8062 - acc: 0.7235 - val_loss: 0.8042 - val_acc: 0.7195
Epoch 7/10
110k/110k [==========] - 11s 100us/step - loss: 0.7834 - acc: 0.7300 - val_loss: 0.7361 - val_acc: 0.7583
Epoch 8/10
110k/110k [==========] - 11s 100us/step - loss: 0.7515 - acc: 0.7456 - val_loss: 0.6995 - val_acc: 0.7733
Epoch 9/10
110k/110k [==========] - 11s 100us/step - loss: 0.6924 - acc: 0.7684 - val_loss: 0.6767 - val_acc: 0.7760
Epoch 10/10
110k/110k [==========] - 11s 100us/step - loss: 0.6928 - acc: 0.7638 - val_loss: 0.6751 - val_acc: 0.7690

new jersey est parfois chaud en l' de l' est il en avril <PAD> ... <PAD>


              
                  def embed_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
      """
      Build and train a RNN model using word embedding on x and y
      :param input_shape: Tuple of input shape
      :param output_sequence_length: Length of output sequence
      :param english_vocab_size: Number of unique English words in the dataset
      :param french_vocab_size: Number of unique French words in the dataset
      :return: Keras model built, but not trained
      """
      learning_rate = 0.01

      model = Sequential()
      model.add(Embedding(english_vocab_size, 256, input_length = output_sequence_length))
      model.add(GRU(256, return_sequences = True))
      model.add(TimeDistributed(Dense(french_vocab_size, activation = "softmax")))

      model.compile(loss = sparse_categorical_crossentropy,
                    optimizer = Adam(learning_rate),
                    metrics = ['accuracy'])

      return model

  # Reshape input
  tmp_x = pad(preproc_english_sentences, max_french_sequence_length)

  # Train network
  embed_rnn_model = embed_model(
      tmp_x.shape,
      max_french_sequence_length,
      english_vocab_size,
      french_vocab_size)
  embed_rnn_model.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)

  # Print prediction(s)
  print(logits_to_text(embed_rnn_model.predict(tmp_x[:1])[0], french_tokenizer))

Train on 110288 samples, validate on 27573 samples
Epoch 1/10
110k/110k [==========] - 14s 123us/step - loss: 1.3673 - acc: 0.7013 - val_loss: 0.4086 - val_acc: 0.8722
Epoch 2/10
110k/110k [==========] - 13s 120us/step - loss: 0.3158 - acc: 0.8982 - val_loss: 0.2705 - val_acc: 0.9118
Epoch 3/10
110k/110k [==========] - 13s 120us/step - loss: 0.2431 - acc: 0.9190 - val_loss: 0.2315 - val_acc: 0.9227
Epoch 4/10
110k/110k [==========] - 13s 120us/step - loss: 0.2142 - acc: 0.9268 - val_loss: 0.2139 - val_acc: 0.9277
Epoch 5/10
110k/110k [==========] - 13s 120us/step - loss: 0.2022 - acc: 0.9299 - val_loss: 0.2071 - val_acc: 0.9285
Epoch 6/10
110k/110k [==========] - 13s 120us/step - loss: 0.1968 - acc: 0.9313 - val_loss: 0.2080 - val_acc: 0.9292
Epoch 7/10
110k/110k [==========] - 13s 120us/step - loss: 0.1939 - acc: 0.9321 - val_loss: 0.2000 - val_acc: 0.9301
Epoch 8/10
110k/110k [==========] - 13s 120us/step - loss: 0.1929 - acc: 0.9326 - val_loss: 0.2032 - val_acc: 0.9305
Epoch 9/10
110k/110k [==========] - 13s 119us/step - loss: 0.1917 - acc: 0.9328 - val_loss: 0.2039 - val_acc: 0.9299
Epoch 10/10
110k/110k [==========] - 13s 120us/step - loss: 0.1923 - acc: 0.9325 - val_loss: 0.2087 - val_acc: 0.9290

new jersey est parfois calme en l' automne et il est neigeux en avril <PAD> ... <PAD>


              
                def bd_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
      """
      Build and train a bidirectional RNN model on x and y
      :param input_shape: Tuple of input shape
      :param output_sequence_length: Length of output sequence
      :param english_vocab_size: Number of unique English words in the dataset
      :param french_vocab_size: Number of unique French words in the dataset
      :return: Keras model built, but not trained
      """
      learning_rate = 0.001
      
      model = Sequential()
      model.add(Bidirectional(GRU(256, return_sequences = True), input_shape = input_shape[1:]))
      model.add(TimeDistributed(Dense(french_vocab_size, activation = "softmax")))
      
      model.compile(loss = sparse_categorical_crossentropy,
                    optimizer = Adam(learning_rate),
                    metrics = ['accuracy'])
      
      return model

  # Train network
  tmp_x = pad(preproc_english_sentences, max_french_sequence_length)
  tmp_x = tmp_x.reshape((-1, preproc_french_sentences.shape[-2], 1))

  bd_rnn_model = bd_model(
      tmp_x.shape,
      max_french_sequence_length,
      english_vocab_size,
      french_vocab_size)
  bd_rnn_model.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)

  # Print prediction(s)
  print(logits_to_text(bd_rnn_model.predict(tmp_x[:1])[0], french_tokenizer))

Train on 110288 samples, validate on 27573 samples
Epoch 1/10
110k/110k [==========] - 18s 165us/step - loss: 2.1304 - acc: 0.5489 - val_loss: 1.4903 - val_acc: 0.6112
Epoch 2/10
110k/110k [==========] - 18s 159us/step - loss: 1.3656 - acc: 0.6257 - val_loss: 1.2708 - val_acc: 0.6424
Epoch 3/10
110k/110k [==========] - 18s 159us/step - loss: 1.2151 - acc: 0.6505 - val_loss: 1.1648 - val_acc: 0.6636
Epoch 4/10
110k/110k [==========] - 18s 160us/step - loss: 1.1233 - acc: 0.6713 - val_loss: 1.0811 - val_acc: 0.6800
Epoch 5/10
110k/110k [==========] - 18s 160us/step - loss: 1.0499 - acc: 0.6846 - val_loss: 1.0170 - val_acc: 0.6919
Epoch 6/10
110k/110k [==========] - 18s 159us/step - loss: 0.9913 - acc: 0.6938 - val_loss: 0.9668 - val_acc: 0.6986
Epoch 7/10
110k/110k [==========] - 18s 159us/step - loss: 0.9465 - acc: 0.7005 - val_loss: 0.9258 - val_acc: 0.7059
Epoch 8/10
110k/110k [==========] - 18s 159us/step - loss: 0.9087 - acc: 0.7067 - val_loss: 0.8895 - val_acc: 0.7108
Epoch 9/10
110k/110k [==========] - 18s 159us/step - loss: 0.8744 - acc: 0.7128 - val_loss: 0.8581 - val_acc: 0.7167
Epoch 10/10
110k/110k [==========] - 18s 159us/step - loss: 0.8452 - acc: 0.7185 - val_loss: 0.8360 - val_acc: 0.7205

new jersey est parfois calme en mois et il il il en en en <PAD> ... <PAD>


              
                  def model_final(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
      """
      Build and train a model that incorporates embedding, encoder-decoder, and bidirectional RNN on x and y
      :param input_shape: Tuple of input shape
      :param output_sequence_length: Length of output sequence
      :param english_vocab_size: Number of unique English words in the dataset
      :param french_vocab_size: Number of unique French words in the dataset
      :return: Keras model built, but not trained
      """
      learning_rate = 0.001
      
      model = Sequential()
      model.add(Embedding(english_vocab_size, 256,
                          input_length = output_sequence_length,
                          input_shape = input_shape[1:]))
      model.add(Bidirectional(GRU(256, return_sequences = True)))
      model.add(Dropout(0.5))
      model.add(TimeDistributed(Dense(french_vocab_size, activation = "softmax")))
      
      model.compile(loss = sparse_categorical_crossentropy,
                    optimizer = Adam(learning_rate),
                    metrics = ['accuracy'])
      
      return model

  print('Final Model Loaded')

  # Train network
  tmp_x = pad(preproc_english_sentences, preproc_french_sentences.shape[1])
  tmp_x = tmp_x.reshape((-1, preproc_french_sentences.shape[-2]))

  final_rnn_model = model_final(
      tmp_x.shape,
      max_french_sequence_length,
      english_vocab_size,
      french_vocab_size)
  final_rnn_model.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)

  # Print prediction(s)
  print(logits_to_text(final_rnn_model.predict(tmp_x[:1])[0], french_tokenizer))

Final Model Loaded
Train on 110288 samples, validate on 27573 samples
Epoch 1/10
110k/110k [==========] - 25s 228us/step - loss: 2.6778 - acc: 0.4926 - val_loss: 1.5954 - val_acc: 0.6197
Epoch 2/10
110k/110k [==========] - 24s 221us/step - loss: 1.2212 - acc: 0.6969 - val_loss: 0.8890 - val_acc: 0.7729
Epoch 3/10
110k/110k [==========] - 24s 222us/step - loss: 0.7875 - acc: 0.7865 - val_loss: 0.6014 - val_acc: 0.8299
Epoch 4/10
110k/110k [==========] - 24s 222us/step - loss: 0.5741 - acc: 0.8341 - val_loss: 0.4452 - val_acc: 0.8688
Epoch 5/10
110k/110k [==========] - 24s 222us/step - loss: 0.4457 - acc: 0.8671 - val_loss: 0.3665 - val_acc: 0.8900
Epoch 6/10
110k/110k [==========] - 24s 222us/step - loss: 0.3657 - acc: 0.8900 - val_loss: 0.2882 - val_acc: 0.9145
Epoch 7/10
110k/110k [==========] - 24s 221us/step - loss: 0.3122 - acc: 0.9057 - val_loss: 0.2441 - val_acc: 0.9273
Epoch 8/10
110k/110k [==========] - 25s 222us/step - loss: 0.2725 - acc: 0.9177 - val_loss: 0.2136 - val_acc: 0.9358
Epoch 9/10
110k/110k [==========] - 25s 222us/step - loss: 0.2425 - acc: 0.9269 - val_loss: 0.1866 - val_acc: 0.9441
Epoch 10/10
110k/110k [==========] - 24s 222us/step - loss: 0.2171 - acc: 0.9347 - val_loss: 0.1684 - val_acc: 0.9505

new jersey est parfois calme pendant l' automne et il est neigeux en avril <PAD> ... <PAD>


              
                def final_predictions(x, y, x_tk, y_tk):
      """
      Gets predictions using the final model
      :param x: Preprocessed English data
      :param y: Preprocessed French data
      :param x_tk: English tokenizer
      :param y_tk: French tokenizer
      """
      x = pad(x, max_french_sequence_length)
      
      model = model_final(
          x.shape,
          y.shape[1],
          english_vocab_size,
          french_vocab_size)
      model.fit(x, y, batch_size=1024, epochs=10, validation_split=0.2)

      y_id_to_word = {value: key for key, value in y_tk.word_index.items()}
      y_id_to_word[0] = '<PAD>'

      sentence = 'he saw a old yellow truck'
      sentence = [x_tk.word_index[word] for word in sentence.split()]
      sentence = pad_sequences([sentence], maxlen=x.shape[-1], padding='post')
      sentences = np.array([sentence[0], x[0]])
      predictions = model.predict(sentences, len(sentences))

      print('Sample 1:')
      print(' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]]))
      print('Il a vu un vieux camion jaune')
      print('Sample 2:')
      print(' '.join([y_id_to_word[np.argmax(x)] for x in predictions[1]]))
      print(' '.join([y_id_to_word[np.max(x)] for x in y[0]]))

  final_predictions(preproc_english_sentences, preproc_french_sentences, english_tokenizer, french_tokenizer)

  Train on 110288 samples, validate on 27573 samples
  Epoch 1/10
  110k/110k [==========] - 25s 228us/step - loss: 2.6723 - acc: 0.4934 - val_loss: 1.5712 - val_acc: 0.6215
  Epoch 2/10
  110k/110k [==========] - 24s 221us/step - loss: 1.2008 - acc: 0.7022 - val_loss: 0.8650 - val_acc: 0.7777
  Epoch 3/10
  110k/110k [==========] - 24s 221us/step - loss: 0.7601 - acc: 0.7923 - val_loss: 0.5802 - val_acc: 0.8351
  Epoch 4/10
  110k/110k [==========] - 24s 221us/step - loss: 0.5550 - acc: 0.8388 - val_loss: 0.4283 - val_acc: 0.8733
  Epoch 5/10
  110k/110k [==========] - 24s 221us/step - loss: 0.4326 - acc: 0.8706 - val_loss: 0.3353 - val_acc: 0.8992
  Epoch 6/10
  110k/110k [==========] - 24s 221us/step - loss: 0.3529 - acc: 0.8936 - val_loss: 0.2799 - val_acc: 0.9176
  Epoch 7/10
  110k/110k [==========] - 24s 221us/step - loss: 0.3020 - acc: 0.9088 - val_loss: 0.2375 - val_acc: 0.9304
  Epoch 8/10
  110k/110k [==========] - 24s 221us/step - loss: 0.2648 - acc: 0.9197 - val_loss: 0.2078 - val_acc: 0.9385
  Epoch 9/10
  110k/110k [==========] - 24s 222us/step - loss: 0.2378 - acc: 0.9284 - val_loss: 0.1859 - val_acc: 0.9447
  Epoch 10/10
  110k/110k [==========] - 24s 221us/step - loss: 0.2131 - acc: 0.9359 - val_loss: 0.1665 - val_acc: 0.9512

  Sample 1:
  il a vu un vieux camion jaune <PAD> ... <PAD>
  Il a vu un vieux camion jaune
  Sample 2:
  new jersey est parfois calme pendant l' automne et il est neigeux en avril <PAD> ... <PAD>
  new jersey est parfois calme pendant l' automne et il est neigeux en avril <PAD> ... <PAD>

Language Translator¶

1.0 Preprocessing¶

1.1 Dataset¶

1.2 Sample the Data¶

1.3 Vocabulary Complexity¶

1.4 Tokenize the Vocabulary¶

1.5 Pad the Inputs¶

1.6 Preprocess Pipeline¶

2.0 Models¶

2.1 IDs to Text¶

2.2 Model #1: Simple RNN¶

2.3 Model #2: RNN with Embedding¶

2.4 Model #3: Bidirectional RNN¶

Model #4: Final Model¶

3.0 Prediction¶