Python lstm in keras

text = 'Hi this is a small sentence'
# We choose a sequence length
seq_len = 3
# Split text into a list of words
words = text.split() # ['Hi', 'this', 'is', 'a', 'small', 'sentence']
# Make lines with 3 words like : ['Hi this is', 'this is a', 'is a small', 'a small sentence']
lines = []
for i in range(seq_len, len(words) + 1):
    line = ' '.join(words[i-seq_len:i])
    lines.append(line)
# Import Tokenizer from keras preprocessing text
from tensorflow.keras.preprocessing.text import Tokenizer
# Instantiate Tokenizer
tokenizer = Tokenizer()
# Fit it on the previous lines
tokenizer.fit_on_texts(lines)
# Turn the lines into numeric sequences
sequences = tokenizer.texts_to_sequences(lines) # array([[5, 3, 1], [3, 1, 2], [1, 2, 4], [2, 4, 6]])
print(tokenizer.index_word) # {1: 'is', 2: 'a', 3: 'this', 4: 'small', 5: 'hi', 6: 'sentence'}, we can use this to decode back original text

# Import Dense, LSTM and Embedding layers
from tensorflow.keras.layers import Dense, LSTM, Embedding
model = Sequential()
# Vocabulary size
vocab_size = len(tokenizer.index_word) + 1 # we are adding 1 since our encoding started from 1 and not 0, reserved for special characters
# Starting with an embedding layer (This is a layers that is specially required when we deal with categorical data like text in NLP to let the neural network understand the similarity between them)
# input_dim=size of unique tokens, input_length= length of input sequence, output_dim= dense vector embedding matrix columns
model.add(Embedding(input_dim=vocab_size, output_dim=8, input_length=2)) 
# Adding an LSTM layer
model.add(LSTM(8))
# Adding a Dense hidden layer
model.add(Dense(8, activation='relu'))
# Adding an output layer with softmax, last dense layer should have same number of inputs as input dimension of embedding layer
model.add(Dense(vocab_size, activation='softmax'))

#### Example 2
from tensorflow.keras.preprocessing.text import Tokenizer
# Split text into an array of words 
words = text.split()
# Make sentences of 4 words each, moving one word at a time
sentences = []
for i in range(4, len(words)):
  sentences.append(' '.join(words[i-4:i]))
# Instantiate a Tokenizer, then fit it on the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences) # Turn sentences into a sequence of numbers
print("Sentences: \n {} \n Sequences: \n {}".format(sentences[:5],sequences[:5]))
vocab_size = len(tokenizer.index_word) + 1 
print(tokenizer.index_word)
from tensorflow.keras.layers import LSTM, Dense, Embedding # Import the Embedding, LSTM and Dense layer
model = Sequential()
# Add an Embedding layer with the right parameters
model.add(Embedding(input_dim = vocab_size, input_length = 3, output_dim = 8 )) # feed each neuron 3 words at a time
model.add(LSTM(32)) # Add a 32 unit LSTM layer
# Add a hidden Dense layer of 32 units and an output layer of vocab_size with softmax
model.add(Dense(32, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))
model.summary()
def predict_text(test_text, model = model):
  if len(test_text.split()) != 3:
    print('Text input should be 3 words!')
    return False
  # Turn the test_text into a sequence of numbers
  test_seq = tokenizer.texts_to_sequences([test_text])
  test_seq = np.array(test_seq)
  # Use the model passed as a parameter to predict the next word
  pred = model.predict(test_seq).argmax(axis = 1)[0]
  # Return the word that maps to the prediction
  return tokenizer.index_word[pred]
predict_text('meet revenge with')from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)
Python相关代码片段