# Tensorflow快餐教程(12) – 用机器写莎士比亚的戏剧

### 高层框架：TFLearn和Keras

#### 机器来写莎士比亚的戏剧

``from __future__ import absolute_import, division, print_function  import os import pickle from six.moves import urllib  import tflearn from tflearn.data_utils import *  path = "shakespeare_input.txt" char_idx_file = 'char_idx.pickle'  if not os.path.isfile(path):     urllib.request.urlretrieve("https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt", path)  maxlen = 25  char_idx = None if os.path.isfile(char_idx_file):   print('Loading previous char_idx')   char_idx = pickle.load(open(char_idx_file, 'rb'))  X, Y, char_idx = /     textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3,                                          pre_defined_char_idx=char_idx)  pickle.dump(char_idx, open(char_idx_file,'wb'))  g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',                        learning_rate=0.001)  m = tflearn.SequenceGenerator(g, dictionary=char_idx,                               seq_maxlen=maxlen,                               clip_gradients=5.0,                               checkpoint_path='model_shakespeare')  for i in range(50):     seed = random_sequence_from_textfile(path, maxlen)     m.fit(X, Y, validation_set=0.1, batch_size=128,           n_epoch=1, run_id='shakespeare')     print("-- TESTING...")     print("-- Test with temperature of 1.0 --")     print(m.generate(600, temperature=1.0, seq_seed=seed))     print("-- Test with temperature of 0.5 --")     print(m.generate(600, temperature=0.5, seq_seed=seed)) ``

``pip install tflearn ``

TFLearn是专门为Tensorflow开发的高层次API框架。

``g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',                        learning_rate=0.001)  m = tflearn.SequenceGenerator(g, dictionary=char_idx,                               seq_maxlen=maxlen,                               clip_gradients=5.0,                               checkpoint_path='model_shakespeare') ``

``# Build neural network net = tflearn.input_data(shape=[None, 6]) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net)  # Define model model = tflearn.DNN(net) # Start training (apply gradient descent algorithm) model.fit(data, labels, n_epoch=10, batch_size=16, show_metric=True) ``

#### 从生成城市名字说起

``Zachary Zafra Zag Zahl Zaleski Zalma Zama Zanesfield Zanesville Zap Zapata Zarah Zavalla Zearing Zebina Zebulon Zeeland Zeigler Zela Zelienople Zell Zellwood Zemple Zena Zenda Zenith Zephyr Zephyr Cove Zephyrhills Zia Pueblo Zillah Zilwaukee Zim Zimmerman Zinc Zion Zionsville Zita Zoar Zolfo Springs Zona Zumbro Falls Zumbrota Zuni Zurich Zwingle Zwolle ``

``from __future__ import absolute_import, division, print_function  import os from six import moves import ssl  import tflearn from tflearn.data_utils import *  path = "US_Cities.txt" if not os.path.isfile(path):     context = ssl._create_unverified_context()     moves.urllib.request.urlretrieve("https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/US_Cities.txt", path, context=context)  maxlen = 20  string_utf8 = open(path, "r").read().decode('utf-8') X, Y, char_idx = /     string_to_semi_redundant_sequences(string_utf8, seq_maxlen=maxlen, redun_step=3)  g = tflearn.input_data(shape=[None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',                        learning_rate=0.001)  m = tflearn.SequenceGenerator(g, dictionary=char_idx,                               seq_maxlen=maxlen,                               clip_gradients=5.0,                               checkpoint_path='model_us_cities')  for i in range(40):     seed = random_sequence_from_string(string_utf8, maxlen)     m.fit(X, Y, validation_set=0.1, batch_size=128,           n_epoch=1, run_id='us_cities')     print("-- TESTING...")     print("-- Test with temperature of 1.2 --")     print(m.generate(30, temperature=1.2, seq_seed=seed).encode('utf-8'))     print("-- Test with temperature of 1.0 --")     print(m.generate(30, temperature=1.0, seq_seed=seed).encode('utf-8'))     print("-- Test with temperature of 0.5 --")     print(m.generate(30, temperature=0.5, seq_seed=seed).encode('utf-8')) ``

``t and Shoot Cuthbertd Lettfrecv El Ceoneel Sutd Sa ``

``stle Finchford Finch Dasthond madloogd Wlaycoyarfw ``

``averal Cape Carteret Acbiropa Heowar Sor Dittoy Do ``

``hoenchen Schofield Stcojos Schabell StcaKnerum Cri ``

Keras是可以跨Tensorflow，微软的CNTK等多种后端的API。

``pip install keras ``

``model = Sequential() model.add(LSTM(128, input_shape=(maxlen, len(chars)))) model.add(Dense(len(chars))) model.add(Activation('softmax'))  optimizer = RMSprop(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer) ``

``'''Example script to generate text from Nietzsche's writings. At least 20 epochs are required before the generated text starts sounding coherent. It is recommended to run this script on GPU, as recurrent networks are quite computationally intensive. If you try this script on new data, make sure your corpus has at least ~100k characters. ~1M is better. '''  from __future__ import print_function from keras.callbacks import LambdaCallback from keras.models import Sequential from keras.layers import Dense, Activation from keras.layers import LSTM from keras.optimizers import RMSprop from keras.utils.data_utils import get_file import numpy as np import random import sys import io  path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt') with io.open(path, encoding='utf-8') as f:     text = f.read().lower() print('corpus length:', len(text))  chars = sorted(list(set(text))) print('total chars:', len(chars)) char_indices = dict((c, i) for i, c in enumerate(chars)) indices_char = dict((i, c) for i, c in enumerate(chars))  # cut the text in semi-redundant sequences of maxlen characters maxlen = 40 step = 3 sentences = [] next_chars = [] for i in range(0, len(text) - maxlen, step):     sentences.append(text[i: i + maxlen])     next_chars.append(text[i + maxlen]) print('nb sequences:', len(sentences))  print('Vectorization...') x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) y = np.zeros((len(sentences), len(chars)), dtype=np.bool) for i, sentence in enumerate(sentences):     for t, char in enumerate(sentence):         x[i, t, char_indices[char]] = 1     y[i, char_indices[next_chars[i]]] = 1   # build the model: a single LSTM print('Build model...') model = Sequential() model.add(LSTM(128, input_shape=(maxlen, len(chars)))) model.add(Dense(len(chars))) model.add(Activation('softmax'))  optimizer = RMSprop(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer)   def sample(preds, temperature=1.0):     # helper function to sample an index from a probability array     preds = np.asarray(preds).astype('float64')     preds = np.log(preds) / temperature     exp_preds = np.exp(preds)     preds = exp_preds / np.sum(exp_preds)     probas = np.random.multinomial(1, preds, 1)     return np.argmax(probas)   def on_epoch_end(epoch, logs):     # Function invoked at end of each epoch. Prints generated text.     print()     print('----- Generating text after Epoch: %d' % epoch)      start_index = random.randint(0, len(text) - maxlen - 1)     for diversity in [0.2, 0.5, 1.0, 1.2]:         print('----- diversity:', diversity)          generated = ''         sentence = text[start_index: start_index + maxlen]         generated += sentence         print('----- Generating with seed: "' + sentence + '"')         sys.stdout.write(generated)          for i in range(400):             x_pred = np.zeros((1, maxlen, len(chars)))             for t, char in enumerate(sentence):                 x_pred[0, t, char_indices[char]] = 1.              preds = model.predict(x_pred, verbose=0)[0]             next_index = sample(preds, diversity)             next_char = indices_char[next_index]              generated += next_char             sentence = sentence[1:] + next_char              sys.stdout.write(next_char)             sys.stdout.flush()         print()  print_callback = LambdaCallback(on_epoch_end=on_epoch_end)  model.fit(x, y,           batch_size=128,           epochs=60,           callbacks=[print_callback]) ``

lusing