forked from lcdevelop/ChatBotCourse
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhello_sequence.py
More file actions
98 lines (82 loc) · 3.65 KB
/
hello_sequence.py
File metadata and controls
98 lines (82 loc) · 3.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# coding:utf-8
from __future__ import print_function
import numpy as np
import tensorflow as tf
import sys
vocab_size=256
learning_rate=0.1
# 暂时只试验一个bucket
buckets=[(10, 10)]
bucket_id=0
# 填充0
PAD=[0]
sample_size=20
# LSTM中的记忆单元数目
num_units=100
# 多少层的lstm
num_layers=2
# sample_size个样本,每个样本有一个question、answer、weights,question、answer分别是10维的向量
# 这sample_size个样本有时间序上的依赖关系
question_sample_list = [map(ord, "hello?") + PAD * 4] * sample_size
answer_sample_list = [map(ord, "world!") + PAD * 4] * sample_size
init_weights_list = [[1.0]*7 + [0.0]*3] *sample_size # mask padding. todo: redundant --
with tf.Session() as session:
# 初始化神经网络单元
cell = single_cell = tf.nn.rnn_cell.LSTMCell(num_units)
if num_layers > 1:
cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
# 定义函数
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
return tf.nn.seq2seq.embedding_rnn_seq2seq(
encoder_inputs, decoder_inputs, cell,
num_encoder_symbols=vocab_size,
num_decoder_symbols=vocab_size,
embedding_size=num_units,
feed_previous=do_decode)
# 初始化训练用的变量,如果是多个层,权重共享
encoder_inputs = []
decoder_inputs = []
weights = []
for i in xrange(sample_size):
encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
for i in xrange(sample_size):
decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))
targets = [decoder_inputs[i] for i in xrange(len(decoder_inputs))]
# 创建模型及损失计算方法
buckets_outputs, losses = tf.nn.seq2seq.model_with_buckets(
encoder_inputs, decoder_inputs, targets,
weights, buckets,
lambda x, y: seq2seq_f(x, y, False))
# 梯度更新算法
updates=[]
for b in xrange(len(buckets)):
updates.append(tf.train.AdamOptimizer(learning_rate).minimize(losses[b]))
# 用于保存模型
saver = tf.train.Saver(tf.all_variables())
# 初始化
session.run(tf.initialize_all_variables())
while True:
encoder_size = len(encoder_inputs)
decoder_size = len(decoder_inputs)
# 初始化feed_dict数据
feed_dict = {}
for i in xrange(encoder_size):
feed_dict[encoder_inputs[i].name] = question_sample_list[i]
for i in xrange(decoder_size):
feed_dict[decoder_inputs[i].name] = answer_sample_list[i]
feed_dict[weights[i].name] = init_weights_list[i]
# 初始化fetches模型相关信息,fetches就是想拿什么就拿什么,比如updates就是拿更新值,losses就是拿损失值,buckets_outputs就是拿输出值
fetches = [updates[bucket_id], losses[bucket_id]]
fetches.append(buckets_outputs[bucket_id][0])
# 这一句是为了拿输出,训练过程可以不要
for i in xrange(len(buckets_outputs[bucket_id])):
fetches.append(buckets_outputs[bucket_id][i])
# 参数传递进去的是数据和计算逻辑,具体执行时可以传到各种介质中执行
fetches_outputs = session.run(fetches, feed_dict)
perplexity = fetches_outputs[1]
outputs = fetches_outputs[2:]
print ("perplexity =", perplexity)
words = np.argmax(outputs, axis=2)
word = "".join(map(chr, words[0])).replace('\x00', '').replace('\n', '')
print("output: %s" % word)