Skip to content

Commit fcef0ce

Browse files
author
shixiaowen03
committed
AC,A2C,A3C
1 parent a626029 commit fcef0ce

File tree

7 files changed

+104
-68
lines changed

7 files changed

+104
-68
lines changed

.idea/misc.xml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/tensorflow1.2.iml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/workspace.xml

Lines changed: 81 additions & 60 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

RL/Basic-A2C-Demo/A2C.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import numpy as np
22
import tensorflow as tf
33
import gym
4+
import pandas as pd
45

56
OUTPUT_GRAPH = False
6-
MAX_EPISODE = 3000
7+
MAX_EPISODE = 500
78
DISPLAY_REWARD_THRESHOLD = 200 # renders environment if total episode reward is greater then this threshold
8-
MAX_EP_STEPS = 1000 # maximum time step in one episode
9+
MAX_EP_STEPS = 2000 # maximum time step in one episode
910
RENDER = False # rendering wastes time
1011
GAMMA = 0.9 # reward discount in TD error
1112
LR_A = 0.001 # learning rate for actor
@@ -100,7 +101,7 @@ def learn(self, s, r, s_):
100101
td_error, _ = self.sess.run([self.td_error, self.train_op],
101102
{self.s: s, self.v_: v_, self.r: r})
102103
return td_error
103-
104+
104105
# action有两个,即向左或向右移动小车
105106
# state是四维
106107

@@ -118,6 +119,7 @@ def learn(self, s, r, s_):
118119

119120
sess.run(tf.global_variables_initializer())
120121

122+
res = []
121123
for i_episode in range(MAX_EPISODE):
122124
s = env.reset()
123125
t = 0
@@ -148,5 +150,8 @@ def learn(self, s, r, s_):
148150
running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
149151
if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True # rendering
150152
print("episode:", i_episode, " reward:", int(running_reward))
153+
res.append([i_episode, running_reward])
151154
break
152155

156+
pd.DataFrame(res,columns=['episode','a2c_reward']).to_csv('../a2c_reward.csv')
157+

RL/Basic-A3C-Demo/A3C.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@
66
import os
77
import shutil
88
import matplotlib.pyplot as plt
9+
import pandas as pd
910

1011

1112
GAME = 'CartPole-v0'
1213
OUTPUT_GRAPH = True
1314
LOG_DIR = './log'
1415
N_WORKERS = multiprocessing.cpu_count()
15-
MAX_GLOBAL_EP = 1000
16+
MAX_GLOBAL_EP = 500
1617
GLOBAL_NET_SCOPE = 'Global_Net'
1718
UPDATE_GLOBAL_ITER = 10
1819
GAMMA = 0.9
@@ -184,6 +185,9 @@ def work(self):
184185
worker_threads.append(t)
185186
COORD.join(worker_threads) #把开启的线程加入主线程,等待threads结束
186187

188+
res = np.concatenate([np.arange(len(GLOBAL_RUNNING_R)).reshape(-1,1),np.array(GLOBAL_RUNNING_R).reshape(-1,1)],axis=1)
189+
pd.DataFrame(res, columns=['episode', 'a3c_reward']).to_csv('../a3c_reward.csv')
190+
187191
plt.plot(np.arange(len(GLOBAL_RUNNING_R)), GLOBAL_RUNNING_R)
188192
plt.xlabel('step')
189193
plt.ylabel('Total moving reward')

RL/Basic-A3C-Demo/log/events.out.tfevents.1543473204.meituan-sxwdeMacBook-Pro-4.local renamed to RL/Basic-A3C-Demo/log/events.out.tfevents.1543484083.meituan-sxwdeMacBook-Pro-4.local

1.01 MB
Binary file not shown.

RL/Basic-AC-Demo/AC.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import numpy as np
22
import tensorflow as tf
33
import gym
4+
import pandas as pd
45

56
OUTPUT_GRAPH = False
6-
MAX_EPISODE = 3000
7+
MAX_EPISODE = 500
78
DISPLAY_REWARD_THRESHOLD = 200 # renders environment if total episode reward is greater then this threshold
8-
MAX_EP_STEPS = 1000 # maximum time step in one episode
9+
MAX_EP_STEPS = 2000 # maximum time step in one episode
910
RENDER = False # rendering wastes time
1011
GAMMA = 0.9 # reward discount in TD error
1112
LR_A = 0.001 # learning rate for actor
@@ -126,6 +127,7 @@ def learn(self, s, a, r, s_):
126127

127128
sess.run(tf.global_variables_initializer())
128129

130+
res = []
129131
for i_episode in range(MAX_EPISODE):
130132
s = env.reset()
131133
t = 0
@@ -156,5 +158,9 @@ def learn(self, s, a, r, s_):
156158
running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
157159
if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True # rendering
158160
print("episode:", i_episode, " reward:", int(running_reward))
161+
res.append([i_episode,running_reward])
162+
159163
break
160164

165+
pd.DataFrame(res,columns=['episode','ac_reward']).to_csv('../ac_reward.csv')
166+

0 commit comments

Comments
 (0)