Skip to content

Commit 6eaa0af

Browse files
Update BipedalWalker-v3_PPO.py
1 parent dc1319c commit 6eaa0af

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

BipedalWalker-v3_PPO/BipedalWalker-v3_PPO.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ def run(self):
5252
self.child_conn.send(state)
5353
while True:
5454
action = self.child_conn.recv()
55-
if self.is_render and self.env_idx == 0:
56-
self.env.render()
55+
#if self.is_render and self.env_idx == 0:
56+
#self.env.render()
5757

5858
state, reward, done, info = self.env.step(action)
5959
state = np.reshape(state, [1, self.state_size])
@@ -173,8 +173,9 @@ def act(self, state):
173173
# Use the network to predict the next action to take, using the model
174174
pred = self.Actor.predict(state)
175175

176-
action = pred + np.random.normal(size=pred.shape) * self.std
177-
action = np.clip(action, -1, 1) # -1 and 1 are boundaries of tanh
176+
low, high = -1.0, 1.0 # -1 and 1 are boundaries of tanh
177+
action = pred + np.random.uniform(low, high, size=pred.shape) * self.std
178+
action = np.clip(action, low, high)
178179

179180
logp_t = self.gaussian_likelihood(action, pred, self.log_std)
180181

@@ -413,7 +414,7 @@ def test(self, test_episodes = 100):#evaluate
413414
done = False
414415
score = 0
415416
while not done:
416-
#self.env.render()
417+
self.env.render()
417418
action = self.Actor.predict(state)[0]
418419
state, reward, done, _ = self.env.step(action)
419420
state = np.reshape(state, [1, self.state_size[0]])
@@ -430,5 +431,5 @@ def test(self, test_episodes = 100):#evaluate
430431
env_name = 'BipedalWalker-v3'
431432
agent = PPOAgent(env_name)
432433
#agent.run_batch() # train as PPO
433-
#agent.run_multiprocesses(num_worker = 2) # train PPO multiprocessed (fastest)
434+
#agent.run_multiprocesses(num_worker = 16) # train PPO multiprocessed (fastest)
434435
agent.test()

0 commit comments

Comments
 (0)