@@ -52,8 +52,8 @@ def run(self):
5252 self .child_conn .send (state )
5353 while True :
5454 action = self .child_conn .recv ()
55- if self .is_render and self .env_idx == 0 :
56- self .env .render ()
55+ # if self.is_render and self.env_idx == 0:
56+ # self.env.render()
5757
5858 state , reward , done , info = self .env .step (action )
5959 state = np .reshape (state , [1 , self .state_size ])
@@ -173,8 +173,9 @@ def act(self, state):
173173 # Use the network to predict the next action to take, using the model
174174 pred = self .Actor .predict (state )
175175
176- action = pred + np .random .normal (size = pred .shape ) * self .std
177- action = np .clip (action , - 1 , 1 ) # -1 and 1 are boundaries of tanh
176+ low , high = - 1.0 , 1.0 # -1 and 1 are boundaries of tanh
177+ action = pred + np .random .uniform (low , high , size = pred .shape ) * self .std
178+ action = np .clip (action , low , high )
178179
179180 logp_t = self .gaussian_likelihood (action , pred , self .log_std )
180181
@@ -413,7 +414,7 @@ def test(self, test_episodes = 100):#evaluate
413414 done = False
414415 score = 0
415416 while not done :
416- # self.env.render()
417+ self .env .render ()
417418 action = self .Actor .predict (state )[0 ]
418419 state , reward , done , _ = self .env .step (action )
419420 state = np .reshape (state , [1 , self .state_size [0 ]])
@@ -430,5 +431,5 @@ def test(self, test_episodes = 100):#evaluate
430431 env_name = 'BipedalWalker-v3'
431432 agent = PPOAgent (env_name )
432433 #agent.run_batch() # train as PPO
433- #agent.run_multiprocesses(num_worker = 2 ) # train PPO multiprocessed (fastest)
434+ #agent.run_multiprocesses(num_worker = 16 ) # train PPO multiprocessed (fastest)
434435 agent .test ()
0 commit comments