From da5ac61baa4a08c49bf39f0a41f9bc984aa6e5f8 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Sat, 29 Feb 2020 22:02:01 -0800 Subject: [PATCH 1/6] use PyBullet's own visualizer for enjoy script, + add a time.sleep since it is way too fast (1000FPS for cart pole) --- .../gym/pybullet_envs/stable_baselines/enjoy.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py b/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py index d887c41d3..3b8aa4098 100644 --- a/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py +++ b/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py @@ -6,7 +6,7 @@ # MIT License import argparse import multiprocessing - +import time import gym import numpy as np import pybullet_envs @@ -35,11 +35,7 @@ if __name__ == '__main__': # Use SubprocVecEnv for rendering if not args.no_render: - # Note: fork is not thread-safe but usually is faster - fork_available = 'fork' in multiprocessing.get_all_start_methods() - start_method = 'fork' if fork_available else 'spawn' - env = SubprocVecEnv([lambda: env], start_method=start_method) - + env.render(mode='human') algo = { 'sac': SAC, @@ -67,6 +63,8 @@ if __name__ == '__main__': episode_length += 1 if not args.no_render: env.render(mode='human') + dt = 1./240. + time.sleep(dt) episode_rewards.append(episode_reward) episode_lengths.append(episode_length) print("Episode {} reward={}, length={}".format(len(episode_rewards), episode_reward, episode_length)) From 7ecb769a9cfe8bce881283cae620c815843320d9 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Sun, 1 Mar 2020 12:22:47 -0800 Subject: [PATCH 2/6] add flag to assure deterministicOverlappingPairs --- examples/pybullet/gym/pybullet_envs/env_bases.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/pybullet/gym/pybullet_envs/env_bases.py b/examples/pybullet/gym/pybullet_envs/env_bases.py index ff0589ae0..f9c51c051 100644 --- a/examples/pybullet/gym/pybullet_envs/env_bases.py +++ b/examples/pybullet/gym/pybullet_envs/env_bases.py @@ -57,6 +57,7 @@ class MJCFBaseBulletEnv(gym.Env): else: self._p = bullet_client.BulletClient() self._p.resetSimulation() + self._p.setPhysicsEngineParameter(deterministicOverlappingPairs=1) #optionally enable EGL for faster headless rendering try: if os.environ["PYBULLET_EGL"]: From 7c5073d3ab56a024d751570cc1b0ad980a7d37a4 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Sun, 1 Mar 2020 13:11:47 -0800 Subject: [PATCH 3/6] prepare towards HumanoidDeepMimicBackflipBulletEnv-v1 and HumanoidDeepMimicWalkBulletEnv-v1 remove unused SubprocVecEnv from stable_baselines/enjoy.py --- .../pybullet/gym/pybullet_envs/__init__.py | 11 +- .../deep_mimic/gym_env/__init__.py | 3 + .../deep_mimic/gym_env/deep_mimic_env.py | 175 ++++++++++++++++++ .../deep_mimic/gym_env/deepmimic_test.py | 23 +++ .../pybullet_envs/stable_baselines/enjoy.py | 2 +- 5 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/__init__.py create mode 100644 examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py create mode 100644 examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py index 83b536fdb..ed909978d 100644 --- a/examples/pybullet/gym/pybullet_envs/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/__init__.py @@ -12,8 +12,15 @@ def register(id, *args, **kvargs): # ------------bullet------------- register( - id='HumanoidDeepMimicBulletEnv-v1', - entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv', + id='HumanoidDeepMimicBackflipBulletEnv-v1', + entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicBackflipBulletEnv', + max_episode_steps=1000, + reward_threshold=20000.0, +) + +register( + id='HumanoidDeepMimicWalkBulletEnv-v1', + entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicWalkBulletEnv', max_episode_steps=1000, reward_threshold=20000.0, ) diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/__init__.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/__init__.py new file mode 100644 index 000000000..44f79b499 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/__init__.py @@ -0,0 +1,3 @@ +from pybullet_envs.deep_mimic.gym_env.deep_mimic_env import HumanoidDeepMimicBackflipBulletEnv +from pybullet_envs.deep_mimic.gym_env.deep_mimic_env import HumanoidDeepMimicWalkBulletEnv + diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py new file mode 100644 index 000000000..ba77efddb --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py @@ -0,0 +1,175 @@ +""" +Classic cart-pole system implemented by Rich Sutton et al. +Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c +""" +import os, inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(os.path.dirname(currentdir)) +os.sys.path.insert(0, parentdir) + +import logging +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time +import subprocess +import pybullet as p2 +import pybullet_data +import pybullet_utils.bullet_client as bc +from pkg_resources import parse_version +from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv +from pybullet_utils.arg_parser import ArgParser +from pybullet_utils.logger import Logger + +logger = logging.getLogger(__name__) + + +class HumanoidDeepBulletEnv(gym.Env): + metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50} + + def __init__(self, renders=False, arg_file=''): + + self._arg_parser = ArgParser() + Logger.print2("===========================================================") + succ = False + if (arg_file != ''): + path = pybullet_data.getDataPath() + "/args/" + arg_file + succ = self._arg_parser.load_file(path) + Logger.print2(arg_file) + assert succ, Logger.print2('Failed to load args from: ' + arg_file) + + self._internal_env = None + self._renders = renders + self._discrete_actions = False + self._arg_file=arg_file + self._render_height = 200 + self._render_width = 320 + + self.theta_threshold_radians = 12 * 2 * math.pi / 360 + self.x_threshold = 0.4 #2.4 + high = np.array([ + self.x_threshold * 2, + np.finfo(np.float32).max, self.theta_threshold_radians * 2, + np.finfo(np.float32).max + ]) + + self.force_mag = 10 + + if self._discrete_actions: + self.action_space = spaces.Discrete(2) + else: + action_dim = 1 + action_high = np.array([self.force_mag] * action_dim) + self.action_space = spaces.Box(-action_high, action_high) + + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.seed() + + self.viewer = None + self._configure() + + def _configure(self, display=None): + self.display = display + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + p = self._p + if self._discrete_actions: + force = self.force_mag if action == 1 else -self.force_mag + else: + force = action[0] + + p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force) + p.stepSimulation() + + self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] + theta, theta_dot, x, x_dot = self.state + + done = x < -self.x_threshold \ + or x > self.x_threshold \ + or theta < -self.theta_threshold_radians \ + or theta > self.theta_threshold_radians + done = bool(done) + reward = 1.0 + #print("state=",self.state) + return np.array(self.state), reward, done, {} + + def reset(self): + # print("-----------reset simulation---------------") + if self._internal_env==None: + self._internal_env = PyBulletDeepMimicEnv(self._arg_parser, self._renders) + self._internal_env.reset() + agent_id = -1 #unused here + state = self._internal_env.record_state(agent_id) + return state + + def render(self, mode='human', close=False): + if mode == "human": + self._renders = True + if mode != "rgb_array": + return np.array([]) + base_pos=[0,0,0] + self._cam_dist = 2 + self._cam_pitch = 0.3 + self._cam_yaw = 0 + if (self._physics_client_id>=0): + view_matrix = self._p.computeViewMatrixFromYawPitchRoll( + cameraTargetPosition=base_pos, + distance=self._cam_dist, + yaw=self._cam_yaw, + pitch=self._cam_pitch, + roll=0, + upAxisIndex=2) + proj_matrix = self._p.computeProjectionMatrixFOV(fov=60, + aspect=float(self._render_width) / + self._render_height, + nearVal=0.1, + farVal=100.0) + (_, _, px, _, _) = self._p.getCameraImage( + width=self._render_width, + height=self._render_height, + renderer=self._p.ER_BULLET_HARDWARE_OPENGL, + viewMatrix=view_matrix, + projectionMatrix=proj_matrix) + else: + px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8) + rgb_array = np.array(px, dtype=np.uint8) + rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1)) + rgb_array = rgb_array[:, :, :3] + return rgb_array + + def configure(self, args): + pass + + def close(self): + if self._physics_client_id >= 0: + self._p.disconnect() + self._physics_client_id = -1 + +class HumanoidDeepMimicBackflipBulletEnv(HumanoidDeepBulletEnv): + metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50} + + def __init__(self, renders=False): + # start the bullet physics server + HumanoidDeepBulletEnv.__init__(self, renders, arg_file="run_humanoid3d_backflip_args.txt") + + +class HumanoidDeepMimicWalkBulletEnv(HumanoidDeepBulletEnv): + metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50} + + def __init__(self, renders=False): + # start the bullet physics server + HumanoidDeepBulletEnv.__init__(self, renders, arg_file="run_humanoid3d_walk_args.txt") + +class CartPoleContinuousBulletEnv5(HumanoidDeepBulletEnv): + metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50} + + def __init__(self, renders=False): + # start the bullet physics server + HumanoidDeepBulletEnv.__init__(self, renders, arg_file="") diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py new file mode 100644 index 000000000..789a04614 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py @@ -0,0 +1,23 @@ +import gym +import pybullet_envs +import time + +env = gym.make('HumanoidDeepMimicBackflipBulletEnv-v1') +env.render(mode='human') +env.reset() +print("------------------------------------") +print("env=",env) +print(dir(env)) +print(dir(env.env)) +dt = 1./240. +logId = env.env._internal_env._pybullet_client.startStateLogging(env.env._internal_env._pybullet_client.STATE_LOGGING_PROFILE_TIMINGS, "perf.json") +for i in range (100): + env.env._internal_env._pybullet_client.submitProfileTiming("loop") + #time.sleep(dt) + #keys = env.env._internal_env._pybullet_client.getKeyboardEvents() + #if keys: + # print (keys) + env.reset() + env.env._internal_env._pybullet_client.submitProfileTiming() + +env.env._internal_env._pybullet_client.stopStateLogging(logId) \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py b/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py index 3b8aa4098..7f1ea5eae 100644 --- a/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py +++ b/examples/pybullet/gym/pybullet_envs/stable_baselines/enjoy.py @@ -12,7 +12,7 @@ import numpy as np import pybullet_envs from stable_baselines import SAC, TD3 -from stable_baselines.common.vec_env import SubprocVecEnv + from stable_baselines.common.evaluation import evaluate_policy from pybullet_envs.stable_baselines.utils import TimeFeatureWrapper From 81c1daacbbf98266fad034fc9c128fa8d74a3085 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Sun, 1 Mar 2020 21:13:10 -0800 Subject: [PATCH 4/6] Connect deep_mimic_env.py to internal pybullet_deep_mimic_env.py This adds two untested Gym environments to pybullet_envs. todo: train using PPO2 etc HumanoidDeepMimicBackflipBulletEnv-v1 HumanoidDeepMimicWalkBulletEnv-v1 --- .../deep_mimic/gym_env/deep_mimic_env.py | 84 ++++++++++++------- .../deep_mimic/gym_env/deepmimic_test.py | 25 ++++-- 2 files changed, 72 insertions(+), 37 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py index ba77efddb..55a50ef50 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py @@ -40,6 +40,8 @@ class HumanoidDeepBulletEnv(gym.Env): Logger.print2(arg_file) assert succ, Logger.print2('Failed to load args from: ' + arg_file) + self._p = None + self._time_step = 1./240. self._internal_env = None self._renders = renders self._discrete_actions = False @@ -55,16 +57,37 @@ class HumanoidDeepBulletEnv(gym.Env): np.finfo(np.float32).max ]) - self.force_mag = 10 - - if self._discrete_actions: - self.action_space = spaces.Discrete(2) - else: - action_dim = 1 - action_high = np.array([self.force_mag] * action_dim) - self.action_space = spaces.Box(-action_high, action_high) - self.observation_space = spaces.Box(-high, high, dtype=np.float32) + ctrl_size = 43 #numDof + root_size = 7 # root + + action_dim = ctrl_size - root_size + + action_bound_min = np.array([ + -4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000, + -1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000, + -1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000, + -1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000, + -7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000, + -1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000, + -1.000000000, -4.710000000 + ]) + + #print("len(action_bound_min)=",len(action_bound_min)) + action_bound_max = np.array([ + 4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000, + 1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, + 1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000, + 8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000, + 1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000 + ]) + #print("len(action_bound_max)=",len(action_bound_max)) + + self.action_space = spaces.Box(action_bound_min, action_bound_max) + observation_min = np.array([0.0]+[-100.0]+[-4.0]*105+[-500.0]*90) + observation_max = np.array([1.0]+[100.0]+[4.0]*105+[500.0]*90) + state_size = 197 + self.observation_space = spaces.Box(observation_min, observation_min, dtype=np.float32) self.seed() @@ -79,25 +102,24 @@ class HumanoidDeepBulletEnv(gym.Env): return [seed] def step(self, action): - p = self._p - if self._discrete_actions: - force = self.force_mag if action == 1 else -self.force_mag - else: - force = action[0] + + #apply control action + agent_id = -1 + self._internal_env.set_action(agent_id, action) + + #step sim + self._internal_env.update(self._time_step) + - p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force) - p.stepSimulation() - - self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2] - theta, theta_dot, x, x_dot = self.state - - done = x < -self.x_threshold \ - or x > self.x_threshold \ - or theta < -self.theta_threshold_radians \ - or theta > self.theta_threshold_radians - done = bool(done) - reward = 1.0 - #print("state=",self.state) + #record state + self.state = self._internal_env.record_state(agent_id) + + #record reward + reward = self._internal_env.calc_reward(agent_id) + + #record done + done = self._internal_env.is_episode_end() + return np.array(self.state), reward, done, {} def reset(self): @@ -105,6 +127,7 @@ class HumanoidDeepBulletEnv(gym.Env): if self._internal_env==None: self._internal_env = PyBulletDeepMimicEnv(self._arg_parser, self._renders) self._internal_env.reset() + self._p = self._internal_env._pybullet_client agent_id = -1 #unused here state = self._internal_env.record_state(agent_id) return state @@ -118,7 +141,7 @@ class HumanoidDeepBulletEnv(gym.Env): self._cam_dist = 2 self._cam_pitch = 0.3 self._cam_yaw = 0 - if (self._physics_client_id>=0): + if (not self._p == None): view_matrix = self._p.computeViewMatrixFromYawPitchRoll( cameraTargetPosition=base_pos, distance=self._cam_dist, @@ -148,9 +171,8 @@ class HumanoidDeepBulletEnv(gym.Env): pass def close(self): - if self._physics_client_id >= 0: - self._p.disconnect() - self._physics_client_id = -1 + + pass class HumanoidDeepMimicBackflipBulletEnv(HumanoidDeepBulletEnv): metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50} diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py index 789a04614..46c875aba 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py @@ -2,7 +2,8 @@ import gym import pybullet_envs import time -env = gym.make('HumanoidDeepMimicBackflipBulletEnv-v1') +#env = gym.make('HumanoidDeepMimicBackflipBulletEnv-v1') +env = gym.make('HumanoidDeepMimicWalkBulletEnv-v1') env.render(mode='human') env.reset() print("------------------------------------") @@ -13,11 +14,23 @@ dt = 1./240. logId = env.env._internal_env._pybullet_client.startStateLogging(env.env._internal_env._pybullet_client.STATE_LOGGING_PROFILE_TIMINGS, "perf.json") for i in range (100): env.env._internal_env._pybullet_client.submitProfileTiming("loop") - #time.sleep(dt) - #keys = env.env._internal_env._pybullet_client.getKeyboardEvents() - #if keys: - # print (keys) + env.reset() env.env._internal_env._pybullet_client.submitProfileTiming() +env.env._internal_env._pybullet_client.stopStateLogging(logId) -env.env._internal_env._pybullet_client.stopStateLogging(logId) \ No newline at end of file +action = env.env.action_space.sample() +while (1): + time.sleep(dt) + #keys = env.env._internal_env._pybullet_client.getKeyboardEvents() + #if keys: + # env.reset() + #action=[0]*36 + action = env.env.action_space.sample() + state, reward, done, info = env.step(action) + #env.render(mode='rgb_array') + if done: + env.reset() + #action = env.env.action_space.sample() + #print("reward=",reward) + \ No newline at end of file From 6b393dffca806a2b6501f5647749a2a1c4ca4b55 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Sun, 1 Mar 2020 21:27:30 -0800 Subject: [PATCH 5/6] set correct reward for deep_mimic gymenv (max episode = 1000 steps, 1 reward at each step) terminate testrl.py at 1000, show reward --- examples/pybullet/gym/pybullet_envs/__init__.py | 4 ++-- .../pybullet/gym/pybullet_envs/deep_mimic/testrl.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py index ed909978d..c49a64166 100644 --- a/examples/pybullet/gym/pybullet_envs/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/__init__.py @@ -15,14 +15,14 @@ register( id='HumanoidDeepMimicBackflipBulletEnv-v1', entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicBackflipBulletEnv', max_episode_steps=1000, - reward_threshold=20000.0, + reward_threshold=1000.0, ) register( id='HumanoidDeepMimicWalkBulletEnv-v1', entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicWalkBulletEnv', max_episode_steps=1000, - reward_threshold=20000.0, + reward_threshold=1000.0, ) register( diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py index bca52c161..e6be22277 100644 --- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py @@ -19,14 +19,25 @@ import random update_timestep = 1. / 240. animating = True step = False +total_reward = 0 +steps = 0 def update_world(world, time_elapsed): timeStep = update_timestep world.update(timeStep) reward = world.env.calc_reward(agent_id=0) + global total_reward + total_reward += reward + global steps + steps+=1 + #print("reward=",reward) + #print("steps=",steps) end_episode = world.env.is_episode_end() - if (end_episode): + if (end_episode or steps>= 1000): + print("total_reward=",total_reward) + total_reward=0 + steps = 0 world.end_episode() world.reset() return From 1f3c0057e5af04e6987a0df7a48588074a9a7ecc Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Mon, 2 Mar 2020 12:52:14 -0800 Subject: [PATCH 6/6] increase the number of steps from 1000 to 2000, to mimic multiple cycles --- examples/pybullet/gym/pybullet_envs/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/__init__.py b/examples/pybullet/gym/pybullet_envs/__init__.py index c49a64166..8f67de5d6 100644 --- a/examples/pybullet/gym/pybullet_envs/__init__.py +++ b/examples/pybullet/gym/pybullet_envs/__init__.py @@ -14,15 +14,15 @@ def register(id, *args, **kvargs): register( id='HumanoidDeepMimicBackflipBulletEnv-v1', entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicBackflipBulletEnv', - max_episode_steps=1000, - reward_threshold=1000.0, + max_episode_steps=2000, + reward_threshold=2000.0, ) register( id='HumanoidDeepMimicWalkBulletEnv-v1', entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicWalkBulletEnv', - max_episode_steps=1000, - reward_threshold=1000.0, + max_episode_steps=2000, + reward_threshold=2000.0, ) register(