Connect deep_mimic_env.py to internal pybullet_deep_mimic_env.py

This adds two untested Gym environments to pybullet_envs. todo: train using PPO2 etc HumanoidDeepMimicBackflipBulletEnv-v1 HumanoidDeepMimicWalkBulletEnv-v1
2024-12-13 21:30:09 +00:00 · 2020-03-01 21:13:10 -08:00 · 2020-03-01 21:13:10 -08:00 · 9ecd0884d9
commit 9ecd0884d9
parent 812c675d4c
2 changed files with 72 additions and 37 deletions
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deep_mimic_env.py
@ -40,6 +40,8 @@ class HumanoidDeepBulletEnv(gym.Env):
      Logger.print2(arg_file)
    assert succ, Logger.print2('Failed to load args from: ' + arg_file)
    
+    self._p = None
+    self._time_step = 1./240.
    self._internal_env =  None
    self._renders = renders
    self._discrete_actions = False
@ -55,16 +57,37 @@ class HumanoidDeepBulletEnv(gym.Env):
        np.finfo(np.float32).max
    ])

-    self.force_mag = 10
    
-    if self._discrete_actions:
-      self.action_space = spaces.Discrete(2)
-    else:
-      action_dim = 1
-      action_high = np.array([self.force_mag] * action_dim)
-      self.action_space = spaces.Box(-action_high, action_high)
+    ctrl_size = 43  #numDof
+    root_size = 7 # root
    
-    self.observation_space = spaces.Box(-high, high, dtype=np.float32)
+    action_dim = ctrl_size - root_size
+    
+    action_bound_min = np.array([
+        -4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
+        -1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
+        -1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
+        -1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
+        -7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
+        -1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
+        -1.000000000, -4.710000000
+    ])
+    
+    #print("len(action_bound_min)=",len(action_bound_min))
+    action_bound_max = np.array([
+        4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
+        1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
+        1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
+        8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
+        1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
+    ])
+    #print("len(action_bound_max)=",len(action_bound_max))
+    
+    self.action_space = spaces.Box(action_bound_min, action_bound_max)
+    observation_min = np.array([0.0]+[-100.0]+[-4.0]*105+[-500.0]*90)
+    observation_max = np.array([1.0]+[100.0]+[4.0]*105+[500.0]*90)
+    state_size = 197
+    self.observation_space = spaces.Box(observation_min, observation_min, dtype=np.float32)

    self.seed()
    
@ -79,25 +102,24 @@ class HumanoidDeepBulletEnv(gym.Env):
    return [seed]

  def step(self, action):
-    p = self._p
-    if self._discrete_actions:
-      force = self.force_mag if action == 1 else -self.force_mag
-    else:
-      force = action[0]
    
-    p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
-    p.stepSimulation()
+    #apply control action
+    agent_id = -1
+    self._internal_env.set_action(agent_id, action)
    
-    self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
-    theta, theta_dot, x, x_dot = self.state
+    #step sim
+    self._internal_env.update(self._time_step)
+    
+
+    #record state
+    self.state = self._internal_env.record_state(agent_id)
+    
+    #record reward
+    reward = self._internal_env.calc_reward(agent_id)
+    
+    #record done
+    done = self._internal_env.is_episode_end()
    
-    done =  x < -self.x_threshold \
-                or x > self.x_threshold \
-                or theta < -self.theta_threshold_radians \
-                or theta > self.theta_threshold_radians
-    done = bool(done)
-    reward = 1.0
-    #print("state=",self.state)
    return np.array(self.state), reward, done, {}

  def reset(self):
@ -105,6 +127,7 @@ class HumanoidDeepBulletEnv(gym.Env):
    if self._internal_env==None:
      self._internal_env = PyBulletDeepMimicEnv(self._arg_parser, self._renders)
    self._internal_env.reset()
+    self._p = self._internal_env._pybullet_client
    agent_id = -1 #unused here
    state = self._internal_env.record_state(agent_id)
    return state
@ -118,7 +141,7 @@ class HumanoidDeepBulletEnv(gym.Env):
    self._cam_dist = 2
    self._cam_pitch = 0.3
    self._cam_yaw = 0
-    if (self._physics_client_id>=0):
+    if (not self._p == None):
      view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
        cameraTargetPosition=base_pos,
        distance=self._cam_dist,
@ -148,9 +171,8 @@ class HumanoidDeepBulletEnv(gym.Env):
    pass
    
  def close(self):
-    if self._physics_client_id >= 0:
-      self._p.disconnect()
-    self._physics_client_id = -1
+    
+    pass

 class HumanoidDeepMimicBackflipBulletEnv(HumanoidDeepBulletEnv):
  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/gym_env/deepmimic_test.py
@ -2,7 +2,8 @@ import gym
 import pybullet_envs
 import time

-env = gym.make('HumanoidDeepMimicBackflipBulletEnv-v1')
+#env = gym.make('HumanoidDeepMimicBackflipBulletEnv-v1')
+env = gym.make('HumanoidDeepMimicWalkBulletEnv-v1')
 env.render(mode='human')
 env.reset()
 print("------------------------------------")
@ -13,11 +14,23 @@ dt = 1./240.
 logId = env.env._internal_env._pybullet_client.startStateLogging(env.env._internal_env._pybullet_client.STATE_LOGGING_PROFILE_TIMINGS, "perf.json")
 for i in range (100):
  env.env._internal_env._pybullet_client.submitProfileTiming("loop")
-  #time.sleep(dt)
-  #keys = env.env._internal_env._pybullet_client.getKeyboardEvents()
-  #if keys:
-  #  print (keys)
+  
  env.reset()
  env.env._internal_env._pybullet_client.submitProfileTiming()
-
 env.env._internal_env._pybullet_client.stopStateLogging(logId)
+
+action = env.env.action_space.sample()
+while (1):
+  time.sleep(dt)
+  #keys = env.env._internal_env._pybullet_client.getKeyboardEvents()
+  #if keys:
+  #  env.reset()
+  #action=[0]*36
+  action = env.env.action_space.sample()
+  state, reward, done, info = env.step(action)
+  #env.render(mode='rgb_array')
+  if done:
+    env.reset()
+    #action = env.env.action_space.sample()
+  #print("reward=",reward)
+