mirror of
https://github.com/bulletphysics/bullet3
synced 2024-12-13 21:30:09 +00:00
Merge pull request #2649 from erwincoumans/master
use PyBullet's own visualizer for enjoy script, + add a time.sleep si…
This commit is contained in:
commit
2b1e27834b
@ -12,10 +12,17 @@ def register(id, *args, **kvargs):
|
||||
# ------------bullet-------------
|
||||
|
||||
register(
|
||||
id='HumanoidDeepMimicBulletEnv-v1',
|
||||
entry_point='pybullet_envs.deep_mimic:HumanoidDeepMimicGymEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=20000.0,
|
||||
id='HumanoidDeepMimicBackflipBulletEnv-v1',
|
||||
entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicBackflipBulletEnv',
|
||||
max_episode_steps=2000,
|
||||
reward_threshold=2000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HumanoidDeepMimicWalkBulletEnv-v1',
|
||||
entry_point='pybullet_envs.deep_mimic.gym_env:HumanoidDeepMimicWalkBulletEnv',
|
||||
max_episode_steps=2000,
|
||||
reward_threshold=2000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
|
@ -0,0 +1,3 @@
|
||||
from pybullet_envs.deep_mimic.gym_env.deep_mimic_env import HumanoidDeepMimicBackflipBulletEnv
|
||||
from pybullet_envs.deep_mimic.gym_env.deep_mimic_env import HumanoidDeepMimicWalkBulletEnv
|
||||
|
@ -0,0 +1,197 @@
|
||||
"""
|
||||
Classic cart-pole system implemented by Rich Sutton et al.
|
||||
Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
|
||||
"""
|
||||
import os, inspect
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||
os.sys.path.insert(0, parentdir)
|
||||
|
||||
import logging
|
||||
import math
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import time
|
||||
import subprocess
|
||||
import pybullet as p2
|
||||
import pybullet_data
|
||||
import pybullet_utils.bullet_client as bc
|
||||
from pkg_resources import parse_version
|
||||
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
|
||||
from pybullet_utils.arg_parser import ArgParser
|
||||
from pybullet_utils.logger import Logger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HumanoidDeepBulletEnv(gym.Env):
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self, renders=False, arg_file=''):
|
||||
|
||||
self._arg_parser = ArgParser()
|
||||
Logger.print2("===========================================================")
|
||||
succ = False
|
||||
if (arg_file != ''):
|
||||
path = pybullet_data.getDataPath() + "/args/" + arg_file
|
||||
succ = self._arg_parser.load_file(path)
|
||||
Logger.print2(arg_file)
|
||||
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
|
||||
|
||||
self._p = None
|
||||
self._time_step = 1./240.
|
||||
self._internal_env = None
|
||||
self._renders = renders
|
||||
self._discrete_actions = False
|
||||
self._arg_file=arg_file
|
||||
self._render_height = 200
|
||||
self._render_width = 320
|
||||
|
||||
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
||||
self.x_threshold = 0.4 #2.4
|
||||
high = np.array([
|
||||
self.x_threshold * 2,
|
||||
np.finfo(np.float32).max, self.theta_threshold_radians * 2,
|
||||
np.finfo(np.float32).max
|
||||
])
|
||||
|
||||
|
||||
ctrl_size = 43 #numDof
|
||||
root_size = 7 # root
|
||||
|
||||
action_dim = ctrl_size - root_size
|
||||
|
||||
action_bound_min = np.array([
|
||||
-4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
|
||||
-1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
|
||||
-1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
|
||||
-1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
|
||||
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
|
||||
-1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
|
||||
-1.000000000, -4.710000000
|
||||
])
|
||||
|
||||
#print("len(action_bound_min)=",len(action_bound_min))
|
||||
action_bound_max = np.array([
|
||||
4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
|
||||
1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
|
||||
1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
|
||||
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
|
||||
1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
|
||||
])
|
||||
#print("len(action_bound_max)=",len(action_bound_max))
|
||||
|
||||
self.action_space = spaces.Box(action_bound_min, action_bound_max)
|
||||
observation_min = np.array([0.0]+[-100.0]+[-4.0]*105+[-500.0]*90)
|
||||
observation_max = np.array([1.0]+[100.0]+[4.0]*105+[500.0]*90)
|
||||
state_size = 197
|
||||
self.observation_space = spaces.Box(observation_min, observation_min, dtype=np.float32)
|
||||
|
||||
self.seed()
|
||||
|
||||
self.viewer = None
|
||||
self._configure()
|
||||
|
||||
def _configure(self, display=None):
|
||||
self.display = display
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def step(self, action):
|
||||
|
||||
#apply control action
|
||||
agent_id = -1
|
||||
self._internal_env.set_action(agent_id, action)
|
||||
|
||||
#step sim
|
||||
self._internal_env.update(self._time_step)
|
||||
|
||||
|
||||
#record state
|
||||
self.state = self._internal_env.record_state(agent_id)
|
||||
|
||||
#record reward
|
||||
reward = self._internal_env.calc_reward(agent_id)
|
||||
|
||||
#record done
|
||||
done = self._internal_env.is_episode_end()
|
||||
|
||||
return np.array(self.state), reward, done, {}
|
||||
|
||||
def reset(self):
|
||||
# print("-----------reset simulation---------------")
|
||||
if self._internal_env==None:
|
||||
self._internal_env = PyBulletDeepMimicEnv(self._arg_parser, self._renders)
|
||||
self._internal_env.reset()
|
||||
self._p = self._internal_env._pybullet_client
|
||||
agent_id = -1 #unused here
|
||||
state = self._internal_env.record_state(agent_id)
|
||||
return state
|
||||
|
||||
def render(self, mode='human', close=False):
|
||||
if mode == "human":
|
||||
self._renders = True
|
||||
if mode != "rgb_array":
|
||||
return np.array([])
|
||||
base_pos=[0,0,0]
|
||||
self._cam_dist = 2
|
||||
self._cam_pitch = 0.3
|
||||
self._cam_yaw = 0
|
||||
if (not self._p == None):
|
||||
view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
|
||||
cameraTargetPosition=base_pos,
|
||||
distance=self._cam_dist,
|
||||
yaw=self._cam_yaw,
|
||||
pitch=self._cam_pitch,
|
||||
roll=0,
|
||||
upAxisIndex=2)
|
||||
proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
|
||||
aspect=float(self._render_width) /
|
||||
self._render_height,
|
||||
nearVal=0.1,
|
||||
farVal=100.0)
|
||||
(_, _, px, _, _) = self._p.getCameraImage(
|
||||
width=self._render_width,
|
||||
height=self._render_height,
|
||||
renderer=self._p.ER_BULLET_HARDWARE_OPENGL,
|
||||
viewMatrix=view_matrix,
|
||||
projectionMatrix=proj_matrix)
|
||||
else:
|
||||
px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8)
|
||||
rgb_array = np.array(px, dtype=np.uint8)
|
||||
rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
|
||||
rgb_array = rgb_array[:, :, :3]
|
||||
return rgb_array
|
||||
|
||||
def configure(self, args):
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
|
||||
pass
|
||||
|
||||
class HumanoidDeepMimicBackflipBulletEnv(HumanoidDeepBulletEnv):
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self, renders=False):
|
||||
# start the bullet physics server
|
||||
HumanoidDeepBulletEnv.__init__(self, renders, arg_file="run_humanoid3d_backflip_args.txt")
|
||||
|
||||
|
||||
class HumanoidDeepMimicWalkBulletEnv(HumanoidDeepBulletEnv):
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self, renders=False):
|
||||
# start the bullet physics server
|
||||
HumanoidDeepBulletEnv.__init__(self, renders, arg_file="run_humanoid3d_walk_args.txt")
|
||||
|
||||
class CartPoleContinuousBulletEnv5(HumanoidDeepBulletEnv):
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
|
||||
|
||||
def __init__(self, renders=False):
|
||||
# start the bullet physics server
|
||||
HumanoidDeepBulletEnv.__init__(self, renders, arg_file="")
|
@ -0,0 +1,36 @@
|
||||
import gym
|
||||
import pybullet_envs
|
||||
import time
|
||||
|
||||
#env = gym.make('HumanoidDeepMimicBackflipBulletEnv-v1')
|
||||
env = gym.make('HumanoidDeepMimicWalkBulletEnv-v1')
|
||||
env.render(mode='human')
|
||||
env.reset()
|
||||
print("------------------------------------")
|
||||
print("env=",env)
|
||||
print(dir(env))
|
||||
print(dir(env.env))
|
||||
dt = 1./240.
|
||||
logId = env.env._internal_env._pybullet_client.startStateLogging(env.env._internal_env._pybullet_client.STATE_LOGGING_PROFILE_TIMINGS, "perf.json")
|
||||
for i in range (100):
|
||||
env.env._internal_env._pybullet_client.submitProfileTiming("loop")
|
||||
|
||||
env.reset()
|
||||
env.env._internal_env._pybullet_client.submitProfileTiming()
|
||||
env.env._internal_env._pybullet_client.stopStateLogging(logId)
|
||||
|
||||
action = env.env.action_space.sample()
|
||||
while (1):
|
||||
time.sleep(dt)
|
||||
#keys = env.env._internal_env._pybullet_client.getKeyboardEvents()
|
||||
#if keys:
|
||||
# env.reset()
|
||||
#action=[0]*36
|
||||
action = env.env.action_space.sample()
|
||||
state, reward, done, info = env.step(action)
|
||||
#env.render(mode='rgb_array')
|
||||
if done:
|
||||
env.reset()
|
||||
#action = env.env.action_space.sample()
|
||||
#print("reward=",reward)
|
||||
|
@ -19,14 +19,25 @@ import random
|
||||
update_timestep = 1. / 240.
|
||||
animating = True
|
||||
step = False
|
||||
total_reward = 0
|
||||
steps = 0
|
||||
|
||||
def update_world(world, time_elapsed):
|
||||
timeStep = update_timestep
|
||||
world.update(timeStep)
|
||||
reward = world.env.calc_reward(agent_id=0)
|
||||
global total_reward
|
||||
total_reward += reward
|
||||
global steps
|
||||
steps+=1
|
||||
|
||||
#print("reward=",reward)
|
||||
#print("steps=",steps)
|
||||
end_episode = world.env.is_episode_end()
|
||||
if (end_episode):
|
||||
if (end_episode or steps>= 1000):
|
||||
print("total_reward=",total_reward)
|
||||
total_reward=0
|
||||
steps = 0
|
||||
world.end_episode()
|
||||
world.reset()
|
||||
return
|
||||
|
@ -57,6 +57,7 @@ class MJCFBaseBulletEnv(gym.Env):
|
||||
else:
|
||||
self._p = bullet_client.BulletClient()
|
||||
self._p.resetSimulation()
|
||||
self._p.setPhysicsEngineParameter(deterministicOverlappingPairs=1)
|
||||
#optionally enable EGL for faster headless rendering
|
||||
try:
|
||||
if os.environ["PYBULLET_EGL"]:
|
||||
|
@ -6,13 +6,13 @@
|
||||
# MIT License
|
||||
import argparse
|
||||
import multiprocessing
|
||||
|
||||
import time
|
||||
import gym
|
||||
import numpy as np
|
||||
import pybullet_envs
|
||||
|
||||
from stable_baselines import SAC, TD3
|
||||
from stable_baselines.common.vec_env import SubprocVecEnv
|
||||
|
||||
from stable_baselines.common.evaluation import evaluate_policy
|
||||
|
||||
from pybullet_envs.stable_baselines.utils import TimeFeatureWrapper
|
||||
@ -35,11 +35,7 @@ if __name__ == '__main__':
|
||||
|
||||
# Use SubprocVecEnv for rendering
|
||||
if not args.no_render:
|
||||
# Note: fork is not thread-safe but usually is faster
|
||||
fork_available = 'fork' in multiprocessing.get_all_start_methods()
|
||||
start_method = 'fork' if fork_available else 'spawn'
|
||||
env = SubprocVecEnv([lambda: env], start_method=start_method)
|
||||
|
||||
env.render(mode='human')
|
||||
|
||||
algo = {
|
||||
'sac': SAC,
|
||||
@ -67,6 +63,8 @@ if __name__ == '__main__':
|
||||
episode_length += 1
|
||||
if not args.no_render:
|
||||
env.render(mode='human')
|
||||
dt = 1./240.
|
||||
time.sleep(dt)
|
||||
episode_rewards.append(episode_reward)
|
||||
episode_lengths.append(episode_length)
|
||||
print("Episode {} reward={}, length={}".format(len(episode_rewards), episode_reward, episode_length))
|
||||
|
Loading…
Reference in New Issue
Block a user