bullet3/examples/pybullet/gym/pybullet_envs/robot_manipulators.py
erwincoumans 851ca5bfb3 Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster.
Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1

Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym.
Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray)
2018-01-15 12:48:32 -08:00

310 lines
14 KiB
Python

from robot_bases import MJCFBasedRobot
import numpy as np
class Reacher(MJCFBasedRobot):
TARG_LIMIT = 0.27
def __init__(self):
MJCFBasedRobot.__init__(self, 'reacher.xml', 'body0', action_dim=2, obs_dim=9)
def robot_specific_reset(self):
self.jdict["target_x"].reset_current_position(
self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0)
self.jdict["target_y"].reset_current_position(
self.np_random.uniform(low=-self.TARG_LIMIT, high=self.TARG_LIMIT), 0)
self.fingertip = self.parts["fingertip"]
self.target = self.parts["target"]
self.central_joint = self.jdict["joint0"]
self.elbow_joint = self.jdict["joint1"]
self.central_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.elbow_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
def apply_action(self, a):
assert (np.isfinite(a).all())
self.central_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
self.elbow_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
def calc_state(self):
theta, self.theta_dot = self.central_joint.current_relative_position()
self.gamma, self.gamma_dot = self.elbow_joint.current_relative_position()
target_x, _ = self.jdict["target_x"].current_position()
target_y, _ = self.jdict["target_y"].current_position()
self.to_target_vec = np.array(self.fingertip.pose().xyz()) - np.array(self.target.pose().xyz())
return np.array([
target_x,
target_y,
self.to_target_vec[0],
self.to_target_vec[1],
np.cos(theta),
np.sin(theta),
self.theta_dot,
self.gamma,
self.gamma_dot,
])
def calc_potential(self):
return -100 * np.linalg.norm(self.to_target_vec)
class Pusher(MJCFBasedRobot):
min_target_placement_radius = 0.5
max_target_placement_radius = 0.8
min_object_to_target_distance = 0.1
max_object_to_target_distance = 0.4
def __init__(self):
MJCFBasedRobot.__init__(self, 'pusher.xml', 'body0', action_dim=7, obs_dim=55)
def robot_specific_reset(self):
# parts
self.fingertip = self.parts["fingertip"]
self.target = self.parts["target"]
self.object = self.parts["object"]
# joints
self.shoulder_pan_joint = self.jdict["shoulder_pan_joint"]
self.shoulder_lift_joint = self.jdict["shoulder_lift_joint"]
self.upper_arm_roll_joint = self.jdict["upper_arm_roll_joint"]
self.elbow_flex_joint = self.jdict["elbow_flex_joint"]
self.forearm_roll_joint = self.jdict["forearm_roll_joint"]
self.wrist_flex_joint = self.jdict["wrist_flex_joint"]
self.wrist_roll_joint = self.jdict["wrist_roll_joint"]
self.target_pos = np.concatenate([
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1)
])
# make length of vector between min and max_target_placement_radius
self.target_pos = self.target_pos \
/ np.linalg.norm(self.target_pos) \
* self.np_random.uniform(low=self.min_target_placement_radius,
high=self.max_target_placement_radius, size=1)
self.object_pos = np.concatenate([
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1)
])
# make length of vector between min and max_object_to_target_distance
self.object_pos = self.object_pos \
/ np.linalg.norm(self.object_pos - self.target_pos) \
* self.np_random.uniform(low=self.min_object_to_target_distance,
high=self.max_object_to_target_distance, size=1)
# set position of objects
self.zero_offset = np.array([0.45, 0.55])
self.jdict["target_x"].reset_current_position(self.target_pos[0] - self.zero_offset[0], 0)
self.jdict["target_y"].reset_current_position(self.target_pos[1] - self.zero_offset[1], 0)
self.jdict["object_x"].reset_current_position(self.object_pos[0] - self.zero_offset[0], 0)
self.jdict["object_y"].reset_current_position(self.object_pos[1] - self.zero_offset[1], 0)
# randomize all joints TODO: Will this work or do we have to constrain this resetting in some way?
self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
def apply_action(self, a):
assert (np.isfinite(a).all())
self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1)))
self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1)))
self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1)))
self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1)))
self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1)))
def calc_state(self):
self.to_target_vec = self.target_pos - self.object_pos
return np.concatenate([
np.array([j.current_position() for j in self.ordered_joints]).flatten(), # all positions
np.array([j.current_relative_position() for j in self.ordered_joints]).flatten(), # all speeds
self.to_target_vec,
self.fingertip.pose().xyz(),
self.object.pose().xyz(),
self.target.pose().xyz(),
])
class Striker(MJCFBasedRobot):
min_target_placement_radius = 0.1
max_target_placement_radius = 0.8
min_object_placement_radius = 0.1
max_object_placement_radius = 0.8
def __init__(self):
MJCFBasedRobot.__init__(self, 'lstriker.xml', 'body0', action_dim=7, obs_dim=55)
def robot_specific_reset(self):
# parts
self.fingertip = self.parts["fingertip"]
self.target = self.parts["target"]
self.object = self.parts["object"]
# joints
self.shoulder_pan_joint = self.jdict["shoulder_pan_joint"]
self.shoulder_lift_joint = self.jdict["shoulder_lift_joint"]
self.upper_arm_roll_joint = self.jdict["upper_arm_roll_joint"]
self.elbow_flex_joint = self.jdict["elbow_flex_joint"]
self.forearm_roll_joint = self.jdict["forearm_roll_joint"]
self.wrist_flex_joint = self.jdict["wrist_flex_joint"]
self.wrist_roll_joint = self.jdict["wrist_roll_joint"]
self._min_strike_dist = np.inf
self._striked = False
self._strike_pos = None
# reset position and speed of manipulator
# TODO: Will this work or do we have to constrain this resetting in some way?
self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.zero_offset = np.array([0.45, 0.55, 0])
self.object_pos = np.concatenate([
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1)
])
# make length of vector between min and max_object_placement_radius
self.object_pos = self.object_pos \
/ np.linalg.norm(self.object_pos) \
* self.np_random.uniform(low=self.min_object_placement_radius,
high=self.max_object_placement_radius, size=1)
# reset object position
self.jdict["object_x"].reset_current_position(self.object_pos[0] - self.zero_offset[0], 0)
self.jdict["object_y"].reset_current_position(self.object_pos[1] - self.zero_offset[1], 0)
self.target_pos = np.concatenate([
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1)
])
# make length of vector between min and max_target_placement_radius
self.target_pos = self.target_pos \
/ np.linalg.norm(self.target_pos) \
* self.np_random.uniform(low=self.min_target_placement_radius,
high=self.max_target_placement_radius, size=1)
self.target.reset_pose(self.target_pos - self.zero_offset, np.array([0, 0, 0, 1]))
def apply_action(self, a):
assert (np.isfinite(a).all())
self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1)))
self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1)))
self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1)))
self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1)))
self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1)))
def calc_state(self):
self.to_target_vec = self.target_pos - self.object_pos
return np.concatenate([
np.array([j.current_position() for j in self.ordered_joints]).flatten(), # all positions
np.array([j.current_relative_position() for j in self.ordered_joints]).flatten(), # all speeds
self.to_target_vec,
self.fingertip.pose().xyz(),
self.object.pose().xyz(),
self.target.pose().xyz(),
])
class Thrower(MJCFBasedRobot):
min_target_placement_radius = 0.1
max_target_placement_radius = 0.8
min_object_placement_radius = 0.1
max_object_placement_radius = 0.8
def __init__(self):
MJCFBasedRobot.__init__(self, 'thrower.xml', 'body0', action_dim=7, obs_dim=48)
def robot_specific_reset(self):
# parts
self.fingertip = self.parts["fingertip"]
self.target = self.parts["target"]
self.object = self.parts["object"]
# joints
self.shoulder_pan_joint = self.jdict["shoulder_pan_joint"]
self.shoulder_lift_joint = self.jdict["shoulder_lift_joint"]
self.upper_arm_roll_joint = self.jdict["upper_arm_roll_joint"]
self.elbow_flex_joint = self.jdict["elbow_flex_joint"]
self.forearm_roll_joint = self.jdict["forearm_roll_joint"]
self.wrist_flex_joint = self.jdict["wrist_flex_joint"]
self.wrist_roll_joint = self.jdict["wrist_roll_joint"]
self._object_hit_ground = False
self._object_hit_location = None
# reset position and speed of manipulator
# TODO: Will this work or do we have to constrain this resetting in some way?
self.shoulder_pan_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.shoulder_lift_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.elbow_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.upper_arm_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.wrist_flex_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.wrist_roll_joint.reset_current_position(self.np_random.uniform(low=-3.14, high=3.14), 0)
self.zero_offset = np.array([0.45, 0.55, 0])
self.object_pos = np.concatenate([
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1)
])
# make length of vector between min and max_object_placement_radius
self.object_pos = self.object_pos \
/ np.linalg.norm(self.object_pos) \
* self.np_random.uniform(low=self.min_object_placement_radius,
high=self.max_object_placement_radius, size=1)
# reset object position
self.parts["object"].reset_pose(self.object_pos - self.zero_offset, np.array([0, 0, 0, 1]))
self.target_pos = np.concatenate([
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1),
self.np_random.uniform(low=-1, high=1, size=1)
])
# make length of vector between min and max_target_placement_radius
self.target_pos = self.target_pos \
/ np.linalg.norm(self.target_pos) \
* self.np_random.uniform(low=self.min_target_placement_radius,
high=self.max_target_placement_radius, size=1)
self.parts["target"].reset_pose(self.target_pos - self.zero_offset, np.array([0, 0, 0, 1]))
def apply_action(self, a):
assert (np.isfinite(a).all())
self.shoulder_pan_joint.set_motor_torque(0.05 * float(np.clip(a[0], -1, +1)))
self.shoulder_lift_joint.set_motor_torque(0.05 * float(np.clip(a[1], -1, +1)))
self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[2], -1, +1)))
self.elbow_flex_joint.set_motor_torque(0.05 * float(np.clip(a[3], -1, +1)))
self.upper_arm_roll_joint.set_motor_torque(0.05 * float(np.clip(a[4], -1, +1)))
self.wrist_flex_joint.set_motor_torque(0.05 * float(np.clip(a[5], -1, +1)))
self.wrist_roll_joint.set_motor_torque(0.05 * float(np.clip(a[6], -1, +1)))
def calc_state(self):
self.to_target_vec = self.target_pos - self.object_pos
return np.concatenate([
np.array([j.current_position() for j in self.ordered_joints]).flatten(), # all positions
np.array([j.current_relative_position() for j in self.ordered_joints]).flatten(), # all speeds
self.to_target_vec,
self.fingertip.pose().xyz(),
self.object.pose().xyz(),
self.target.pose().xyz(),
])