mirror of
https://github.com/bulletphysics/bullet3
synced 2025-01-18 21:10:05 +00:00
add minitaur environment randomizer, enable it by default
this improves PPO training ( more stable)
This commit is contained in:
parent
12f28c5f76
commit
c37919604f
@ -0,0 +1,25 @@
|
||||
"""Abstract base class for environment randomizer."""
|
||||
|
||||
import abc
|
||||
|
||||
|
||||
class EnvRandomizerBase(object):
|
||||
"""Abstract base class for environment randomizer.
|
||||
|
||||
An EnvRandomizer is called in environment.reset(). It will
|
||||
randomize physical parameters of the objects in the simulation.
|
||||
The physical parameters will be fixed for that episode and be
|
||||
randomized again in the next environment.reset().
|
||||
"""
|
||||
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def randomize_env(self, env):
|
||||
"""Randomize the simulated_objects in the environment.
|
||||
|
||||
Args:
|
||||
env: The environment to be randomized.
|
||||
"""
|
||||
pass
|
||||
|
@ -0,0 +1,68 @@
|
||||
"""Randomize the minitaur_gym_env when reset() is called."""
|
||||
import random
|
||||
import numpy as np
|
||||
from . import env_randomizer_base
|
||||
|
||||
# Relative range.
|
||||
MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
|
||||
MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
|
||||
# Absolute range.
|
||||
BATTERY_VOLTAGE_RANGE = (14.8, 16.8) # Unit: Volt
|
||||
MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01) # Unit: N*m*s/rad (torque/angular vel)
|
||||
MINITAUR_LEG_FRICTION = (0.8, 1.5) # Unit: dimensionless
|
||||
|
||||
|
||||
class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
|
||||
"""A randomizer that change the minitaur_gym_env during every reset."""
|
||||
|
||||
def __init__(self,
|
||||
minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE,
|
||||
minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE,
|
||||
battery_voltage_range=BATTERY_VOLTAGE_RANGE,
|
||||
motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE):
|
||||
self._minitaur_base_mass_err_range = minitaur_base_mass_err_range
|
||||
self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range
|
||||
self._battery_voltage_range = battery_voltage_range
|
||||
self._motor_viscous_damping_range = motor_viscous_damping_range
|
||||
|
||||
def randomize_env(self, env):
|
||||
self._randomize_minitaur(env.minitaur)
|
||||
|
||||
def _randomize_minitaur(self, minitaur):
|
||||
"""Randomize various physical properties of minitaur.
|
||||
|
||||
It randomizes the mass/inertia of the base, mass/inertia of the legs,
|
||||
friction coefficient of the feet, the battery voltage and the motor damping
|
||||
at each reset() of the environment.
|
||||
|
||||
Args:
|
||||
minitaur: the Minitaur instance in minitaur_gym_env environment.
|
||||
"""
|
||||
base_mass = minitaur.GetBaseMassFromURDF()
|
||||
randomized_base_mass = random.uniform(
|
||||
base_mass * (1.0 + self._minitaur_base_mass_err_range[0]),
|
||||
base_mass * (1.0 + self._minitaur_base_mass_err_range[1]))
|
||||
minitaur.SetBaseMass(randomized_base_mass)
|
||||
|
||||
leg_masses = minitaur.GetLegMassesFromURDF()
|
||||
leg_masses_lower_bound = np.array(leg_masses) * (
|
||||
1.0 + self._minitaur_leg_mass_err_range[0])
|
||||
leg_masses_upper_bound = np.array(leg_masses) * (
|
||||
1.0 + self._minitaur_leg_mass_err_range[1])
|
||||
randomized_leg_masses = [
|
||||
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
|
||||
for i in range(len(leg_masses))
|
||||
]
|
||||
minitaur.SetLegMasses(randomized_leg_masses)
|
||||
|
||||
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
|
||||
BATTERY_VOLTAGE_RANGE[1])
|
||||
minitaur.SetBatteryVoltage(randomized_battery_voltage)
|
||||
|
||||
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
|
||||
MOTOR_VISCOUS_DAMPING_RANGE[1])
|
||||
minitaur.SetMotorViscousDamping(randomized_motor_damping)
|
||||
|
||||
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
|
||||
MINITAUR_LEG_FRICTION[1])
|
||||
minitaur.SetFootFriction(randomized_foot_friction)
|
@ -19,6 +19,7 @@ from . import bullet_client
|
||||
from . import minitaur
|
||||
import os
|
||||
import pybullet_data
|
||||
from . import minitaur_env_randomizer
|
||||
|
||||
NUM_SUBSTEPS = 5
|
||||
NUM_MOTORS = 8
|
||||
@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env):
|
||||
on_rack=False,
|
||||
render=False,
|
||||
kd_for_pd_controllers=0.3,
|
||||
env_randomizer=None):
|
||||
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
|
||||
"""Initialize the minitaur gym environment.
|
||||
|
||||
Args:
|
||||
|
@ -0,0 +1,167 @@
|
||||
r"""An example to run of the minitaur gym environment with sine gaits.
|
||||
|
||||
blaze run -c opt //robotics/reinforcement_learning/minitaur/\
|
||||
envs:minitaur_gym_env_example
|
||||
"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from pybullet_envs.bullet import minitaur_gym_env
|
||||
import argparse
|
||||
from pybullet_envs.bullet import minitaur_env_randomizer
|
||||
|
||||
def ResetPoseExample():
|
||||
"""An example that the minitaur stands still using the reset pose."""
|
||||
steps = 1000
|
||||
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
leg_model_enabled=False,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_overheat_protection=True,
|
||||
env_randomizer = randomizer,
|
||||
hard_reset=False)
|
||||
action = [math.pi / 2] * 8
|
||||
for _ in range(steps):
|
||||
_, _, done, _ = environment.step(action)
|
||||
if done:
|
||||
break
|
||||
environment.reset()
|
||||
|
||||
def MotorOverheatExample():
|
||||
"""An example of minitaur motor overheat protection is triggered.
|
||||
|
||||
The minitaur is leaning forward and the motors are getting obove threshold
|
||||
torques. The overheat protection will be triggered in ~1 sec.
|
||||
"""
|
||||
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
leg_model_enabled=False,
|
||||
motor_velocity_limit=np.inf,
|
||||
motor_overheat_protection=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.20,
|
||||
motor_kd=0.00,
|
||||
on_rack=False)
|
||||
|
||||
action = [2.0] * 8
|
||||
for i in range(8):
|
||||
action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
|
||||
|
||||
steps = 500
|
||||
actions_and_observations = []
|
||||
for step_counter in range(steps):
|
||||
# Matches the internal timestep.
|
||||
time_step = 0.01
|
||||
t = step_counter * time_step
|
||||
current_row = [t]
|
||||
current_row.extend(action)
|
||||
|
||||
observation, _, _, _ = environment.step(action)
|
||||
current_row.extend(observation.tolist())
|
||||
actions_and_observations.append(current_row)
|
||||
environment.reset()
|
||||
|
||||
def SineStandExample():
|
||||
"""An example of minitaur standing and squatting on the floor.
|
||||
|
||||
To validate the accurate motor model we command the robot and sit and stand up
|
||||
periodically in both simulation and experiment. We compare the measured motor
|
||||
trajectories, torques and gains.
|
||||
"""
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
leg_model_enabled=False,
|
||||
motor_velocity_limit=np.inf,
|
||||
motor_overheat_protection=True,
|
||||
accurate_motor_model_enabled=True,
|
||||
motor_kp=1.20,
|
||||
motor_kd=0.02,
|
||||
on_rack=False)
|
||||
steps = 1000
|
||||
amplitude = 0.5
|
||||
speed = 3
|
||||
|
||||
actions_and_observations = []
|
||||
|
||||
for step_counter in range(steps):
|
||||
# Matches the internal timestep.
|
||||
time_step = 0.01
|
||||
t = step_counter * time_step
|
||||
current_row = [t]
|
||||
|
||||
action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
|
||||
current_row.extend(action)
|
||||
|
||||
observation, _, _, _ = environment.step(action)
|
||||
current_row.extend(observation.tolist())
|
||||
actions_and_observations.append(current_row)
|
||||
|
||||
environment.reset()
|
||||
|
||||
|
||||
def SinePolicyExample():
|
||||
"""An example of minitaur walking with a sine gait."""
|
||||
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
|
||||
environment = minitaur_gym_env.MinitaurBulletEnv(
|
||||
render=True,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=True,
|
||||
hard_reset=False,
|
||||
env_randomizer = randomizer,
|
||||
on_rack=False)
|
||||
sum_reward = 0
|
||||
steps = 20000
|
||||
amplitude_1_bound = 0.5
|
||||
amplitude_2_bound = 0.5
|
||||
speed = 40
|
||||
|
||||
for step_counter in range(steps):
|
||||
time_step = 0.01
|
||||
t = step_counter * time_step
|
||||
|
||||
amplitude1 = amplitude_1_bound
|
||||
amplitude2 = amplitude_2_bound
|
||||
steering_amplitude = 0
|
||||
if t < 10:
|
||||
steering_amplitude = 0.5
|
||||
elif t < 20:
|
||||
steering_amplitude = -0.5
|
||||
else:
|
||||
steering_amplitude = 0
|
||||
|
||||
# Applying asymmetrical sine gaits to different legs can steer the minitaur.
|
||||
a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
|
||||
a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
|
||||
a3 = math.sin(t * speed) * amplitude2
|
||||
a4 = math.sin(t * speed + math.pi) * amplitude2
|
||||
action = [a1, a2, a2, a1, a3, a4, a4, a3]
|
||||
_, reward, done, _ = environment.step(action)
|
||||
sum_reward += reward
|
||||
if done:
|
||||
break
|
||||
environment.reset()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int, default=0)
|
||||
args = parser.parse_args()
|
||||
print("--env=" + str(args.env))
|
||||
|
||||
if (args.env == 0):
|
||||
SinePolicyExample()
|
||||
if (args.env == 1):
|
||||
SineStandExample()
|
||||
if (args.env == 2):
|
||||
ResetPoseExample()
|
||||
if (args.env == 3):
|
||||
MotorOverheatExample()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user