add minitaur environment randomizer, enable it by default

this improves PPO training ( more stable)
This commit is contained in:
Erwin Coumans 2017-09-27 09:07:21 -07:00
parent 12f28c5f76
commit c37919604f
4 changed files with 262 additions and 1 deletions

View File

@ -0,0 +1,25 @@
"""Abstract base class for environment randomizer."""
import abc
class EnvRandomizerBase(object):
"""Abstract base class for environment randomizer.
An EnvRandomizer is called in environment.reset(). It will
randomize physical parameters of the objects in the simulation.
The physical parameters will be fixed for that episode and be
randomized again in the next environment.reset().
"""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def randomize_env(self, env):
"""Randomize the simulated_objects in the environment.
Args:
env: The environment to be randomized.
"""
pass

View File

@ -0,0 +1,68 @@
"""Randomize the minitaur_gym_env when reset() is called."""
import random
import numpy as np
from . import env_randomizer_base
# Relative range.
MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20%
# Absolute range.
BATTERY_VOLTAGE_RANGE = (14.8, 16.8) # Unit: Volt
MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01) # Unit: N*m*s/rad (torque/angular vel)
MINITAUR_LEG_FRICTION = (0.8, 1.5) # Unit: dimensionless
class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase):
"""A randomizer that change the minitaur_gym_env during every reset."""
def __init__(self,
minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE,
minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE,
battery_voltage_range=BATTERY_VOLTAGE_RANGE,
motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE):
self._minitaur_base_mass_err_range = minitaur_base_mass_err_range
self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range
self._battery_voltage_range = battery_voltage_range
self._motor_viscous_damping_range = motor_viscous_damping_range
def randomize_env(self, env):
self._randomize_minitaur(env.minitaur)
def _randomize_minitaur(self, minitaur):
"""Randomize various physical properties of minitaur.
It randomizes the mass/inertia of the base, mass/inertia of the legs,
friction coefficient of the feet, the battery voltage and the motor damping
at each reset() of the environment.
Args:
minitaur: the Minitaur instance in minitaur_gym_env environment.
"""
base_mass = minitaur.GetBaseMassFromURDF()
randomized_base_mass = random.uniform(
base_mass * (1.0 + self._minitaur_base_mass_err_range[0]),
base_mass * (1.0 + self._minitaur_base_mass_err_range[1]))
minitaur.SetBaseMass(randomized_base_mass)
leg_masses = minitaur.GetLegMassesFromURDF()
leg_masses_lower_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[0])
leg_masses_upper_bound = np.array(leg_masses) * (
1.0 + self._minitaur_leg_mass_err_range[1])
randomized_leg_masses = [
np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i])
for i in range(len(leg_masses))
]
minitaur.SetLegMasses(randomized_leg_masses)
randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0],
BATTERY_VOLTAGE_RANGE[1])
minitaur.SetBatteryVoltage(randomized_battery_voltage)
randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0],
MOTOR_VISCOUS_DAMPING_RANGE[1])
minitaur.SetMotorViscousDamping(randomized_motor_damping)
randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0],
MINITAUR_LEG_FRICTION[1])
minitaur.SetFootFriction(randomized_foot_friction)

View File

@ -19,6 +19,7 @@ from . import bullet_client
from . import minitaur
import os
import pybullet_data
from . import minitaur_env_randomizer
NUM_SUBSTEPS = 5
NUM_MOTORS = 8
@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env):
on_rack=False,
render=False,
kd_for_pd_controllers=0.3,
env_randomizer=None):
env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()):
"""Initialize the minitaur gym environment.
Args:

View File

@ -0,0 +1,167 @@
r"""An example to run of the minitaur gym environment with sine gaits.
blaze run -c opt //robotics/reinforcement_learning/minitaur/\
envs:minitaur_gym_env_example
"""
import math
import numpy as np
from pybullet_envs.bullet import minitaur_gym_env
import argparse
from pybullet_envs.bullet import minitaur_env_randomizer
def ResetPoseExample():
"""An example that the minitaur stands still using the reset pose."""
steps = 1000
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
environment = minitaur_gym_env.MinitaurBulletEnv(
render=True,
leg_model_enabled=False,
motor_velocity_limit=np.inf,
pd_control_enabled=True,
accurate_motor_model_enabled=True,
motor_overheat_protection=True,
env_randomizer = randomizer,
hard_reset=False)
action = [math.pi / 2] * 8
for _ in range(steps):
_, _, done, _ = environment.step(action)
if done:
break
environment.reset()
def MotorOverheatExample():
"""An example of minitaur motor overheat protection is triggered.
The minitaur is leaning forward and the motors are getting obove threshold
torques. The overheat protection will be triggered in ~1 sec.
"""
environment = minitaur_gym_env.MinitaurBulletEnv(
render=True,
leg_model_enabled=False,
motor_velocity_limit=np.inf,
motor_overheat_protection=True,
accurate_motor_model_enabled=True,
motor_kp=1.20,
motor_kd=0.00,
on_rack=False)
action = [2.0] * 8
for i in range(8):
action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1)
steps = 500
actions_and_observations = []
for step_counter in range(steps):
# Matches the internal timestep.
time_step = 0.01
t = step_counter * time_step
current_row = [t]
current_row.extend(action)
observation, _, _, _ = environment.step(action)
current_row.extend(observation.tolist())
actions_and_observations.append(current_row)
environment.reset()
def SineStandExample():
"""An example of minitaur standing and squatting on the floor.
To validate the accurate motor model we command the robot and sit and stand up
periodically in both simulation and experiment. We compare the measured motor
trajectories, torques and gains.
"""
environment = minitaur_gym_env.MinitaurBulletEnv(
render=True,
leg_model_enabled=False,
motor_velocity_limit=np.inf,
motor_overheat_protection=True,
accurate_motor_model_enabled=True,
motor_kp=1.20,
motor_kd=0.02,
on_rack=False)
steps = 1000
amplitude = 0.5
speed = 3
actions_and_observations = []
for step_counter in range(steps):
# Matches the internal timestep.
time_step = 0.01
t = step_counter * time_step
current_row = [t]
action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8
current_row.extend(action)
observation, _, _, _ = environment.step(action)
current_row.extend(observation.tolist())
actions_and_observations.append(current_row)
environment.reset()
def SinePolicyExample():
"""An example of minitaur walking with a sine gait."""
randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer())
environment = minitaur_gym_env.MinitaurBulletEnv(
render=True,
motor_velocity_limit=np.inf,
pd_control_enabled=True,
hard_reset=False,
env_randomizer = randomizer,
on_rack=False)
sum_reward = 0
steps = 20000
amplitude_1_bound = 0.5
amplitude_2_bound = 0.5
speed = 40
for step_counter in range(steps):
time_step = 0.01
t = step_counter * time_step
amplitude1 = amplitude_1_bound
amplitude2 = amplitude_2_bound
steering_amplitude = 0
if t < 10:
steering_amplitude = 0.5
elif t < 20:
steering_amplitude = -0.5
else:
steering_amplitude = 0
# Applying asymmetrical sine gaits to different legs can steer the minitaur.
a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude)
a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude)
a3 = math.sin(t * speed) * amplitude2
a4 = math.sin(t * speed + math.pi) * amplitude2
action = [a1, a2, a2, a1, a3, a4, a4, a3]
_, reward, done, _ = environment.step(action)
sum_reward += reward
if done:
break
environment.reset()
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int, default=0)
args = parser.parse_args()
print("--env=" + str(args.env))
if (args.env == 0):
SinePolicyExample()
if (args.env == 1):
SineStandExample()
if (args.env == 2):
ResetPoseExample()
if (args.env == 3):
MotorOverheatExample()
if __name__ == '__main__':
main()