From c37919604fba7413c6a89e92b4641fb8572160c7 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 27 Sep 2017 09:07:21 -0700 Subject: [PATCH] add minitaur environment randomizer, enable it by default this improves PPO training ( more stable) --- .../bullet/env_randomizer_base.py | 25 +++ .../bullet/minitaur_env_randomizer.py | 68 +++++++ .../pybullet_envs/bullet/minitaur_gym_env.py | 3 +- .../examples/minitaur_gym_env_example.py | 167 ++++++++++++++++++ 4 files changed, 262 insertions(+), 1 deletion(-) create mode 100644 examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py create mode 100644 examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py create mode 100644 examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py diff --git a/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py b/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py new file mode 100644 index 000000000..c39f3b888 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/bullet/env_randomizer_base.py @@ -0,0 +1,25 @@ +"""Abstract base class for environment randomizer.""" + +import abc + + +class EnvRandomizerBase(object): + """Abstract base class for environment randomizer. + + An EnvRandomizer is called in environment.reset(). It will + randomize physical parameters of the objects in the simulation. + The physical parameters will be fixed for that episode and be + randomized again in the next environment.reset(). + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def randomize_env(self, env): + """Randomize the simulated_objects in the environment. + + Args: + env: The environment to be randomized. + """ + pass + diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py new file mode 100644 index 000000000..db139f3b0 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_env_randomizer.py @@ -0,0 +1,68 @@ +"""Randomize the minitaur_gym_env when reset() is called.""" +import random +import numpy as np +from . import env_randomizer_base + +# Relative range. +MINITAUR_BASE_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20% +MINITAUR_LEG_MASS_ERROR_RANGE = (-0.2, 0.2) # 0.2 means 20% +# Absolute range. +BATTERY_VOLTAGE_RANGE = (14.8, 16.8) # Unit: Volt +MOTOR_VISCOUS_DAMPING_RANGE = (0, 0.01) # Unit: N*m*s/rad (torque/angular vel) +MINITAUR_LEG_FRICTION = (0.8, 1.5) # Unit: dimensionless + + +class MinitaurEnvRandomizer(env_randomizer_base.EnvRandomizerBase): + """A randomizer that change the minitaur_gym_env during every reset.""" + + def __init__(self, + minitaur_base_mass_err_range=MINITAUR_BASE_MASS_ERROR_RANGE, + minitaur_leg_mass_err_range=MINITAUR_LEG_MASS_ERROR_RANGE, + battery_voltage_range=BATTERY_VOLTAGE_RANGE, + motor_viscous_damping_range=MOTOR_VISCOUS_DAMPING_RANGE): + self._minitaur_base_mass_err_range = minitaur_base_mass_err_range + self._minitaur_leg_mass_err_range = minitaur_leg_mass_err_range + self._battery_voltage_range = battery_voltage_range + self._motor_viscous_damping_range = motor_viscous_damping_range + + def randomize_env(self, env): + self._randomize_minitaur(env.minitaur) + + def _randomize_minitaur(self, minitaur): + """Randomize various physical properties of minitaur. + + It randomizes the mass/inertia of the base, mass/inertia of the legs, + friction coefficient of the feet, the battery voltage and the motor damping + at each reset() of the environment. + + Args: + minitaur: the Minitaur instance in minitaur_gym_env environment. + """ + base_mass = minitaur.GetBaseMassFromURDF() + randomized_base_mass = random.uniform( + base_mass * (1.0 + self._minitaur_base_mass_err_range[0]), + base_mass * (1.0 + self._minitaur_base_mass_err_range[1])) + minitaur.SetBaseMass(randomized_base_mass) + + leg_masses = minitaur.GetLegMassesFromURDF() + leg_masses_lower_bound = np.array(leg_masses) * ( + 1.0 + self._minitaur_leg_mass_err_range[0]) + leg_masses_upper_bound = np.array(leg_masses) * ( + 1.0 + self._minitaur_leg_mass_err_range[1]) + randomized_leg_masses = [ + np.random.uniform(leg_masses_lower_bound[i], leg_masses_upper_bound[i]) + for i in range(len(leg_masses)) + ] + minitaur.SetLegMasses(randomized_leg_masses) + + randomized_battery_voltage = random.uniform(BATTERY_VOLTAGE_RANGE[0], + BATTERY_VOLTAGE_RANGE[1]) + minitaur.SetBatteryVoltage(randomized_battery_voltage) + + randomized_motor_damping = random.uniform(MOTOR_VISCOUS_DAMPING_RANGE[0], + MOTOR_VISCOUS_DAMPING_RANGE[1]) + minitaur.SetMotorViscousDamping(randomized_motor_damping) + + randomized_foot_friction = random.uniform(MINITAUR_LEG_FRICTION[0], + MINITAUR_LEG_FRICTION[1]) + minitaur.SetFootFriction(randomized_foot_friction) diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py index 6e7872a61..38ec631a5 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py @@ -19,6 +19,7 @@ from . import bullet_client from . import minitaur import os import pybullet_data +from . import minitaur_env_randomizer NUM_SUBSTEPS = 5 NUM_MOTORS = 8 @@ -68,7 +69,7 @@ class MinitaurBulletEnv(gym.Env): on_rack=False, render=False, kd_for_pd_controllers=0.3, - env_randomizer=None): + env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()): """Initialize the minitaur gym environment. Args: diff --git a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py new file mode 100644 index 000000000..b71ee5fc6 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py @@ -0,0 +1,167 @@ +r"""An example to run of the minitaur gym environment with sine gaits. + +blaze run -c opt //robotics/reinforcement_learning/minitaur/\ +envs:minitaur_gym_env_example +""" + +import math +import numpy as np +from pybullet_envs.bullet import minitaur_gym_env +import argparse +from pybullet_envs.bullet import minitaur_env_randomizer + +def ResetPoseExample(): + """An example that the minitaur stands still using the reset pose.""" + steps = 1000 + randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) + environment = minitaur_gym_env.MinitaurBulletEnv( + render=True, + leg_model_enabled=False, + motor_velocity_limit=np.inf, + pd_control_enabled=True, + accurate_motor_model_enabled=True, + motor_overheat_protection=True, + env_randomizer = randomizer, + hard_reset=False) + action = [math.pi / 2] * 8 + for _ in range(steps): + _, _, done, _ = environment.step(action) + if done: + break + environment.reset() + +def MotorOverheatExample(): + """An example of minitaur motor overheat protection is triggered. + + The minitaur is leaning forward and the motors are getting obove threshold + torques. The overheat protection will be triggered in ~1 sec. + """ + + environment = minitaur_gym_env.MinitaurBulletEnv( + render=True, + leg_model_enabled=False, + motor_velocity_limit=np.inf, + motor_overheat_protection=True, + accurate_motor_model_enabled=True, + motor_kp=1.20, + motor_kd=0.00, + on_rack=False) + + action = [2.0] * 8 + for i in range(8): + action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1) + + steps = 500 + actions_and_observations = [] + for step_counter in range(steps): + # Matches the internal timestep. + time_step = 0.01 + t = step_counter * time_step + current_row = [t] + current_row.extend(action) + + observation, _, _, _ = environment.step(action) + current_row.extend(observation.tolist()) + actions_and_observations.append(current_row) + environment.reset() + +def SineStandExample(): + """An example of minitaur standing and squatting on the floor. + + To validate the accurate motor model we command the robot and sit and stand up + periodically in both simulation and experiment. We compare the measured motor + trajectories, torques and gains. + """ + environment = minitaur_gym_env.MinitaurBulletEnv( + render=True, + leg_model_enabled=False, + motor_velocity_limit=np.inf, + motor_overheat_protection=True, + accurate_motor_model_enabled=True, + motor_kp=1.20, + motor_kd=0.02, + on_rack=False) + steps = 1000 + amplitude = 0.5 + speed = 3 + + actions_and_observations = [] + + for step_counter in range(steps): + # Matches the internal timestep. + time_step = 0.01 + t = step_counter * time_step + current_row = [t] + + action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8 + current_row.extend(action) + + observation, _, _, _ = environment.step(action) + current_row.extend(observation.tolist()) + actions_and_observations.append(current_row) + + environment.reset() + + +def SinePolicyExample(): + """An example of minitaur walking with a sine gait.""" + randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) + environment = minitaur_gym_env.MinitaurBulletEnv( + render=True, + motor_velocity_limit=np.inf, + pd_control_enabled=True, + hard_reset=False, + env_randomizer = randomizer, + on_rack=False) + sum_reward = 0 + steps = 20000 + amplitude_1_bound = 0.5 + amplitude_2_bound = 0.5 + speed = 40 + + for step_counter in range(steps): + time_step = 0.01 + t = step_counter * time_step + + amplitude1 = amplitude_1_bound + amplitude2 = amplitude_2_bound + steering_amplitude = 0 + if t < 10: + steering_amplitude = 0.5 + elif t < 20: + steering_amplitude = -0.5 + else: + steering_amplitude = 0 + + # Applying asymmetrical sine gaits to different legs can steer the minitaur. + a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) + a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) + a3 = math.sin(t * speed) * amplitude2 + a4 = math.sin(t * speed + math.pi) * amplitude2 + action = [a1, a2, a2, a1, a3, a4, a4, a3] + _, reward, done, _ = environment.step(action) + sum_reward += reward + if done: + break + environment.reset() + + +def main(): + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--env', help='environment ID (0==sine, 1==stand, 2=reset, 3=overheat)',type=int, default=0) + args = parser.parse_args() + print("--env=" + str(args.env)) + + if (args.env == 0): + SinePolicyExample() + if (args.env == 1): + SineStandExample() + if (args.env == 2): + ResetPoseExample() + if (args.env == 3): + MotorOverheatExample() + +if __name__ == '__main__': + main() + +