mirror of
https://github.com/bulletphysics/bullet3
synced 2024-12-13 21:30:09 +00:00
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env.
This commit is contained in:
parent
9f20e40541
commit
3191291748
@ -1,71 +1,29 @@
|
||||
from scene_abstract import SingleRobotEmptyScene
|
||||
from env_bases import MujocoXmlBaseBulletEnv
|
||||
from robot_pendula import InvertedPendulum, InvertedPendulumSwingup, InvertedDoublePendulum
|
||||
import gym, gym.spaces, gym.utils, gym.utils.seeding
|
||||
import numpy as np
|
||||
import os, sys
|
||||
|
||||
class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv):
|
||||
swingup = False
|
||||
force_gain = 12 # TODO: Try to find out why we need to scale the force
|
||||
|
||||
def __init__(self):
|
||||
MujocoXmlBaseBulletEnv.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5)
|
||||
self.robot = InvertedPendulum()
|
||||
MujocoXmlBaseBulletEnv.__init__(self, self.robot)
|
||||
|
||||
def create_single_player_scene(self):
|
||||
return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1)
|
||||
|
||||
def robot_specific_reset(self):
|
||||
self.pole = self.parts["pole"]
|
||||
self.slider = self.jdict["slider"]
|
||||
self.j1 = self.jdict["hinge"]
|
||||
u = self.np_random.uniform(low=-.1, high=.1)
|
||||
self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0)
|
||||
self.j1.set_motor_torque(0)
|
||||
|
||||
def apply_action(self, a):
|
||||
#assert( np.isfinite(a).all() )
|
||||
if not np.isfinite(a).all():
|
||||
print("a is inf")
|
||||
a[0] = 0
|
||||
self.slider.set_motor_torque( self.force_gain * 100*float(np.clip(a[0], -1, +1)) )
|
||||
|
||||
def calc_state(self):
|
||||
self.theta, theta_dot = self.j1.current_position()
|
||||
x, vx = self.slider.current_position()
|
||||
#assert( np.isfinite(x) )
|
||||
|
||||
if not np.isfinite(x):
|
||||
print("x is inf")
|
||||
x = 0
|
||||
|
||||
if not np.isfinite(vx):
|
||||
print("vx is inf")
|
||||
vx = 0
|
||||
|
||||
if not np.isfinite(self.theta):
|
||||
print("theta is inf")
|
||||
self.theta = 0
|
||||
|
||||
if not np.isfinite(theta_dot):
|
||||
print("theta_dot is inf")
|
||||
theta_dot = 0
|
||||
|
||||
return np.array([
|
||||
x, vx,
|
||||
np.cos(self.theta), np.sin(self.theta), theta_dot
|
||||
])
|
||||
|
||||
def _step(self, a):
|
||||
self.apply_action(a)
|
||||
self.robot.apply_action(a)
|
||||
self.scene.global_step()
|
||||
state = self.calc_state() # sets self.pos_x self.pos_y
|
||||
state = self.robot.calc_state() # sets self.pos_x self.pos_y
|
||||
vel_penalty = 0
|
||||
if self.swingup:
|
||||
reward = np.cos(self.theta)
|
||||
if self.robot.swingup:
|
||||
reward = np.cos(self.robot.theta)
|
||||
done = False
|
||||
else:
|
||||
reward = 1.0
|
||||
done = np.abs(self.theta) > .2
|
||||
done = np.abs(self.robot.theta) > .2
|
||||
self.rewards = [float(reward)]
|
||||
self.HUD(state, a, done)
|
||||
return state, sum(self.rewards), done, {}
|
||||
@ -74,57 +32,30 @@ class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv):
|
||||
self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5)
|
||||
|
||||
class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv):
|
||||
swingup = True
|
||||
force_gain = 2.2 # TODO: Try to find out why we need to scale the force
|
||||
def __init__(self):
|
||||
self.robot = InvertedPendulumSwingup()
|
||||
MujocoXmlBaseBulletEnv.__init__(self, self.robot)
|
||||
|
||||
class InvertedDoublePendulumBulletEnv(MujocoXmlBaseBulletEnv):
|
||||
def __init__(self):
|
||||
MujocoXmlBaseBulletEnv.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9)
|
||||
self.robot = InvertedDoublePendulum()
|
||||
MujocoXmlBaseBulletEnv.__init__(self, self.robot)
|
||||
|
||||
def create_single_player_scene(self):
|
||||
return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1)
|
||||
|
||||
def robot_specific_reset(self):
|
||||
self.pole2 = self.parts["pole2"]
|
||||
self.slider = self.jdict["slider"]
|
||||
self.j1 = self.jdict["hinge"]
|
||||
self.j2 = self.jdict["hinge2"]
|
||||
u = self.np_random.uniform(low=-.1, high=.1, size=[2])
|
||||
self.j1.reset_current_position(float(u[0]), 0)
|
||||
self.j2.reset_current_position(float(u[1]), 0)
|
||||
self.j1.set_motor_torque(0)
|
||||
self.j2.set_motor_torque(0)
|
||||
|
||||
def apply_action(self, a):
|
||||
assert( np.isfinite(a).all() )
|
||||
force_gain = 0.78 # TODO: Try to find out why we need to scale the force
|
||||
self.slider.set_motor_torque( force_gain *200*float(np.clip(a[0], -1, +1)) )
|
||||
|
||||
def calc_state(self):
|
||||
theta, theta_dot = self.j1.current_position()
|
||||
gamma, gamma_dot = self.j2.current_position()
|
||||
x, vx = self.slider.current_position()
|
||||
self.pos_x, _, self.pos_y = self.pole2.pose().xyz()
|
||||
assert( np.isfinite(x) )
|
||||
return np.array([
|
||||
x, vx,
|
||||
self.pos_x,
|
||||
np.cos(theta), np.sin(theta), theta_dot,
|
||||
np.cos(gamma), np.sin(gamma), gamma_dot,
|
||||
])
|
||||
|
||||
def _step(self, a):
|
||||
self.apply_action(a)
|
||||
self.robot.apply_action(a)
|
||||
self.scene.global_step()
|
||||
state = self.calc_state() # sets self.pos_x self.pos_y
|
||||
state = self.robot.calc_state() # sets self.pos_x self.pos_y
|
||||
# upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
|
||||
# using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
|
||||
dist_penalty = 0.01 * self.pos_x ** 2 + (self.pos_y + 0.3 - 2) ** 2
|
||||
dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2
|
||||
# v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
|
||||
#vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
|
||||
vel_penalty = 0
|
||||
alive_bonus = 10
|
||||
done = self.pos_y + 0.3 <= 1
|
||||
done = self.robot.pos_y + 0.3 <= 1
|
||||
self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
|
||||
self.HUD(state, a, done)
|
||||
return state, sum(self.rewards), done, {}
|
||||
|
87
examples/pybullet/gym/envs/robot_pendula.py
Normal file
87
examples/pybullet/gym/envs/robot_pendula.py
Normal file
@ -0,0 +1,87 @@
|
||||
from robot_bases import MujocoXmlBasedRobot
|
||||
import numpy as np
|
||||
|
||||
class InvertedPendulum(MujocoXmlBasedRobot):
|
||||
swingup = False
|
||||
force_gain = 12 # TODO: Try to find out why we need to scale the force
|
||||
def __init__(self):
|
||||
MujocoXmlBasedRobot.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5)
|
||||
|
||||
def robot_specific_reset(self):
|
||||
self.pole = self.parts["pole"]
|
||||
self.slider = self.jdict["slider"]
|
||||
self.j1 = self.jdict["hinge"]
|
||||
u = self.np_random.uniform(low=-.1, high=.1)
|
||||
self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0)
|
||||
self.j1.set_motor_torque(0)
|
||||
|
||||
def apply_action(self, a):
|
||||
#assert( np.isfinite(a).all() )
|
||||
if not np.isfinite(a).all():
|
||||
print("a is inf")
|
||||
a[0] = 0
|
||||
self.slider.set_motor_torque( self.force_gain * 100*float(np.clip(a[0], -1, +1)) )
|
||||
|
||||
def calc_state(self):
|
||||
self.theta, theta_dot = self.j1.current_position()
|
||||
x, vx = self.slider.current_position()
|
||||
#assert( np.isfinite(x) )
|
||||
|
||||
if not np.isfinite(x):
|
||||
print("x is inf")
|
||||
x = 0
|
||||
|
||||
if not np.isfinite(vx):
|
||||
print("vx is inf")
|
||||
vx = 0
|
||||
|
||||
if not np.isfinite(self.theta):
|
||||
print("theta is inf")
|
||||
self.theta = 0
|
||||
|
||||
if not np.isfinite(theta_dot):
|
||||
print("theta_dot is inf")
|
||||
theta_dot = 0
|
||||
|
||||
return np.array([
|
||||
x, vx,
|
||||
np.cos(self.theta), np.sin(self.theta), theta_dot
|
||||
])
|
||||
|
||||
class InvertedPendulumSwingup(InvertedPendulum):
|
||||
swingup = True
|
||||
force_gain = 2.2 # TODO: Try to find out why we need to scale the force
|
||||
|
||||
|
||||
class InvertedDoublePendulum(MujocoXmlBasedRobot):
|
||||
def __init__(self):
|
||||
MujocoXmlBasedRobot.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9)
|
||||
|
||||
def robot_specific_reset(self):
|
||||
self.pole2 = self.parts["pole2"]
|
||||
self.slider = self.jdict["slider"]
|
||||
self.j1 = self.jdict["hinge"]
|
||||
self.j2 = self.jdict["hinge2"]
|
||||
u = self.np_random.uniform(low=-.1, high=.1, size=[2])
|
||||
self.j1.reset_current_position(float(u[0]), 0)
|
||||
self.j2.reset_current_position(float(u[1]), 0)
|
||||
self.j1.set_motor_torque(0)
|
||||
self.j2.set_motor_torque(0)
|
||||
|
||||
def apply_action(self, a):
|
||||
assert( np.isfinite(a).all() )
|
||||
force_gain = 0.78 # TODO: Try to find out why we need to scale the force
|
||||
self.slider.set_motor_torque( force_gain *200*float(np.clip(a[0], -1, +1)) )
|
||||
|
||||
def calc_state(self):
|
||||
theta, theta_dot = self.j1.current_position()
|
||||
gamma, gamma_dot = self.j2.current_position()
|
||||
x, vx = self.slider.current_position()
|
||||
self.pos_x, _, self.pos_y = self.pole2.pose().xyz()
|
||||
assert( np.isfinite(x) )
|
||||
return np.array([
|
||||
x, vx,
|
||||
self.pos_x,
|
||||
np.cos(theta), np.sin(theta), theta_dot,
|
||||
np.cos(gamma), np.sin(gamma), gamma_dot,
|
||||
])
|
Loading…
Reference in New Issue
Block a user