Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env.

2024-12-13 21:30:09 +00:00 · 2017-08-17 00:25:16 +02:00 · 2017-08-17 00:25:16 +02:00 · 3191291748
commit 3191291748
parent 9f20e40541
2 changed files with 104 additions and 86 deletions
--- a/examples/pybullet/gym/envs/gym_pendulum_envs.py
+++ b/examples/pybullet/gym/envs/gym_pendulum_envs.py
@ -1,71 +1,29 @@
 from scene_abstract import SingleRobotEmptyScene
 from env_bases import MujocoXmlBaseBulletEnv
+from robot_pendula import InvertedPendulum, InvertedPendulumSwingup, InvertedDoublePendulum
 import gym, gym.spaces, gym.utils, gym.utils.seeding
 import numpy as np
 import os, sys

 class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv):
-	swingup = False
-	force_gain = 12 # TODO: Try to find out why we need to scale the force
-
 	def __init__(self):
-		MujocoXmlBaseBulletEnv.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5)
+		self.robot = InvertedPendulum()
+		MujocoXmlBaseBulletEnv.__init__(self, self.robot)

 	def create_single_player_scene(self):
 		return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1)

-	def robot_specific_reset(self):
-		self.pole = self.parts["pole"]
-		self.slider = self.jdict["slider"]
-		self.j1 = self.jdict["hinge"]
-		u = self.np_random.uniform(low=-.1, high=.1)
-		self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0)
-		self.j1.set_motor_torque(0)
-
-	def apply_action(self, a):
-		#assert( np.isfinite(a).all() )
-		if not np.isfinite(a).all():
-			print("a is inf")
-			a[0] = 0
-		self.slider.set_motor_torque( self.force_gain * 100*float(np.clip(a[0], -1, +1)) )
-
-	def calc_state(self):
-		self.theta, theta_dot = self.j1.current_position()
-		x, vx = self.slider.current_position()
-		#assert( np.isfinite(x) )
-
-		if not np.isfinite(x):
-			print("x is inf")
-			x = 0
-
-		if not np.isfinite(vx):
-			print("vx is inf")
-			vx = 0
-
-		if not np.isfinite(self.theta):
-			print("theta is inf")
-			self.theta = 0
-
-		if not np.isfinite(theta_dot):
-			print("theta_dot is inf")
-			theta_dot = 0
-
-		return np.array([
-			x, vx,
-			np.cos(self.theta), np.sin(self.theta), theta_dot
-			])
-
 	def _step(self, a):
-		self.apply_action(a)
+		self.robot.apply_action(a)
 		self.scene.global_step()
-		state = self.calc_state()  # sets self.pos_x self.pos_y
+		state = self.robot.calc_state()  # sets self.pos_x self.pos_y
 		vel_penalty = 0
-		if self.swingup:
-			reward = np.cos(self.theta)
+		if self.robot.swingup:
+			reward = np.cos(self.robot.theta)
 			done = False
 		else:
 			reward = 1.0
-			done = np.abs(self.theta) > .2
+			done = np.abs(self.robot.theta) > .2
 		self.rewards = [float(reward)]
 		self.HUD(state, a, done)
 		return state, sum(self.rewards), done, {}
@ -74,57 +32,30 @@ class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv):
 		self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5)

 class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv):
-	swingup = True
-	force_gain = 2.2  # TODO: Try to find out why we need to scale the force
+	def __init__(self):
+		self.robot = InvertedPendulumSwingup()
+		MujocoXmlBaseBulletEnv.__init__(self, self.robot)

 class InvertedDoublePendulumBulletEnv(MujocoXmlBaseBulletEnv):
 	def __init__(self):
-		MujocoXmlBaseBulletEnv.__init__(self, 'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9)
+		self.robot = InvertedDoublePendulum()
+		MujocoXmlBaseBulletEnv.__init__(self, self.robot)

 	def create_single_player_scene(self):
 		return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1)

-	def robot_specific_reset(self):
-		self.pole2 = self.parts["pole2"]
-		self.slider = self.jdict["slider"]
-		self.j1 = self.jdict["hinge"]
-		self.j2 = self.jdict["hinge2"]
-		u = self.np_random.uniform(low=-.1, high=.1, size=[2])
-		self.j1.reset_current_position(float(u[0]), 0)
-		self.j2.reset_current_position(float(u[1]), 0)
-		self.j1.set_motor_torque(0)
-		self.j2.set_motor_torque(0)
-
-	def apply_action(self, a):
-		assert( np.isfinite(a).all() )
-		force_gain = 0.78  # TODO: Try to find out why we need to scale the force
-		self.slider.set_motor_torque( force_gain *200*float(np.clip(a[0], -1, +1)) )
-
-	def calc_state(self):
-		theta, theta_dot = self.j1.current_position()
-		gamma, gamma_dot = self.j2.current_position()
-		x, vx = self.slider.current_position()
-		self.pos_x, _, self.pos_y = self.pole2.pose().xyz()
-		assert( np.isfinite(x) )
-		return np.array([
-			x, vx,
-			self.pos_x,
-			np.cos(theta), np.sin(theta), theta_dot,
-			np.cos(gamma), np.sin(gamma), gamma_dot,
-			])
-
 	def _step(self, a):
-		self.apply_action(a)
+		self.robot.apply_action(a)
 		self.scene.global_step()
-		state = self.calc_state()  # sets self.pos_x self.pos_y
+		state = self.robot.calc_state()  # sets self.pos_x self.pos_y
 		# upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
 		# using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
-		dist_penalty = 0.01 * self.pos_x ** 2 + (self.pos_y + 0.3 - 2) ** 2
+		dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2
 		# v1, v2 = self.model.data.qvel[1:3]   TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
 		#vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
 		vel_penalty = 0
 		alive_bonus = 10
-		done = self.pos_y + 0.3 <= 1
+		done = self.robot.pos_y + 0.3 <= 1
 		self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
 		self.HUD(state, a, done)
 		return state, sum(self.rewards), done, {}
--- a/examples/pybullet/gym/envs/robot_pendula.py
+++ b/examples/pybullet/gym/envs/robot_pendula.py
@ -0,0 +1,87 @@
+from robot_bases import MujocoXmlBasedRobot
+import numpy as np
+
+class InvertedPendulum(MujocoXmlBasedRobot):
+	swingup = False
+	force_gain = 12 # TODO: Try to find out why we need to scale the force
+	def __init__(self):
+		MujocoXmlBasedRobot.__init__(self, 'inverted_pendulum.xml', 'cart', action_dim=1, obs_dim=5)
+
+	def robot_specific_reset(self):
+		self.pole = self.parts["pole"]
+		self.slider = self.jdict["slider"]
+		self.j1 = self.jdict["hinge"]
+		u = self.np_random.uniform(low=-.1, high=.1)
+		self.j1.reset_current_position( u if not self.swingup else 3.1415+u , 0)
+		self.j1.set_motor_torque(0)
+
+	def apply_action(self, a):
+		#assert( np.isfinite(a).all() )
+		if not np.isfinite(a).all():
+			print("a is inf")
+			a[0] = 0
+		self.slider.set_motor_torque( self.force_gain * 100*float(np.clip(a[0], -1, +1)) )
+
+	def calc_state(self):
+		self.theta, theta_dot = self.j1.current_position()
+		x, vx = self.slider.current_position()
+		#assert( np.isfinite(x) )
+
+		if not np.isfinite(x):
+			print("x is inf")
+			x = 0
+
+		if not np.isfinite(vx):
+			print("vx is inf")
+			vx = 0
+
+		if not np.isfinite(self.theta):
+			print("theta is inf")
+			self.theta = 0
+
+		if not np.isfinite(theta_dot):
+			print("theta_dot is inf")
+			theta_dot = 0
+
+		return np.array([
+			x, vx,
+			np.cos(self.theta), np.sin(self.theta), theta_dot
+			])
+
+class InvertedPendulumSwingup(InvertedPendulum):
+	swingup = True
+	force_gain = 2.2  # TODO: Try to find out why we need to scale the force
+
+
+class InvertedDoublePendulum(MujocoXmlBasedRobot):
+	def __init__(self):
+		MujocoXmlBasedRobot.__init__(self,  'inverted_double_pendulum.xml', 'cart', action_dim=1, obs_dim=9)
+
+	def robot_specific_reset(self):
+		self.pole2 = self.parts["pole2"]
+		self.slider = self.jdict["slider"]
+		self.j1 = self.jdict["hinge"]
+		self.j2 = self.jdict["hinge2"]
+		u = self.np_random.uniform(low=-.1, high=.1, size=[2])
+		self.j1.reset_current_position(float(u[0]), 0)
+		self.j2.reset_current_position(float(u[1]), 0)
+		self.j1.set_motor_torque(0)
+		self.j2.set_motor_torque(0)
+
+	def apply_action(self, a):
+		assert( np.isfinite(a).all() )
+		force_gain = 0.78  # TODO: Try to find out why we need to scale the force
+		self.slider.set_motor_torque( force_gain *200*float(np.clip(a[0], -1, +1)) )
+
+	def calc_state(self):
+		theta, theta_dot = self.j1.current_position()
+		gamma, gamma_dot = self.j2.current_position()
+		x, vx = self.slider.current_position()
+		self.pos_x, _, self.pos_y = self.pole2.pose().xyz()
+		assert( np.isfinite(x) )
+		return np.array([
+			x, vx,
+			self.pos_x,
+			np.cos(theta), np.sin(theta), theta_dot,
+			np.cos(gamma), np.sin(gamma), gamma_dot,
+			])