bullet3/examples/pybullet/gym/pybullet_envs/gym_pendulum_envs.py

from .scene_abstract import SingleRobotEmptyScene
from .env_bases import MJCFBaseBulletEnv
from robot_pendula import InvertedPendulum, InvertedPendulumSwingup, InvertedDoublePendulum
import gym, gym.spaces, gym.utils, gym.utils.seeding
import numpy as np
import pybullet 
import os, sys

class InvertedPendulumBulletEnv(MJCFBaseBulletEnv):
	def __init__(self):
		self.robot = InvertedPendulum()
		MJCFBaseBulletEnv.__init__(self, self.robot)
		self.stateId=-1

	def create_single_player_scene(self, bullet_client):
		return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)

	def _reset(self):
		if (self.stateId>=0):
			#print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")")
			self._p.restoreState(self.stateId)
		r = MJCFBaseBulletEnv._reset(self)
		if (self.stateId<0):
			self.stateId = self._p.saveState()
			#print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId)
		return r
	
	def _step(self, a):
		self.robot.apply_action(a)
		self.scene.global_step()
		state = self.robot.calc_state()  # sets self.pos_x self.pos_y
		vel_penalty = 0
		if self.robot.swingup:
			reward = np.cos(self.robot.theta)
			done = False
		else:
			reward = 1.0
			done = np.abs(self.robot.theta) > .2
		self.rewards = [float(reward)]
		self.HUD(state, a, done)
		return state, sum(self.rewards), done, {}

	def camera_adjust(self):
		self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5)

class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv):
	def __init__(self):
		self.robot = InvertedPendulumSwingup()
		MJCFBaseBulletEnv.__init__(self, self.robot)
		self.stateId=-1

class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv):
	def __init__(self):
		self.robot = InvertedDoublePendulum()
		MJCFBaseBulletEnv.__init__(self, self.robot)
		self.stateId = -1
	def create_single_player_scene(self, bullet_client):
		return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)

	def _reset(self):
		if (self.stateId>=0):
			self._p.restoreState(self.stateId)
		r = MJCFBaseBulletEnv._reset(self)
		if (self.stateId<0):
			self.stateId = self._p.saveState()
		return r
	
	def _step(self, a):
		self.robot.apply_action(a)
		self.scene.global_step()
		state = self.robot.calc_state()  # sets self.pos_x self.pos_y
		# upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9
		# using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3
		dist_penalty = 0.01 * self.robot.pos_x ** 2 + (self.robot.pos_y + 0.3 - 2) ** 2
		# v1, v2 = self.model.data.qvel[1:3]   TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040
		#vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
		vel_penalty = 0
		alive_bonus = 10
		done = self.robot.pos_y + 0.3 <= 1
		self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]
		self.HUD(state, a, done)
		return state, sum(self.rewards), done, {}

	def camera_adjust(self):
		self.camera.move_and_look_at(0,1.2,1.2, 0,0,0.5)
improve the new pybullet gym environments, follow camera, disable 2D GUI, disable rendering during loading (makes it faster) disable vsync on Mac fix setup.py file 2017-08-21 01:11:53 +00:00			`from .scene_abstract import SingleRobotEmptyScene`
Clarify in naming that the environment uses MJCF xml files with Bullet, it doesn't use MuJoCo. 2017-08-23 22:03:57 +00:00			`from .env_bases import MJCFBaseBulletEnv`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`from robot_pendula import InvertedPendulum, InvertedPendulumSwingup, InvertedDoublePendulum`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`import gym, gym.spaces, gym.utils, gym.utils.seeding`
			`import numpy as np`
Fix for 1643, allow to instantiate multiple PyBullet Gym environments (Ant, Humanoid, Hopper, Pendula etc) in the same process (same or other thread). It uses the pybullet_utils.bullet_client to achieve this. 2018-05-18 23:23:54 +00:00			`import pybullet`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`import os, sys`

Clarify in naming that the environment uses MJCF xml files with Bullet, it doesn't use MuJoCo. 2017-08-23 22:03:57 +00:00			`class InvertedPendulumBulletEnv(MJCFBaseBulletEnv):`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`def __init__(self):`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`self.robot = InvertedPendulum()`
Clarify in naming that the environment uses MJCF xml files with Bullet, it doesn't use MuJoCo. 2017-08-23 22:03:57 +00:00			`MJCFBaseBulletEnv.__init__(self, self.robot)`
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`self.stateId=-1`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00
Fix for 1643, allow to instantiate multiple PyBullet Gym environments (Ant, Humanoid, Hopper, Pendula etc) in the same process (same or other thread). It uses the pybullet_utils.bullet_client to achieve this. 2018-05-18 23:23:54 +00:00			`def create_single_player_scene(self, bullet_client):`
			`return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`def _reset(self):`
			`if (self.stateId>=0):`
			`#print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")")`
Fix pendula to use the bullet-client. 2018-05-22 14:40:10 +00:00			`self._p.restoreState(self.stateId)`
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`r = MJCFBaseBulletEnv._reset(self)`
			`if (self.stateId<0):`
Fix for 1643, allow to instantiate multiple PyBullet Gym environments (Ant, Humanoid, Hopper, Pendula etc) in the same process (same or other thread). It uses the pybullet_utils.bullet_client to achieve this. 2018-05-18 23:23:54 +00:00			`self.stateId = self._p.saveState()`
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`#print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId)`
			`return r`

Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`def _step(self, a):`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`self.robot.apply_action(a)`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`self.scene.global_step()`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`state = self.robot.calc_state() # sets self.pos_x self.pos_y`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`vel_penalty = 0`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`if self.robot.swingup:`
			`reward = np.cos(self.robot.theta)`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`done = False`
			`else:`
			`reward = 1.0`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`done = np.abs(self.robot.theta) > .2`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`self.rewards = [float(reward)]`
			`self.HUD(state, a, done)`
			`return state, sum(self.rewards), done, {}`

			`def camera_adjust(self):`
			`self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5)`

Remove duplicate assets and use other assets. Rename classes appropriately for robot-scene-env split refactoring. 2017-08-15 13:31:24 +00:00			`class InvertedPendulumSwingupBulletEnv(InvertedPendulumBulletEnv):`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`def __init__(self):`
			`self.robot = InvertedPendulumSwingup()`
Clarify in naming that the environment uses MJCF xml files with Bullet, it doesn't use MuJoCo. 2017-08-23 22:03:57 +00:00			`MJCFBaseBulletEnv.__init__(self, self.robot)`
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`self.stateId=-1`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00
Clarify in naming that the environment uses MJCF xml files with Bullet, it doesn't use MuJoCo. 2017-08-23 22:03:57 +00:00			`class InvertedDoublePendulumBulletEnv(MJCFBaseBulletEnv):`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`def __init__(self):`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`self.robot = InvertedDoublePendulum()`
Clarify in naming that the environment uses MJCF xml files with Bullet, it doesn't use MuJoCo. 2017-08-23 22:03:57 +00:00			`MJCFBaseBulletEnv.__init__(self, self.robot)`
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`self.stateId = -1`
Fix for 1643, allow to instantiate multiple PyBullet Gym environments (Ant, Humanoid, Hopper, Pendula etc) in the same process (same or other thread). It uses the pybullet_utils.bullet_client to achieve this. 2018-05-18 23:23:54 +00:00			`def create_single_player_scene(self, bullet_client):`
			`return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`def _reset(self):`
			`if (self.stateId>=0):`
Fix pendula to use the bullet-client. 2018-05-22 14:40:10 +00:00			`self._p.restoreState(self.stateId)`
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`r = MJCFBaseBulletEnv._reset(self)`
			`if (self.stateId<0):`
Fix for 1643, allow to instantiate multiple PyBullet Gym environments (Ant, Humanoid, Hopper, Pendula etc) in the same process (same or other thread). It uses the pybullet_utils.bullet_client to achieve this. 2018-05-18 23:23:54 +00:00			`self.stateId = self._p.saveState()`
Improve PyBullet ports of Roboschool envs: fix reset (it kept adding stadium objects, causing slowdown), now reset uses saveState/restoreState and reset becomes a few orders of magnitude faster. Use python -m pybullet_envs.examples.testEnv --env AntBulletEnv-v0 --render=1 --steps 1000 --resetbenchmark=1 Added environments: HumanoidFlagrunBulletEnv-v0, HumanoidFlagrunHarderBulletEnv-v0, StrikerBulletEnv-v0, ThrowerBulletEnv-v0, PusherBulletEnv-v0, ReacherBulletEnv-v0, CartPoleBulletEnv-v0 and register them to OpenAI Gym. Allow numpy/humanoid_running.py to use abtch or non-batch update (setJointMotorControl2/setJointMotorControlArray) 2018-01-15 20:48:32 +00:00			`return r`

Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`def _step(self, a):`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`self.robot.apply_action(a)`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`self.scene.global_step()`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`state = self.robot.calc_state() # sets self.pos_x self.pos_y`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`# upright position: 0.6 (one pole) + 0.6 (second pole) * 0.5 (middle of second pole) = 0.9`
			`# using <site> tag in original xml, upright position is 0.6 + 0.6 = 1.2, difference +0.3`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`dist_penalty = 0.01 * self.robot.pos_x 2 + (self.robot.pos_y + 0.3 - 2) 2`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`# v1, v2 = self.model.data.qvel[1:3] TODO when this fixed https://github.com/bulletphysics/bullet3/issues/1040`
			`#vel_penalty = 1e-3 * v1*2 + 5e-3 v2**2`
			`vel_penalty = 0`
			`alive_bonus = 10`
Split pendula robot from the pendula envs due to changes in the underlying mujoco xml base env. 2017-08-16 22:25:16 +00:00			`done = self.robot.pos_y + 0.3 <= 1`
Add nearly all gym environments using pybullet together with the latest tf model from the roboschool model zoo. 2017-07-14 21:38:15 +00:00			`self.rewards = [float(alive_bonus), float(-dist_penalty), float(-vel_penalty)]`
			`self.HUD(state, a, done)`
			`return state, sum(self.rewards), done, {}`

			`def camera_adjust(self):`
			`self.camera.move_and_look_at(0,1.2,1.2, 0,0,0.5)`