bullet3/examples/pybullet/gym/pybullet_envs/bullet/cartpole_bullet.py

"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c
"""
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)

import logging
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import time
import subprocess
import pybullet as p2
import pybullet_data
import pybullet_utils.bullet_client as bc
from pkg_resources import parse_version

logger = logging.getLogger(__name__)


class CartPoleBulletEnv(gym.Env):
  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

  def __init__(self, renders=False, discrete_actions=True):
    # start the bullet physics server
    self._renders = renders
    self._discrete_actions = discrete_actions
    self._render_height = 200
    self._render_width = 320
    self._physics_client_id = -1
    self.theta_threshold_radians = 12 * 2 * math.pi / 360
    self.x_threshold = 0.4  #2.4
    high = np.array([
        self.x_threshold * 2,
        np.finfo(np.float32).max, self.theta_threshold_radians * 2,
        np.finfo(np.float32).max
    ])

    self.force_mag = 10

    if self._discrete_actions:
      self.action_space = spaces.Discrete(2)
    else:
      action_dim = 1
      action_high = np.array([self.force_mag] * action_dim)
      self.action_space = spaces.Box(-action_high, action_high)

    self.observation_space = spaces.Box(-high, high, dtype=np.float32)

    self.seed()
    #    self.reset()
    self.viewer = None
    self._configure()

  def _configure(self, display=None):
    self.display = display

  def seed(self, seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]

  def step(self, action):
    p = self._p
    if self._discrete_actions:
      force = self.force_mag if action == 1 else -self.force_mag
    else:
      force = action[0]

    p.setJointMotorControl2(self.cartpole, 0, p.TORQUE_CONTROL, force=force)
    p.stepSimulation()

    self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
    theta, theta_dot, x, x_dot = self.state

    done =  x < -self.x_threshold \
                or x > self.x_threshold \
                or theta < -self.theta_threshold_radians \
                or theta > self.theta_threshold_radians
    done = bool(done)
    reward = 1.0
    #print("state=",self.state)
    return np.array(self.state), reward, done, {}

  def reset(self):
    #    print("-----------reset simulation---------------")
    if self._physics_client_id < 0:
      if self._renders:
        self._p = bc.BulletClient(connection_mode=p2.GUI)
      else:
        self._p = bc.BulletClient()
      self._physics_client_id = self._p._client

      p = self._p
      p.resetSimulation()
      self.cartpole = p.loadURDF(os.path.join(pybullet_data.getDataPath(), "cartpole.urdf"),
                                 [0, 0, 0])
      p.changeDynamics(self.cartpole, -1, linearDamping=0, angularDamping=0)
      p.changeDynamics(self.cartpole, 0, linearDamping=0, angularDamping=0)
      p.changeDynamics(self.cartpole, 1, linearDamping=0, angularDamping=0)
      self.timeStep = 0.02
      p.setJointMotorControl2(self.cartpole, 1, p.VELOCITY_CONTROL, force=0)
      p.setJointMotorControl2(self.cartpole, 0, p.VELOCITY_CONTROL, force=0)
      p.setGravity(0, 0, -9.8)
      p.setTimeStep(self.timeStep)
      p.setRealTimeSimulation(0)
    p = self._p
    randstate = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
    p.resetJointState(self.cartpole, 1, randstate[0], randstate[1])
    p.resetJointState(self.cartpole, 0, randstate[2], randstate[3])
    #print("randstate=",randstate)
    self.state = p.getJointState(self.cartpole, 1)[0:2] + p.getJointState(self.cartpole, 0)[0:2]
    #print("self.state=", self.state)
    return np.array(self.state)

  def render(self, mode='human', close=False):
    if mode == "human":
      self._renders = True
    if mode != "rgb_array":
      return np.array([])
    base_pos=[0,0,0]
    self._cam_dist = 2
    self._cam_pitch = 0.3
    self._cam_yaw = 0
    if (self._physics_client_id>=0):
      view_matrix = self._p.computeViewMatrixFromYawPitchRoll(
        cameraTargetPosition=base_pos,
        distance=self._cam_dist,
        yaw=self._cam_yaw,
        pitch=self._cam_pitch,
        roll=0,
        upAxisIndex=2)
      proj_matrix = self._p.computeProjectionMatrixFOV(fov=60,
             aspect=float(self._render_width) /
             self._render_height,
             nearVal=0.1,
             farVal=100.0)
      (_, _, px, _, _) = self._p.getCameraImage(
          width=self._render_width,
          height=self._render_height,
          renderer=self._p.ER_BULLET_HARDWARE_OPENGL,
          viewMatrix=view_matrix,
          projectionMatrix=proj_matrix)
    else:
      px = np.array([[[255,255,255,255]]*self._render_width]*self._render_height, dtype=np.uint8)
    rgb_array = np.array(px, dtype=np.uint8)
    rgb_array = np.reshape(np.array(px), (self._render_height, self._render_width, -1))
    rgb_array = rgb_array[:, :, :3]
    return rgb_array

  def configure(self, args):
    pass

  def close(self):
    if self._physics_client_id >= 0:
      self._p.disconnect()
    self._physics_client_id = -1

class CartPoleContinuousBulletEnv(CartPoleBulletEnv):
  metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}

  def __init__(self, renders=False):
    # start the bullet physics server
    CartPoleBulletEnv.__init__(self, renders, discrete_actions=False)