From 55f5e52ecd4d088ec9124da20a082fd4e66a5619 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Tue, 31 Oct 2017 15:50:34 -0700 Subject: [PATCH] add continuous versions of kukaGymEnv, kukaCamGymEnv, racecarZEDGymEnv etc. should be trainable with PPO or evolution strategies (ES) now --- .../pybullet/gym/pybullet_envs/bullet/kuka.py | 16 ++-- .../gym/pybullet_envs/bullet/kukaCamGymEnv.py | 94 +++++++++++++------ .../gym/pybullet_envs/bullet/kukaGymEnv.py | 79 +++++++++++----- .../pybullet_envs/bullet/minitaur_gym_env.py | 15 ++- .../pybullet_envs/examples/kukaGymEnvTest.py | 10 +- .../examples/minitaur_gym_env_example.py | 14 +-- .../examples/racecarGymEnvTest.py | 2 +- .../examples/racecarZEDGymEnvTest.py | 38 ++++---- 8 files changed, 172 insertions(+), 96 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py index 3e7ee6c49..b2b02f2e7 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py @@ -17,12 +17,12 @@ class Kuka: self.timeStep = timeStep self.maxForce = 200. - self.fingerAForce = 6 - self.fingerBForce = 5.5 - self.fingerTipForce = 6 + self.fingerAForce = 2 + self.fingerBForce = 2.5 + self.fingerTipForce = 2 self.useInverseKinematics = 1 self.useSimulation = 1 - self.useNullSpace = 1 + self.useNullSpace =21 self.useOrientation = 1 self.kukaEndEffectorIndex = 6 #lower limits for null space @@ -120,10 +120,8 @@ class Kuka: #print (self.endEffectorPos[2]) #print("actualEndEffectorPos[2]") #print(actualEndEffectorPos[2]) - if (dz>0 or actualEndEffectorPos[2]>0.10): - self.endEffectorPos[2] = self.endEffectorPos[2]+dz - if (actualEndEffectorPos[2]<0.10): - self.endEffectorPos[2] = self.endEffectorPos[2]+0.0001 + #if (dz<0 or actualEndEffectorPos[2]<0.5): + self.endEffectorPos[2] = self.endEffectorPos[2]+dz self.endEffectorAngle = self.endEffectorAngle + da @@ -147,7 +145,7 @@ class Kuka: if (self.useSimulation): for i in range (self.kukaEndEffectorIndex+1): #print(i) - p.setJointMotorControl2(bodyIndex=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,positionGain=0.03,velocityGain=1) + p.setJointMotorControl2(bodyIndex=self.kukaUid,jointIndex=i,controlMode=p.POSITION_CONTROL,targetPosition=jointPoses[i],targetVelocity=0,force=self.maxForce,positionGain=0.3,velocityGain=1) else: #reset the joint state (ignoring all dynamics, not recommended to use during simulation) for i in range (self.numJoints): diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py index ea2b07b82..a1cdc8242 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaCamGymEnv.py @@ -14,7 +14,7 @@ import pybullet as p from . import kuka import random import pybullet_data - +maxSteps = 1000 class KukaCamGymEnv(gym.Env): metadata = { @@ -24,10 +24,10 @@ class KukaCamGymEnv(gym.Env): def __init__(self, urdfRoot=pybullet_data.getDataPath(), - actionRepeat=1, + actionRepeat=25, isEnableSelfCollision=True, - renders=True): - print("init") + renders=True, + isDiscrete=False): self._timeStep = 1./240. self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat @@ -37,6 +37,7 @@ class KukaCamGymEnv(gym.Env): self._renders = renders self._width = 341 self._height = 256 + self._isDiscrete=isDiscrete self.terminated = 0 self._p = p if self._renders: @@ -54,12 +55,17 @@ class KukaCamGymEnv(gym.Env): #print(observationDim) observation_high = np.array([np.finfo(np.float32).max] * observationDim) - self.action_space = spaces.Discrete(7) + if (self._isDiscrete): + self.action_space = spaces.Discrete(7) + else: + action_dim = 3 + self._action_bound = 1 + action_high = np.array([self._action_bound] * action_dim) + self.action_space = spaces.Box(-action_high, action_high) self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4)) self.viewer = None def _reset(self): - print("reset") self.terminated = 0 p.resetSimulation() p.setPhysicsEngineParameter(numSolverIterations=150) @@ -68,8 +74,8 @@ class KukaCamGymEnv(gym.Env): p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) - xpos = 0.5 +0.05*random.random() - ypos = 0 +0.05*random.random() + xpos = 0.5 +0.2*random.random() + ypos = 0 +0.25*random.random() ang = 3.1415925438*random.random() orn = p.getQuaternionFromEuler([0,0,ang]) self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3]) @@ -117,24 +123,36 @@ class KukaCamGymEnv(gym.Env): return self._observation def _step(self, action): - dv = 0.01 - dx = [0,-dv,dv,0,0,0,0][action] - dy = [0,0,0,-dv,dv,0,0][action] - da = [0,0,0,0,0,-0.1,0.1][action] - f = 0.3 - realAction = [dx,dy,-0.002,da,f] + if (self._isDiscrete): + dv = 0.01 + dx = [0,-dv,dv,0,0,0,0][action] + dy = [0,0,0,-dv,dv,0,0][action] + da = [0,0,0,0,0,-0.1,0.1][action] + f = 0.3 + realAction = [dx,dy,-0.002,da,f] + else: + dv = 0.01 + dx = action[0] * dv + dy = action[1] * dv + da = action[2] * 0.1 + f = 0.3 + realAction = [dx,dy,-0.002,da,f] + return self.step2( realAction) def step2(self, action): - self._kuka.applyAction(action) for i in range(self._actionRepeat): + self._kuka.applyAction(action) p.stepSimulation() - if self._renders: - time.sleep(self._timeStep) - self._observation = self.getExtendedObservation() if self._termination(): break + #self._observation = self.getExtendedObservation() self._envStepCounter += 1 + + self._observation = self.getExtendedObservation() + if self._renders: + time.sleep(self._timeStep) + #print("self._envStepCounter") #print(self._envStepCounter) @@ -154,25 +172,41 @@ class KukaCamGymEnv(gym.Env): #print("self._envStepCounter") #print(self._envStepCounter) - if (self.terminated or self._envStepCounter>1000): + if (self.terminated or self._envStepCounter>maxSteps): self._observation = self.getExtendedObservation() return True - - if (actualEndEffectorPos[2] <= 0.10): + maxDist = 0.005 + closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist) + + if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43): self.terminated = 1 #print("closing gripper, attempting grasp") #start grasp and terminate fingerAngle = 0.3 - - for i in range (1000): - graspAction = [0,0,0.001,0,fingerAngle] + for i in range (100): + graspAction = [0,0,0.0001,0,fingerAngle] self._kuka.applyAction(graspAction) p.stepSimulation() fingerAngle = fingerAngle-(0.3/100.) if (fingerAngle<0): fingerAngle=0 - + + for i in range (1000): + graspAction = [0,0,0.001,0,fingerAngle] + self._kuka.applyAction(graspAction) + p.stepSimulation() + blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) + if (blockPos[2] > 0.23): + #print("BLOCKPOS!") + #print(blockPos[2]) + break + state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex) + actualEndEffectorPos = state[0] + if (actualEndEffectorPos[2]>0.5): + break + + self._observation = self.getExtendedObservation() return True return False @@ -181,7 +215,7 @@ class KukaCamGymEnv(gym.Env): #rewards is height of target object blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) - closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000) + closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) reward = -1000 numPt = len(closestPoints) @@ -189,13 +223,13 @@ class KukaCamGymEnv(gym.Env): if (numPt>0): #print("reward:") reward = -closestPoints[0][8]*10 - if (blockPos[2] >0.2): - print("grasped a block!!!") - print("self._envStepCounter") - print(self._envStepCounter) + #print("grasped a block!!!") + #print("self._envStepCounter") + #print(self._envStepCounter) reward = reward+1000 #print("reward") #print(reward) return reward + diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py index 5928a7232..0dfb62490 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kukaGymEnv.py @@ -14,6 +14,8 @@ from . import kuka import random import pybullet_data +maxSteps = 1000 + class KukaGymEnv(gym.Env): metadata = { @@ -25,8 +27,9 @@ class KukaGymEnv(gym.Env): urdfRoot=pybullet_data.getDataPath(), actionRepeat=1, isEnableSelfCollision=True, - renders=True): - print("init") + renders=True, + isDiscrete=False): + self._isDiscrete = isDiscrete self._timeStep = 1./240. self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat @@ -51,12 +54,17 @@ class KukaGymEnv(gym.Env): #print(observationDim) observation_high = np.array([np.finfo(np.float32).max] * observationDim) - self.action_space = spaces.Discrete(7) + if (self._isDiscrete): + self.action_space = spaces.Discrete(7) + else: + action_dim = 3 + self._action_bound = 1 + action_high = np.array([self._action_bound] * action_dim) + self.action_space = spaces.Box(-action_high, action_high) self.observation_space = spaces.Box(-observation_high, observation_high) self.viewer = None def _reset(self): - print("reset") self.terminated = 0 p.resetSimulation() p.setPhysicsEngineParameter(numSolverIterations=150) @@ -65,8 +73,8 @@ class KukaGymEnv(gym.Env): p.loadURDF(os.path.join(self._urdfRoot,"table/table.urdf"), 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) - xpos = 0.5 +0.05*random.random() - ypos = 0 +0.05*random.random() + xpos = 0.5 +0.2*random.random() + ypos = 0 +0.25*random.random() ang = 3.1415925438*random.random() orn = p.getQuaternionFromEuler([0,0,ang]) self.blockUid =p.loadURDF(os.path.join(self._urdfRoot,"block.urdf"), xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3]) @@ -101,12 +109,20 @@ class KukaGymEnv(gym.Env): return self._observation def _step(self, action): - dv = 0.01 - dx = [0,-dv,dv,0,0,0,0][action] - dy = [0,0,0,-dv,dv,0,0][action] - da = [0,0,0,0,0,-0.1,0.1][action] - f = 0.3 - realAction = [dx,dy,-0.002,da,f] + if (self._isDiscrete): + dv = 0.01 + dx = [0,-dv,dv,0,0,0,0][action] + dy = [0,0,0,-dv,dv,0,0][action] + da = [0,0,0,0,0,-0.1,0.1][action] + f = 0.3 + realAction = [dx,dy,-0.002,da,f] + else: + dv = 0.01 + dx = action[0] * dv + dy = action[1] * dv + da = action[2] * 0.1 + f = 0.3 + realAction = [dx,dy,-0.002,da,f] return self.step2( realAction) def step2(self, action): @@ -138,25 +154,41 @@ class KukaGymEnv(gym.Env): #print("self._envStepCounter") #print(self._envStepCounter) - if (self.terminated or self._envStepCounter>1000): + if (self.terminated or self._envStepCounter>maxSteps): self._observation = self.getExtendedObservation() return True - - if (actualEndEffectorPos[2] <= 0.10): + maxDist = 0.005 + closestPoints = p.getClosestPoints(self._kuka.trayUid, self._kuka.kukaUid,maxDist) + + if (len(closestPoints)):#(actualEndEffectorPos[2] <= -0.43): self.terminated = 1 #print("closing gripper, attempting grasp") #start grasp and terminate fingerAngle = 0.3 - - for i in range (1000): - graspAction = [0,0,0.001,0,fingerAngle] + for i in range (100): + graspAction = [0,0,0.0001,0,fingerAngle] self._kuka.applyAction(graspAction) p.stepSimulation() fingerAngle = fingerAngle-(0.3/100.) if (fingerAngle<0): fingerAngle=0 - + + for i in range (1000): + graspAction = [0,0,0.001,0,fingerAngle] + self._kuka.applyAction(graspAction) + p.stepSimulation() + blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) + if (blockPos[2] > 0.23): + #print("BLOCKPOS!") + #print(blockPos[2]) + break + state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex) + actualEndEffectorPos = state[0] + if (actualEndEffectorPos[2]>0.5): + break + + self._observation = self.getExtendedObservation() return True return False @@ -165,7 +197,7 @@ class KukaGymEnv(gym.Env): #rewards is height of target object blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) - closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000) + closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000, -1, self._kuka.kukaEndEffectorIndex) reward = -1000 numPt = len(closestPoints) @@ -173,11 +205,10 @@ class KukaGymEnv(gym.Env): if (numPt>0): #print("reward:") reward = -closestPoints[0][8]*10 - if (blockPos[2] >0.2): - print("grasped a block!!!") - print("self._envStepCounter") - print(self._envStepCounter) + #print("grasped a block!!!") + #print("self._envStepCounter") + #print(self._envStepCounter) reward = reward+1000 #print("reward") diff --git a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py index 38ec631a5..f553ac46e 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/minitaur_gym_env.py @@ -32,6 +32,9 @@ OBSERVATION_EPS = 0.01 RENDER_HEIGHT = 720 RENDER_WIDTH = 960 +duckStartPos = [0,0,0.25] +duckStartOrn = [0.5,0.5,0.5,0.5] + class MinitaurBulletEnv(gym.Env): """The gym environment for the minitaur. @@ -133,6 +136,7 @@ class MinitaurBulletEnv(gym.Env): self._on_rack = on_rack self._cam_dist = 1.0 self._cam_yaw = 0 + self._duckId = -1 self._cam_pitch = -30 self._hard_reset = True self._kd_for_pd_controllers = kd_for_pd_controllers @@ -176,7 +180,8 @@ class MinitaurBulletEnv(gym.Env): self._pybullet_client.setPhysicsEngineParameter( numSolverIterations=int(self._num_bullet_solver_iterations)) self._pybullet_client.setTimeStep(self._time_step) - self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root) + self._groundId = self._pybullet_client.loadURDF("%s/plane.urdf" % self._urdf_root) + self._duckId = self._pybullet_client.loadURDF("%s/duck_vhacd.urdf" % self._urdf_root,duckStartPos,duckStartOrn) self._pybullet_client.setGravity(0, 0, -10) acc_motor = self._accurate_motor_model_enabled motor_protect = self._motor_overheat_protection @@ -196,7 +201,7 @@ class MinitaurBulletEnv(gym.Env): kd_for_pd_controllers=self._kd_for_pd_controllers)) else: self.minitaur.Reset(reload_urdf=False) - + self._pybullet_client.resetBasePositionAndOrientation(self._duckId,duckStartPos,duckStartOrn) if self._env_randomizer is not None: self._env_randomizer.randomize_env(self) @@ -322,6 +327,10 @@ class MinitaurBulletEnv(gym.Env): """ return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:]) + def lost_duck(self): + points = self._pybullet_client.getContactPoints(self._duckId, self._groundId); + return len(points)>0 + def is_fallen(self): """Decide whether the minitaur has fallen. @@ -342,7 +351,7 @@ class MinitaurBulletEnv(gym.Env): def _termination(self): position = self.minitaur.GetBasePosition() distance = math.sqrt(position[0]**2 + position[1]**2) - return self.is_fallen() or distance > self._distance_limit + return self.lost_duck() or self.is_fallen() or distance > self._distance_limit def _reward(self): current_base_position = self.minitaur.GetBasePosition() diff --git a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py index 65120c5f8..5649063b2 100644 --- a/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py +++ b/examples/pybullet/gym/pybullet_envs/examples/kukaGymEnvTest.py @@ -9,7 +9,7 @@ import time def main(): - environment = KukaGymEnv(renders=True) + environment = KukaGymEnv(renders=True,isDiscrete=False) motorsIds=[] @@ -19,10 +19,10 @@ def main(): #motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0)) #motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3)) - dv = 0.001 + dv = 1 motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0)) motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0)) - motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,-dv)) + motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,0)) motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0)) motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3)) @@ -33,8 +33,8 @@ def main(): for motorId in motorsIds: action.append(environment._p.readUserDebugParameter(motorId)) - state, reward, done, info = environment.step2(action) + state, reward, done, info = environment.step(action) obs = environment.getExtendedObservation() if __name__=="__main__": - main() \ No newline at end of file + main() diff --git a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py index d75d2254c..2aebfd79a 100644 --- a/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py +++ b/examples/pybullet/gym/pybullet_envs/examples/minitaur_gym_env_example.py @@ -44,9 +44,9 @@ def MotorOverheatExample(): motor_kd=0.00, on_rack=False) - action = [2.0] * 8 + action = [.0] * 8 for i in range(8): - action[i] = 2.0 - 0.5 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1) + action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1) steps = 500 actions_and_observations = [] @@ -112,9 +112,9 @@ def SinePolicyExample(): on_rack=False) sum_reward = 0 steps = 20000 - amplitude_1_bound = 0.5 - amplitude_2_bound = 0.5 - speed = 40 + amplitude_1_bound = 0.1 + amplitude_2_bound = 0.1 + speed = 1 for step_counter in range(steps): time_step = 0.01 @@ -124,9 +124,9 @@ def SinePolicyExample(): amplitude2 = amplitude_2_bound steering_amplitude = 0 if t < 10: - steering_amplitude = 0.5 + steering_amplitude = 0.1 elif t < 20: - steering_amplitude = -0.5 + steering_amplitude = -0.1 else: steering_amplitude = 0 diff --git a/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py index aa78baa32..d640951b0 100644 --- a/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py +++ b/examples/pybullet/gym/pybullet_envs/examples/racecarGymEnvTest.py @@ -41,4 +41,4 @@ def main(): print(obs) if __name__=="__main__": - main() \ No newline at end of file + main() diff --git a/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py b/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py index 64521b121..cee459e70 100644 --- a/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py +++ b/examples/pybullet/gym/pybullet_envs/examples/racecarZEDGymEnvTest.py @@ -3,12 +3,13 @@ import os, inspect currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(os.path.dirname(currentdir)) os.sys.path.insert(0,parentdir) +isDiscrete = False from pybullet_envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv def main(): - environment = RacecarZEDGymEnv(renders=True, isDiscrete=True) + environment = RacecarZEDGymEnv(renders=True, isDiscrete=isDiscrete) targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0) steeringSlider = environment._p.addUserDebugParameter("steering",-1,1,0) @@ -16,25 +17,28 @@ def main(): while (True): targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider) steeringAngle = environment._p.readUserDebugParameter(steeringSlider) - discreteAction = 0 - if (targetVelocity<-0.33): - discreteAction=0 + if (isDiscrete): + discreteAction = 0 + if (targetVelocity<-0.33): + discreteAction=0 + else: + if (targetVelocity>0.33): + discreteAction=6 + else: + discreteAction=3 + if (steeringAngle>-0.17): + if (steeringAngle>0.17): + discreteAction=discreteAction+2 + else: + discreteAction=discreteAction+1 + action=discreteAction else: - if (targetVelocity>0.33): - discreteAction=6 - else: - discreteAction=3 - if (steeringAngle>-0.17): - if (steeringAngle>0.17): - discreteAction=discreteAction+2 - else: - discreteAction=discreteAction+1 - action=discreteAction - + action=[targetVelocity,steeringAngle] + state, reward, done, info = environment.step(action) obs = environment.getExtendedObservation() - print("obs") - print(obs) + #print("obs") + #print(obs) if __name__=="__main__": main()