diff --git a/examples/pybullet/gym/enjoy_kuka_grasping.py b/examples/pybullet/gym/enjoy_kuka_grasping.py new file mode 100644 index 000000000..2814e1b01 --- /dev/null +++ b/examples/pybullet/gym/enjoy_kuka_grasping.py @@ -0,0 +1,26 @@ +import gym +from envs.bullet.kukaGymEnv import KukaGymEnv + +from baselines import deepq + + +def main(): + + env = KukaGymEnv(renders=True) + act = deepq.load("kuka_model.pkl") + print(act) + while True: + obs, done = env.reset(), False + print("===================================") + print("obs") + print(obs) + episode_rew = 0 + while not done: + env.render() + obs, rew, done, _ = env.step(act(obs[None])[0]) + episode_rew += rew + print("Episode reward", episode_rew) + + +if __name__ == '__main__': + main() diff --git a/examples/pybullet/gym/envs/bullet/kuka.py b/examples/pybullet/gym/envs/bullet/kuka.py index 6c25dd25d..e86bb2a0a 100644 --- a/examples/pybullet/gym/envs/bullet/kuka.py +++ b/examples/pybullet/gym/envs/bullet/kuka.py @@ -10,8 +10,8 @@ class Kuka: self.timeStep = timeStep self.reset() self.maxForce = 90 - self.fingerAForce = 12 - self.fingerBForce = 10 + self.fingerAForce = 6 + self.fingerBForce = 5 self.fingerTipForce = 3 self.useInverseKinematics = 1 @@ -34,17 +34,18 @@ class Kuka: objects = p.loadSDF("kuka_iiwa/kuka_with_gripper2.sdf") self.kukaUid = objects[0] - for i in range (p.getNumJoints(self.kukaUid)): - print(p.getJointInfo(self.kukaUid,i)) + #for i in range (p.getNumJoints(self.kukaUid)): + # print(p.getJointInfo(self.kukaUid,i)) p.resetBasePositionAndOrientation(self.kukaUid,[-0.100000,0.000000,0.070000],[0.000000,0.000000,0.000000,1.000000]) - self.jointPositions=[ -0.196884, 0.857586, -0.023543, -1.664977, 0.030403, 0.624786, -0.232294, 0.000000, -0.296450, 0.000000, 0.100002, 0.284399, 0.000000, -0.099999 ] + self.jointPositions=[ 0.006418, 0.413184, -0.011401, -1.589317, 0.005379, 1.137684, -0.006539, 0.000048, -0.299912, 0.000000, -0.000043, 0.299960, 0.000000, -0.000200 ] self.numJoints = p.getNumJoints(self.kukaUid) for jointIndex in range (self.numJoints): p.resetJointState(self.kukaUid,jointIndex,self.jointPositions[jointIndex]) self.trayUid = p.loadURDF("tray/tray.urdf", 0.640000,0.075000,-0.190000,0.000000,0.000000,1.000000,0.000000) - self.blockUid =p.loadURDF("block.urdf", 0.604746,0.080626,-0.180069,0.000050,-0.000859,-0.824149,0.566372) - p.loadURDF("table/table.urdf", 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) + self.endEffectorPos = [0.537,0.0,0.5] + self.endEffectorAngle = 0 + self.motorNames = [] self.motorIndices = [] @@ -68,21 +69,47 @@ class Kuka: def getObservation(self): observation = [] - pos,orn=p.getBasePositionAndOrientation(self.blockUid) - + state = p.getLinkState(self.kukaUid,self.kukaEndEffectorIndex) + pos = state[0] + orn = state[1] + observation.extend(list(pos)) observation.extend(list(orn)) return observation def applyAction(self, motorCommands): + #print ("self.numJoints") #print (self.numJoints) if (self.useInverseKinematics): - pos = [motorCommands[0],motorCommands[1],motorCommands[2]] - yaw = motorCommands[3] + + dx = motorCommands[0] + dy = motorCommands[1] + dz = motorCommands[2] + da = motorCommands[3] fingerAngle = motorCommands[4] - #roll = motorCommands[5] + + self.endEffectorPos[0] = self.endEffectorPos[0]+dx + if (self.endEffectorPos[0]>0.7): + self.endEffectorPos[0]=0.7 + if (self.endEffectorPos[0]<0.45): + self.endEffectorPos[0]=0.45 + self.endEffectorPos[1] = self.endEffectorPos[1]+dy + if (self.endEffectorPos[1]<-0.2): + self.endEffectorPos[1]=-0.2 + if (self.endEffectorPos[1]>0.3): + self.endEffectorPos[1]=0.3 + + #print (self.endEffectorPos[2]) + self.endEffectorPos[2] = self.endEffectorPos[2]+dz + if (self.endEffectorPos[2]<0.1): + self.endEffectorPos[2] = 0.1 + if (self.endEffectorPos[2]>0.5): + self.endEffectorPos[2] = 0.5 + + self.endEffectorAngle = self.endEffectorAngle + da + pos = self.endEffectorPos orn = p.getQuaternionFromEuler([0,-math.pi,0]) # -math.pi,yaw]) if (self.useNullSpace==1): if (self.useOrientation==1): @@ -108,7 +135,7 @@ class Kuka: for i in range (self.numJoints): p.resetJointState(self.kukaUid,i,jointPoses[i]) #fingers - p.setJointMotorControl2(self.kukaUid,7,p.POSITION_CONTROL,targetPosition=yaw,force=self.maxForce) + p.setJointMotorControl2(self.kukaUid,7,p.POSITION_CONTROL,targetPosition=self.endEffectorAngle,force=self.maxForce) p.setJointMotorControl2(self.kukaUid,8,p.POSITION_CONTROL,targetPosition=-fingerAngle,force=self.fingerAForce) p.setJointMotorControl2(self.kukaUid,11,p.POSITION_CONTROL,targetPosition=fingerAngle,force=self.fingerBForce) diff --git a/examples/pybullet/gym/envs/bullet/kukaGymEnv.py b/examples/pybullet/gym/envs/bullet/kukaGymEnv.py index 6ee08a7b6..0baa0f3ca 100644 --- a/examples/pybullet/gym/envs/bullet/kukaGymEnv.py +++ b/examples/pybullet/gym/envs/bullet/kukaGymEnv.py @@ -16,7 +16,7 @@ class KukaGymEnv(gym.Env): def __init__(self, urdfRoot="", - actionRepeat=50, + actionRepeat=1, isEnableSelfCollision=True, renders=True): print("init") @@ -32,6 +32,7 @@ class KukaGymEnv(gym.Env): p.connect(p.GUI) else: p.connect(p.DIRECT) + #timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json") self._seed() self.reset() observationDim = len(self.getExtendedObservation()) @@ -39,7 +40,7 @@ class KukaGymEnv(gym.Env): #print(observationDim) observation_high = np.array([np.finfo(np.float32).max] * observationDim) - self.action_space = spaces.Discrete(9) + self.action_space = spaces.Discrete(7) self.observation_space = spaces.Box(-observation_high, observation_high) self.viewer = None @@ -48,14 +49,16 @@ class KukaGymEnv(gym.Env): p.setPhysicsEngineParameter(numSolverIterations=150) p.setTimeStep(self._timeStep) p.loadURDF("%splane.urdf" % self._urdfRoot,[0,0,-1]) + if self._renders: + p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33]) + p.loadURDF("table/table.urdf", 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) - dist = 5 +2.*random.random() - ang = 2.*3.1415925438*random.random() - - ballx = dist * math.sin(ang) - bally = dist * math.cos(ang) - ballz = 1 - + xpos = 0.5 +0.25*random.random() + ypos = 0 +0.22*random.random() + ang = 3.1415925438*random.random() + orn = p.getQuaternionFromEuler([0,0,ang]) + self.blockUid =p.loadURDF("block.urdf", xpos,ypos,0.02,orn[0],orn[1],orn[2],orn[3]) + p.setGravity(0,0,-10) self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) self._envStepCounter = 0 @@ -72,9 +75,22 @@ class KukaGymEnv(gym.Env): def getExtendedObservation(self): self._observation = self._kuka.getObservation() + pos,orn = p.getBasePositionAndOrientation(self.blockUid) + self._observation.extend(list(pos)) + self._observation.extend(list(orn)) + return self._observation - + def _step(self, action): + dv = 0.005 + dx = [0,-dv,dv,0,0,0,0][action] + dy = [0,0,0,-dv,dv,0,0][action] + da = [0,0,0,0,0,-dv,dv][action] + f = 0.3 + realAction = [dx,dy,-0.1,da,f] + return self.step2( realAction) + + def step2(self, action): self._kuka.applyAction(action) for i in range(self._actionRepeat): p.stepSimulation() @@ -84,8 +100,11 @@ class KukaGymEnv(gym.Env): if self._termination(): break self._envStepCounter += 1 - reward = self._reward() + #print("self._envStepCounter") + #print(self._envStepCounter) + done = self._termination() + reward = self._reward() #print("len=%r" % len(self._observation)) return np.array(self._observation), reward, done, {} @@ -94,10 +113,36 @@ class KukaGymEnv(gym.Env): return def _termination(self): - return self._envStepCounter>1000 + #print (self._kuka.endEffectorPos[2]) + if (self._envStepCounter>1000): + for i in range (1000): + p.stepSimulation() + #start grasp and terminate + fingerAngle = 0.3 + + + for i in range (5000): + graspAction = [0,0,0,0,fingerAngle] + self._kuka.applyAction(graspAction) + p.stepSimulation() + fingerAngle = fingerAngle-(0.3/5000.) + + for i in range (5000): + graspAction = [0,0,0.0001,0,0] + self._kuka.applyAction(graspAction) + p.stepSimulation() + fingerAngle = fingerAngle-(0.3/10000.) + + self._observation = self.getExtendedObservation() + return True + return False def _reward(self): - #todo - reward=0 + #rewards is height of target object + pos,orn=p.getBasePositionAndOrientation(self.blockUid) + reward = pos[2] + if (reward>0.2): + print("reward") + print(reward) return reward \ No newline at end of file diff --git a/examples/pybullet/gym/kukaGymEnvTest.py b/examples/pybullet/gym/kukaGymEnvTest.py index e58ade822..05ca9497f 100644 --- a/examples/pybullet/gym/kukaGymEnvTest.py +++ b/examples/pybullet/gym/kukaGymEnvTest.py @@ -7,18 +7,26 @@ environment = KukaGymEnv(renders=True) motorsIds=[] -motorsIds.append(environment._p.addUserDebugParameter("posX",0.4,0.75,0.537)) -motorsIds.append(environment._p.addUserDebugParameter("posY",-.22,.3,0.0)) -motorsIds.append(environment._p.addUserDebugParameter("posZ",0.1,1,0.2)) -motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0)) +#motorsIds.append(environment._p.addUserDebugParameter("posX",0.4,0.75,0.537)) +#motorsIds.append(environment._p.addUserDebugParameter("posY",-.22,.3,0.0)) +#motorsIds.append(environment._p.addUserDebugParameter("posZ",0.1,1,0.2)) +#motorsIds.append(environment._p.addUserDebugParameter("yaw",-3.14,3.14,0)) +#motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3)) + +dv = 0.001 +motorsIds.append(environment._p.addUserDebugParameter("posX",-dv,dv,0)) +motorsIds.append(environment._p.addUserDebugParameter("posY",-dv,dv,0)) +motorsIds.append(environment._p.addUserDebugParameter("posZ",-dv,dv,-dv)) +motorsIds.append(environment._p.addUserDebugParameter("yaw",-dv,dv,0)) motorsIds.append(environment._p.addUserDebugParameter("fingerAngle",0,0.3,.3)) -while (True): +done = False +while (not done): action=[] for motorId in motorsIds: action.append(environment._p.readUserDebugParameter(motorId)) - state, reward, done, info = environment.step(action) + state, reward, done, info = environment.step2(action) obs = environment.getExtendedObservation() diff --git a/examples/pybullet/gym/train_kuka_grasping.py b/examples/pybullet/gym/train_kuka_grasping.py new file mode 100644 index 000000000..10bb5c1ee --- /dev/null +++ b/examples/pybullet/gym/train_kuka_grasping.py @@ -0,0 +1,40 @@ +import gym +from envs.bullet.kukaGymEnv import KukaGymEnv + +from baselines import deepq + +import datetime + + + +def callback(lcl, glb): + # stop training if reward exceeds 199 + total = sum(lcl['episode_rewards'][-101:-1]) / 100 + totalt = lcl['t'] + #print("totalt") + #print(totalt) + is_solved = totalt > 2000 and total >= 10 + return is_solved + + +def main(): + + env = KukaGymEnv(renders=False) + model = deepq.models.mlp([64]) + act = deepq.learn( + env, + q_func=model, + lr=1e-3, + max_timesteps=10000000, + buffer_size=50000, + exploration_fraction=0.1, + exploration_final_eps=0.02, + print_freq=10, + callback=callback + ) + print("Saving model to kuka_model.pkl") + act.save("kuka_model.pkl") + + +if __name__ == '__main__': + main()