Add files for multiclip reward

2024-12-13 13:20:07 +00:00 · 2020-09-17 18:04:41 +01:00 · 2020-09-17 18:04:41 +01:00 · ab8d927845
commit ab8d927845
parent 5ce97ffdb1
14 changed files with 12367 additions and 0 deletions
--- a/examples/pybullet/gym/pybullet_data/args/run_humanoid3d_walker_args.txt
+++ b/examples/pybullet/gym/pybullet_data/args/run_humanoid3d_walker_args.txt
@ -0,0 +1,25 @@
+--scene imitate
+
+--num_update_substeps 10
+--num_sim_substeps 2
+--world_scale 4
+
+--terrain_file data/terrain/plane.txt
+
+--char_types general
+--character_files data/characters/humanoid3d.txt
+--enable_char_soft_contact false
+--fall_contact_bodies 0 1 2 3 4 6 7 8 9 10 12 13 14
+
+--char_ctrls ct_pd
+--char_ctrl_files data/controllers/humanoid3d_ctrl.txt
+--motion_file data/motions/walker/walk_forward0.txt
+--sync_char_root_pos true
+--sync_char_root_rot false
+
+--agent_files data/agents/ct_agent_humanoid_ppo.txt
+--train_agents false
+
+#--output_path output
+#--int_output_path output/intermediate
+--model_files data/policies/humanoid3d/humanoid3d_walker.ckpt
--- a/examples/pybullet/gym/pybullet_data/args/train_humanoid3d_walker_args.txt
+++ b/examples/pybullet/gym/pybullet_data/args/train_humanoid3d_walker_args.txt
@ -0,0 +1,33 @@
+--scene imitate
+
+--time_lim_min 0.5
+--time_lim_max 0.5
+--time_lim_exp 0.2
+--time_end_lim_min 20
+--time_end_lim_max 20
+--time_end_lim_exp 50
+--anneal_samples 32000000
+
+--num_update_substeps 10
+--num_sim_substeps 2
+--world_scale 4
+
+--terrain_file data/terrain/plane.txt
+
+--char_types general
+--character_files data/characters/humanoid3d.txt
+--enable_char_soft_contact false
+--fall_contact_bodies 0 1 2 3 4 6 7 8 9 10 12 13 14
+
+--char_ctrls ct_pd
+--char_ctrl_files data/controllers/humanoid3d_ctrl.txt
+--motion_file data/motions/walker/
+--sync_char_root_pos true
+--sync_char_root_rot false
+
+--agent_files data/agents/ct_agent_humanoid_ppo.txt
+#--train_agents false
+
+--output_path output
+#--int_output_path output/intermediate
+#--model_files data/policies/humanoid3d/agent0_model.ckpt
--- a/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_left0.txt
+++ b/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_left0.txt
--- a/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_left1.txt
+++ b/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_left1.txt
--- a/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_right0.txt
+++ b/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_right0.txt
--- a/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_right1.txt
+++ b/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_right1.txt
--- a/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_right2.txt
+++ b/examples/pybullet/gym/pybullet_data/data/motions/walker/turn_right2.txt
--- a/examples/pybullet/gym/pybullet_data/data/motions/walker/walk_forward0.txt
+++ b/examples/pybullet/gym/pybullet_data/data/motions/walker/walk_forward0.txt
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer_multiclip.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer_multiclip.py
@ -0,0 +1,54 @@
+import numpy as np
+import sys
+import os
+import inspect
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0, parentdir)
+print("parentdir=", parentdir)
+
+from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env_multiclip import PyBulletDeepMimicEnvMultiClip
+from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
+from pybullet_utils.logger import Logger
+from pybullet_envs.deep_mimic.testrl_multiclip import update_world, update_timestep, build_world
+import pybullet_utils.mpi_util as MPIUtil
+
+args = []
+world = None
+
+
+def run():
+  global update_timestep
+  global world
+  done = False
+  while not (done):
+    update_world(world, update_timestep)
+
+  return
+
+
+def shutdown():
+  global world
+
+  Logger.print2('Shutting down...')
+  world.shutdown()
+  return
+
+
+def main():
+  global args
+  global world
+
+  # Command line arguments
+  args = sys.argv[1:]
+  enable_draw = False
+  world = build_world(args, enable_draw)
+
+  run()
+  shutdown()
+
+  return
+
+
+if __name__ == '__main__':
+  main()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd_multiclip.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd_multiclip.py
@ -0,0 +1,997 @@
+from pybullet_utils import pd_controller_stable
+from pybullet_envs.deep_mimic.env import humanoid_pose_interpolator
+import math
+import numpy as np
+
+chest = 1
+neck = 2
+rightHip = 3
+rightKnee = 4
+rightAnkle = 5
+rightShoulder = 6
+rightElbow = 7
+leftHip = 9
+leftKnee = 10
+leftAnkle = 11
+leftShoulder = 12
+leftElbow = 13
+jointFrictionForce = 0
+
+
+class HumanoidStablePDMultiClip(object):
+
+    def __init__(self, pybullet_client, mocap_data, timeStep,
+                 useFixedBase=True, arg_parser=None, useComReward=False):
+        self._pybullet_client = pybullet_client
+        self._mocap_data = mocap_data # this is a dictionary
+        self._arg_parser = arg_parser
+        self._n_clips = self._mocap_data.getNumClips()
+        print("LOADING humanoid!")
+        flags = self._pybullet_client.URDF_MAINTAIN_LINK_ORDER + self._pybullet_client.URDF_USE_SELF_COLLISION + self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
+        self._sim_model = self._pybullet_client.loadURDF(
+            "humanoid/humanoid.urdf", [0, 0.889540259, 0],
+            globalScaling=0.25,
+            useFixedBase=useFixedBase,
+            flags=flags)
+
+        # self._pybullet_client.setCollisionFilterGroupMask(self._sim_model,-1,collisionFilterGroup=0,collisionFilterMask=0)
+        # for j in range (self._pybullet_client.getNumJoints(self._sim_model)):
+        #  self._pybullet_client.setCollisionFilterGroupMask(self._sim_model,j,collisionFilterGroup=0,collisionFilterMask=0)
+
+        self._end_effectors = [5, 8, 11, 14]  # ankle and wrist, both left and right
+
+        self._kin_models = {}
+        self._poseInterpolators = {}
+
+        for i in range(self._n_clips):
+            self._kin_models[i] = self._pybullet_client.loadURDF(
+                "humanoid/humanoid.urdf", [0, 0.85, 0],
+                globalScaling=0.25,
+                useFixedBase=True,
+                flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER)
+
+        self._pybullet_client.changeDynamics(self._sim_model, -1, lateralFriction=0.9)
+        for j in range(self._pybullet_client.getNumJoints(self._sim_model)):
+            self._pybullet_client.changeDynamics(self._sim_model, j, lateralFriction=0.9)
+
+        self._pybullet_client.changeDynamics(self._sim_model, -1, linearDamping=0, angularDamping=0)
+        for i in range(self._n_clips):
+            self._pybullet_client.changeDynamics(self._kin_models[i], -1, linearDamping=0, angularDamping=0)
+
+            # todo: add feature to disable simulation for a particular object. Until then, disable all collisions
+            self._pybullet_client.setCollisionFilterGroupMask(self._kin_models[i],
+                                                              -1,
+                                                              collisionFilterGroup=0,
+                                                              collisionFilterMask=0)
+            self._pybullet_client.changeDynamics(
+                self._kin_models[i],
+                -1,
+                activationState=self._pybullet_client.ACTIVATION_STATE_SLEEP +
+                                self._pybullet_client.ACTIVATION_STATE_ENABLE_SLEEPING +
+                                self._pybullet_client.ACTIVATION_STATE_DISABLE_WAKEUP)
+            alpha = 0.4
+            self._pybullet_client.changeVisualShape(self._kin_models[i], -1, rgbaColor=[1, 1, 1, alpha])
+            for j in range(self._pybullet_client.getNumJoints(self._kin_models[i])):
+                self._pybullet_client.setCollisionFilterGroupMask(self._kin_models[i],
+                                                                  j,
+                                                                  collisionFilterGroup=0,
+                                                                  collisionFilterMask=0)
+                self._pybullet_client.changeDynamics(
+                    self._kin_models[i],
+                    j,
+                    activationState=self._pybullet_client.ACTIVATION_STATE_SLEEP +
+                                    self._pybullet_client.ACTIVATION_STATE_ENABLE_SLEEPING +
+                                    self._pybullet_client.ACTIVATION_STATE_DISABLE_WAKEUP)
+                self._pybullet_client.changeVisualShape(self._kin_models[i], j, rgbaColor=[1, 1, 1, alpha])
+
+            self._poseInterpolators[i] = humanoid_pose_interpolator.HumanoidPoseInterpolator()
+
+            for j in range(self._mocap_data.getNumFrames() - 1):
+                frameData = self._mocap_data._motion_data[i]['Frames'][j]
+                self._poseInterpolators[i].PostProcessMotionData(frameData)
+
+        self._stablePD = pd_controller_stable.PDControllerStableMultiDof(self._pybullet_client)
+        self._timeStep = timeStep
+        self._kpOrg = [
+            0, 0, 0, 0, 0, 0, 0, 1000, 1000, 1000, 1000, 100, 100, 100, 100, 500, 500, 500, 500, 500,
+            400, 400, 400, 400, 400, 400, 400, 400, 300, 500, 500, 500, 500, 500, 400, 400, 400, 400,
+            400, 400, 400, 400, 300
+        ]
+        self._kdOrg = [
+            0, 0, 0, 0, 0, 0, 0, 100, 100, 100, 100, 10, 10, 10, 10, 50, 50, 50, 50, 50, 40, 40, 40,
+            40, 40, 40, 40, 40, 30, 50, 50, 50, 50, 50, 40, 40, 40, 40, 40, 40, 40, 40, 30
+        ]
+
+        self._jointIndicesAll = [
+            chest, neck, rightHip, rightKnee, rightAnkle, rightShoulder, rightElbow, leftHip, leftKnee,
+            leftAnkle, leftShoulder, leftElbow
+        ]
+        for j in self._jointIndicesAll:
+            # self._pybullet_client.setJointMotorControlMultiDof(self._sim_model, j, self._pybullet_client.POSITION_CONTROL, force=[1,1,1])
+            self._pybullet_client.setJointMotorControl2(self._sim_model,
+                                                        j,
+                                                        self._pybullet_client.POSITION_CONTROL,
+                                                        targetPosition=0,
+                                                        positionGain=0,
+                                                        targetVelocity=0,
+                                                        force=jointFrictionForce)
+            self._pybullet_client.setJointMotorControlMultiDof(
+                self._sim_model,
+                j,
+                self._pybullet_client.POSITION_CONTROL,
+                targetPosition=[0, 0, 0, 1],
+                targetVelocity=[0, 0, 0],
+                positionGain=0,
+                velocityGain=1,
+                force=[jointFrictionForce, jointFrictionForce, jointFrictionForce])
+
+            for i in range(self._n_clips):
+                self._pybullet_client.setJointMotorControl2(self._kin_models[i],
+                                                            j,
+                                                            self._pybullet_client.POSITION_CONTROL,
+                                                            targetPosition=0,
+                                                            positionGain=0,
+                                                            targetVelocity=0,
+                                                            force=0)
+                self._pybullet_client.setJointMotorControlMultiDof(
+                    self._kin_models[i],
+                    j,
+                    self._pybullet_client.POSITION_CONTROL,
+                    targetPosition=[0, 0, 0, 1],
+                    targetVelocity=[0, 0, 0],
+                    positionGain=0,
+                    velocityGain=1,
+                    force=[jointFrictionForce, jointFrictionForce, 0])
+
+        self._jointDofCounts = [4, 4, 4, 1, 4, 4, 1, 4, 1, 4, 4, 1]
+
+        # only those body parts/links are allowed to touch the ground, otherwise the episode terminates
+        fall_contact_bodies = []
+        if self._arg_parser is not None:
+            fall_contact_bodies = self._arg_parser.parse_ints("fall_contact_bodies")
+        self._fall_contact_body_parts = fall_contact_bodies
+
+        # [x,y,z] base position and [x,y,z,w] base orientation!
+        self._totalDofs = 7
+        for dof in self._jointDofCounts:
+            self._totalDofs += dof
+        self.setSimTime(0)
+
+        self._useComReward = useComReward
+
+        self.resetPose()
+
+    def resetPose(self):
+        # print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction)
+        pose = self.computePose(self._frameFraction, 0)
+        self.initializePose(self._poseInterpolators[0], self._sim_model, initBase=True)
+        self.initializePose(self._poseInterpolators[0], self._kin_models[0], initBase=False)
+        for i in range(1, self._n_clips):
+            _ = self.computePose(self._frameFraction, i)
+            # self.initializePose(self._poseInterpolators[i], self._sim_model, initBase=True)
+            self.initializePose(self._poseInterpolators[i], self._kin_models[i], initBase=False)
+
+    def initializePose(self, pose, phys_model, initBase, initializeVelocity=True):
+
+        useArray = True
+        if initializeVelocity:
+            if initBase:
+                self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos,
+                                                                      pose._baseOrn)
+                self._pybullet_client.resetBaseVelocity(phys_model, pose._baseLinVel, pose._baseAngVel)
+            if useArray:
+                indices = [chest, neck, rightHip, rightKnee,
+                           rightAnkle, rightShoulder, rightElbow, leftHip,
+                           leftKnee, leftAnkle, leftShoulder, leftElbow]
+                jointPositions = [pose._chestRot, pose._neckRot, pose._rightHipRot, pose._rightKneeRot,
+                                  pose._rightAnkleRot, pose._rightShoulderRot, pose._rightElbowRot, pose._leftHipRot,
+                                  pose._leftKneeRot, pose._leftAnkleRot, pose._leftShoulderRot, pose._leftElbowRot]
+
+                jointVelocities = [pose._chestVel, pose._neckVel, pose._rightHipVel, pose._rightKneeVel,
+                                   pose._rightAnkleVel, pose._rightShoulderVel, pose._rightElbowVel, pose._leftHipVel,
+                                   pose._leftKneeVel, pose._leftAnkleVel, pose._leftShoulderVel, pose._leftElbowVel]
+                self._pybullet_client.resetJointStatesMultiDof(phys_model, indices,
+                                                               jointPositions, jointVelocities)
+            else:
+                self._pybullet_client.resetJointStateMultiDof(phys_model, chest, pose._chestRot,
+                                                              pose._chestVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, neck, pose._neckRot, pose._neckVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightHip, pose._rightHipRot,
+                                                              pose._rightHipVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightKnee, pose._rightKneeRot,
+                                                              pose._rightKneeVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightAnkle, pose._rightAnkleRot,
+                                                              pose._rightAnkleVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightShoulder,
+                                                              pose._rightShoulderRot, pose._rightShoulderVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightElbow, pose._rightElbowRot,
+                                                              pose._rightElbowVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftHip, pose._leftHipRot,
+                                                              pose._leftHipVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftKnee, pose._leftKneeRot,
+                                                              pose._leftKneeVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftAnkle, pose._leftAnkleRot,
+                                                              pose._leftAnkleVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftShoulder,
+                                                              pose._leftShoulderRot, pose._leftShoulderVel)
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftElbow, pose._leftElbowRot,
+                                                              pose._leftElbowVel)
+        else:
+
+            if initBase:
+                self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos,
+                                                                      pose._baseOrn)
+            if useArray:
+                indices = [chest, neck, rightHip, rightKnee,
+                           rightAnkle, rightShoulder, rightElbow, leftHip,
+                           leftKnee, leftAnkle, leftShoulder, leftElbow]
+                jointPositions = [pose._chestRot, pose._neckRot, pose._rightHipRot, pose._rightKneeRot,
+                                  pose._rightAnkleRot, pose._rightShoulderRot, pose._rightElbowRot, pose._leftHipRot,
+                                  pose._leftKneeRot, pose._leftAnkleRot, pose._leftShoulderRot, pose._leftElbowRot]
+                self._pybullet_client.resetJointStatesMultiDof(phys_model, indices, jointPositions)
+
+            else:
+                self._pybullet_client.resetJointStateMultiDof(phys_model, chest, pose._chestRot, [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, neck, pose._neckRot, [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightHip, pose._rightHipRot,
+                                                              [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightKnee, pose._rightKneeRot, [0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightAnkle, pose._rightAnkleRot,
+                                                              [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightShoulder,
+                                                              pose._rightShoulderRot, [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, rightElbow, pose._rightElbowRot,
+                                                              [0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftHip, pose._leftHipRot,
+                                                              [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftKnee, pose._leftKneeRot, [0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftAnkle, pose._leftAnkleRot,
+                                                              [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftShoulder,
+                                                              pose._leftShoulderRot, [0, 0, 0])
+                self._pybullet_client.resetJointStateMultiDof(phys_model, leftElbow, pose._leftElbowRot, [0])
+
+    def calcCycleCount(self, simTime, cycleTime):
+        phases = simTime / cycleTime
+        count = math.floor(phases)
+        loop = True
+        # count = (loop) ? count : cMathUtil::Clamp(count, 0, 1);
+        return count
+
+    def getCycleTime(self):
+        keyFrameDuration = self._mocap_data.getKeyFrameDuration()
+        cycleTime = keyFrameDuration * (self._mocap_data.getNumFrames() - 1)
+        return cycleTime
+
+    def setSimTime(self, t):
+        self._simTime = t
+        # print("SetTimeTime time =",t)
+        keyFrameDuration = self._mocap_data.getKeyFrameDuration()
+        cycleTime = self.getCycleTime()
+        # print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames())
+        self._cycleCount = self.calcCycleCount(t, cycleTime)
+        # print("cycles=",cycles)
+        frameTime = t - self._cycleCount * cycleTime
+        if (frameTime < 0):
+            frameTime += cycleTime
+
+        # print("keyFrameDuration=",keyFrameDuration)
+        # print("frameTime=",frameTime)
+        self._frame = int(frameTime / keyFrameDuration)
+        # print("self._frame=",self._frame)
+
+        self._frameNext = self._frame + 1
+        if (self._frameNext >= self._mocap_data.getNumFrames()):
+            self._frameNext = self._frame
+
+        self._frameFraction = (frameTime - self._frame * keyFrameDuration) / (keyFrameDuration)
+
+    def computeCycleOffset(self, i=0):
+        firstFrame = 0
+        lastFrame = self._mocap_data.getNumFrames() - 1
+        frameData = self._mocap_data._motion_data[i]['Frames'][0]
+        frameDataNext = self._mocap_data._motion_data[i]['Frames'][lastFrame]
+
+        basePosStart = [frameData[1], frameData[2], frameData[3]]
+        basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+        self._cycleOffset = [
+            basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
+            basePosEnd[2] - basePosStart[2]
+        ]
+        return self._cycleOffset
+
+    def computePose(self, frameFraction, i=0):
+        frameData = self._mocap_data._motion_data[i]['Frames'][self._frame]
+        frameDataNext = self._mocap_data._motion_data[i]['Frames'][self._frameNext]
+
+        self._poseInterpolators[i].Slerp(frameFraction, frameData, frameDataNext, self._pybullet_client)
+        # print("self._poseInterpolator.Slerp(", frameFraction,")=", pose)
+        self.computeCycleOffset()
+        oldPos = self._poseInterpolators[i]._basePos
+        self._poseInterpolators[i]._basePos = [
+            oldPos[0] + self._cycleCount * self._cycleOffset[0],
+            oldPos[1] + self._cycleCount * self._cycleOffset[1],
+            oldPos[2] + self._cycleCount * self._cycleOffset[2]
+        ]
+        pose = self._poseInterpolators[i].GetPose()
+
+        return pose
+
+    def convertActionToPose(self, action, i):
+        pose = self._poseInterpolators[i].ConvertFromAction(self._pybullet_client, action)
+        return pose
+
+    def computeAndApplyPDForces(self, desiredPositions, maxForces):
+        dofIndex = 7
+        scaling = 1
+        indices = []
+        forces = []
+        targetPositions = []
+        targetVelocities = []
+        kps = []
+        kds = []
+
+        for index in range(len(self._jointIndicesAll)):
+            jointIndex = self._jointIndicesAll[index]
+            indices.append(jointIndex)
+            kps.append(self._kpOrg[dofIndex])
+            kds.append(self._kdOrg[dofIndex])
+            if self._jointDofCounts[index] == 4:
+                force = [
+                    scaling * maxForces[dofIndex + 0],
+                    scaling * maxForces[dofIndex + 1],
+                    scaling * maxForces[dofIndex + 2]
+                ]
+                targetVelocity = [0, 0, 0]
+                targetPosition = [
+                    desiredPositions[dofIndex + 0],
+                    desiredPositions[dofIndex + 1],
+                    desiredPositions[dofIndex + 2],
+                    desiredPositions[dofIndex + 3]
+                ]
+            if self._jointDofCounts[index] == 1:
+                force = [scaling * maxForces[dofIndex]]
+                targetPosition = [desiredPositions[dofIndex + 0]]
+                targetVelocity = [0]
+            forces.append(force)
+            targetPositions.append(targetPosition)
+            targetVelocities.append(targetVelocity)
+            dofIndex += self._jointDofCounts[index]
+
+        # static char* kwlist[] = { "bodyUniqueId",
+        # "jointIndices",
+        # "controlMode", "targetPositions", "targetVelocities", "forces", "positionGains", "velocityGains", "maxVelocities", "physicsClientId", NULL };
+        self._pybullet_client.setJointMotorControlMultiDofArray(self._sim_model,
+                                                                indices,
+                                                                self._pybullet_client.STABLE_PD_CONTROL,
+                                                                targetPositions=targetPositions,
+                                                                targetVelocities=targetVelocities,
+                                                                forces=forces,
+                                                                positionGains=kps,
+                                                                velocityGains=kds,
+                                                                )
+
+    def computePDForces(self, desiredPositions, desiredVelocities, maxForces):
+        """Compute torques from the PD controller."""
+        if desiredVelocities == None:
+            desiredVelocities = [0] * self._totalDofs
+
+        taus = self._stablePD.computePD(bodyUniqueId=self._sim_model,
+                                        jointIndices=self._jointIndicesAll,
+                                        desiredPositions=desiredPositions,
+                                        desiredVelocities=desiredVelocities,
+                                        kps=self._kpOrg,
+                                        kds=self._kdOrg,
+                                        maxForces=maxForces,
+                                        timeStep=self._timeStep)
+        return taus
+
+    def applyPDForces(self, taus):
+        """Apply pre-computed torques."""
+        dofIndex = 7
+        scaling = 1
+        useArray = True
+        indices = []
+        forces = []
+
+        if (useArray):
+            for index in range(len(self._jointIndicesAll)):
+                jointIndex = self._jointIndicesAll[index]
+                indices.append(jointIndex)
+                if self._jointDofCounts[index] == 4:
+                    force = [
+                        scaling * taus[dofIndex + 0], scaling * taus[dofIndex + 1],
+                        scaling * taus[dofIndex + 2]
+                    ]
+                if self._jointDofCounts[index] == 1:
+                    force = [scaling * taus[dofIndex]]
+                    # print("force[", jointIndex,"]=",force)
+                forces.append(force)
+                dofIndex += self._jointDofCounts[index]
+            self._pybullet_client.setJointMotorControlMultiDofArray(self._sim_model,
+                                                                    indices,
+                                                                    self._pybullet_client.TORQUE_CONTROL,
+                                                                    forces=forces)
+        else:
+            for index in range(len(self._jointIndicesAll)):
+                jointIndex = self._jointIndicesAll[index]
+                if self._jointDofCounts[index] == 4:
+                    force = [
+                        scaling * taus[dofIndex + 0], scaling * taus[dofIndex + 1],
+                        scaling * taus[dofIndex + 2]
+                    ]
+                    # print("force[", jointIndex,"]=",force)
+                    self._pybullet_client.setJointMotorControlMultiDof(self._sim_model,
+                                                                       jointIndex,
+                                                                       self._pybullet_client.TORQUE_CONTROL,
+                                                                       force=force)
+                if self._jointDofCounts[index] == 1:
+                    force = [scaling * taus[dofIndex]]
+                    # print("force[", jointIndex,"]=",force)
+                    self._pybullet_client.setJointMotorControlMultiDof(
+                        self._sim_model,
+                        jointIndex,
+                        controlMode=self._pybullet_client.TORQUE_CONTROL,
+                        force=force)
+                dofIndex += self._jointDofCounts[index]
+
+    def setJointMotors(self, desiredPositions, maxForces, i=0):
+        controlMode = self._pybullet_client.POSITION_CONTROL
+        startIndex = 7
+        chest = 1
+        neck = 2
+        rightHip = 3
+        rightKnee = 4
+        rightAnkle = 5
+        rightShoulder = 6
+        rightElbow = 7
+        leftHip = 9
+        leftKnee = 10
+        leftAnkle = 11
+        leftShoulder = 12
+        leftElbow = 13
+        kp = 0.2
+
+        forceScale = 1
+        # self._jointDofCounts=[4,4,4,1,4,4,1,4,1,4,4,1]
+        maxForce = [
+            forceScale * maxForces[startIndex], forceScale * maxForces[startIndex + 1],
+            forceScale * maxForces[startIndex + 2], forceScale * maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            chest,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._chestRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [
+            maxForces[startIndex], maxForces[startIndex + 1], maxForces[startIndex + 2],
+            maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            neck,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._neckRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [
+            maxForces[startIndex], maxForces[startIndex + 1], maxForces[startIndex + 2],
+            maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            rightHip,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._rightHipRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [forceScale * maxForces[startIndex]]
+        startIndex += 1
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            rightKnee,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._rightKneeRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [
+            maxForces[startIndex], maxForces[startIndex + 1], maxForces[startIndex + 2],
+            maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            rightAnkle,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._rightAnkleRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [
+            forceScale * maxForces[startIndex], forceScale * maxForces[startIndex + 1],
+            forceScale * maxForces[startIndex + 2], forceScale * maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        maxForce = [forceScale * maxForces[startIndex]]
+        startIndex += 1
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            rightElbow,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._rightElbowRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [
+            maxForces[startIndex], maxForces[startIndex + 1], maxForces[startIndex + 2],
+            maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            leftHip,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._leftHipRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [forceScale * maxForces[startIndex]]
+        startIndex += 1
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            leftKnee,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._leftKneeRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [
+            maxForces[startIndex], maxForces[startIndex + 1], maxForces[startIndex + 2],
+            maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            leftAnkle,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._leftAnkleRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [
+            maxForces[startIndex], maxForces[startIndex + 1], maxForces[startIndex + 2],
+            maxForces[startIndex + 3]
+        ]
+        startIndex += 4
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            leftShoulder,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._leftShoulderRot,
+            positionGain=kp,
+            force=maxForce)
+        maxForce = [forceScale * maxForces[startIndex]]
+        startIndex += 1
+        self._pybullet_client.setJointMotorControlMultiDof(
+            self._sim_model,
+            leftElbow,
+            controlMode,
+            targetPosition=self._poseInterpolators[i]._leftElbowRot,
+            positionGain=kp,
+            force=maxForce)
+        # print("startIndex=",startIndex)
+
+    def getPhase(self):
+        keyFrameDuration = self._mocap_data.getKeyFrameDuration()
+        cycleTime = keyFrameDuration * (self._mocap_data.getNumFrames() - 1)
+        phase = self._simTime / cycleTime
+        phase = math.fmod(phase, 1.0)
+        if (phase < 0):
+            phase += 1
+        return phase
+
+    def buildHeadingTrans(self, rootOrn):
+        # align root transform 'forward' with world-space x axis
+        eul = self._pybullet_client.getEulerFromQuaternion(rootOrn)
+        refDir = [1, 0, 0]
+        rotVec = self._pybullet_client.rotateVector(rootOrn, refDir)
+        heading = math.atan2(-rotVec[2], rotVec[0])
+        heading2 = eul[1]
+        # print("heading=",heading)
+        headingOrn = self._pybullet_client.getQuaternionFromAxisAngle([0, 1, 0], -heading)
+        return headingOrn
+
+    def buildOriginTrans(self):
+        rootPos, rootOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
+
+        # print("rootPos=",rootPos, " rootOrn=",rootOrn)
+        invRootPos = [-rootPos[0], 0, -rootPos[2]]
+        # invOrigTransPos, invOrigTransOrn = self._pybullet_client.invertTransform(rootPos,rootOrn)
+        headingOrn = self.buildHeadingTrans(rootOrn)
+        # print("headingOrn=",headingOrn)
+        headingMat = self._pybullet_client.getMatrixFromQuaternion(headingOrn)
+        # print("headingMat=",headingMat)
+        # dummy, rootOrnWithoutHeading = self._pybullet_client.multiplyTransforms([0,0,0],headingOrn, [0,0,0], rootOrn)
+        # dummy, invOrigTransOrn = self._pybullet_client.multiplyTransforms([0,0,0],rootOrnWithoutHeading, invOrigTransPos, invOrigTransOrn)
+
+        invOrigTransPos, invOrigTransOrn = self._pybullet_client.multiplyTransforms([0, 0, 0],
+                                                                                    headingOrn,
+                                                                                    invRootPos,
+                                                                                    [0, 0, 0, 1])
+        # print("invOrigTransPos=",invOrigTransPos)
+        # print("invOrigTransOrn=",invOrigTransOrn)
+        invOrigTransMat = self._pybullet_client.getMatrixFromQuaternion(invOrigTransOrn)
+        # print("invOrigTransMat =",invOrigTransMat )
+        return invOrigTransPos, invOrigTransOrn
+
+    def getState(self):
+
+        stateVector = []
+        phase = self.getPhase()
+        # print("phase=",phase)
+        stateVector.append(phase)
+
+        rootTransPos, rootTransOrn = self.buildOriginTrans()
+        basePos, baseOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
+
+        rootPosRel, dummy = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn,
+                                                                     basePos, [0, 0, 0, 1])
+        # print("!!!rootPosRel =",rootPosRel )
+        # print("rootTransPos=",rootTransPos)
+        # print("basePos=",basePos)
+        localPos, localOrn = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn,
+                                                                      basePos, baseOrn)
+
+        localPos = [
+            localPos[0] - rootPosRel[0], localPos[1] - rootPosRel[1], localPos[2] - rootPosRel[2]
+        ]
+        # print("localPos=",localPos)
+
+        stateVector.append(rootPosRel[1])
+
+        # self.pb2dmJoints=[0,1,2,9,10,11,3,4,5,12,13,14,6,7,8]
+        self.pb2dmJoints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
+
+        linkIndicesSim = []
+        for pbJoint in range(self._pybullet_client.getNumJoints(self._sim_model)):
+            linkIndicesSim.append(self.pb2dmJoints[pbJoint])
+
+        linkStatesSim = self._pybullet_client.getLinkStates(self._sim_model, linkIndicesSim,
+                                                            computeForwardKinematics=True, computeLinkVelocity=True)
+
+        for pbJoint in range(self._pybullet_client.getNumJoints(self._sim_model)):
+            j = self.pb2dmJoints[pbJoint]
+            # print("joint order:",j)
+            # ls = self._pybullet_client.getLinkState(self._sim_model, j, computeForwardKinematics=True)
+            ls = linkStatesSim[pbJoint]
+            linkPos = ls[0]
+            linkOrn = ls[1]
+            linkPosLocal, linkOrnLocal = self._pybullet_client.multiplyTransforms(
+                rootTransPos, rootTransOrn, linkPos, linkOrn)
+            if (linkOrnLocal[3] < 0):
+                linkOrnLocal = [-linkOrnLocal[0], -linkOrnLocal[1], -linkOrnLocal[2], -linkOrnLocal[3]]
+            linkPosLocal = [
+                linkPosLocal[0] - rootPosRel[0], linkPosLocal[1] - rootPosRel[1],
+                linkPosLocal[2] - rootPosRel[2]
+            ]
+            for l in linkPosLocal:
+                stateVector.append(l)
+            # re-order the quaternion, DeepMimic uses w,x,y,z
+
+            if (linkOrnLocal[3] < 0):
+                linkOrnLocal[0] *= -1
+                linkOrnLocal[1] *= -1
+                linkOrnLocal[2] *= -1
+                linkOrnLocal[3] *= -1
+
+            stateVector.append(linkOrnLocal[3])
+            stateVector.append(linkOrnLocal[0])
+            stateVector.append(linkOrnLocal[1])
+            stateVector.append(linkOrnLocal[2])
+
+        for pbJoint in range(self._pybullet_client.getNumJoints(self._sim_model)):
+            j = self.pb2dmJoints[pbJoint]
+            # ls = self._pybullet_client.getLinkState(self._sim_model, j, computeLinkVelocity=True)
+            ls = linkStatesSim[pbJoint]
+
+            linkLinVel = ls[6]
+            linkAngVel = ls[7]
+            linkLinVelLocal, unused = self._pybullet_client.multiplyTransforms([0, 0, 0], rootTransOrn,
+                                                                               linkLinVel, [0, 0, 0, 1])
+            # linkLinVelLocal=[linkLinVelLocal[0]-rootPosRel[0],linkLinVelLocal[1]-rootPosRel[1],linkLinVelLocal[2]-rootPosRel[2]]
+            linkAngVelLocal, unused = self._pybullet_client.multiplyTransforms([0, 0, 0], rootTransOrn,
+                                                                               linkAngVel, [0, 0, 0, 1])
+
+            for l in linkLinVelLocal:
+                stateVector.append(l)
+            for l in linkAngVelLocal:
+                stateVector.append(l)
+
+        # print("stateVector len=",len(stateVector))
+        # for st in range (len(stateVector)):
+        #  print("state[",st,"]=",stateVector[st])
+        return stateVector
+
+    def terminates(self):
+        # check if any non-allowed body part hits the ground
+        terminates = False
+        pts = self._pybullet_client.getContactPoints()
+        for p in pts:
+            part = -1
+            # ignore self-collision
+            if (p[1] == p[2]):
+                continue
+            if (p[1] == self._sim_model):
+                part = p[3]
+            if (p[2] == self._sim_model):
+                part = p[4]
+            if (part >= 0 and part in self._fall_contact_body_parts):
+                # print("terminating part:", part)
+                terminates = True
+
+        return terminates
+
+    def quatMul(self, q1, q2):
+        return [
+            q1[3] * q2[0] + q1[0] * q2[3] + q1[1] * q2[2] - q1[2] * q2[1],
+            q1[3] * q2[1] + q1[1] * q2[3] + q1[2] * q2[0] - q1[0] * q2[2],
+            q1[3] * q2[2] + q1[2] * q2[3] + q1[0] * q2[1] - q1[1] * q2[0],
+            q1[3] * q2[3] - q1[0] * q2[0] - q1[1] * q2[1] - q1[2] * q2[2]
+        ]
+
+    def calcRootAngVelErr(self, vel0, vel1):
+        diff = [vel0[0] - vel1[0], vel0[1] - vel1[1], vel0[2] - vel1[2]]
+        return diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2]
+
+    def calcRootRotDiff(self, orn0, orn1):
+        orn0Conj = [-orn0[0], -orn0[1], -orn0[2], orn0[3]]
+        q_diff = self.quatMul(orn1, orn0Conj)
+        axis, angle = self._pybullet_client.getAxisAngleFromQuaternion(q_diff)
+        return angle * angle
+
+    def getReward(self, pose, i=0):
+        """Compute and return the pose-based reward."""
+        # from DeepMimic double cSceneImitate::CalcRewardImitate
+        # todo: compensate for ground height in some parts, once we move to non-flat terrain
+        # not values from the paper, but from the published code.
+        pose_w = 0.5
+        vel_w = 0.05
+        end_eff_w = 0.15
+        # does not exist in paper
+        root_w = 0.2
+        if self._useComReward:
+            com_w = 0.1
+        else:
+            com_w = 0
+
+        total_w = pose_w + vel_w + end_eff_w + root_w + com_w
+        pose_w /= total_w
+        vel_w /= total_w
+        end_eff_w /= total_w
+        root_w /= total_w
+        com_w /= total_w
+
+        pose_scale = 2
+        vel_scale = 0.1
+        end_eff_scale = 40
+        root_scale = 5
+        com_scale = 10
+        err_scale = 1  # error scale
+
+        reward = 0
+
+        pose_err = 0
+        vel_err = 0
+        end_eff_err = 0
+        root_err = 0
+        com_err = 0
+        heading_err = 0
+
+        # create a mimic reward, comparing the dynamics humanoid with a kinematic one
+
+        # pose = self.InitializePoseFromMotionData()
+        # print("self._kin_model=",self._kin_model)
+        # print("kinematicHumanoid #joints=",self._pybullet_client.getNumJoints(self._kin_model))
+        # self.ApplyPose(pose, True, True, self._kin_model, self._pybullet_client)
+
+        # const Eigen::VectorXd& pose0 = sim_char.GetPose();
+        # const Eigen::VectorXd& vel0 = sim_char.GetVel();
+        # const Eigen::VectorXd& pose1 = kin_char.GetPose();
+        # const Eigen::VectorXd& vel1 = kin_char.GetVel();
+        # tMatrix origin_trans = sim_char.BuildOriginTrans();
+        # tMatrix kin_origin_trans = kin_char.BuildOriginTrans();
+        #
+        # tVector com0_world = sim_char.CalcCOM();
+        if self._useComReward:
+            comSim, comSimVel = self.computeCOMposVel(self._sim_model)
+            comKin, comKinVel = self.computeCOMposVel(self._kin_models[i])
+        # tVector com_vel0_world = sim_char.CalcCOMVel();
+        # tVector com1_world;
+        # tVector com_vel1_world;
+        # cRBDUtil::CalcCoM(joint_mat, body_defs, pose1, vel1, com1_world, com_vel1_world);
+        #
+        root_id = 0
+        # tVector root_pos0 = cKinTree::GetRootPos(joint_mat, pose0);
+        # tVector root_pos1 = cKinTree::GetRootPos(joint_mat, pose1);
+        # tQuaternion root_rot0 = cKinTree::GetRootRot(joint_mat, pose0);
+        # tQuaternion root_rot1 = cKinTree::GetRootRot(joint_mat, pose1);
+        # tVector root_vel0 = cKinTree::GetRootVel(joint_mat, vel0);
+        # tVector root_vel1 = cKinTree::GetRootVel(joint_mat, vel1);
+        # tVector root_ang_vel0 = cKinTree::GetRootAngVel(joint_mat, vel0);
+        # tVector root_ang_vel1 = cKinTree::GetRootAngVel(joint_mat, vel1);
+
+        mJointWeights = [
+            0.20833, 0.10416, 0.0625, 0.10416, 0.0625, 0.041666666666666671, 0.0625, 0.0416, 0.00,
+            0.10416, 0.0625, 0.0416, 0.0625, 0.0416, 0.0000
+        ]
+
+        num_end_effs = 0
+        num_joints = 15
+
+        root_rot_w = mJointWeights[root_id]
+        rootPosSim, rootOrnSim = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
+        rootPosKin, rootOrnKin = self._pybullet_client.getBasePositionAndOrientation(self._kin_models[i])
+        linVelSim, angVelSim = self._pybullet_client.getBaseVelocity(self._sim_model)
+        # don't read the velocities from the kinematic model (they are zero), use the pose interpolator velocity
+        # see also issue https://github.com/bulletphysics/bullet3/issues/2401
+        linVelKin = self._poseInterpolators[i]._baseLinVel
+        angVelKin = self._poseInterpolators[i]._baseAngVel
+
+        root_rot_err = self.calcRootRotDiff(rootOrnSim, rootOrnKin)
+        pose_err += root_rot_w * root_rot_err
+
+        root_vel_diff = [
+            linVelSim[0] - linVelKin[0], linVelSim[1] - linVelKin[1], linVelSim[2] - linVelKin[2]
+        ]
+        root_vel_err = root_vel_diff[0] * root_vel_diff[0] + root_vel_diff[1] * root_vel_diff[
+            1] + root_vel_diff[2] * root_vel_diff[2]
+
+        root_ang_vel_err = self.calcRootAngVelErr(angVelSim, angVelKin)
+        vel_err += root_rot_w * root_ang_vel_err
+
+        useArray = True
+
+        if useArray:
+            jointIndices = range(num_joints)
+            simJointStates = self._pybullet_client.getJointStatesMultiDof(self._sim_model, jointIndices)
+            kinJointStates = self._pybullet_client.getJointStatesMultiDof(self._kin_models[i], jointIndices)
+            linkStatesSim = self._pybullet_client.getLinkStates(self._sim_model, jointIndices)
+            linkStatesKin = self._pybullet_client.getLinkStates(self._kin_models[i], jointIndices)
+        for j in range(num_joints):
+            curr_pose_err = 0
+            curr_vel_err = 0
+            w = mJointWeights[j]
+            if useArray:
+                simJointInfo = simJointStates[j]
+            else:
+                simJointInfo = self._pybullet_client.getJointStateMultiDof(self._sim_model, j)
+
+            # print("simJointInfo.pos=",simJointInfo[0])
+            # print("simJointInfo.vel=",simJointInfo[1])
+            if useArray:
+                kinJointInfo = kinJointStates[j]
+            else:
+                kinJointInfo = self._pybullet_client.getJointStateMultiDof(self._kin_models[i], j)
+            # print("kinJointInfo.pos=",kinJointInfo[0])
+            # print("kinJointInfo.vel=",kinJointInfo[1])
+            if (len(simJointInfo[0]) == 1):
+                angle = simJointInfo[0][0] - kinJointInfo[0][0]
+                curr_pose_err = angle * angle
+                velDiff = simJointInfo[1][0] - kinJointInfo[1][0]
+                curr_vel_err = velDiff * velDiff
+            if (len(simJointInfo[0]) == 4):
+                # print("quaternion diff")
+                diffQuat = self._pybullet_client.getDifferenceQuaternion(simJointInfo[0], kinJointInfo[0])
+                axis, angle = self._pybullet_client.getAxisAngleFromQuaternion(diffQuat)
+                curr_pose_err = angle * angle
+                diffVel = [
+                    simJointInfo[1][0] - kinJointInfo[1][0], simJointInfo[1][1] - kinJointInfo[1][1],
+                    simJointInfo[1][2] - kinJointInfo[1][2]
+                ]
+                curr_vel_err = diffVel[0] * diffVel[0] + diffVel[1] * diffVel[1] + diffVel[2] * diffVel[2]
+
+            pose_err += w * curr_pose_err
+            vel_err += w * curr_vel_err
+
+            is_end_eff = j in self._end_effectors
+
+            if is_end_eff:
+
+                if useArray:
+                    linkStateSim = linkStatesSim[j]
+                    linkStateKin = linkStatesKin[j]
+                else:
+                    linkStateSim = self._pybullet_client.getLinkState(self._sim_model, j)
+                    linkStateKin = self._pybullet_client.getLinkState(self._kin_models[i], j)
+                linkPosSim = linkStateSim[0]
+                linkPosKin = linkStateKin[0]
+                linkPosDiff = [
+                    linkPosSim[0] - linkPosKin[0], linkPosSim[1] - linkPosKin[1],
+                    linkPosSim[2] - linkPosKin[2]
+                ]
+                curr_end_err = linkPosDiff[0] * linkPosDiff[0] + linkPosDiff[1] * linkPosDiff[
+                    1] + linkPosDiff[2] * linkPosDiff[2]
+                end_eff_err += curr_end_err
+                num_end_effs += 1
+
+        if (num_end_effs > 0):
+            end_eff_err /= num_end_effs
+
+        # double root_ground_h0 = mGround->SampleHeight(sim_char.GetRootPos())
+        # double root_ground_h1 = kin_char.GetOriginPos()[1]
+        # root_pos0[1] -= root_ground_h0
+        # root_pos1[1] -= root_ground_h1
+        root_pos_diff = [
+            rootPosSim[0] - rootPosKin[0], rootPosSim[1] - rootPosKin[1], rootPosSim[2] - rootPosKin[2]
+        ]
+        root_pos_err = root_pos_diff[0] * root_pos_diff[0] + root_pos_diff[1] * root_pos_diff[
+            1] + root_pos_diff[2] * root_pos_diff[2]
+        #
+        # root_rot_err = cMathUtil::QuatDiffTheta(root_rot0, root_rot1)
+        # root_rot_err *= root_rot_err
+
+        # root_vel_err = (root_vel1 - root_vel0).squaredNorm()
+        # root_ang_vel_err = (root_ang_vel1 - root_ang_vel0).squaredNorm()
+
+        root_err = root_pos_err + 0.1 * root_rot_err + 0.01 * root_vel_err + 0.001 * root_ang_vel_err
+
+        # COM error in initial code -> COM velocities
+        if self._useComReward:
+            com_err = 0.1 * np.sum(np.square(comKinVel - comSimVel))
+        # com_err = 0.1 * np.sum(np.square(comKin - comSim))
+        # com_err = 0.1 * (com_vel1_world - com_vel0_world).squaredNorm()
+
+        # print("pose_err=",pose_err)
+        # print("vel_err=",vel_err)
+        pose_reward = math.exp(-err_scale * pose_scale * pose_err)
+        vel_reward = math.exp(-err_scale * vel_scale * vel_err)
+        end_eff_reward = math.exp(-err_scale * end_eff_scale * end_eff_err)
+        root_reward = math.exp(-err_scale * root_scale * root_err)
+        com_reward = math.exp(-err_scale * com_scale * com_err)
+
+        reward = pose_w * pose_reward + vel_w * vel_reward + end_eff_w * end_eff_reward + root_w * root_reward + com_w * com_reward
+
+        # pose_reward,vel_reward,end_eff_reward, root_reward, com_reward);
+        # print("reward=",reward)
+        # print("pose_reward=",pose_reward)
+        # print("vel_reward=",vel_reward)
+        # print("end_eff_reward=",end_eff_reward)
+        # print("root_reward=",root_reward)
+        # print("com_reward=",com_reward)
+
+        info_rew = dict(
+            pose_reward=pose_reward,
+            vel_reward=vel_reward,
+            end_eff_reward=end_eff_reward,
+            root_reward=root_reward,
+            com_reward=com_reward
+        )
+
+        info_errs = dict(
+            pose_err=pose_err,
+            vel_err=vel_err,
+            end_eff_err=end_eff_err,
+            root_err=root_err,
+            com_err=com_err
+        )
+
+        return reward
+
+    def computeCOMposVel(self, uid: int):
+        """Compute center-of-mass position and velocity."""
+        pb = self._pybullet_client
+        num_joints = 15
+        jointIndices = range(num_joints)
+        link_states = pb.getLinkStates(uid, jointIndices, computeLinkVelocity=1)
+        link_pos = np.array([s[0] for s in link_states])
+        link_vel = np.array([s[-2] for s in link_states])
+        tot_mass = 0.
+        masses = []
+        for j in jointIndices:
+            mass_, *_ = pb.getDynamicsInfo(uid, j)
+            masses.append(mass_)
+            tot_mass += mass_
+        masses = np.asarray(masses)[:, None]
+        com_pos = np.sum(masses * link_pos, axis=0) / tot_mass
+        com_vel = np.sum(masses * link_vel, axis=0) / tot_mass
+        return com_pos, com_vel
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data_multiclip.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data_multiclip.py
@ -0,0 +1,239 @@
+import json
+import math
+import random
+import numpy as np
+from os import listdir
+from os.path import isfile, join
+
+from scipy.spatial.transform import Rotation as R
+from scipy.spatial.transform import Slerp
+
+
+class MotionCaptureDataMultiClip(object):
+
+    def __init__(self):
+        self.Reset()
+
+    def Reset(self):
+        self._motion_data = {}
+        self._num_frames_min = 9999
+        self._num_frames_max = -1
+        self._num_clips = -1
+        self._names = []
+
+    def Load(self, path):
+        if isfile(path):
+            files = [path]
+        else:
+            files = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
+        for i in range(len(files)):
+            with open(files[i], 'r') as f:
+                self._names.append(files[i].split('/')[-1])
+                self._motion_data[i] = json.load(f)
+                t = len(self._motion_data[i]['Frames'])
+                self._num_frames_min = min(t, self._num_frames_min)
+                self._num_frames_max = max(t, self._num_frames_max)
+        self._num_clips = len(self._motion_data)
+        downsample = False
+        if downsample:
+            self.downsampleClips()
+        else:
+            self.upsampleClips()
+
+    def getNumFrames(self):
+        return self._num_frames_min
+
+    def getKeyFrameDuration(self, id=0):
+        return self._motion_data[id]['Frames'][0][0]
+
+    def getCycleTime(self):
+        keyFrameDuration = self.getKeyFrameDuration()
+        cycleTime = keyFrameDuration * (self.getNumFrames() - 1)
+        return cycleTime
+
+    def calcCycleCount(self, simTime, cycleTime):
+        phases = simTime / cycleTime
+        count = math.floor(phases)
+        return count
+
+    def computeCycleOffset(self, id=0):
+        lastFrame = self.getNumFrames() - 1
+        frameData = self._motion_data[id]['Frames'][0]
+        frameDataNext = self._motion_data[id]['Frames'][lastFrame]
+
+        basePosStart = [frameData[1], frameData[2], frameData[3]]
+        basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
+        self._cycleOffset = [
+            basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
+            basePosEnd[2] - basePosStart[2]
+        ]
+        return self._cycleOffset
+
+    def getNumClips(self):
+        return self._num_clips
+
+    def downsampleClips(self):
+        for i in range(self._num_clips):
+            n_frames = len(self._motion_data[i]['Frames'])
+            if n_frames != self._num_frames_min:
+                sample = random.sample(range(n_frames), self._num_frames_min)
+                sample.sort()
+                downsampled = np.array(self._motion_data[i]['Frames'])[sample]
+                self._motion_data[i]['Frames'] = downsampled.tolist()
+                #s = json.dumps(self._motion_data[i])
+                #with open("output/{}".format(self._names[i]), 'w') as f:
+                #    f.writelines(s)
+
+    def upsampleClips(self):
+        print("Max number of frames: ", self._num_frames_max)
+        for i in range(self._num_clips):
+            print("Uspsampling clip number: ", i)
+            keyframe_duration = self.getKeyFrameDuration(i)
+            old_times = np.arange(0, len(self._motion_data[i]['Frames']) * keyframe_duration, keyframe_duration)
+            while len(old_times) < self._num_frames_max:
+                new_times, new_vals = self.slerpSingleClip(self._motion_data[i]['Frames'], old_times)
+                #print("Number of final frames: ", len(new_vals))
+                self._motion_data[i]['Frames'] = new_vals
+                old_times = new_times
+            #s = json.dumps(self._motion_data[i])
+            #with open("output/{}".format(self._names[i]), 'w') as f:
+            #    f.writelines(s)
+        self._num_frames_min = self._num_frames_max
+
+
+    def slerpSingleClip(self, clip, key_times):
+        print("Number of initial frames: ", len(key_times))
+        dict_clip = self.extractQuaternions(clip)
+        dict_clip = np.asarray(dict_clip)
+        t = dict_clip[:, 0]
+        root_pos = dict_clip[:, 1]
+        key_rots = dict_clip[:, 2]
+        n_frames = len(key_rots)
+        assert len(key_times) == n_frames
+        needed_frames = self._num_frames_max - n_frames
+        print("Needed frames: ", needed_frames)
+        inter_times = self.calc_inter_times(key_times)
+        inter_times = sorted(random.sample(inter_times, min(len(inter_times), needed_frames)))
+        print("Number of frames to interpolate: ", len(inter_times))
+        print("Number of rots: ", len(key_rots[0]))
+        inter_joint = []
+        for i in range(len(key_rots[0])):
+            quats = [rot[i] for rot in key_rots]
+            if len(quats[0]) == 4:
+                joint = R.from_quat(quats)
+                slerp = Slerp(key_times, joint)
+                interp_rots = slerp(inter_times)
+                interp_rots = interp_rots.as_quat().tolist()
+            else:
+                interp_rots = []
+                for tim in range(len(inter_times)):
+                    lb = key_times.tolist().index(max([st for st in key_times if st < inter_times[tim]]))
+                    ub = lb + 1
+                    #print(lb, ub)
+                    new_rot = (quats[ub][0] + quats[lb][0])/2
+                    interp_rots.append([new_rot])
+            inter_joint.append(interp_rots)
+        inter_joint = np.array(inter_joint).T
+        #print("Shape of interpolated joints: ", inter_joint.shape)
+        old_dict = dict(zip(key_times, key_rots))
+        new_dict = dict(zip(inter_times, inter_joint))
+        old_root_pos = dict(zip(key_times, root_pos))
+        inter_root_pos = self.calc_inter_root_pos(root_pos, key_times, inter_times)
+        new_root_pos = dict(zip(inter_times, inter_root_pos))
+        new_dict = {**old_dict, **new_dict}
+        new_rp_dict = {**old_root_pos, **new_root_pos}
+        ord_keys = sorted(new_dict.keys())
+        ord_rots = [new_dict[k] for k in ord_keys]
+        ord_root_pos = [new_rp_dict[k] for k in ord_keys]
+        new_clip = self.insertClip(t, ord_root_pos, ord_rots)
+
+        return np.array(ord_keys), new_clip
+
+    def extractQuaternions(self, clip):
+        new_clips = []
+        for c in clip:
+            t = c[0]
+            root_pos = c[1:4]
+            root_rotation = c[4:8]
+            chest_rotation = c[8:12]
+            neck_rotation = c[12:16]
+            right_hip_rotation = c[16:20]
+            right_knee_rotation = c[20]
+            right_ankle_rotation = c[21:25]
+            right_shoulder_rotation = c[25:29]
+            right_elbow_rotation = c[29]
+            left_hip_rotation = c[30:34]
+            left_knee_rotation = c[34]
+            left_ankle_rotation = c[35:39]
+            left_shoulder_rotation = c[39:43]
+            left_elbow_rotation = c[43]
+            d = [
+                t,
+                root_pos,
+                [
+                    self.deepmimic_to_scipy_quaternion(root_rotation),
+                    self.deepmimic_to_scipy_quaternion(chest_rotation),
+                    self.deepmimic_to_scipy_quaternion(neck_rotation),
+                    self.deepmimic_to_scipy_quaternion(right_hip_rotation),
+                    [right_knee_rotation],
+                    self.deepmimic_to_scipy_quaternion(right_ankle_rotation),
+                    self.deepmimic_to_scipy_quaternion(right_shoulder_rotation),
+                    [right_elbow_rotation],
+                    self.deepmimic_to_scipy_quaternion(left_hip_rotation),
+                    [left_knee_rotation],
+                    self.deepmimic_to_scipy_quaternion(left_ankle_rotation),
+                    self.deepmimic_to_scipy_quaternion(left_shoulder_rotation),
+                    [left_elbow_rotation]
+                ]
+            ]
+            new_clips.append(d)
+        return new_clips
+
+    def deepmimic_to_scipy_quaternion(self, quat):
+        return quat[1:] + [quat[0]]
+
+    def scipy_to_deepmimic_quaternion(self, quat):
+        return [quat[-1]] + quat[:-1]
+
+    def calc_inter_times(self, times, method="intermediate"):
+        if method == "intermediate":
+            inter_times = []
+            for i in range(1, len(times)):
+                it = (times[i] - times[i-1])/2 + times[i-1]
+                inter_times.append(it)
+            return inter_times
+
+    def calc_inter_root_pos(self, root_pos, times, inter_times):
+        inter_root_pos = []
+        all_times = sorted([*times.tolist(), *inter_times])
+        for i in range(len(inter_times)):
+            low_index = times.tolist().index(all_times[all_times.index(inter_times[i]) - 1])
+            up_index = times.tolist().index(all_times[all_times.index(inter_times[i]) + 1])
+            assert low_index == up_index - 1
+            inter_root_pos.append(((np.array(root_pos[up_index]) + np.array(root_pos[low_index]))/2).tolist())
+        return inter_root_pos
+
+    def insertClip(self, t, ord_root_pos, ord_rots):
+        delta_t = t[0]
+        new_quats = self.insertQuaternions(ord_rots)
+        a = []
+        for i in range(len(ord_root_pos)):
+            rot = new_quats[i]
+
+            a.append([
+                delta_t,
+                *ord_root_pos[i],
+                *rot
+            ])
+        return a
+
+    def insertQuaternions(self, rotations):
+        quats = []
+        for rot in rotations:
+            rots = []
+            for r in rot:
+                if len(r) == 4:
+                    r = self.scipy_to_deepmimic_quaternion(r)
+                rots += [el for el in r]
+            quats.append(rots)
+        return quats
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env_multiclip.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env_multiclip.py
@ -0,0 +1,358 @@
+import numpy as np
+import math
+from pybullet_envs.deep_mimic.env.env import Env
+from pybullet_envs.deep_mimic.env.action_space import ActionSpace
+from pybullet_utils import bullet_client
+import time
+from pybullet_envs.deep_mimic.env import motion_capture_data_multiclip
+from pybullet_envs.deep_mimic.env import humanoid_stable_pd_multiclip
+import pybullet_data
+import pybullet as p1
+import random
+
+from enum import Enum
+
+
+class InitializationStrategy(Enum):
+    """Set how the environment is initialized."""
+    START = 0
+    RANDOM = 1  # random state initialization (RSI)
+
+
+class PyBulletDeepMimicEnvMultiClip(Env):
+
+    def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None,
+                 time_step=1. / 240,
+                 init_strategy=InitializationStrategy.RANDOM):
+        super().__init__(arg_parser, enable_draw)
+        self._num_agents = 1
+        self._pybullet_client = pybullet_client
+        self._isInitialized = False
+        self._useStablePD = True
+        self._arg_parser = arg_parser
+        self.timeStep = time_step
+        self._init_strategy = init_strategy
+        self._n_clips = -1
+        print("Initialization strategy: {:s}".format(init_strategy))
+        self.reset()
+
+    def reset(self):
+
+        if not self._isInitialized:
+            if self.enable_draw:
+                self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
+                # disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
+                self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI, 0)
+            else:
+                self._pybullet_client = bullet_client.BulletClient()
+
+            self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
+            z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
+            self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
+                                                           z2y,
+                                                           useMaximalCoordinates=True)
+            # print("planeId=",self._planeId)
+            self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
+            self._pybullet_client.setGravity(0, -9.8, 0)
+
+            self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
+            self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
+
+            self._mocapData = motion_capture_data_multiclip.MotionCaptureDataMultiClip()
+
+            motion_file = self._arg_parser.parse_strings('motion_file')
+            print("motion_file=", motion_file[0])
+
+            motionPath = pybullet_data.getDataPath() + "/" + motion_file[0]
+            # motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
+            self._mocapData.Load(motionPath)
+            self._n_clips = self._mocapData.getNumClips()
+            timeStep = self.timeStep
+            useFixedBase = False
+            self._humanoid = humanoid_stable_pd_multiclip.HumanoidStablePDMultiClip(self._pybullet_client, self._mocapData,
+                                                                                    timeStep, useFixedBase, self._arg_parser)
+            self._isInitialized = True
+
+            self._pybullet_client.setTimeStep(timeStep)
+            self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
+
+            selfCheck = False
+            if (selfCheck):
+                curTime = 0
+                while self._pybullet_client.isConnected():
+                    self._humanoid.setSimTime(curTime)
+                    state = self._humanoid.getState()
+                    # print("state=",state)
+                    # pose = self._humanoid.computePose(self._humanoid._frameFraction)  this would need an id
+                    for i in range(10):
+                        curTime += timeStep
+                        # taus = self._humanoid.computePDForces(pose)
+                        # self._humanoid.applyPDForces(taus)
+                        # self._pybullet_client.stepSimulation()
+                    time.sleep(timeStep)
+        # print("numframes = ", self._humanoid._mocap_data.NumFrames())
+        # startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
+
+        if self._init_strategy == InitializationStrategy.RANDOM:
+            rnrange = 1000
+            rn = random.randint(0, rnrange)
+            startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
+        elif self._init_strategy == InitializationStrategy.START:
+            startTime = 0
+
+        self.t = startTime
+
+        self._humanoid.setSimTime(startTime)
+
+        self._humanoid.resetPose()
+        # this clears the contact points. Todo: add API to explicitly clear all contact points?
+        # self._pybullet_client.stepSimulation()
+        self._humanoid.resetPose()
+        self.needs_update_time = self.t - 1  # force update
+
+    def get_num_agents(self):
+        return self._num_agents
+
+    def get_action_space(self, agent_id):
+        return ActionSpace(ActionSpace.Continuous)
+
+    def get_reward_min(self, agent_id):
+        return 0
+
+    def get_reward_max(self, agent_id):
+        return 1
+
+    def get_reward_fail(self, agent_id):
+        return self.get_reward_min(agent_id)
+
+    def get_reward_succ(self, agent_id):
+        return self.get_reward_max(agent_id)
+
+    # scene_name == "imitate" -> cDrawSceneImitate
+    def get_state_size(self, agent_id):
+        # cCtController::GetStateSize()
+        # int state_size = cDeepMimicCharController::GetStateSize();
+        #                     state_size += GetStatePoseSize();#106
+        #                     state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
+        # state_size += GetStatePhaseSize();#1
+        # 197
+        return 197
+
+    def build_state_norm_groups(self, agent_id):
+        # if (mEnablePhaseInput)
+        # {
+        # int phase_group = gNormGroupNone;
+        # int phase_offset = GetStatePhaseOffset();
+        # int phase_size = GetStatePhaseSize();
+        # out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
+        groups = [0] * self.get_state_size(agent_id)
+        groups[0] = -1
+        return groups
+
+    def build_state_offset(self, agent_id):
+        out_offset = [0] * self.get_state_size(agent_id)
+        phase_offset = -0.5
+        out_offset[0] = phase_offset
+        return np.array(out_offset)
+
+    def build_state_scale(self, agent_id):
+        out_scale = [1] * self.get_state_size(agent_id)
+        phase_scale = 2
+        out_scale[0] = phase_scale
+        return np.array(out_scale)
+
+    def get_goal_size(self, agent_id):
+        return 0
+
+    def get_action_size(self, agent_id):
+        ctrl_size = 43  # numDof
+        root_size = 7
+        return ctrl_size - root_size
+
+    def build_goal_norm_groups(self, agent_id):
+        return np.array([])
+
+    def build_goal_offset(self, agent_id):
+        return np.array([])
+
+    def build_goal_scale(self, agent_id):
+        return np.array([])
+
+    def build_action_offset(self, agent_id):
+        out_offset = [0] * self.get_action_size(agent_id)
+        out_offset = [
+            0.0000000000, 0.0000000000, 0.0000000000, -0.200000000, 0.0000000000, 0.0000000000,
+            0.0000000000, -0.200000000, 0.0000000000, 0.0000000000, 0.00000000, -0.2000000, 1.57000000,
+            0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
+            -0.2000000, -1.5700000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 1.57000000,
+            0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
+            -0.2000000, -1.5700000
+        ]
+        # see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
+        # see cCtCtrlUtil::BuildOffsetScalePDSpherical
+        return np.array(out_offset)
+
+    def build_action_scale(self, agent_id):
+        out_scale = [1] * self.get_action_size(agent_id)
+        # see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
+        # see cCtCtrlUtil::BuildOffsetScalePDSpherical
+        out_scale = [
+            0.20833333333333, 1.00000000000000, 1.00000000000000, 1.00000000000000, 0.25000000000000,
+            1.00000000000000, 1.00000000000000, 1.00000000000000, 0.12077294685990, 1.00000000000000,
+            1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
+            1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
+            1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
+            1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
+            1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
+            0.159235668789
+        ]
+        return np.array(out_scale)
+
+    def build_action_bound_min(self, agent_id):
+        # see cCtCtrlUtil::BuildBoundsPDSpherical
+        out_scale = [-1] * self.get_action_size(agent_id)
+        out_scale = [
+            -4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
+            -1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
+            -1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
+            -1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
+            -7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
+            -1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
+            -1.000000000, -4.710000000
+        ]
+
+        return out_scale
+
+    def build_action_bound_max(self, agent_id):
+        out_scale = [1] * self.get_action_size(agent_id)
+        out_scale = [
+            4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
+            1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
+            1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
+            8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
+            1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
+        ]
+        return out_scale
+
+    def set_mode(self, mode):
+        self._mode = mode
+
+    def need_new_action(self, agent_id):
+        if self.t >= self.needs_update_time:
+            self.needs_update_time = self.t + 1. / 30.
+            return True
+        return False
+
+    def record_state(self, agent_id):
+        state = self._humanoid.getState()
+
+        return np.array(state)
+
+    def record_goal(self, agent_id):
+        return np.array([])
+
+    def calc_reward(self, agent_id):
+        rewards = {}
+        for i in range(self._n_clips):
+            kinPose = self._humanoid.computePose(self._humanoid._frameFraction, i)
+            rewards[i] = self._humanoid.getReward(kinPose, i)
+        reward = self.select_reward(rewards)
+        return reward
+
+    def set_action(self, agent_id, action):
+        # print("action=",)
+        # for a in action:
+        #  print(a)
+        # np.savetxt("pb_action.csv", action, delimiter=",")
+        self.desiredPose = {} # TODO: is this necessary???
+        for i in range(self._n_clips):
+            self.desiredPose[i] = self._humanoid.convertActionToPose(action, i)
+            # we need the target root positon and orientation to be zero, to be compatible with deep mimic
+            self.desiredPose[i][0] = 0
+            self.desiredPose[i][1] = 0
+            self.desiredPose[i][2] = 0
+            self.desiredPose[i][3] = 0
+            self.desiredPose[i][4] = 0
+            self.desiredPose[i][5] = 0
+            self.desiredPose[i][6] = 0
+            target_pose = np.array(self.desiredPose[i])
+
+        # np.savetxt("pb_target_pose.csv", target_pose, delimiter=",")
+
+        # print("set_action: desiredPose=", self.desiredPose)
+
+    def log_val(self, agent_id, val):
+        pass
+
+    def update(self, timeStep):
+        # print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
+        self._pybullet_client.setTimeStep(timeStep)
+        self._humanoid._timeStep = timeStep
+        self.timeStep = timeStep
+
+        for j in range(1):
+            self.t += timeStep
+            self._humanoid.setSimTime(self.t)
+
+            if self.desiredPose:
+                for i in range(self._n_clips):
+                    kinPose = self._humanoid.computePose(self._humanoid._frameFraction, i)
+                    self._humanoid.initializePose(self._humanoid._poseInterpolators[i],
+                                                  self._humanoid._kin_models[i],
+                                                  initBase=True)
+                # pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
+                # self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0]+3,pos[1],pos[2]],orn)
+                # print("desiredPositions=",self.desiredPose[i])
+                maxForces = [
+                    0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90,
+                    90, 90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100,
+                    100, 100, 60
+                ]
+
+                if self._useStablePD:
+                    usePythonStablePD = False
+                    if usePythonStablePD:
+                        taus = self._humanoid.computePDForces(self.desiredPose[0], # TODO: check if correct
+                                                              desiredVelocities=None,
+                                                              maxForces=maxForces)
+                        # taus = [0]*43
+                        self._humanoid.applyPDForces(taus)
+                    else:
+                        self._humanoid.computeAndApplyPDForces(self.desiredPose[0],
+                                                               maxForces=maxForces)
+                else:
+                    self._humanoid.setJointMotors(self.desiredPose[0], maxForces=maxForces, i=0)
+
+                self._pybullet_client.stepSimulation()
+
+    def set_sample_count(self, count):
+        return
+
+    def check_terminate(self, agent_id):
+        return Env.Terminate(self.is_episode_end())
+
+    def is_episode_end(self):
+        isEnded = self._humanoid.terminates()
+        # also check maximum time, 20 seconds (todo get from file)
+        # print("self.t=",self.t)
+        if (self.t > 20):
+            isEnded = True
+        return isEnded
+
+    def check_valid_episode(self):
+        # could check if limbs exceed velocity threshold
+        return True
+
+    def getKeyboardEvents(self):
+        return self._pybullet_client.getKeyboardEvents()
+
+    def isKeyTriggered(self, keys, key):
+        o = ord(key)
+        # print("ord=",o)
+        if o in keys:
+            return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
+        return False
+
+    def select_reward(self, rewards, criterion=None):
+        # todo create enum for criterion
+        return max(rewards.values())
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run_multiclip.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run_multiclip.py
@ -0,0 +1,25 @@
+import sys
+import subprocess
+from pybullet_utils.arg_parser import ArgParser
+from pybullet_utils.logger import Logger
+
+
+def main():
+  # Command line arguments
+  args = sys.argv[1:]
+  arg_parser = ArgParser()
+  arg_parser.load_args(args)
+
+  num_workers = arg_parser.parse_int('num_workers', 1)
+  assert (num_workers > 0)
+
+  Logger.print2('Running with {:d} workers'.format(num_workers))
+  cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer_multiclip.py '.format(num_workers)
+  cmd += ' '.join(args)
+  Logger.print2('cmd: ' + cmd)
+  subprocess.call(cmd, shell=True)
+  return
+
+
+if __name__ == '__main__':
+  main()
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl_multiclip.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl_multiclip.py
@ -0,0 +1,112 @@
+import time
+import os
+import inspect
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(os.path.dirname(currentdir))
+os.sys.path.insert(0, parentdir)
+print("parentdir=", parentdir)
+import json
+from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
+from pybullet_envs.deep_mimic.learning.ppo_agent import PPOAgent
+
+import pybullet_data
+from pybullet_utils.arg_parser import ArgParser
+from pybullet_utils.logger import Logger
+from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env_multiclip import PyBulletDeepMimicEnvMultiClip
+import sys
+import random
+
+update_timestep = 1. / 240.
+animating = True
+step = False
+total_reward = 0
+steps = 0
+
+
+def update_world(world, time_elapsed):
+    timeStep = update_timestep
+    world.update(timeStep)
+    reward = world.env.calc_reward(agent_id=0)
+    global total_reward
+    total_reward += reward
+    global steps
+    steps += 1
+
+    end_episode = world.env.is_episode_end()
+    if (end_episode or steps >= 1000):
+        print("total_reward=", total_reward)
+        total_reward = 0
+        steps = 0
+        world.end_episode()
+        world.reset()
+
+
+def build_arg_parser(args):
+    arg_parser = ArgParser()
+    arg_parser.load_args(args)
+
+    arg_file = arg_parser.parse_string('arg_file', '')
+    if arg_file == '':
+        arg_file = "run_humanoid3d_backflip_args.txt"
+    if (arg_file != ''):
+        path = pybullet_data.getDataPath() + "/args/" + arg_file
+        if os.path.isfile(path):
+            succ = arg_parser.load_file(path)
+        else:
+            files = [arg_parser.load_file(f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+            succ = all(files)
+        Logger.print2(arg_file)
+        assert succ, Logger.print2('Failed to load args from: ' + arg_file)
+    return arg_parser
+
+
+args = sys.argv[1:]
+
+
+def build_world(args, enable_draw):
+    arg_parser = build_arg_parser(args)
+    print("enable_draw=", enable_draw)
+    env = PyBulletDeepMimicEnvMultiClip(arg_parser, enable_draw)
+    world = RLWorld(env, arg_parser)
+
+    motion_file = arg_parser.parse_string("motion_file")
+    print("motion_file=", motion_file)
+    bodies = arg_parser.parse_ints("fall_contact_bodies")
+    print("bodies=", bodies)
+    int_output_path = arg_parser.parse_string("int_output_path")
+    print("int_output_path=", int_output_path)
+    agent_files = pybullet_data.getDataPath() + "/" + arg_parser.parse_string("agent_files")
+
+    AGENT_TYPE_KEY = "AgentType"
+
+    print("agent_file=", agent_files)
+    with open(agent_files) as data_file:
+        json_data = json.load(data_file)
+        print("json_data=", json_data)
+        assert AGENT_TYPE_KEY in json_data
+        agent_type = json_data[AGENT_TYPE_KEY]
+        print("agent_type=", agent_type)
+        agent = PPOAgent(world, id, json_data)
+
+        agent.set_enable_training(False)
+        world.reset()
+    return world
+
+
+if __name__ == '__main__':
+
+    world = build_world(args, True)
+    while (world.env._pybullet_client.isConnected()):
+
+        timeStep = update_timestep
+        time.sleep(timeStep)
+        keys = world.env.getKeyboardEvents()
+
+        if world.env.isKeyTriggered(keys, ' '):
+            animating = not animating
+        if world.env.isKeyTriggered(keys, 'i'):
+            step = True
+        if (animating or step):
+            update_world(world, timeStep)
+            step = False