Merge pull request #3076 from tfederico/master

Add multiclip loading and reward
This commit is contained in:
erwincoumans 2020-11-13 10:02:49 -08:00 committed by GitHub
commit 2a0f811c3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 10274 additions and 0 deletions

View File

@ -0,0 +1,25 @@
--scene imitate
--num_update_substeps 10
--num_sim_substeps 2
--world_scale 4
--terrain_file data/terrain/plane.txt
--char_types general
--character_files data/characters/humanoid3d.txt
--enable_char_soft_contact false
--fall_contact_bodies 0 1 2 3 4 6 7 8 9 10 12 13 14
--char_ctrls ct_pd
--char_ctrl_files data/controllers/humanoid3d_ctrl.txt
--motion_file data/motions/walker/walk_forward0.txt
--sync_char_root_pos true
--sync_char_root_rot false
--agent_files data/agents/ct_agent_humanoid_ppo.txt
--train_agents false
#--output_path output
#--int_output_path output/intermediate
--model_files data/policies/humanoid3d/humanoid3d_walker.ckpt

View File

@ -0,0 +1,33 @@
--scene imitate
--time_lim_min 0.5
--time_lim_max 0.5
--time_lim_exp 0.2
--time_end_lim_min 20
--time_end_lim_max 20
--time_end_lim_exp 50
--anneal_samples 32000000
--num_update_substeps 10
--num_sim_substeps 2
--world_scale 4
--terrain_file data/terrain/plane.txt
--char_types general
--character_files data/characters/humanoid3d.txt
--enable_char_soft_contact false
--fall_contact_bodies 0 1 2 3 4 6 7 8 9 10 12 13 14
--char_ctrls ct_pd
--char_ctrl_files data/controllers/humanoid3d_ctrl.txt
--motion_file data/motions/walker/
--sync_char_root_pos true
--sync_char_root_rot false
--agent_files data/agents/ct_agent_humanoid_ppo.txt
#--train_agents false
--output_path output
#--int_output_path output/intermediate
#--model_files data/policies/humanoid3d/agent0_model.ckpt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
import numpy as np
import sys
import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)
print("parentdir=", parentdir)
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env_multiclip import PyBulletDeepMimicEnvMultiClip
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
from pybullet_utils.logger import Logger
from pybullet_envs.deep_mimic.testrl_multiclip import update_world, update_timestep, build_world
import pybullet_utils.mpi_util as MPIUtil
args = []
world = None
def run():
global update_timestep
global world
done = False
while not (done):
update_world(world, update_timestep)
return
def shutdown():
global world
Logger.print2('Shutting down...')
world.shutdown()
return
def main():
global args
global world
# Command line arguments
args = sys.argv[1:]
enable_draw = False
world = build_world(args, enable_draw)
run()
shutdown()
return
if __name__ == '__main__':
main()

View File

@ -0,0 +1,668 @@
from pybullet_utils import pd_controller_stable
from pybullet_envs.deep_mimic.env import humanoid_pose_interpolator
import math
import numpy as np
chest = 1
neck = 2
rightHip = 3
rightKnee = 4
rightAnkle = 5
rightShoulder = 6
rightElbow = 7
leftHip = 9
leftKnee = 10
leftAnkle = 11
leftShoulder = 12
leftElbow = 13
jointFrictionForce = 0
class HumanoidStablePDMultiClip(object):
def __init__(self, pybullet_client, mocap_data, timeStep,
useFixedBase=True, arg_parser=None, useComReward=False):
self._pybullet_client = pybullet_client
self._mocap_data = mocap_data # this is a dictionary
self._arg_parser = arg_parser
self._n_clips = self._mocap_data.getNumClips()
print("LOADING humanoid!")
flags = self._pybullet_client.URDF_MAINTAIN_LINK_ORDER + self._pybullet_client.URDF_USE_SELF_COLLISION + self._pybullet_client.URDF_USE_SELF_COLLISION_EXCLUDE_ALL_PARENTS
self._sim_model = self._pybullet_client.loadURDF(
"humanoid/humanoid.urdf", [0, 0.889540259, 0],
globalScaling=0.25,
useFixedBase=useFixedBase,
flags=flags)
self._end_effectors = [5, 8, 11, 14] # ankle and wrist, both left and right
self._kin_models = {}
self._poseInterpolators = {}
for i in range(self._n_clips):
self._kin_models[i] = self._pybullet_client.loadURDF(
"humanoid/humanoid.urdf", [0, 0.85, 0],
globalScaling=0.25,
useFixedBase=True,
flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER)
self._pybullet_client.changeDynamics(self._sim_model, -1, lateralFriction=0.9)
for j in range(self._pybullet_client.getNumJoints(self._sim_model)):
self._pybullet_client.changeDynamics(self._sim_model, j, lateralFriction=0.9)
self._pybullet_client.changeDynamics(self._sim_model, -1, linearDamping=0, angularDamping=0)
for i in range(self._n_clips):
self._pybullet_client.changeDynamics(self._kin_models[i], -1, linearDamping=0, angularDamping=0)
# todo: add feature to disable simulation for a particular object. Until then, disable all collisions
self._pybullet_client.setCollisionFilterGroupMask(self._kin_models[i],
-1,
collisionFilterGroup=0,
collisionFilterMask=0)
self._pybullet_client.changeDynamics(
self._kin_models[i],
-1,
activationState=self._pybullet_client.ACTIVATION_STATE_SLEEP +
self._pybullet_client.ACTIVATION_STATE_ENABLE_SLEEPING +
self._pybullet_client.ACTIVATION_STATE_DISABLE_WAKEUP)
alpha = 0.4
self._pybullet_client.changeVisualShape(self._kin_models[i], -1, rgbaColor=[1, 1, 1, alpha])
for j in range(self._pybullet_client.getNumJoints(self._kin_models[i])):
self._pybullet_client.setCollisionFilterGroupMask(self._kin_models[i],
j,
collisionFilterGroup=0,
collisionFilterMask=0)
self._pybullet_client.changeDynamics(
self._kin_models[i],
j,
activationState=self._pybullet_client.ACTIVATION_STATE_SLEEP +
self._pybullet_client.ACTIVATION_STATE_ENABLE_SLEEPING +
self._pybullet_client.ACTIVATION_STATE_DISABLE_WAKEUP)
self._pybullet_client.changeVisualShape(self._kin_models[i], j, rgbaColor=[1, 1, 1, alpha])
self._poseInterpolators[i] = humanoid_pose_interpolator.HumanoidPoseInterpolator()
for j in range(self._mocap_data.getNumFrames() - 1):
frameData = self._mocap_data._motion_data[i]['Frames'][j]
self._poseInterpolators[i].PostProcessMotionData(frameData)
self._stablePD = pd_controller_stable.PDControllerStableMultiDof(self._pybullet_client)
self._timeStep = timeStep
self._kpOrg = [
0, 0, 0, 0, 0, 0, 0, 1000, 1000, 1000, 1000, 100, 100, 100, 100, 500, 500, 500, 500, 500,
400, 400, 400, 400, 400, 400, 400, 400, 300, 500, 500, 500, 500, 500, 400, 400, 400, 400,
400, 400, 400, 400, 300
]
self._kdOrg = [
0, 0, 0, 0, 0, 0, 0, 100, 100, 100, 100, 10, 10, 10, 10, 50, 50, 50, 50, 50, 40, 40, 40,
40, 40, 40, 40, 40, 30, 50, 50, 50, 50, 50, 40, 40, 40, 40, 40, 40, 40, 40, 30
]
self._jointIndicesAll = [
chest, neck, rightHip, rightKnee, rightAnkle, rightShoulder, rightElbow, leftHip, leftKnee,
leftAnkle, leftShoulder, leftElbow
]
for j in self._jointIndicesAll:
self._pybullet_client.setJointMotorControl2(self._sim_model,
j,
self._pybullet_client.POSITION_CONTROL,
targetPosition=0,
positionGain=0,
targetVelocity=0,
force=jointFrictionForce)
self._pybullet_client.setJointMotorControlMultiDof(
self._sim_model,
j,
self._pybullet_client.POSITION_CONTROL,
targetPosition=[0, 0, 0, 1],
targetVelocity=[0, 0, 0],
positionGain=0,
velocityGain=1,
force=[jointFrictionForce, jointFrictionForce, jointFrictionForce])
for i in range(self._n_clips):
self._pybullet_client.setJointMotorControl2(self._kin_models[i],
j,
self._pybullet_client.POSITION_CONTROL,
targetPosition=0,
positionGain=0,
targetVelocity=0,
force=0)
self._pybullet_client.setJointMotorControlMultiDof(
self._kin_models[i],
j,
self._pybullet_client.POSITION_CONTROL,
targetPosition=[0, 0, 0, 1],
targetVelocity=[0, 0, 0],
positionGain=0,
velocityGain=1,
force=[jointFrictionForce, jointFrictionForce, 0])
self._jointDofCounts = [4, 4, 4, 1, 4, 4, 1, 4, 1, 4, 4, 1]
# only those body parts/links are allowed to touch the ground, otherwise the episode terminates
fall_contact_bodies = []
if self._arg_parser is not None:
fall_contact_bodies = self._arg_parser.parse_ints("fall_contact_bodies")
self._fall_contact_body_parts = fall_contact_bodies
# [x,y,z] base position and [x,y,z,w] base orientation
self._totalDofs = 7
for dof in self._jointDofCounts:
self._totalDofs += dof
self.setSimTime(0)
self._useComReward = useComReward
self.resetPose()
def resetPose(self):
# print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction)
pose = self.computePose(self._frameFraction, 0)
self.initializePose(self._poseInterpolators[0], self._sim_model, initBase=True)
self.initializePose(self._poseInterpolators[0], self._kin_models[0], initBase=False)
for i in range(1, self._n_clips):
_ = self.computePose(self._frameFraction, i)
self.initializePose(self._poseInterpolators[i], self._kin_models[i], initBase=False)
def initializePose(self, pose, phys_model, initBase, initializeVelocity=True):
if initializeVelocity:
if initBase:
self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos,
pose._baseOrn)
self._pybullet_client.resetBaseVelocity(phys_model, pose._baseLinVel, pose._baseAngVel)
indices = [chest, neck, rightHip, rightKnee,
rightAnkle, rightShoulder, rightElbow, leftHip,
leftKnee, leftAnkle, leftShoulder, leftElbow]
jointPositions = [pose._chestRot, pose._neckRot, pose._rightHipRot, pose._rightKneeRot,
pose._rightAnkleRot, pose._rightShoulderRot, pose._rightElbowRot, pose._leftHipRot,
pose._leftKneeRot, pose._leftAnkleRot, pose._leftShoulderRot, pose._leftElbowRot]
jointVelocities = [pose._chestVel, pose._neckVel, pose._rightHipVel, pose._rightKneeVel,
pose._rightAnkleVel, pose._rightShoulderVel, pose._rightElbowVel, pose._leftHipVel,
pose._leftKneeVel, pose._leftAnkleVel, pose._leftShoulderVel, pose._leftElbowVel]
self._pybullet_client.resetJointStatesMultiDof(phys_model, indices,
jointPositions, jointVelocities)
else:
if initBase:
self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos,
pose._baseOrn)
indices = [chest, neck, rightHip, rightKnee,
rightAnkle, rightShoulder, rightElbow, leftHip,
leftKnee, leftAnkle, leftShoulder, leftElbow]
jointPositions = [pose._chestRot, pose._neckRot, pose._rightHipRot, pose._rightKneeRot,
pose._rightAnkleRot, pose._rightShoulderRot, pose._rightElbowRot, pose._leftHipRot,
pose._leftKneeRot, pose._leftAnkleRot, pose._leftShoulderRot, pose._leftElbowRot]
self._pybullet_client.resetJointStatesMultiDof(phys_model, indices, jointPositions)
def calcCycleCount(self, simTime, cycleTime):
phases = simTime / cycleTime
count = math.floor(phases)
return count
def getCycleTime(self):
keyFrameDuration = self._mocap_data.getKeyFrameDuration()
cycleTime = keyFrameDuration * (self._mocap_data.getNumFrames() - 1)
return cycleTime
def setSimTime(self, t):
self._simTime = t
# print("SetTimeTime time =",t)
keyFrameDuration = self._mocap_data.getKeyFrameDuration()
cycleTime = self.getCycleTime()
# print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames())
self._cycleCount = self.calcCycleCount(t, cycleTime)
# print("cycles=",cycles)
frameTime = t - self._cycleCount * cycleTime
if (frameTime < 0):
frameTime += cycleTime
# print("keyFrameDuration=",keyFrameDuration)
# print("frameTime=",frameTime)
self._frame = int(frameTime / keyFrameDuration)
# print("self._frame=",self._frame)
self._frameNext = self._frame + 1
if (self._frameNext >= self._mocap_data.getNumFrames()):
self._frameNext = self._frame
self._frameFraction = (frameTime - self._frame * keyFrameDuration) / (keyFrameDuration)
def computeCycleOffset(self, i=0):
lastFrame = self._mocap_data.getNumFrames() - 1
frameData = self._mocap_data._motion_data[i]['Frames'][0]
frameDataNext = self._mocap_data._motion_data[i]['Frames'][lastFrame]
basePosStart = [frameData[1], frameData[2], frameData[3]]
basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
self._cycleOffset = [
basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
basePosEnd[2] - basePosStart[2]
]
return self._cycleOffset
def computePose(self, frameFraction, i=0):
frameData = self._mocap_data._motion_data[i]['Frames'][self._frame]
frameDataNext = self._mocap_data._motion_data[i]['Frames'][self._frameNext]
self._poseInterpolators[i].Slerp(frameFraction, frameData, frameDataNext, self._pybullet_client)
# print("self._poseInterpolator.Slerp(", frameFraction,")=", pose)
self.computeCycleOffset()
oldPos = self._poseInterpolators[i]._basePos
self._poseInterpolators[i]._basePos = [
oldPos[0] + self._cycleCount * self._cycleOffset[0],
oldPos[1] + self._cycleCount * self._cycleOffset[1],
oldPos[2] + self._cycleCount * self._cycleOffset[2]
]
pose = self._poseInterpolators[i].GetPose()
return pose
def convertActionToPose(self, action, i):
pose = self._poseInterpolators[i].ConvertFromAction(self._pybullet_client, action)
return pose
def computeAndApplyPDForces(self, desiredPositions, maxForces):
dofIndex = 7
scaling = 1
indices = []
forces = []
targetPositions = []
targetVelocities = []
kps = []
kds = []
for index in range(len(self._jointIndicesAll)):
jointIndex = self._jointIndicesAll[index]
indices.append(jointIndex)
kps.append(self._kpOrg[dofIndex])
kds.append(self._kdOrg[dofIndex])
if self._jointDofCounts[index] == 4:
force = [
scaling * maxForces[dofIndex + 0],
scaling * maxForces[dofIndex + 1],
scaling * maxForces[dofIndex + 2]
]
targetVelocity = [0, 0, 0]
targetPosition = [
desiredPositions[dofIndex + 0],
desiredPositions[dofIndex + 1],
desiredPositions[dofIndex + 2],
desiredPositions[dofIndex + 3]
]
if self._jointDofCounts[index] == 1:
force = [scaling * maxForces[dofIndex]]
targetPosition = [desiredPositions[dofIndex + 0]]
targetVelocity = [0]
forces.append(force)
targetPositions.append(targetPosition)
targetVelocities.append(targetVelocity)
dofIndex += self._jointDofCounts[index]
self._pybullet_client.setJointMotorControlMultiDofArray(self._sim_model,
indices,
self._pybullet_client.STABLE_PD_CONTROL,
targetPositions=targetPositions,
targetVelocities=targetVelocities,
forces=forces,
positionGains=kps,
velocityGains=kds,
)
def getPhase(self):
keyFrameDuration = self._mocap_data.getKeyFrameDuration()
cycleTime = keyFrameDuration * (self._mocap_data.getNumFrames() - 1)
phase = self._simTime / cycleTime
phase = math.fmod(phase, 1.0)
if (phase < 0):
phase += 1
return phase
def buildHeadingTrans(self, rootOrn):
# align root transform 'forward' with world-space x axis
eul = self._pybullet_client.getEulerFromQuaternion(rootOrn)
refDir = [1, 0, 0]
rotVec = self._pybullet_client.rotateVector(rootOrn, refDir)
heading = math.atan2(-rotVec[2], rotVec[0])
heading2 = eul[1]
# print("heading=",heading)
headingOrn = self._pybullet_client.getQuaternionFromAxisAngle([0, 1, 0], -heading)
return headingOrn
def buildOriginTrans(self):
rootPos, rootOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
# print("rootPos=",rootPos, " rootOrn=",rootOrn)
invRootPos = [-rootPos[0], 0, -rootPos[2]]
# invOrigTransPos, invOrigTransOrn = self._pybullet_client.invertTransform(rootPos,rootOrn)
headingOrn = self.buildHeadingTrans(rootOrn)
# print("headingOrn=",headingOrn)
headingMat = self._pybullet_client.getMatrixFromQuaternion(headingOrn)
# print("headingMat=",headingMat)
# dummy, rootOrnWithoutHeading = self._pybullet_client.multiplyTransforms([0,0,0],headingOrn, [0,0,0], rootOrn)
# dummy, invOrigTransOrn = self._pybullet_client.multiplyTransforms([0,0,0],rootOrnWithoutHeading, invOrigTransPos, invOrigTransOrn)
invOrigTransPos, invOrigTransOrn = self._pybullet_client.multiplyTransforms([0, 0, 0],
headingOrn,
invRootPos,
[0, 0, 0, 1])
# print("invOrigTransPos=",invOrigTransPos)
# print("invOrigTransOrn=",invOrigTransOrn)
invOrigTransMat = self._pybullet_client.getMatrixFromQuaternion(invOrigTransOrn)
# print("invOrigTransMat =",invOrigTransMat )
return invOrigTransPos, invOrigTransOrn
def getState(self):
stateVector = []
phase = self.getPhase()
# print("phase=",phase)
stateVector.append(phase)
rootTransPos, rootTransOrn = self.buildOriginTrans()
basePos, baseOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
rootPosRel, dummy = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn,
basePos, [0, 0, 0, 1])
# print("!!!rootPosRel =",rootPosRel )
# print("rootTransPos=",rootTransPos)
# print("basePos=",basePos)
localPos, localOrn = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn,
basePos, baseOrn)
localPos = [
localPos[0] - rootPosRel[0], localPos[1] - rootPosRel[1], localPos[2] - rootPosRel[2]
]
# print("localPos=",localPos)
stateVector.append(rootPosRel[1])
self.pb2dmJoints = range(15)
linkIndicesSim = []
for pbJoint in range(self._pybullet_client.getNumJoints(self._sim_model)):
linkIndicesSim.append(self.pb2dmJoints[pbJoint])
linkStatesSim = self._pybullet_client.getLinkStates(self._sim_model, linkIndicesSim,
computeForwardKinematics=True, computeLinkVelocity=True)
for pbJoint in range(self._pybullet_client.getNumJoints(self._sim_model)):
j = self.pb2dmJoints[pbJoint]
# print("joint order:",j)
ls = linkStatesSim[pbJoint]
linkPos = ls[0]
linkOrn = ls[1]
linkPosLocal, linkOrnLocal = self._pybullet_client.multiplyTransforms(
rootTransPos, rootTransOrn, linkPos, linkOrn)
if (linkOrnLocal[3] < 0):
linkOrnLocal = [-linkOrnLocal[0], -linkOrnLocal[1], -linkOrnLocal[2], -linkOrnLocal[3]]
linkPosLocal = [
linkPosLocal[0] - rootPosRel[0], linkPosLocal[1] - rootPosRel[1],
linkPosLocal[2] - rootPosRel[2]
]
for l in linkPosLocal:
stateVector.append(l)
# re-order the quaternion, DeepMimic uses w,x,y,z
if (linkOrnLocal[3] < 0):
linkOrnLocal[0] *= -1
linkOrnLocal[1] *= -1
linkOrnLocal[2] *= -1
linkOrnLocal[3] *= -1
stateVector.append(linkOrnLocal[3])
stateVector.append(linkOrnLocal[0])
stateVector.append(linkOrnLocal[1])
stateVector.append(linkOrnLocal[2])
for pbJoint in range(self._pybullet_client.getNumJoints(self._sim_model)):
j = self.pb2dmJoints[pbJoint]
ls = linkStatesSim[pbJoint]
linkLinVel = ls[6]
linkAngVel = ls[7]
linkLinVelLocal, unused = self._pybullet_client.multiplyTransforms([0, 0, 0], rootTransOrn,
linkLinVel, [0, 0, 0, 1])
linkAngVelLocal, unused = self._pybullet_client.multiplyTransforms([0, 0, 0], rootTransOrn,
linkAngVel, [0, 0, 0, 1])
for l in linkLinVelLocal:
stateVector.append(l)
for l in linkAngVelLocal:
stateVector.append(l)
# print("stateVector len=",len(stateVector))
# for st in range (len(stateVector)):
# print("state[",st,"]=",stateVector[st])
return stateVector
def terminates(self):
# check if any non-allowed body part hits the ground
terminates = False
pts = self._pybullet_client.getContactPoints()
for p in pts:
part = -1
# ignore self-collision
if (p[1] == p[2]):
continue
if (p[1] == self._sim_model):
part = p[3]
if (p[2] == self._sim_model):
part = p[4]
if (part >= 0 and part in self._fall_contact_body_parts):
# print("terminating part:", part)
terminates = True
return terminates
def quatMul(self, q1, q2):
return [
q1[3] * q2[0] + q1[0] * q2[3] + q1[1] * q2[2] - q1[2] * q2[1],
q1[3] * q2[1] + q1[1] * q2[3] + q1[2] * q2[0] - q1[0] * q2[2],
q1[3] * q2[2] + q1[2] * q2[3] + q1[0] * q2[1] - q1[1] * q2[0],
q1[3] * q2[3] - q1[0] * q2[0] - q1[1] * q2[1] - q1[2] * q2[2]
]
def calcRootAngVelErr(self, vel0, vel1):
diff = [vel0[0] - vel1[0], vel0[1] - vel1[1], vel0[2] - vel1[2]]
return diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2]
def calcRootRotDiff(self, orn0, orn1):
orn0Conj = [-orn0[0], -orn0[1], -orn0[2], orn0[3]]
q_diff = self.quatMul(orn1, orn0Conj)
axis, angle = self._pybullet_client.getAxisAngleFromQuaternion(q_diff)
return angle * angle
def getReward(self, pose, i=0):
"""Compute and return the pose-based reward."""
# from DeepMimic double cSceneImitate::CalcRewardImitate
# todo: compensate for ground height in some parts, once we move to non-flat terrain
# not values from the paper, but from the published code.
pose_w = 0.5
vel_w = 0.05
end_eff_w = 0.15
# does not exist in paper
root_w = 0.2
if self._useComReward:
com_w = 0.1
else:
com_w = 0
total_w = pose_w + vel_w + end_eff_w + root_w + com_w
pose_w /= total_w
vel_w /= total_w
end_eff_w /= total_w
root_w /= total_w
com_w /= total_w
pose_scale = 2
vel_scale = 0.1
end_eff_scale = 40
root_scale = 5
com_scale = 10
err_scale = 1 # error scale
reward = 0
pose_err = 0
vel_err = 0
end_eff_err = 0
root_err = 0
com_err = 0
heading_err = 0
# create a mimic reward, comparing the dynamics humanoid with a kinematic one
if self._useComReward:
comSim, comSimVel = self.computeCOMposVel(self._sim_model)
comKin, comKinVel = self.computeCOMposVel(self._kin_models[i])
root_id = 0
mJointWeights = [
0.20833, 0.10416, 0.0625, 0.10416, 0.0625, 0.041666666666666671, 0.0625, 0.0416, 0.00,
0.10416, 0.0625, 0.0416, 0.0625, 0.0416, 0.0000
]
num_end_effs = 0
num_joints = 15
root_rot_w = mJointWeights[root_id]
rootPosSim, rootOrnSim = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
rootPosKin, rootOrnKin = self._pybullet_client.getBasePositionAndOrientation(self._kin_models[i])
linVelSim, angVelSim = self._pybullet_client.getBaseVelocity(self._sim_model)
# don't read the velocities from the kinematic model (they are zero), use the pose interpolator velocity
# see also issue https://github.com/bulletphysics/bullet3/issues/2401
linVelKin = self._poseInterpolators[i]._baseLinVel
angVelKin = self._poseInterpolators[i]._baseAngVel
root_rot_err = self.calcRootRotDiff(rootOrnSim, rootOrnKin)
pose_err += root_rot_w * root_rot_err
root_vel_diff = [
linVelSim[0] - linVelKin[0], linVelSim[1] - linVelKin[1], linVelSim[2] - linVelKin[2]
]
root_vel_err = root_vel_diff[0] * root_vel_diff[0] + root_vel_diff[1] * root_vel_diff[
1] + root_vel_diff[2] * root_vel_diff[2]
root_ang_vel_err = self.calcRootAngVelErr(angVelSim, angVelKin)
vel_err += root_rot_w * root_ang_vel_err
jointIndices = range(num_joints)
simJointStates = self._pybullet_client.getJointStatesMultiDof(self._sim_model, jointIndices)
kinJointStates = self._pybullet_client.getJointStatesMultiDof(self._kin_models[i], jointIndices)
linkStatesSim = self._pybullet_client.getLinkStates(self._sim_model, jointIndices)
linkStatesKin = self._pybullet_client.getLinkStates(self._kin_models[i], jointIndices)
for j in range(num_joints):
curr_pose_err = 0
curr_vel_err = 0
w = mJointWeights[j]
simJointInfo = simJointStates[j]
# print("simJointInfo.pos=",simJointInfo[0])
# print("simJointInfo.vel=",simJointInfo[1])
kinJointInfo = kinJointStates[j]
# print("kinJointInfo.pos=",kinJointInfo[0])
# print("kinJointInfo.vel=",kinJointInfo[1])
if (len(simJointInfo[0]) == 1):
angle = simJointInfo[0][0] - kinJointInfo[0][0]
curr_pose_err = angle * angle
velDiff = simJointInfo[1][0] - kinJointInfo[1][0]
curr_vel_err = velDiff * velDiff
if (len(simJointInfo[0]) == 4):
# print("quaternion diff")
diffQuat = self._pybullet_client.getDifferenceQuaternion(simJointInfo[0], kinJointInfo[0])
axis, angle = self._pybullet_client.getAxisAngleFromQuaternion(diffQuat)
curr_pose_err = angle * angle
diffVel = [
simJointInfo[1][0] - kinJointInfo[1][0], simJointInfo[1][1] - kinJointInfo[1][1],
simJointInfo[1][2] - kinJointInfo[1][2]
]
curr_vel_err = diffVel[0] * diffVel[0] + diffVel[1] * diffVel[1] + diffVel[2] * diffVel[2]
pose_err += w * curr_pose_err
vel_err += w * curr_vel_err
is_end_eff = j in self._end_effectors
if is_end_eff:
linkStateSim = linkStatesSim[j]
linkStateKin = linkStatesKin[j]
linkPosSim = linkStateSim[0]
linkPosKin = linkStateKin[0]
linkPosDiff = [
linkPosSim[0] - linkPosKin[0], linkPosSim[1] - linkPosKin[1],
linkPosSim[2] - linkPosKin[2]
]
curr_end_err = linkPosDiff[0] * linkPosDiff[0] + linkPosDiff[1] * linkPosDiff[
1] + linkPosDiff[2] * linkPosDiff[2]
end_eff_err += curr_end_err
num_end_effs += 1
if (num_end_effs > 0):
end_eff_err /= num_end_effs
root_pos_diff = [
rootPosSim[0] - rootPosKin[0], rootPosSim[1] - rootPosKin[1], rootPosSim[2] - rootPosKin[2]
]
root_pos_err = root_pos_diff[0] * root_pos_diff[0] + root_pos_diff[1] * root_pos_diff[
1] + root_pos_diff[2] * root_pos_diff[2]
root_err = root_pos_err + 0.1 * root_rot_err + 0.01 * root_vel_err + 0.001 * root_ang_vel_err
# COM error in initial code -> COM velocities
if self._useComReward:
com_err = 0.1 * np.sum(np.square(comKinVel - comSimVel))
# com_err = 0.1 * np.sum(np.square(comKin - comSim))
# com_err = 0.1 * (com_vel1_world - com_vel0_world).squaredNorm()
# print("pose_err=",pose_err)
# print("vel_err=",vel_err)
pose_reward = math.exp(-err_scale * pose_scale * pose_err)
vel_reward = math.exp(-err_scale * vel_scale * vel_err)
end_eff_reward = math.exp(-err_scale * end_eff_scale * end_eff_err)
root_reward = math.exp(-err_scale * root_scale * root_err)
com_reward = math.exp(-err_scale * com_scale * com_err)
reward = pose_w * pose_reward + vel_w * vel_reward + end_eff_w * end_eff_reward + root_w * root_reward + com_w * com_reward
# pose_reward,vel_reward,end_eff_reward, root_reward, com_reward);
# print("reward=",reward)
# print("pose_reward=",pose_reward)
# print("vel_reward=",vel_reward)
# print("end_eff_reward=",end_eff_reward)
# print("root_reward=",root_reward)
# print("com_reward=",com_reward)
return reward
def computeCOMposVel(self, uid: int):
"""Compute center-of-mass position and velocity."""
pb = self._pybullet_client
num_joints = 15
jointIndices = range(num_joints)
link_states = pb.getLinkStates(uid, jointIndices, computeLinkVelocity=1)
link_pos = np.array([s[0] for s in link_states])
link_vel = np.array([s[-2] for s in link_states])
tot_mass = 0.
masses = []
for j in jointIndices:
mass_, *_ = pb.getDynamicsInfo(uid, j)
masses.append(mass_)
tot_mass += mass_
masses = np.asarray(masses)[:, None]
com_pos = np.sum(masses * link_pos, axis=0) / tot_mass
com_vel = np.sum(masses * link_vel, axis=0) / tot_mass
return com_pos, com_vel

View File

@ -0,0 +1,244 @@
import json
import math
import random
import numpy as np
from os import listdir
from os.path import isfile, join
from scipy.spatial.transform import Rotation as R
from scipy.spatial.transform import Slerp
class MotionCaptureDataMultiClip(object):
def __init__(self):
self.Reset()
def Reset(self):
self._motion_data = {}
self._num_frames_min = 9999
self._num_frames_max = -1
self._num_clips = -1
self._names = []
def Load(self, path):
if isfile(path):
files = [path]
else:
files = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
files.sort()
for i in range(len(files)):
with open(files[i], 'r') as f:
self._names.append(files[i].split('/')[-1])
self._motion_data[i] = json.load(f)
t = len(self._motion_data[i]['Frames'])
self._num_frames_min = min(t, self._num_frames_min)
self._num_frames_max = max(t, self._num_frames_max)
self._num_clips = len(self._motion_data)
self._downsample = True
if self._downsample:
self.downsampleClips()
self._num_frames_max = self._num_frames_min
else:
self.upsampleClips()
self._num_frames_min = self._num_frames_max
def getNumFrames(self):
if self._downsample:
return self._num_frames_min
else:
return self._num_frames_max
def getKeyFrameDuration(self, id=0):
return self._motion_data[id]['Frames'][0][0]
def getCycleTime(self):
keyFrameDuration = self.getKeyFrameDuration()
cycleTime = keyFrameDuration * (self.getNumFrames() - 1)
return cycleTime
def calcCycleCount(self, simTime, cycleTime):
phases = simTime / cycleTime
count = math.floor(phases)
return count
def computeCycleOffset(self, id=0):
lastFrame = self.getNumFrames() - 1
frameData = self._motion_data[id]['Frames'][0]
frameDataNext = self._motion_data[id]['Frames'][lastFrame]
basePosStart = [frameData[1], frameData[2], frameData[3]]
basePosEnd = [frameDataNext[1], frameDataNext[2], frameDataNext[3]]
self._cycleOffset = [
basePosEnd[0] - basePosStart[0], basePosEnd[1] - basePosStart[1],
basePosEnd[2] - basePosStart[2]
]
return self._cycleOffset
def getNumClips(self):
return self._num_clips
def downsampleClips(self):
for i in range(self._num_clips):
n_frames = len(self._motion_data[i]['Frames'])
if n_frames != self._num_frames_min:
sample = random.sample(range(n_frames), self._num_frames_min)
sample.sort()
downsampled = np.array(self._motion_data[i]['Frames'])[sample]
self._motion_data[i]['Frames'] = downsampled.tolist()
#s = json.dumps(self._motion_data[i])
#with open("output/{}".format(self._names[i]), 'w') as f:
# f.writelines(s)
def upsampleClips(self):
print("Max number of frames: ", self._num_frames_max)
for i in range(self._num_clips):
#print("Uspsampling clip number: ", i)
keyframe_duration = self.getKeyFrameDuration(i)
old_times = np.arange(0, len(self._motion_data[i]['Frames']) * keyframe_duration, keyframe_duration)
while len(old_times) < self._num_frames_max:
new_times, new_vals = self.slerpSingleClip(self._motion_data[i]['Frames'], old_times)
#print("Number of final frames: ", len(new_vals))
self._motion_data[i]['Frames'] = new_vals
old_times = new_times
#s = json.dumps(self._motion_data[i])
#with open("output/{}".format(self._names[i]), 'w') as f:
# f.writelines(s)
def slerpSingleClip(self, clip, key_times):
#print("Number of initial frames: ", len(key_times))
org_clip = self.quatlist_to_quatlists(clip)
org_clip = np.asarray(org_clip)
t = org_clip[:, 0]
root_pos = org_clip[:, 1]
key_rots = org_clip[:, 2]
n_frames = len(key_rots)
assert len(key_times) == n_frames
needed_frames = self._num_frames_max - n_frames
#print("Needed frames: ", needed_frames)
inter_times = self.calc_inter_times(key_times)
inter_times = sorted(random.sample(inter_times, min(len(inter_times), needed_frames)))
#print("Number of frames to interpolate: ", len(inter_times))
#print("Number of rots: ", len(key_rots[0]))
inter_joint = []
for i in range(len(key_rots[0])):
quats = [rot[i] for rot in key_rots]
if len(quats[0]) == 4:
joint = R.from_quat(quats)
slerp = Slerp(key_times, joint)
interp_rots = slerp(inter_times)
interp_rots = interp_rots.as_quat().tolist()
else:
interp_rots = []
for tim in range(len(inter_times)):
lb = key_times.tolist().index(max([st for st in key_times if st < inter_times[tim]]))
ub = lb + 1
#print(lb, ub)
new_rot = (quats[ub][0] + quats[lb][0])/2
interp_rots.append([new_rot])
inter_joint.append(interp_rots)
inter_joint = np.array(inter_joint).T
#print("Shape of interpolated joints: ", inter_joint.shape)
old_dict = dict(zip(key_times, key_rots))
new_dict = dict(zip(inter_times, inter_joint))
old_root_pos = dict(zip(key_times, root_pos))
inter_root_pos = self.calc_inter_root_pos(root_pos, key_times, inter_times)
new_root_pos = dict(zip(inter_times, inter_root_pos))
new_dict = {**old_dict, **new_dict}
new_rp_dict = {**old_root_pos, **new_root_pos}
ord_keys = sorted(new_dict.keys())
ord_rots = [new_dict[k] for k in ord_keys]
ord_root_pos = [new_rp_dict[k] for k in ord_keys]
new_clip = self.quatlists_to_quatlist(t, ord_root_pos, ord_rots)
return np.array(ord_keys), new_clip
def quatlist_to_quatlists(self, clip):
new_clips = []
for c in clip:
t = c[0]
root_pos = c[1:4]
root_rotation = c[4:8]
chest_rotation = c[8:12]
neck_rotation = c[12:16]
right_hip_rotation = c[16:20]
right_knee_rotation = c[20]
right_ankle_rotation = c[21:25]
right_shoulder_rotation = c[25:29]
right_elbow_rotation = c[29]
left_hip_rotation = c[30:34]
left_knee_rotation = c[34]
left_ankle_rotation = c[35:39]
left_shoulder_rotation = c[39:43]
left_elbow_rotation = c[43]
d = [
t,
root_pos,
[
self.deepmimic_to_scipy_quaternion(root_rotation),
self.deepmimic_to_scipy_quaternion(chest_rotation),
self.deepmimic_to_scipy_quaternion(neck_rotation),
self.deepmimic_to_scipy_quaternion(right_hip_rotation),
[right_knee_rotation],
self.deepmimic_to_scipy_quaternion(right_ankle_rotation),
self.deepmimic_to_scipy_quaternion(right_shoulder_rotation),
[right_elbow_rotation],
self.deepmimic_to_scipy_quaternion(left_hip_rotation),
[left_knee_rotation],
self.deepmimic_to_scipy_quaternion(left_ankle_rotation),
self.deepmimic_to_scipy_quaternion(left_shoulder_rotation),
[left_elbow_rotation]
]
]
new_clips.append(d)
return new_clips
def deepmimic_to_scipy_quaternion(self, quat):
return quat[1:] + [quat[0]]
def scipy_to_deepmimic_quaternion(self, quat):
return [quat[-1]] + quat[:-1]
def calc_inter_times(self, times, method="intermediate"):
if method == "intermediate":
inter_times = []
for i in range(1, len(times)):
it = (times[i] - times[i-1])/2 + times[i-1]
inter_times.append(it)
return inter_times
def calc_inter_root_pos(self, root_pos, times, inter_times):
inter_root_pos = []
all_times = sorted([*times.tolist(), *inter_times])
for i in range(len(inter_times)):
low_index = times.tolist().index(all_times[all_times.index(inter_times[i]) - 1])
up_index = times.tolist().index(all_times[all_times.index(inter_times[i]) + 1])
assert low_index == up_index - 1
inter_root_pos.append(((np.array(root_pos[up_index]) + np.array(root_pos[low_index]))/2).tolist())
return inter_root_pos
def quatlists_to_quatlist(self, t, ord_root_pos, ord_rots):
delta_t = t[0]
new_quats = self.merge_quaternions(ord_rots)
a = []
for i in range(len(ord_root_pos)):
rot = new_quats[i]
a.append([
delta_t,
*ord_root_pos[i],
*rot
])
return a
def merge_quaternions(self, rotations):
quats = []
for rot in rotations:
rots = []
for r in rot:
if len(r) == 4:
r = self.scipy_to_deepmimic_quaternion(r)
rots += [el for el in r]
quats.append(rots)
return quats

View File

@ -0,0 +1,313 @@
import numpy as np
import math
from pybullet_envs.deep_mimic.env.env import Env
from pybullet_envs.deep_mimic.env.action_space import ActionSpace
from pybullet_utils import bullet_client
import time
from pybullet_envs.deep_mimic.env import motion_capture_data_multiclip
from pybullet_envs.deep_mimic.env import humanoid_stable_pd_multiclip
import pybullet_data
import pybullet as p1
import random
from enum import Enum
class InitializationStrategy(Enum):
"""Set how the environment is initialized."""
START = 0
RANDOM = 1 # random state initialization (RSI)
class PyBulletDeepMimicEnvMultiClip(Env):
def __init__(self, arg_parser=None, enable_draw=False, pybullet_client=None,
time_step=1. / 240,
init_strategy=InitializationStrategy.RANDOM):
super().__init__(arg_parser, enable_draw)
self._num_agents = 1
self._pybullet_client = pybullet_client
self._isInitialized = False
#self._useStablePD = True
self._arg_parser = arg_parser
self.timeStep = time_step
self._init_strategy = init_strategy
self._n_clips = -1
print("Initialization strategy: {:s}".format(init_strategy))
self.reset()
def reset(self):
if not self._isInitialized:
if self.enable_draw:
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
# disable 'GUI' since it slows down a lot on Mac OSX and some other platforms
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_GUI, 0)
else:
self._pybullet_client = bullet_client.BulletClient()
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi * 0.5, 0, 0])
self._planeId = self._pybullet_client.loadURDF("plane_implicit.urdf", [0, 0, 0],
z2y,
useMaximalCoordinates=True)
# print("planeId=",self._planeId)
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP, 1)
self._pybullet_client.setGravity(0, -9.8, 0)
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
self._mocapData = motion_capture_data_multiclip.MotionCaptureDataMultiClip()
motion_file = self._arg_parser.parse_strings('motion_file')
print("motion_file=", motion_file[0])
motionPath = pybullet_data.getDataPath() + "/" + motion_file[0]
self._mocapData.Load(motionPath)
self._n_clips = self._mocapData.getNumClips()
timeStep = self.timeStep
useFixedBase = False
self._humanoid = humanoid_stable_pd_multiclip.HumanoidStablePDMultiClip(self._pybullet_client, self._mocapData,
timeStep, useFixedBase, self._arg_parser)
self._isInitialized = True
self._pybullet_client.setTimeStep(timeStep)
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=1)
# print("numframes = ", self._humanoid._mocap_data.NumFrames())
if self._init_strategy == InitializationStrategy.RANDOM:
rnrange = 1000
rn = random.randint(0, rnrange)
startTime = float(rn) / rnrange * self._humanoid.getCycleTime()
elif self._init_strategy == InitializationStrategy.START:
startTime = 0
self.t = startTime
self._humanoid.setSimTime(startTime)
self._humanoid.resetPose()
# this clears the contact points. Todo: add API to explicitly clear all contact points?
self._humanoid.resetPose()
self.needs_update_time = self.t - 1 # force update
def get_num_agents(self):
return self._num_agents
def get_action_space(self, agent_id):
return ActionSpace(ActionSpace.Continuous)
def get_reward_min(self, agent_id):
return 0
def get_reward_max(self, agent_id):
return 1
def get_reward_fail(self, agent_id):
return self.get_reward_min(agent_id)
def get_reward_succ(self, agent_id):
return self.get_reward_max(agent_id)
# scene_name == "imitate" -> cDrawSceneImitate
def get_state_size(self, agent_id):
# cCtController::GetStateSize()
# int state_size = cDeepMimicCharController::GetStateSize();
# state_size += GetStatePoseSize();#106
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
# state_size += GetStatePhaseSize();#1
# 197
return 197
def build_state_norm_groups(self, agent_id):
groups = [0] * self.get_state_size(agent_id)
groups[0] = -1
return groups
def build_state_offset(self, agent_id):
out_offset = [0] * self.get_state_size(agent_id)
phase_offset = -0.5
out_offset[0] = phase_offset
return np.array(out_offset)
def build_state_scale(self, agent_id):
out_scale = [1] * self.get_state_size(agent_id)
phase_scale = 2
out_scale[0] = phase_scale
return np.array(out_scale)
def get_goal_size(self, agent_id):
return 0
def get_action_size(self, agent_id):
ctrl_size = 43 # numDof
root_size = 7
return ctrl_size - root_size
def build_goal_norm_groups(self, agent_id):
return np.array([])
def build_goal_offset(self, agent_id):
return np.array([])
def build_goal_scale(self, agent_id):
return np.array([])
def build_action_offset(self, agent_id):
out_offset = [
0.0000000000, 0.0000000000, 0.0000000000, -0.200000000, 0.0000000000, 0.0000000000,
0.0000000000, -0.200000000, 0.0000000000, 0.0000000000, 0.00000000, -0.2000000, 1.57000000,
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
-0.2000000, -1.5700000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 1.57000000,
0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000,
-0.2000000, -1.5700000
]
# see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
# see cCtCtrlUtil::BuildOffsetScalePDSpherical
return np.array(out_offset)
def build_action_scale(self, agent_id):
# see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
# see cCtCtrlUtil::BuildOffsetScalePDSpherical
out_scale = [
0.20833333333333, 1.00000000000000, 1.00000000000000, 1.00000000000000, 0.25000000000000,
1.00000000000000, 1.00000000000000, 1.00000000000000, 0.12077294685990, 1.00000000000000,
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
0.159235668789
]
return np.array(out_scale)
def build_action_bound_min(self, agent_id):
# see cCtCtrlUtil::BuildBoundsPDSpherical
out_scale = [
-4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000, -4.00000000000,
-1.00000000000, -1.00000000000, -1.00000000000, -7.77999999999, -1.00000000000,
-1.000000000, -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000,
-1.000000000, -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, -6.280000000,
-1.000000000, -1.000000000, -1.000000000, -8.460000000, -1.000000000, -1.000000000,
-1.000000000, -4.710000000
]
return out_scale
def build_action_bound_max(self, agent_id):
out_scale = [
4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000, 1.000000000, 1.000000000,
1.000000000, 8.779999999, 1.000000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
1.0000000, 1.0000000, 1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
1.0000000, 10.100000, 1.0000000, 1.0000000, 1.0000000, 7.8500000
]
return out_scale
def set_mode(self, mode):
self._mode = mode
def need_new_action(self, agent_id):
if self.t >= self.needs_update_time:
self.needs_update_time = self.t + 1. / 30.
return True
return False
def record_state(self, agent_id):
state = self._humanoid.getState()
return np.array(state)
def record_goal(self, agent_id):
return np.array([])
def calc_reward(self, agent_id):
rewards = {}
for i in range(self._n_clips):
kinPose = self._humanoid.computePose(self._humanoid._frameFraction, i)
rewards[i] = self._humanoid.getReward(kinPose, i)
reward = self.select_reward(rewards)
return reward
def set_action(self, agent_id, action):
# print("action=",)
# for a in action:
# print(a)
self.desiredPose = {}
for i in range(self._n_clips):
self.desiredPose[i] = self._humanoid.convertActionToPose(action, i)
# we need the target root positon and orientation to be zero, to be compatible with deep mimic
self.desiredPose[i][0] = 0
self.desiredPose[i][1] = 0
self.desiredPose[i][2] = 0
self.desiredPose[i][3] = 0
self.desiredPose[i][4] = 0
self.desiredPose[i][5] = 0
self.desiredPose[i][6] = 0
target_pose = np.array(self.desiredPose[i])
# print("set_action: desiredPose=", self.desiredPose)
def log_val(self, agent_id, val):
pass
def update(self, timeStep):
# print("pybullet_deep_mimic_env:update timeStep=",timeStep," t=",self.t)
self._pybullet_client.setTimeStep(timeStep)
self._humanoid._timeStep = timeStep
self.timeStep = timeStep
self.t += timeStep
self._humanoid.setSimTime(self.t)
if self.desiredPose:
for i in range(self._n_clips):
kinPose = self._humanoid.computePose(self._humanoid._frameFraction, i)
self._humanoid.initializePose(self._humanoid._poseInterpolators[i],
self._humanoid._kin_models[i],
initBase=True)
# print("desiredPositions=",self.desiredPose[i])
maxForces = [
0, 0, 0, 0, 0, 0, 0, 200, 200, 200, 200, 50, 50, 50, 50, 200, 200, 200, 200, 150, 90,
90, 90, 90, 100, 100, 100, 100, 60, 200, 200, 200, 200, 150, 90, 90, 90, 90, 100, 100,
100, 100, 60
]
self._humanoid.computeAndApplyPDForces(self.desiredPose[0], maxForces=maxForces) # TODO check if this should be inside the for loop
self._pybullet_client.stepSimulation()
def set_sample_count(self, count):
return
def check_terminate(self, agent_id):
return Env.Terminate(self.is_episode_end())
def is_episode_end(self):
isEnded = self._humanoid.terminates()
# also check maximum time, 20 seconds (todo get from file)
# print("self.t=",self.t)
if (self.t > 20):
isEnded = True
return isEnded
def check_valid_episode(self):
# could check if limbs exceed velocity threshold
return True
def getKeyboardEvents(self):
return self._pybullet_client.getKeyboardEvents()
def isKeyTriggered(self, keys, key):
o = ord(key)
# print("ord=",o)
if o in keys:
return keys[ord(key)] & self._pybullet_client.KEY_WAS_TRIGGERED
return False
def select_reward(self, rewards, criterion=None):
# todo create enum for criterion
return max(rewards.values())

View File

@ -0,0 +1,25 @@
import sys
import subprocess
from pybullet_utils.arg_parser import ArgParser
from pybullet_utils.logger import Logger
def main():
# Command line arguments
args = sys.argv[1:]
arg_parser = ArgParser()
arg_parser.load_args(args)
num_workers = arg_parser.parse_int('num_workers', 1)
assert (num_workers > 0)
Logger.print2('Running with {:d} workers'.format(num_workers))
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer_multiclip.py '.format(num_workers)
cmd += ' '.join(args)
Logger.print2('cmd: ' + cmd)
subprocess.call(cmd, shell=True)
return
if __name__ == '__main__':
main()

View File

@ -0,0 +1,30 @@
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
content = []
with open("output/agent0_log.txt", "r") as f:
content = f.readlines()
for i in range(len(content)):
content[i] = ' '.join(content[i].split())
content[i] = content[i].split(" ")
header = content[0]
df = pd.DataFrame(content[1:], columns=header, dtype=float)
tr_vals = df["Train_Return"].values
te_vals = df["Test_Return"].values
x = range(0, len(tr_vals)*400, 400)
fig, ax = plt.subplots()
ax.plot(x, tr_vals, label="Train")
ax.plot(x, te_vals, label="Test")
ax.set_xlim(0, (len(tr_vals)+1)*400)
ax.set_xlabel('Iterations')
ax.set_ylim(0, 100, 10)
ax.set_ylabel('Return')
ax.legend()
plt.savefig("output/rewards_log.png")

View File

@ -0,0 +1,112 @@
import time
import os
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)
print("parentdir=", parentdir)
import json
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
from pybullet_envs.deep_mimic.learning.ppo_agent import PPOAgent
import pybullet_data
from pybullet_utils.arg_parser import ArgParser
from pybullet_utils.logger import Logger
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env_multiclip import PyBulletDeepMimicEnvMultiClip
import sys
import random
update_timestep = 1. / 240.
animating = True
step = False
total_reward = 0
steps = 0
def update_world(world, time_elapsed):
timeStep = update_timestep
world.update(timeStep)
reward = world.env.calc_reward(agent_id=0)
global total_reward
total_reward += reward
global steps
steps += 1
end_episode = world.env.is_episode_end()
if (end_episode or steps >= 1000):
print("total_reward=", total_reward)
total_reward = 0
steps = 0
world.end_episode()
world.reset()
def build_arg_parser(args):
arg_parser = ArgParser()
arg_parser.load_args(args)
arg_file = arg_parser.parse_string('arg_file', '')
if arg_file == '':
arg_file = "run_humanoid3d_backflip_args.txt"
if (arg_file != ''):
path = pybullet_data.getDataPath() + "/args/" + arg_file
if os.path.isfile(path):
succ = arg_parser.load_file(path)
else:
files = [arg_parser.load_file(f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
succ = all(files)
Logger.print2(arg_file)
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
return arg_parser
args = sys.argv[1:]
def build_world(args, enable_draw):
arg_parser = build_arg_parser(args)
print("enable_draw=", enable_draw)
env = PyBulletDeepMimicEnvMultiClip(arg_parser, enable_draw)
world = RLWorld(env, arg_parser)
motion_file = arg_parser.parse_string("motion_file")
print("motion_file=", motion_file)
bodies = arg_parser.parse_ints("fall_contact_bodies")
print("bodies=", bodies)
int_output_path = arg_parser.parse_string("int_output_path")
print("int_output_path=", int_output_path)
agent_files = pybullet_data.getDataPath() + "/" + arg_parser.parse_string("agent_files")
AGENT_TYPE_KEY = "AgentType"
print("agent_file=", agent_files)
with open(agent_files) as data_file:
json_data = json.load(data_file)
print("json_data=", json_data)
assert AGENT_TYPE_KEY in json_data
agent_type = json_data[AGENT_TYPE_KEY]
print("agent_type=", agent_type)
agent = PPOAgent(world, id, json_data)
agent.set_enable_training(False)
world.reset()
return world
if __name__ == '__main__':
world = build_world(args, True)
while (world.env._pybullet_client.isConnected()):
timeStep = update_timestep
time.sleep(timeStep)
keys = world.env.getKeyboardEvents()
if world.env.isKeyTriggered(keys, ' '):
animating = not animating
if world.env.isKeyTriggered(keys, 'i'):
step = True
if (animating or step):
update_world(world, timeStep)
step = False