Merge remote-tracking branch 'bp/master'

This commit is contained in:
Erwin Coumans 2017-05-24 19:57:28 -07:00
commit 40db4998b9
12 changed files with 26 additions and 15 deletions

View File

@ -16,7 +16,7 @@ class SimpleAgent():
session,
ckpt_path,
actor_layer_size,
observation_dim=31
observation_dim=28
):
self._ckpt_path = ckpt_path
self._session = session

View File

@ -18,7 +18,7 @@ class SimpleAgent():
session,
ckpt_path,
actor_layer_size,
observation_size=(31,),
observation_size=(28,),
action_size=8,
):
self._ckpt_path = ckpt_path
@ -30,7 +30,7 @@ class SimpleAgent():
def _build(self):
self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size)
self._obs = tf.placeholder(tf.float32, (31,))
self._obs = tf.placeholder(tf.float32, self._observation_size)
with tf.name_scope('Act'):
batch_obs = snt.nest.pack_iterable_as(self._obs,
snt.nest.map(lambda x: tf.expand_dims(x, 0),

View File

@ -1,2 +0,0 @@
model_checkpoint_path: "tf_graph_data_converted.ckpt-0"
all_model_checkpoint_paths: "tf_graph_data_converted.ckpt-0"

View File

@ -16,10 +16,14 @@ class MinitaurGymEnv(gym.Env):
def __init__(self,
urdfRoot="",
actionRepeat=1,
isEnableSelfCollision=True,
motorVelocityLimit=10.0,
render=False):
self._timeStep = 0.01
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._motorVelocityLimit = motorVelocityLimit
self._isEnableSelfCollision = isEnableSelfCollision
self._observation = []
self._envStepCounter = 0
self._render = render
@ -44,7 +48,7 @@ class MinitaurGymEnv(gym.Env):
p.setTimeStep(self._timeStep)
p.loadURDF("%splane.urdf" % self._urdfRoot)
p.setGravity(0,0,-10)
self._minitaur = minitaur_new.Minitaur(self._urdfRoot)
self._minitaur = minitaur_new.Minitaur(urdfRootPath=self._urdfRoot, timeStep=self._timeStep, isEnableSelfCollision=self._isEnableSelfCollision, motorVelocityLimit=self._motorVelocityLimit)
self._envStepCounter = 0
self._lastBasePosition = [0, 0, 0]
for i in range(100):

View File

@ -4,8 +4,12 @@ import copy
import math
class Minitaur:
def __init__(self, urdfRootPath=''):
def __init__(self, urdfRootPath='', timeStep=0.01, isEnableSelfCollision=True, motorVelocityLimit=10.0):
self.urdfRootPath = urdfRootPath
self.isEnableSelfCollision = isEnableSelfCollision
self.motorVelocityLimit = motorVelocityLimit
self.timeStep = timeStep
self.reset()
def buildJointNameToIdDict(self):
@ -27,8 +31,10 @@ class Minitaur:
self.motorIdList.append(self.jointNameToId['motor_back_rightR_joint'])
def reset(self):
# self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION)
self.quadruped = p.loadURDF("%squadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2])
if self.isEnableSelfCollision:
self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2], flags=p.URDF_USE_SELF_COLLISION)
else:
self.quadruped = p.loadURDF("%s/quadruped/minitaur.urdf" % self.urdfRootPath, [0,0,.2])
self.kp = 1
self.kd = 1
self.maxForce = 3.5
@ -116,11 +122,15 @@ class Minitaur:
observation.extend(self.getMotorVelocities().tolist())
observation.extend(self.getMotorTorques().tolist())
observation.extend(list(self.getBaseOrientation()))
observation.extend(list(self.getBasePosition()))
return observation
def applyAction(self, motorCommands):
if self.motorVelocityLimit < np.inf:
currentMotorAngle = self.getMotorAngles()
motorCommandsMax = currentMotorAngle + self.timeStep * self.motorVelocityLimit
motorCommandsMin = currentMotorAngle - self.timeStep * self.motorVelocityLimit
motorCommands = np.clip(motorCommands, motorCommandsMin, motorCommandsMax)
motorCommandsWithDir = np.multiply(motorCommands, self.motorDir)
# print('action: {}'.format(motorCommands))
# print('motor: {}'.format(motorCommandsWithDir))

View File

@ -14,10 +14,9 @@ from envs.bullet.minitaurGymEnv import MinitaurGymEnv
try:
import sonnet
from agents import simpleAgentWithSonnet as agent_lib
ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
except ImportError:
from agents import simpleAgent as agent_lib
ckpt_path = 'data/agent/tf_graph_data/tf_graph_data.ckpt'
def testSinePolicy():
"""Tests sine policy
@ -60,10 +59,10 @@ def testDDPGPolicy():
environment = MinitaurGymEnv(render=True)
sum_reward = 0
steps = 1000
observation_shape = (31,)
ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
observation_shape = (28,)
action_size = 8
actor_layer_size = (100, 181)
actor_layer_size = (297, 158)
n_steps = 0
tf.reset_default_graph()
with tf.Session() as session: