add back sonnet dependency. If sonnet is not installed, fall back to simpleAgent that does not need sonnet.

2024-12-13 21:30:09 +00:00 · 2017-05-22 20:57:18 -07:00 · 2017-05-22 20:57:18 -07:00 · 671c4bf10e
commit 671c4bf10e
parent 2f3844e5db
7 changed files with 80 additions and 5 deletions
--- a/examples/pybullet/gym/agents/actor_net.py
+++ b/examples/pybullet/gym/agents/actor_net.py
@ -0,0 +1,21 @@
+"""An actor network."""
+import tensorflow as tf
+import sonnet as snt
+
+class ActorNetwork(snt.AbstractModule):
+  """An actor network as a sonnet Module."""
+
+  def __init__(self, layer_sizes, action_size, name='target_actor'):
+    super(ActorNetwork, self).__init__(name=name)
+    self._layer_sizes = layer_sizes
+    self._action_size = action_size
+
+  def _build(self, inputs):
+    state = inputs
+    for output_size in self._layer_sizes:
+      state = snt.Linear(output_size)(state)
+      state = tf.nn.relu(state)
+
+    action = tf.tanh(
+        snt.Linear(self._action_size, name='action')(state))
+    return action
--- a/examples/pybullet/gym/agents/simplerAgent.py
+++ b/examples/pybullet/gym/agents/simplerAgent.py
@ -10,11 +10,12 @@ import numpy as np
 import tensorflow as tf
 import pdb

-class SimplerAgent():
+class SimpleAgent():
  def __init__(
      self,
      session,
      ckpt_path,
+      actor_layer_size,
      observation_dim=31
  ):
    self._ckpt_path = ckpt_path
--- a/examples/pybullet/gym/agents/simpleAgentWithSonnet.py
+++ b/examples/pybullet/gym/agents/simpleAgentWithSonnet.py
@ -0,0 +1,46 @@
+"""Loads a DDPG agent without too much external dependencies
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import collections
+import numpy as np
+import tensorflow as tf
+
+import sonnet as snt
+from agents import actor_net
+
+class SimpleAgent():
+  def __init__(
+      self,
+      session,
+      ckpt_path,
+      actor_layer_size,
+      observation_size=(31,),
+      action_size=8,
+  ):
+    self._ckpt_path = ckpt_path
+    self._actor_layer_size = actor_layer_size
+    self._observation_size = observation_size
+    self._action_size = action_size
+    self._session = session
+    self._build()
+
+  def _build(self):
+    self._agent_net = actor_net.ActorNetwork(self._actor_layer_size, self._action_size)
+    self._obs = tf.placeholder(tf.float32, (31,))
+    with tf.name_scope('Act'):
+      batch_obs = snt.nest.pack_iterable_as(self._obs,
+                                            snt.nest.map(lambda x: tf.expand_dims(x, 0),
+                                                         snt.nest.flatten_iterable(self._obs)))
+      self._action = self._agent_net(batch_obs)
+      saver = tf.train.Saver()
+      saver.restore(
+          sess=self._session,
+          save_path=self._ckpt_path)
+
+  def __call__(self, observation):
+    out_action = self._session.run(self._action, feed_dict={self._obs: observation})
+    return out_action[0]
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.data-00000-of-00001
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.data-00000-of-00001
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.index
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.index
--- a/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.meta
+++ b/examples/pybullet/gym/data/agent/tf_graph_data/tf_graph_data.ckpt.meta
--- a/examples/pybullet/gym/minitaurGymEnvTest.py
+++ b/examples/pybullet/gym/minitaurGymEnvTest.py
@ -10,8 +10,15 @@ import numpy as np
 import tensorflow as tf

 from envs.bullet.minitaurGymEnv import MinitaurGymEnv
-from agents import simplerAgent

+try:
+  import sonnet
+  from agents import simpleAgentWithSonnet as agent_lib
+  ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
+except ImportError:
+  from agents import simpleAgent as agent_lib
+  ckpt_path = 'data/agent/tf_graph_data/tf_graph_data.ckpt'
+  
 def testSinePolicy():
  """Tests sine policy
    """
@ -53,14 +60,14 @@ def testDDPGPolicy():
  environment = MinitaurGymEnv(render=True)
  sum_reward = 0
  steps = 1000
-  ckpt_path = 'data/agent/tf_graph_data/tf_graph_data_converted.ckpt-0'
+
  observation_shape = (31,)
  action_size = 8
-  actor_layer_sizes = (100, 181)
+  actor_layer_size = (100, 181)
  n_steps = 0
  tf.reset_default_graph()
  with tf.Session() as session:
-    agent = simplerAgent.SimplerAgent(session, ckpt_path)
+    agent = agent_lib.SimpleAgent(session=session, ckpt_path=ckpt_path, actor_layer_size=actor_layer_size)
    state = environment.reset()
    action = agent(state)
    for _ in range(steps):