2020-02-15 20:06:10 +00:00
|
|
|
|
# Code adapted from https://github.com/araffin/rl-baselines-zoo
|
|
|
|
|
# it requires stable-baselines to be installed
|
|
|
|
|
# Colab Notebook: https://colab.research.google.com/drive/1nZkHO4QTYfAksm9ZTaZ5vXyC7szZxC3F
|
|
|
|
|
# You can run it using: python -m pybullet_envs.stable_baselines.enjoy --algo td3 --env HalfCheetahBulletEnv-v0
|
|
|
|
|
# Author: Antonin RAFFIN
|
|
|
|
|
# MIT License
|
|
|
|
|
import argparse
|
|
|
|
|
import multiprocessing
|
2020-03-01 06:02:01 +00:00
|
|
|
|
import time
|
2020-02-15 20:06:10 +00:00
|
|
|
|
import gym
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pybullet_envs
|
|
|
|
|
|
|
|
|
|
from stable_baselines import SAC, TD3
|
2020-03-01 21:11:47 +00:00
|
|
|
|
|
2020-02-15 20:06:10 +00:00
|
|
|
|
from stable_baselines.common.evaluation import evaluate_policy
|
|
|
|
|
|
|
|
|
|
from pybullet_envs.stable_baselines.utils import TimeFeatureWrapper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
parser = argparse.ArgumentParser("Enjoy an RL agent trained using Stable Baselines")
|
|
|
|
|
parser.add_argument('--algo', help='RL Algorithm (Soft Actor-Critic by default)', default='sac',
|
|
|
|
|
type=str, required=False, choices=['sac', 'td3'])
|
|
|
|
|
parser.add_argument('--env', type=str, default='HalfCheetahBulletEnv-v0', help='environment ID')
|
|
|
|
|
parser.add_argument('-n', '--n-episodes', help='Number of episodes', default=5,
|
|
|
|
|
type=int)
|
|
|
|
|
parser.add_argument('--no-render', action='store_true', default=False,
|
|
|
|
|
help='Do not render the environment')
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
env_id = args.env
|
|
|
|
|
# Create an env similar to the training env
|
|
|
|
|
env = TimeFeatureWrapper(gym.make(env_id))
|
|
|
|
|
|
|
|
|
|
# Use SubprocVecEnv for rendering
|
|
|
|
|
if not args.no_render:
|
2020-03-01 06:02:01 +00:00
|
|
|
|
env.render(mode='human')
|
2020-02-15 20:06:10 +00:00
|
|
|
|
|
|
|
|
|
algo = {
|
|
|
|
|
'sac': SAC,
|
|
|
|
|
'td3': TD3
|
|
|
|
|
}[args.algo]
|
|
|
|
|
|
|
|
|
|
# We assume that the saved model is in the same folder
|
|
|
|
|
save_path = '{}_{}.zip'.format(args.algo, env_id)
|
|
|
|
|
# Load the saved model
|
|
|
|
|
model = algo.load(save_path, env=env)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Use deterministic actions for evaluation
|
|
|
|
|
episode_rewards, episode_lengths = [], []
|
|
|
|
|
for _ in range(args.n_episodes):
|
|
|
|
|
obs = env.reset()
|
|
|
|
|
done = False
|
|
|
|
|
episode_reward = 0.0
|
|
|
|
|
episode_length = 0
|
|
|
|
|
while not done:
|
|
|
|
|
action, _ = model.predict(obs, deterministic=True)
|
|
|
|
|
obs, reward, done, _info = env.step(action)
|
|
|
|
|
episode_reward += reward
|
|
|
|
|
|
|
|
|
|
episode_length += 1
|
|
|
|
|
if not args.no_render:
|
|
|
|
|
env.render(mode='human')
|
2020-03-01 06:02:01 +00:00
|
|
|
|
dt = 1./240.
|
|
|
|
|
time.sleep(dt)
|
2020-02-15 20:06:10 +00:00
|
|
|
|
episode_rewards.append(episode_reward)
|
|
|
|
|
episode_lengths.append(episode_length)
|
|
|
|
|
print("Episode {} reward={}, length={}".format(len(episode_rewards), episode_reward, episode_length))
|
|
|
|
|
|
|
|
|
|
mean_reward = np.mean(episode_rewards)
|
|
|
|
|
std_reward = np.std(episode_rewards)
|
|
|
|
|
|
|
|
|
|
mean_len, std_len = np.mean(episode_lengths), np.std(episode_lengths)
|
|
|
|
|
|
|
|
|
|
print("==== Results ====")
|
|
|
|
|
print("Episode_reward={:.2f} +/- {:.2f}".format(mean_reward, std_reward))
|
|
|
|
|
print("Episode_length={:.2f} +/- {:.2f}".format(mean_len, std_len))
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Close process
|
|
|
|
|
env.close()
|