Skip to content

Commit

Permalink
Add Monitor Wrapper (openai#79)
Browse files Browse the repository at this point in the history
* Revert "remove unused i parameter from WeakUnvectorize"

This reverts commit 1825ecf.

* Add todo

* Add Monitored wrapper

* Use MonitorManager

* Monitored -> Monitor

* Finish Monitored -> Monitor

* use gym 0.6.0

* Bump universe version

* Bump gym version

* bump gym

* Remove pyc files

* Move Monitor from vectorized to wrappers module

* Fix import

* Force go_vncdriver

* Clean up comments

* Move monitor start to configure
  • Loading branch information
nottombrown authored Dec 24, 2016
1 parent e2d005e commit f637b07
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 105 deletions.
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,17 @@ upload:
twine upload dist/*

test:
find . -name '*.pyc' -delete
docker build -f test.dockerfile -t quay.io/openai/universe:test .
docker run -v /usr/bin/docker:/usr/bin/docker -v /root/.docker:/root/.docker -v /var/run/docker.sock:/var/run/docker.sock --net=host quay.io/openai/universe:test

build:
find . -name '*.pyc' -delete
docker build -t quay.io/openai/universe .
docker build -f test.dockerfile -t quay.io/openai/universe:test .

push:
find . -name '*.pyc' -delete
docker build -t quay.io/openai/universe .
docker build -f test.dockerfile -t quay.io/openai/universe:test .

Expand Down
16 changes: 6 additions & 10 deletions example/diagnostic-agent/diagnostic-agent.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
#!/usr/bin/env python
import argparse
import logging
import numpy as np
import os
import six
import sys
import threading
import time
import traceback

import gym
import numpy as np
import universe
from universe import pyprofile, wrappers, spaces
from gym import wrappers as gym_wrappers

# if not os.getenv("PYPROFILE_FREQUENCY"):
# pyprofile.profile.print_frequency = 5
from universe import vectorized

logger = logging.getLogger()

Expand Down Expand Up @@ -102,6 +99,9 @@ def __call__(self, observation, reward, done):
env.seed([0])
env = wrappers.Logger(env)

if args.monitor:
env = wrappers.Monitor('/tmp/vnc_random_agent', force=True)(env)

env.configure(
fps=args.fps,
# print_frequency=None,
Expand All @@ -119,10 +119,6 @@ def __call__(self, observation, reward, done):
'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5,
},
)

if args.monitor:
env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True)

if args.actions == 'random':
action_space = env.action_space
elif args.actions == 'noop':
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages

setup(name='universe',
version='0.20.4',
version='0.21.0',
description="Universe: a software platform for measuring and training an AI's general intelligence across the world's supply of games, websites and other applications.",
url='https://github.com/openai/universe',
author='OpenAI',
Expand All @@ -14,7 +14,7 @@
'docker-pycreds==0.2.1',
'fastzbarlight>=0.0.13',
'go-vncdriver>=0.4.8',
'gym>=0.5.7',
'gym>=0.6.0',
'Pillow',
'PyYAML',
'six',
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ skipsdist=True
passenv=DISPLAY DOCKER_USERNAME DOCKER_PASSWORD FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES TRAVIS*
deps =
pytest
gym[atari]>=0.2.12
gym[atari]
docker-py==1.10.3
Pillow
autobahn
Expand Down
21 changes: 9 additions & 12 deletions universe/envs/vnc_env.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
import getpass
import logging
import os
import socket
import time
import uuid

# The Go driver is the most supported one. So long as the Go driver
# turns out to be easy to install, we'll continue forcing the Go
# driver here.
import go_vncdriver

from gym import scoreboard
from gym.utils import reraise

import universe
from universe import error, remotes as remotes_module, pyprofile, rewarder, spaces, twisty, utils, vectorized, vncdriver
from gym.utils import reraise
from universe import error, pyprofile, rewarder, spaces, twisty, vectorized, vncdriver
from universe import remotes as remotes_module
from universe.envs import diagnostics
from universe.remotes import healthcheck
from universe.runtimes import registration
from universe.vncdriver import libvnc_session

# The Go driver is the most supported one. So long as the Go driver
# turns out to be easy to install, we'll continue forcing the Go
# driver here.
# noinspection PyUnresolvedReferences
import go_vncdriver

logger = logging.getLogger(__name__)
extra_logger = logging.getLogger('universe.extra.'+__name__)

Expand Down
24 changes: 0 additions & 24 deletions universe/vectorized/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,6 @@ class Env(gym.Env):
# Number of remotes. User should set this.
n = None

@property
def monitor(self):
if not self.metadata['runtime.vectorized']:
# Just delegate if we're not actually vectorized (like
# Unvectorize)
return super(Env, self).monitor

if not hasattr(self, '_monitor'):
# Not much we can do if we don't know how wide we'll
# be. This can happen when closing.
if self.n is None:
raise error.Error('You must call "configure()" before accesssing the monitor for {}'.format(self))

# Circular dependencies :(
from universe import wrappers
from universe.vectorized import monitoring
# We need to maintain pointers to these to avoid them being
# GC'd. They have a weak reference to us to avoid cycles.
self._unvectorized = [wrappers.WeakUnvectorize(self) for _ in range(self.n)]
# Store reference to avoid GC
# self._render_cached = monitoring.RenderCache(self)
self._monitor = monitoring.Monitor(self._unvectorized)
return self._monitor

class Wrapper(Env, gym.Wrapper):
"""Use this instead of gym.Wrapper iff you're wrapping a vectorized env,
(or a vanilla env you wish to be vectorized).
Expand Down
52 changes: 0 additions & 52 deletions universe/vectorized/monitoring.py

This file was deleted.

10 changes: 7 additions & 3 deletions universe/vectorized/tests/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
def test_multiprocessing_env_monitoring():
with helpers.tempdir() as temp:
env = wrappers.WrappedMultiprocessingEnv('Pong-v3')
env = wrappers.Monitor(temp)(env)
env.configure(n=2)
env.monitor.start(temp)
env.reset()
for i in range(2):
env.step([0, 0])
Expand All @@ -24,13 +24,17 @@ def test_vnc_monitoring():
with helpers.tempdir() as temp:
env = gym.make('gym-core.Pong-v3')
env = wrappers.GymCoreAction(env)
env = wrappers.Monitor(temp)(env)

env.configure(remotes=2)
env.monitor.start(temp, seed_n=[1, 2])
env.reset()
for i in range(2):
env.step([0, 0])
env.monitor.close()
env.close()

results = gym.monitoring.load_results(temp)
assert results['env_info']['env_id'] == 'gym-core.Pong-v3'

if __name__ == '__main__':
test_multiprocessing_env_monitoring()
test_vnc_monitoring()
1 change: 1 addition & 0 deletions universe/wrappers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from universe.wrappers.vectorize import Vectorize, Unvectorize, WeakUnvectorize
from universe.wrappers.vision import Vision
from universe.wrappers.recording import Recording
from universe.wrappers.monitoring import Monitor

def wrap(env):
return Timer(Render(Throttle(env)))
Expand Down
49 changes: 49 additions & 0 deletions universe/wrappers/monitoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import logging

from gym import monitoring
from universe.vectorized import core # Cannot import vectorized directly without inducing a cycle

logger = logging.getLogger(__name__)

def Monitor(directory, video_callable=None, force=False, resume=False,
write_upon_reset=False, uid=None, mode=None):
class Monitor(core.Wrapper):
def _configure(self, **kwargs):
super(Monitor, self)._configure(**kwargs)

# We have to wait until configure to set the monitor because we need the number of instances in a vectorized env
self._start_monitor()

def _start_monitor(self):
# Circular dependencies :(
from universe import wrappers
# We need to maintain pointers to these to avoid them being
# GC'd. They have a weak reference to us to avoid cycles.
self._unvectorized_envs = [wrappers.WeakUnvectorize(self, i) for i in range(self.n)]

# For now we only monitor the first env
self._monitor = monitoring.MonitorManager(self._unvectorized_envs[0])
self._monitor.start(directory, video_callable, force, resume,
write_upon_reset, uid, mode)

def _step(self, action_n):
self._monitor._before_step(action_n[0])
observation_n, reward_n, done_n, info = self.env.step(action_n)
done_n[0] = self._monitor._after_step(observation_n[0], reward_n[0], done_n[0], info)
return observation_n, reward_n, done_n, info

def _reset(self):
self._monitor._before_reset()
observation_n = self.env.reset()
self._monitor._after_reset(observation_n[0])
return observation_n

def _close(self):
super(Monitor, self)._close()
self._monitor.close()

def set_monitor_mode(self, mode):
logger.info("Setting the monitor mode is deprecated and will be removed soon")
self._monitor._set_mode(mode)

return Monitor
6 changes: 5 additions & 1 deletion universe/wrappers/vectorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,14 @@ def _seed(self, seed):
return self.env.seed([seed])[0]

class WeakUnvectorize(Unvectorize):
def __init__(self, env):
def __init__(self, env, i):
self._env_ref = weakref.ref(env)
super(WeakUnvectorize, self).__init__(env)
# WeakUnvectorize won't get configure called on it
self.i = i

def _check_for_duplicate_wrappers(self):
pass # Disable this check because we need to wrap vectorized envs in multiple unvectorize wrappers

@property
def env(self):
Expand Down

0 comments on commit f637b07

Please sign in to comment.