diff --git a/README.md b/README.md index 43f49d76..73c32436 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@

-[![pypi](https://img.shields.io/badge/pypi%20package-v0.0.1-blue)](https://pypi.org/project/genrl/) +[![pypi](https://img.shields.io/badge/pypi%20package-v0.0.2-blue)](https://pypi.org/project/genrl/) [![GitHub license](https://img.shields.io/github/license/SforAiDl/genrl)](https://github.com/SforAiDl/genrl/blob/master/LICENSE) [![Build Status](https://travis-ci.com/SforAiDl/genrl.svg?branch=master)](https://travis-ci.com/SforAiDl/genrl) [![Total alerts](https://img.shields.io/lgtm/alerts/g/SforAiDl/genrl.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/SforAiDl/genrl/alerts/) @@ -12,7 +12,8 @@ [![codecov](https://codecov.io/gh/SforAiDl/genrl/branch/master/graph/badge.svg)](https://codecov.io/gh/SforAiDl/genrl) [![Documentation Status](https://readthedocs.org/projects/genrl/badge/?version=latest)](https://genrl.readthedocs.io/en/latest/?badge=latest) [![Maintainability](https://api.codeclimate.com/v1/badges/c3f6e7d31c078528e0e1/maintainability)](https://codeclimate.com/github/SforAiDl/genrl/maintainability) -![Lint, Test, Code Coverage](https://github.com/SforAiDl/genrl/workflows/Lint,%20Test,%20Code%20Coverage/badge.svg) +[![Lint, Test, Code Coverage](https://github.com/SforAiDl/genrl/workflows/Lint,%20Test,%20Code%20Coverage/badge.svg) +[![Slack - Chat](https://img.shields.io/badge/Slack-Chat-blueviolet)](https://join.slack.com/t/genrlworkspace/shared_invite/zt-gwlgnymd-Pw3TYC~0XDLy6VQDml22zg) --- @@ -20,18 +21,19 @@ --- -**GenRL is a PyTorch reinforcement learning library centered around reproducible and generalizable algorithm implementations.** +**GenRL is a PyTorch reinforcement learning library centered around reproducible, generalizable algorithm implementations and improving accessibility in Reinforcement Learning** Reinforcement learning research is moving faster than ever before. In order to keep up with the growing trend and ensure that RL research remains reproducible, GenRL aims to aid faster paper reproduction and benchmarking by providing the following main features: - **PyTorch-first**: Modular, Extensible and Idiomatic Python +- **Tutorials and Example**: 20+ Tutorials from basic RL to SOTA Deep RL algorithm (with explanations)! - **Unified Trainer and Logging class**: code reusability and high-level UI - **Ready-made algorithm implementations**: ready-made implementations of popular RL algorithms. - **Faster Benchmarking**: automated hyperparameter tuning, environment implementations etc. By integrating these features into GenRL, we aim to eventually support **any new algorithm implementation in less than 100 lines**. -**If you're interested in contributing, feel free to go through the issues and open PRs for code, docs, tests etc. In case of any questions, please check out the [Contributing Guidelines](https://github.com/SforAiDl/genrl/wiki/Contributing-Guidelines)** +**If you're interested in contributing, feel free to go through the issues and open PRs for code, docs, tests etc. In case of any questions, please check out the [Contributing Guidelines](CONTRIBUTING.md)** ## Installation @@ -55,10 +57,9 @@ To train a Soft Actor-Critic model from scratch on the `Pendulum-v0` gym environ ```python import gym -from genrl import SAC, QLearning -from genrl.classical.common import Trainer -from genrl.deep.common import OffPolicyTrainer +from genrl.agents import SAC, QLearning from genrl.environments import VectorEnv +from genrl.trainers import ClassicalTrainer, OffPolicyTrainer env = VectorEnv("Pendulum-v0") agent = SAC('mlp', env) @@ -69,13 +70,30 @@ trainer.train() To train a Tabular Dyna-Q model from scratch on the `FrozenLake-v0` gym environment and plot rewards: ```python + env = gym.make("FrozenLake-v0") agent = QLearning(env) -trainer = Trainer(agent, env, mode="dyna", model="tabular", n_episodes=10000) +trainer = ClassicalTrainer(agent, env, mode="dyna", model="tabular", n_episodes=10000) episode_rewards = trainer.train() trainer.plot(episode_rewards) ``` +## Tutorials +- [Multi Armed Bandits](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/bandit_overview.html) + - [Upper Confidence Bound](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/ucb.html) + - [Thompson Sampling](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/thompson_sampling.html) + - [Bayesian](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/bayesian.html) + - [Softmax Action Selection](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/gradients.html) +- [Contextual Bandits](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/contextual_overview.html) + - [Linear Posterior Inference](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/linpos.html) + - [Variational Inference](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/variational.html) + - [https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/bootstrap.html](Bootstrap) + - [Parameter Noise Sampling](https://genrl.readthedocs.io/en/latest/usage/tutorials/bandit/noise.html) +- [Deep Reinforcement Learning Background](https://genrl.readthedocs.io/en/latest/usage/tutorials/Deep/Background.html) + - [Vanilla Policy Gradients](https://genrl.readthedocs.io/en/latest/usage/tutorials/Deep/VPG.html) + - [Advantage Actor Critic](https://genrl.readthedocs.io/en/latest/usage/tutorials/Deep/A2C.html) + - [Proximal Policy Optimization](https://genrl.readthedocs.io/en/latest/usage/tutorials/Deep/PPO.html) + ## Algorithms ### Deep RL diff --git a/docs/source/api/agents/genrl.agents.classical.sarsa.rst b/docs/source/api/agents/genrl.agents.classical.sarsa.rst index 1b0a2eab..5646df57 100644 --- a/docs/source/api/agents/genrl.agents.classical.sarsa.rst +++ b/docs/source/api/agents/genrl.agents.classical.sarsa.rst @@ -8,4 +8,3 @@ genrl.agents.classical.sarsa.sarsa module :members: :undoc-members: :show-inheritance: - diff --git a/docs/source/api/agents/genrl.agents.deep.ppo1.rst b/docs/source/api/agents/genrl.agents.deep.ppo1.rst index 1be2ba1b..51b60c93 100644 --- a/docs/source/api/agents/genrl.agents.deep.ppo1.rst +++ b/docs/source/api/agents/genrl.agents.deep.ppo1.rst @@ -8,4 +8,3 @@ genrl.agents.deep.ppo1.ppo1 module :members: :undoc-members: :show-inheritance: - diff --git a/docs/source/api/agents/genrl.agents.deep.sac.rst b/docs/source/api/agents/genrl.agents.deep.sac.rst index 5cc78eb3..ff355e40 100644 --- a/docs/source/api/agents/genrl.agents.deep.sac.rst +++ b/docs/source/api/agents/genrl.agents.deep.sac.rst @@ -9,4 +9,3 @@ genrl.agents.deep.sac.sac module :members: :undoc-members: :show-inheritance: - diff --git a/docs/source/api/agents/genrl.agents.deep.td3.rst b/docs/source/api/agents/genrl.agents.deep.td3.rst index 98457846..89bc1fcc 100644 --- a/docs/source/api/agents/genrl.agents.deep.td3.rst +++ b/docs/source/api/agents/genrl.agents.deep.td3.rst @@ -8,4 +8,3 @@ genrl.agents.deep.td3.td3 module :members: :undoc-members: :show-inheritance: - diff --git a/docs/source/api/agents/genrl.agents.deep.vpg.rst b/docs/source/api/agents/genrl.agents.deep.vpg.rst index 9b38e73a..753b03bc 100644 --- a/docs/source/api/agents/genrl.agents.deep.vpg.rst +++ b/docs/source/api/agents/genrl.agents.deep.vpg.rst @@ -9,4 +9,3 @@ genrl.agents.deep.vpg.vpg module :members: :undoc-members: :show-inheritance: - diff --git a/docs/source/usage/tutorials/Classical/Q_Learning.rst b/docs/source/usage/tutorials/Classical/Q_Learning.rst index 918b9996..52264366 100644 --- a/docs/source/usage/tutorials/Classical/Q_Learning.rst +++ b/docs/source/usage/tutorials/Classical/Q_Learning.rst @@ -80,4 +80,3 @@ Great so far so good! Now moving towards the training process it is just calling That's it! You have successfully trained a Q-Learning agent. You can now go ahead and play with your own environments using GenRL! - diff --git a/docs/source/usage/tutorials/Classical/Sarsa.rst b/docs/source/usage/tutorials/Classical/Sarsa.rst index afa07a25..f1edc531 100644 --- a/docs/source/usage/tutorials/Classical/Sarsa.rst +++ b/docs/source/usage/tutorials/Classical/Sarsa.rst @@ -67,4 +67,4 @@ Great so far so good! Now moving towards the training process it is just calling trainer.train() trainer.evaluate() -That's it! You have successfully trained a SARSA agent. You can now go ahead and play with your own environments using GenRL! \ No newline at end of file +That's it! You have successfully trained a SARSA agent. You can now go ahead and play with your own environments using GenRL! diff --git a/genrl/__init__.py b/genrl/__init__.py index e69de29b..12266c65 100644 --- a/genrl/__init__.py +++ b/genrl/__init__.py @@ -0,0 +1 @@ +version = "0.0.2" diff --git a/genrl/agents/bandits/contextual/common/base_model.py b/genrl/agents/bandits/contextual/common/base_model.py index 3cd7e899..dc2ee860 100644 --- a/genrl/agents/bandits/contextual/common/base_model.py +++ b/genrl/agents/bandits/contextual/common/base_model.py @@ -2,8 +2,8 @@ from typing import Dict import torch -import torch.nn as nn -import torch.nn.functional as F +from torch import nn as nn +from torch.nn import functional as F from genrl.agents.bandits.contextual.common.transition import TransitionDB diff --git a/genrl/agents/bandits/contextual/common/bayesian.py b/genrl/agents/bandits/contextual/common/bayesian.py index 24960828..02aaae83 100644 --- a/genrl/agents/bandits/contextual/common/bayesian.py +++ b/genrl/agents/bandits/contextual/common/bayesian.py @@ -1,8 +1,8 @@ from typing import Dict, Optional, Tuple import torch -import torch.nn as nn -import torch.nn.functional as F +from torch import nn as nn +from torch.nn import functional as F from genrl.agents.bandits.contextual.common.base_model import Model from genrl.agents.bandits.contextual.common.transition import TransitionDB diff --git a/genrl/agents/bandits/contextual/common/neural.py b/genrl/agents/bandits/contextual/common/neural.py index 176f4a49..b28332da 100644 --- a/genrl/agents/bandits/contextual/common/neural.py +++ b/genrl/agents/bandits/contextual/common/neural.py @@ -1,8 +1,8 @@ from typing import Dict import torch -import torch.nn as nn -import torch.nn.functional as F +from torch import nn as nn +from torch.nn import functional as F from genrl.agents.bandits.contextual.common.base_model import Model from genrl.agents.bandits.contextual.common.transition import TransitionDB diff --git a/genrl/agents/deep/a2c/a2c.py b/genrl/agents/deep/a2c/a2c.py index e651a26e..05311d85 100644 --- a/genrl/agents/deep/a2c/a2c.py +++ b/genrl/agents/deep/a2c/a2c.py @@ -3,8 +3,8 @@ import gym import numpy as np import torch -import torch.nn.functional as F -import torch.optim as opt +from torch import optim as opt +from torch.nn import functional as F from genrl.agents.deep.base import OnPolicyAgent from genrl.utils import get_env_properties, get_model, safe_mean diff --git a/genrl/agents/deep/ddpg/ddpg.py b/genrl/agents/deep/ddpg/ddpg.py index 24f004b6..5fb2d3e9 100644 --- a/genrl/agents/deep/ddpg/ddpg.py +++ b/genrl/agents/deep/ddpg/ddpg.py @@ -2,7 +2,7 @@ from typing import Any, Dict import numpy as np -import torch.optim as opt +from torch import optim as opt from genrl.agents import OffPolicyAgentAC from genrl.core import ActionNoise diff --git a/genrl/agents/deep/dqn/base.py b/genrl/agents/deep/dqn/base.py index 096953de..8b0e15cb 100644 --- a/genrl/agents/deep/dqn/base.py +++ b/genrl/agents/deep/dqn/base.py @@ -3,7 +3,7 @@ import numpy as np import torch -import torch.optim as opt +from torch import optim as opt from genrl.agents import OffPolicyAgent from genrl.utils import get_env_properties, get_model, safe_mean diff --git a/genrl/agents/deep/ppo1/ppo1.py b/genrl/agents/deep/ppo1/ppo1.py index 0987d078..b0a56f2e 100644 --- a/genrl/agents/deep/ppo1/ppo1.py +++ b/genrl/agents/deep/ppo1/ppo1.py @@ -3,8 +3,8 @@ import gym import numpy as np import torch -import torch.nn as nn -import torch.optim as opt +from torch import nn as nn +from torch import optim as opt from genrl.agents import OnPolicyAgent from genrl.utils import get_env_properties, get_model, safe_mean diff --git a/genrl/agents/deep/sac/sac.py b/genrl/agents/deep/sac/sac.py index b7a5572d..407e1c6e 100644 --- a/genrl/agents/deep/sac/sac.py +++ b/genrl/agents/deep/sac/sac.py @@ -3,7 +3,7 @@ import numpy as np import torch -import torch.optim as opt +from torch import optim as opt from genrl.agents import OffPolicyAgentAC from genrl.utils import get_env_properties, get_model, safe_mean diff --git a/genrl/agents/deep/vpg/vpg.py b/genrl/agents/deep/vpg/vpg.py index 88d9d1fa..0d238313 100644 --- a/genrl/agents/deep/vpg/vpg.py +++ b/genrl/agents/deep/vpg/vpg.py @@ -3,7 +3,7 @@ import gym import numpy as np import torch -import torch.optim as opt +from torch import optim as opt from genrl.agents import OnPolicyAgent from genrl.utils import get_env_properties, get_model, safe_mean diff --git a/genrl/core/actor_critic.py b/genrl/core/actor_critic.py index 3b02eafc..b0c32e61 100644 --- a/genrl/core/actor_critic.py +++ b/genrl/core/actor_critic.py @@ -2,8 +2,8 @@ import numpy as np import torch -import torch.nn as nn from gym import spaces +from torch import nn as nn from torch.distributions import Categorical, Normal from genrl.core.base import BaseActorCritic diff --git a/genrl/core/bandit.py b/genrl/core/bandit.py index c96031cc..1a37707e 100644 --- a/genrl/core/bandit.py +++ b/genrl/core/bandit.py @@ -2,7 +2,7 @@ from typing import List, Tuple, Union import torch -import torch.nn.functional as F +from torch.nn import functional as F class Bandit(ABC): diff --git a/genrl/core/base.py b/genrl/core/base.py index 7b495211..610c552e 100644 --- a/genrl/core/base.py +++ b/genrl/core/base.py @@ -1,7 +1,7 @@ from typing import Optional, Tuple import torch -import torch.nn as nn +from torch import nn as nn from torch.distributions import Categorical, Normal diff --git a/genrl/core/noise.py b/genrl/core/noise.py index 528a0588..60432197 100644 --- a/genrl/core/noise.py +++ b/genrl/core/noise.py @@ -2,7 +2,7 @@ import numpy as np import torch -import torch.nn as nn +from torch import nn as nn class ActionNoise(ABC): diff --git a/genrl/utils/utils.py b/genrl/utils/utils.py index 8f2d9b71..27560c2a 100644 --- a/genrl/utils/utils.py +++ b/genrl/utils/utils.py @@ -4,7 +4,7 @@ import gym import numpy as np import torch -import torch.nn as nn +from torch import nn as nn from genrl.core.base import BaseActorCritic, BasePolicy, BaseValue from genrl.core.noise import NoisyLinear