Merge pull request #36 from fortesenselabs/nt-backtest-environment

Add multi-agent environment sample and update README
fortesenselabs · Oct 3, 2024 · 6c6fc92 · 6c6fc92
2 parents 5344965 + 568906b
commit 6c6fc92
Show file tree

Hide file tree

Showing 101 changed files with 11,951 additions and 108 deletions.
diff --git a/.gitignore b/.gitignore
@@ -276,4 +276,5 @@ rl_fts/notebooks/data/*
 
 # micell.
 research/*
-add_stage.sh
+add_stage.sh
+catalog/
diff --git a/README.md b/README.md
@@ -81,13 +81,10 @@ All names, logos, and brands of third parties that may be referenced in our site
 
 ## References
 
+**Note:** This project is built on top of the following libraries/frameworks, so most of the code and concepts are heavily borrowed from them.
+
 - https://github.com/tensortrade-org/tensortrade
 - https://github.com/nautechsystems/nautilus_trader/
 - https://github.com/AI4Finance-Foundation
 - https://github.com/OpenBB-finance
-
-**Note:** This project is built on top of the following libraries/frameworks, so most of the code and concepts are heavily borrowing from them.
-
-- https://github.com/nautechsystems/nautilus_trader
-- https://github.com/tensortrade-org/tensortrade
 - https://github.com/crflynn/stochastic
diff --git a/trade_flow/environments/__init__.py b/trade_flow/environments/__init__.py
@@ -8,71 +8,8 @@
 # - **Sandbox:** Real-time data with simulated venues
 # - **Live:** Real-time data with live venues (paper trading or real accounts)
 
-import os
-import pkgutil
-import toml
-import jsonschema
+
 from trade_flow.environments import generic
 from trade_flow.environments import default
 from trade_flow.environments import nt_backtest
-from trade_flow.environments import __path__ as environments_path
-
-
-def get_available_environments() -> list[tuple]:
-    """
-        List available environments in Trade Flow, using environment.toml files for information.
-
-    Returns:
-        A list of tuples, where each tuple contains the environment name, version, and description.
-    """
-    schema = {
-        "type": "object",
-        "required": ["environment"],
-        "properties": {
-            "environment": {
-                "type": "object",
-                "required": ["name", "version", "description", "type", "engine"],
-                "properties": {
-                    "name": {"type": "string"},
-                    "version": {"type": "string"},
-                    "description": {"type": "string"},
-                    "type": {"type": "string"},
-                    "engine": {"type": "string"},
-                },
-            },
-        },
-    }
-
-    try:
-        environments = []
-
-        for module_info in pkgutil.iter_modules(environments_path):
-            environment_toml_path = os.path.join(
-                f"{environments_path[0]}/{module_info.name}", "metadata.toml"
-            )
-
-            if os.path.exists(environment_toml_path):
-                try:
-                    with open(environment_toml_path, "r") as f:
-                        environment_config = toml.load(f)
-                        jsonschema.validate(environment_config, schema)
-                        environment_name = environment_config["environment"]["name"]
-                        environment_version = environment_config["environment"]["version"]
-                        environment_description = environment_config["environment"]["description"]
-                        environments.append(
-                            (environment_name, environment_description, environment_version)
-                        )
-                except FileNotFoundError as e:
-                    raise ValueError(
-                        f"Environment.toml file not found for {environments_path}: {e}"
-                    )
-                except KeyError as e:
-                    raise ValueError(
-                        f"Missing required field in environment.toml for {environments_path}: {e}"
-                    )
-                except jsonschema.ValidationError as e:
-                    raise ValueError(f"Invalid environment.toml for {environments_path}: {e}")
-
-        return environments
-    except Exception as e:
-        raise ValueError(f"Error listing environments: {e}") from e
+from trade_flow.environments import utils
diff --git a/trade_flow/environments/generic/__init__.py b/trade_flow/environments/generic/__init__.py
@@ -1,9 +1,8 @@
 """
 Provides a Generic Environment tool set
 """
-from typing import Tuple
+
 from gymnasium.envs.registration import register as register_gym_env
-from sklearn.model_selection import train_test_split
 
 from trade_flow.environments.generic.components.reward_scheme import RewardScheme
 from trade_flow.environments.generic.components.action_scheme import ActionScheme
@@ -12,35 +11,3 @@
 from trade_flow.environments.generic.components.informer import Informer
 from trade_flow.environments.generic.components.renderer import Renderer
 from trade_flow.environments.generic.environment import TradingEnvironment
-
-
-def train_test_split_env(
-    env: TradingEnvironment, test_size: float = 0.2, seed: int = 42
-) -> Tuple[TradingEnvironment, TradingEnvironment]:
-    """
-    Splits the environment into training and testing environments by
-    splitting the underlying data it uses.
-
-    This function assumes the TradingEnvironment has a method to reset
-    with different data subsets. 
-
-    Args:
-        env: The TradingEnvironment to split.
-        test_size: The proportion of data to allocate to the testing environment (default: 0.2).
-        seed: Random seed for splitting the data (default: 42).
-
-    Returns:
-        Tuple[TradingEnvironment, TradingEnvironment]: The training and testing environments.
-    """
-
-    # Extract data (assuming env has a method to get data)
-    data = env.get_data()
-
-    # Split the data using train_test_split
-    train_data, test_data = train_test_split(data, test_size=test_size, random_state=seed)
-
-    # Create new environments with the split data (assuming a reset_with_data method)
-    train_env = TradingEnvironment(reset_with_data=train_data)
-    test_env = TradingEnvironment(reset_with_data=test_data)
-
-    return train_env, test_env
diff --git a/trade_flow/environments/generic/environment.py b/trade_flow/environments/generic/environment.py
@@ -52,14 +52,13 @@ def __init__(
         stopper: Stopper,
         informer: Informer,
         renderer: Renderer,
-        min_periods: int = None,
-        max_episode_steps: int = None,
+        min_periods: Optional[int] = None,
+        max_episode_steps: Optional[int] = None,
         random_start_pct: float = 0.00,
         **kwargs,
     ) -> None:
         super().__init__()
         self.clock = Clock()
-        # self.asset_class = asset_class #  TODO: include asset_class: AssetClass,
 
         self.action_scheme = action_scheme
         self.reward_scheme = reward_scheme
@@ -70,6 +69,8 @@ def __init__(
         self.min_periods = min_periods
         self.random_start_pct = random_start_pct
 
+        self.asset_class: Optional[str] = kwargs.get("asset_class", None)
+
         for c in self.components.values():
             c.clock = self.clock
 

diff --git a/trade_flow/environments/multi_agent_rl_finance/README.md b/trade_flow/environments/multi_agent_rl_finance/README.md
@@ -0,0 +1,74 @@
+# What is this?
+
+This is an implementation of my masters thesis. It relies on the `tensortrade` for setting up financial RL environments and `Rlib` for RL agent implementations. My masters thesis has the goal of combining multiple trading RL algorithms, each trainined on different data types, to create a more sofisticated and greater performing algorithm. To achieve this goal, I will be creating trading strategies. Each trading strategy will produce an RL algorithm. Strategies may encorporate previous strategies.
+
+# Repository Structure
+
+- **.vscode** contains opinionated vscode settings for debugging and formatting.
+- **environments** financial trading rl environements.
+- **strategies** strategies implemented to train DRL agents on a specified environment.
+- **logs** training logs for strategies.
+- **rayExtensions** classes which extend the behaviour of the ray framework.
+- **tensortrade** extensions made to plug into the tensortrade framework.
+
+## Strategies
+
+If an environment represents a problem, a strategy represents a solution for solving the environment. Strageies apply deep reinforcement learning algorithms to environments. Strategies are separated into the domains of environments that they are solving. Each environment domain has one or more strategies for solving it. Strategies may make use of custom models for their deep reinforcement learning algorithms.
+
+# Install Instructions
+
+1. Install Anaconda and create a new environment for this projects dependencies
+2. Attemp to install from the environment.yml for conda. If it works great, if it doesn't, follow the next steps.
+3. Install Ray RLlib from the official installation instructions
+4. Install tensortrade
+
+# Apple silicon (m1), Install Instructions
+
+1. Install Anaconda and create a new environment for this projects dependancies
+2. Attemp to install from the environment.yml for conda. If it works great, if it doesn't,
+3. Install Ray Rlib, follow the apple silicon instructions from the official website
+4. Install tensortrade for apple silicon
+5. Download the tensortrade repository, comment out tensotrade from the requirements.txt and install all depedencies.
+6. You're good to go.
+
+# Viewing Experiments
+
+Experiments can be viewed in tensorboard. This can be viewed buy running the following:
+`tensorboard --logdir logs/`
+
+# Algorithm Information
+
+## RL Algorithm
+
+Model is PPO
+
+## Policy Model
+
+# Notes:
+
+- Update the batch size to something reasonable
+- update the number of workers
+- find out what a env spec should look like (specifically the ID)
+
+# TODO:
+
+## Other TODO
+
+- update reward scheme from simple net worth change to something which includes risk, position-time and other useful evaluators identified in my research.
+- update network architecture to use LSTM
+- update network architecture to use Attention
+- implement a strategy on stock data
+  - Setup preprocessing:
+    - generate technical indicators
+    - identify corelation & remove duplicates
+    - normalise the data set
+    - implement a function to un-normalise the dataset for rendering
+- implement a strategy on fundamental data
+- implement a strategy which utilises multi-agent rl
+- implement a strategy which utilises hierarchical rl for quantity selection and stock action
+  - implement a new strategy as an action selection strategy which specifies an amount of stock to buy / sell / short
+
+**future additions**
+
+- Add a stock picking rl agent who's trained to select the best stock at a specified period
+  - Add this rl agent as a new level to the hierarchical rl agent.