From f6a96b1bd32b6281d17f6eff8f6f48b52ffc081d Mon Sep 17 00:00:00 2001
From: justinpolygon <123573436+justinpolygon@users.noreply.github.com>
Date: Mon, 4 Nov 2024 13:34:39 -0800
Subject: [PATCH 1/5] Add Hunting Anomalies in the Stock Market scripts

---
 examples/tools/hunting-anomalies/README.md    |  50 ++++
 .../aggregates_day/README.md                  |   1 +
 .../hunting-anomalies/build-lookup-table.py   |  94 ++++++
 .../hunting-anomalies/gui-lookup-table.py     | 270 ++++++++++++++++++
 .../hunting-anomalies/query-lookup-table.py   |  63 ++++
 5 files changed, 478 insertions(+)
 create mode 100644 examples/tools/hunting-anomalies/README.md
 create mode 100644 examples/tools/hunting-anomalies/aggregates_day/README.md
 create mode 100644 examples/tools/hunting-anomalies/build-lookup-table.py
 create mode 100644 examples/tools/hunting-anomalies/gui-lookup-table.py
 create mode 100644 examples/tools/hunting-anomalies/query-lookup-table.py

diff --git a/examples/tools/hunting-anomalies/README.md b/examples/tools/hunting-anomalies/README.md
new file mode 100644
index 00000000..f7bc9aa2
--- /dev/null
+++ b/examples/tools/hunting-anomalies/README.md
@@ -0,0 +1,50 @@
+# Hunting Anomalies in the Stock Market
+
+This repository contains all the necessary scripts and data directories used in the [Hunting Anomalies in the Stock Market](https://polygon.io/blog/hunting-anomalies-in-stock-market/) tutorial, hosted on Polygon.io's blog. The tutorial demonstrates how to detect statistical anomalies in historical US stock market data through a comprehensive workflow that involves downloading data, building a lookup table, querying for anomalies, and visualizing them through a web interface.
+
+### Prerequisites
+
+- Python 3.8+
+- Access to Polygon.io's historical data via Flat Files
+- An active Polygon.io API key, obtainable by signing up for a Stocks paid plan
+
+### Repository Contents
+
+- `README.md`: This file, outlining setup and execution instructions.
+- `aggregates_day`: Directory where downloaded CSV data files are stored.
+- `build-lookup-table.py`: Python script to build a lookup table from the historical data.
+- `query-lookup-table.py`: Python script to query the lookup table for anomalies.
+- `gui-lookup-table.py`: Python script for a browser-based interface to explore anomalies visually.
+
+### Running the Tutorial
+
+1. **Ensure Python 3.8+ is installed:** Check your Python version and ensure all required libraries (polygon-api-client, pandas, pickle, and argparse) are installed.
+
+2. **Set up your API key:** Make sure you have an active paid Polygon.io Stock subscription for accessing Flat Files. Set up your API key in your environment or directly in the scripts where required.
+
+3. **Download Historical Data:** Use the MinIO client to download historical stock market data:
+   ```bash
+   mc alias set s3polygon https://files.polygon.io YOUR_ACCESS_KEY YOUR_SECRET_KEY
+   mc cp --recursive s3polygon/flatfiles/us_stocks_sip/day_aggs_v1/2024/08/ ./aggregates_day/
+   mc cp --recursive s3polygon/flatfiles/us_stocks_sip/day_aggs_v1/2024/09/ ./aggregates_day/
+   mc cp --recursive s3polygon/flatfiles/us_stocks_sip/day_aggs_v1/2024/10/ ./aggregates_day/
+   gunzip ./aggregates_day/*.gz
+   ```
+   Adjust the commands and paths based on the data you're interested in.
+
+4. **Build the Lookup Table:** This script processes the downloaded data and builds a lookup table, saving it as `lookup_table.pkl`.
+   ```bash
+   python build-lookup-table.py
+   ```
+
+5. **Query Anomalies:** Replace `2024-10-18` with the date you want to analyze for anomalies.
+   ```bash
+   python query-lookup-table.py 2024-10-18
+   ```
+
+6. **Run the GUI:** Access the web interface at `http://localhost:8888` to explore the anomalies visually.
+   ```bash
+   python gui-lookup-table.py
+   ```
+
+For a complete step-by-step guide on each phase of the anomaly detection process, including additional configurations and troubleshooting, refer to the detailed [tutorial on our blog](https://polygon.io/blog/hunting-anomalies-in-stock-market).
diff --git a/examples/tools/hunting-anomalies/aggregates_day/README.md b/examples/tools/hunting-anomalies/aggregates_day/README.md
new file mode 100644
index 00000000..a0ade480
--- /dev/null
+++ b/examples/tools/hunting-anomalies/aggregates_day/README.md
@@ -0,0 +1 @@
+Download flat files into here.
diff --git a/examples/tools/hunting-anomalies/build-lookup-table.py b/examples/tools/hunting-anomalies/build-lookup-table.py
new file mode 100644
index 00000000..c173d58d
--- /dev/null
+++ b/examples/tools/hunting-anomalies/build-lookup-table.py
@@ -0,0 +1,94 @@
+import os
+import pandas as pd
+from collections import defaultdict
+import pickle
+import json
+
+# Directory containing the daily CSV files
+data_dir = './aggregates_day/'
+
+# Initialize a dictionary to hold trades data
+trades_data = defaultdict(list)
+
+# List all CSV files in the directory
+files = sorted([f for f in os.listdir(data_dir) if f.endswith('.csv')])
+
+print("Starting to process files...")
+
+# Process each file (assuming files are named in order)
+for file in files:
+    print(f"Processing {file}")
+    file_path = os.path.join(data_dir, file)
+    df = pd.read_csv(file_path)
+    # For each stock, store the date and relevant data
+    for _, row in df.iterrows():
+        ticker = row['ticker']
+        date = pd.to_datetime(row['window_start'], unit='ns').date()
+        trades = row['transactions']
+        close_price = row['close']  # Ensure 'close' column exists in your CSV
+        trades_data[ticker].append({
+            'date': date,
+            'trades': trades,
+            'close_price': close_price
+        })
+
+print("Finished processing files.")
+print("Building lookup table...")
+
+# Now, build the lookup table with rolling averages and percentage price change
+lookup_table = defaultdict(dict)  # Nested dict: ticker -> date -> stats
+
+for ticker, records in trades_data.items():
+    # Convert records to DataFrame
+    df_ticker = pd.DataFrame(records)
+    # Sort records by date
+    df_ticker.sort_values('date', inplace=True)
+    df_ticker.set_index('date', inplace=True)
+
+    # Calculate the percentage change in close_price
+    df_ticker['price_diff'] = df_ticker['close_price'].pct_change() * 100  # Multiply by 100 for percentage
+
+    # Shift trades to exclude the current day from rolling calculations
+    df_ticker['trades_shifted'] = df_ticker['trades'].shift(1)
+    # Calculate rolling average and standard deviation over the previous 5 days
+    df_ticker['avg_trades'] = df_ticker['trades_shifted'].rolling(window=5).mean()
+    df_ticker['std_trades'] = df_ticker['trades_shifted'].rolling(window=5).std()
+    # Store the data in the lookup table
+    for date, row in df_ticker.iterrows():
+        # Convert date to string for JSON serialization
+        date_str = date.strftime('%Y-%m-%d')
+        # Ensure rolling stats are available
+        if pd.notnull(row['avg_trades']) and pd.notnull(row['std_trades']):
+            lookup_table[ticker][date_str] = {
+                'trades': row['trades'],
+                'close_price': row['close_price'],
+                'price_diff': row['price_diff'],
+                'avg_trades': row['avg_trades'],
+                'std_trades': row['std_trades']
+            }
+        else:
+            # Store data without rolling stats if not enough data points
+            lookup_table[ticker][date_str] = {
+                'trades': row['trades'],
+                'close_price': row['close_price'],
+                'price_diff': row['price_diff'],
+                'avg_trades': None,
+                'std_trades': None
+            }
+
+print("Lookup table built successfully.")
+
+# Convert defaultdict to regular dict for JSON serialization
+lookup_table = {k: v for k, v in lookup_table.items()}
+
+# Save the lookup table to a JSON file
+with open('lookup_table.json', 'w') as f:
+    json.dump(lookup_table, f, indent=4)
+
+print("Lookup table saved to 'lookup_table.json'.")
+
+# Save the lookup table to a file for later use
+with open('lookup_table.pkl', 'wb') as f:
+    pickle.dump(lookup_table, f)
+
+print("Lookup table saved to 'lookup_table.pkl'.")
diff --git a/examples/tools/hunting-anomalies/gui-lookup-table.py b/examples/tools/hunting-anomalies/gui-lookup-table.py
new file mode 100644
index 00000000..ee2fc43b
--- /dev/null
+++ b/examples/tools/hunting-anomalies/gui-lookup-table.py
@@ -0,0 +1,270 @@
+import os
+import pickle
+import json
+from datetime import datetime
+from polygon import RESTClient
+from polygon.rest.models import Agg
+import http.server
+import socketserver
+import traceback
+from urllib.parse import urlparse, parse_qs
+
+PORT = 8888
+
+# Load the lookup_table
+with open('lookup_table.pkl', 'rb') as f:
+    lookup_table = pickle.load(f)
+
+class handler(http.server.SimpleHTTPRequestHandler):
+    def do_GET(self):
+        # Parse the path and query parameters
+        parsed_path = urlparse(self.path)
+        path = parsed_path.path
+        query_params = parse_qs(parsed_path.query)
+        
+        if path == '/':
+            # Handle the root path
+            # Get the date parameter if provided
+            date_param = query_params.get('date', [None])[0]
+            
+            # Get all dates from the lookup table
+            all_dates = set()
+            for ticker_data in lookup_table.values():
+                all_dates.update(ticker_data.keys())
+            all_dates = sorted(all_dates)
+            
+            # If date is None, get the latest date from the lookup table
+            if date_param is None:
+                if all_dates:
+                    latest_date = max(all_dates)
+                else:
+                    self.send_response(200)
+                    self.send_header("Content-type", "text/html")
+                    self.end_headers()
+                    html_content = '<html><body><h1>No data available.</h1></body></html>'
+                    self.wfile.write(html_content.encode())
+                    return
+            else:
+                latest_date = date_param
+            
+            # Ensure latest_date is in all_dates
+            if latest_date not in all_dates:
+                # Handle the case where the provided date is invalid
+                self.send_response(400)
+                self.send_header("Content-type", "text/html")
+                self.end_headers()
+                error_html = f'<html><body><h1>Error: No data available for date {latest_date}</h1></body></html>'
+                self.wfile.write(error_html.encode())
+                return
+            
+            # Now, get the anomalies for the latest_date
+            anomalies = []
+            for ticker, date_data in lookup_table.items():
+                if latest_date in date_data:
+                    data = date_data[latest_date]
+                    trades = data['trades']
+                    avg_trades = data['avg_trades']
+                    std_trades = data['std_trades']
+                    if (
+                        avg_trades is not None and
+                        std_trades is not None and
+                        std_trades > 0
+                    ):
+                        z_score = (trades - avg_trades) / std_trades
+                        threshold_multiplier = 3  # Adjust as needed
+                        if z_score > threshold_multiplier:
+                            anomalies.append({
+                                'ticker': ticker,
+                                'date': latest_date,
+                                'trades': trades,
+                                'avg_trades': avg_trades,
+                                'std_trades': std_trades,
+                                'z_score': z_score,
+                                'close_price': data['close_price'],
+                                'price_diff': data['price_diff']
+                            })
+            # Sort anomalies by trades in descending order
+            anomalies.sort(key=lambda x: x['trades'], reverse=True)
+            # Generate the HTML to display the anomalies
+            self.send_response(200)
+            self.send_header("Content-type", "text/html")
+            self.end_headers()
+            # Build the HTML content
+            html_content = '<html><link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js" integrity="sha512-F/gIMdDfda6OD2rnzt/Iyp2V9JLHlFQ+EUyixDg9+rkwjqgW1snpkpx7FD5FV1+gG2fmFj7I3r6ReQDUidHelA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/sorts/tablesort.number.min.js" integrity="sha512-dRD755QRxlybm0h3LXXIGrFcjNakuxW3reZqnPtUkMv6YsSWoJf+slPjY5v4lZvx2ss+wBZQFegepmA7a2W9eA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><head><title>Anomalies for {}</title></head><body>'.format(latest_date)
+            html_content += '<div id="container" style="padding:4px;"><h1>Anomalies for {}</h1>'.format(latest_date)
+            # Add navigation links (prev and next dates)
+            current_index = all_dates.index(latest_date)
+            prev_date = all_dates[current_index - 1] if current_index > 0 else None
+            next_date = all_dates[current_index + 1] if current_index < len(all_dates) - 1 else None
+            html_content += '<p>'
+            if prev_date:
+                html_content += '<a href="/?date={}">Previous Date</a> '.format(prev_date)
+            if next_date:
+                html_content += '<a href="/?date={}">Next Date</a> '.format(next_date)
+            html_content += '</p>'
+            # Display the anomalies in a table
+            html_content += '<table id="anomalies" class="table table-striped table-hover">'
+            html_content += '<thead><tr>'
+            html_content += '<th>Ticker</th>'
+            html_content += '<th>Trades</th>'
+            html_content += '<th>Avg Trades</th>'
+            html_content += '<th>Std Dev</th>'
+            html_content += '<th>Z-score</th>'
+            html_content += '<th>Close Price</th>'
+            html_content += '<th>Price Diff</th>'
+            html_content += '<th>Chart</th>'
+            html_content += '</tr></thead><tbody>'
+            for anomaly in anomalies:
+                html_content += '<tr>'
+                html_content += '<td>{}</td>'.format(anomaly['ticker'])
+                html_content += '<td>{}</td>'.format(anomaly['trades'])
+                html_content += '<td>{:.2f}</td>'.format(anomaly['avg_trades'])
+                html_content += '<td>{:.2f}</td>'.format(anomaly['std_trades'])
+                html_content += '<td>{:.2f}</td>'.format(anomaly['z_score'])
+                html_content += '<td>{:.2f}</td>'.format(anomaly['close_price'])
+                html_content += '<td>{:.2f}</td>'.format(anomaly['price_diff'])
+                # Add a link to the chart
+                html_content += '<td><a href="/chart?ticker={}&date={}">View Chart</a></td>'.format(anomaly['ticker'], latest_date)
+                html_content += '</tr>'
+            html_content += '</tbody></table><script>new Tablesort(document.getElementById("anomalies"));</script>'
+            html_content += '</div></body></html>'
+            self.wfile.write(html_content.encode())
+        elif path == '/chart':
+            # Handle the chart page
+            # Get 'ticker' and 'date' from query parameters
+            ticker = query_params.get('ticker', [None])[0]
+            date = query_params.get('date', [None])[0]
+            if ticker is None or date is None:
+                # Return an error page
+                self.send_response(400)
+                self.send_header("Content-type", "text/html")
+                self.end_headers()
+                error_html = '<html><body><h1>Error: Missing ticker or date parameter</h1></body></html>'
+                self.wfile.write(error_html.encode())
+            else:
+                # Fetch minute aggregates for the ticker and date
+                client = RESTClient(trace=True)  # POLYGON_API_KEY environment variable is used
+                try:
+                    aggs = []
+                    date_from = date
+                    date_to = date
+                    for a in client.list_aggs(
+                        ticker,
+                        1,
+                        "minute",
+                        date_from,
+                        date_to,
+                        limit=50000,
+                    ):
+                        aggs.append(a)
+                    # Prepare data for the chart
+                    data = []
+                    for agg in aggs:
+                        if isinstance(agg, Agg) and isinstance(agg.timestamp, int):
+                            new_record = [
+                                agg.timestamp,
+                                agg.open,
+                                agg.high,
+                                agg.low,
+                                agg.close
+                            ]
+                            data.append(new_record)
+                    # Generate the HTML for the chart page
+                    chart_html = """
+                    <!DOCTYPE HTML>
+                    <html>
+                    <head>
+                    <style>
+                    #container {
+                      height: 750px;
+                      min-width: 310px;
+                    }
+                    </style>
+                    <script src="https://code.highcharts.com/stock/highstock.js"></script>
+                    <script src="https://code.highcharts.com/stock/modules/data.js"></script>
+                    <script src="https://code.highcharts.com/stock/modules/exporting.js"></script>
+                    <script src="https://code.highcharts.com/stock/modules/accessibility.js"></script>
+                    <script src="https://code.highcharts.com/moment/moment.js"></script>
+                    <script src="https://code.highcharts.com/moment-timezone/moment-timezone.js"></script>
+                    </head>
+                    <body>
+                    <div id="container">
+                    <script type="text/javascript">
+					Highcharts.setOptions({
+					    global: {
+					        timezone: 'America/New_York'
+					    }
+					});
+                    var data = %s;
+                    Highcharts.stockChart('container', {
+				        exporting: {
+				            url: 'http://localhost:7801', // Set your local server as the exporting server
+				            enabled: true // Make sure exporting is enabled
+				        },
+					    rangeSelector: {
+					        enabled: false,
+					        selected: 1
+					    },
+					    navigator: {
+					        //enabled: false
+					    },
+					    scrollbar: {
+					        //enabled: false
+					    },
+					    xAxis: {
+					        labels: {
+					            //enabled: true // This hides the time labels under the chart
+					        }
+					    },
+                        title: {
+                          text: '%s Price Data on %s'
+                        },
+                        series: [{
+                          type: 'candlestick',
+                          name: '%s',
+                          data: data,
+                          color: 'red', // Color for downward movement
+                          lineColor: 'red', // Line color for downward movement
+                          upColor: 'green', // Color for upward movement
+                          upLineColor: 'green', // Line color for upward movement
+                          dataGrouping: {
+                            units: [[
+                              'minute',
+                              [1]
+                            ]]
+                          }
+                        }]
+                      });
+                    </script>
+                    </div>
+                    </body>
+                    </html>
+                    """ % (json.dumps(data), ticker, date, ticker)
+                    self.send_response(200)
+                    self.send_header("Content-type", "text/html")
+                    self.send_header('Access-Control-Allow-Origin', '*')
+                    self.end_headers()
+                    self.wfile.write(chart_html.encode())
+                except Exception as e:
+                    # Handle exceptions
+                    self.send_response(500)
+                    self.send_header("Content-type", "text/html")
+                    self.end_headers()
+                    error_html = '<html><body><h1>Error fetching data: {}</h1></body></html>'.format(str(e))
+                    self.wfile.write(error_html.encode())
+        else:
+            # Serve files from the current directory
+            super().do_GET()
+
+def run_server():
+    with socketserver.TCPServer(("", PORT), handler) as httpd:
+        print("serving at port", PORT)
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            print("\nExiting gracefully...")
+            httpd.shutdown()
+            httpd.server_close()
+
+if __name__ == '__main__':
+    run_server()
diff --git a/examples/tools/hunting-anomalies/query-lookup-table.py b/examples/tools/hunting-anomalies/query-lookup-table.py
new file mode 100644
index 00000000..4037a031
--- /dev/null
+++ b/examples/tools/hunting-anomalies/query-lookup-table.py
@@ -0,0 +1,63 @@
+import pickle
+import argparse
+
+# Parse command-line arguments
+parser = argparse.ArgumentParser(description='Anomaly Detection Script')
+parser.add_argument('date', type=str, help='Target date in YYYY-MM-DD format')
+args = parser.parse_args()
+
+# Load the lookup_table
+with open('lookup_table.pkl', 'rb') as f:
+    lookup_table = pickle.load(f)
+
+# Threshold for considering an anomaly (e.g., 3 standard deviations)
+threshold_multiplier = 3
+
+# Date for which we want to find anomalies
+target_date_str = args.date
+
+# List to store anomalies
+anomalies = []
+
+# Iterate over all tickers in the lookup table
+for ticker, date_data in lookup_table.items():
+    if target_date_str in date_data:
+        data = date_data[target_date_str]
+        trades = data['trades']
+        avg_trades = data['avg_trades']
+        std_trades = data['std_trades']
+        if (
+            avg_trades is not None and
+            std_trades is not None and
+            std_trades > 0
+        ):
+            z_score = (trades - avg_trades) / std_trades
+            if z_score > threshold_multiplier:
+                anomalies.append({
+                    'ticker': ticker,
+                    'date': target_date_str,
+                    'trades': trades,
+                    'avg_trades': avg_trades,
+                    'std_trades': std_trades,
+                    'z_score': z_score,
+                    'close_price': data['close_price'],
+                    'price_diff': data['price_diff']
+                })
+
+# Sort anomalies by trades in descending order
+anomalies.sort(key=lambda x: x['trades'], reverse=True)
+
+# Print the anomalies with aligned columns
+print(f"\nAnomalies Found for {target_date_str}:\n")
+print(f"{'Ticker':<10}{'Trades':>10}{'Avg Trades':>15}{'Std Dev':>10}{'Z-score':>10}{'Close Price':>12}{'Price Diff':>12}")
+print("-" * 91)
+for anomaly in anomalies:
+    print(
+        f"{anomaly['ticker']:<10}"
+        f"{anomaly['trades']:>10.0f}"
+        f"{anomaly['avg_trades']:>15.2f}"
+        f"{anomaly['std_trades']:>10.2f}"
+        f"{anomaly['z_score']:>10.2f}"
+        f"{anomaly['close_price']:>12.2f}"
+        f"{anomaly['price_diff']:>12.2f}"
+    )

From ce3a0f1b601007c6cf8b3ea671acec3abd6a1e65 Mon Sep 17 00:00:00 2001
From: justinpolygon <123573436+justinpolygon@users.noreply.github.com>
Date: Mon, 4 Nov 2024 13:37:02 -0800
Subject: [PATCH 2/5] Fix lint

---
 .../hunting-anomalies/build-lookup-table.py   |  62 +++----
 .../hunting-anomalies/gui-lookup-table.py     | 162 +++++++++++-------
 .../hunting-anomalies/query-lookup-table.py   |  46 ++---
 3 files changed, 151 insertions(+), 119 deletions(-)

diff --git a/examples/tools/hunting-anomalies/build-lookup-table.py b/examples/tools/hunting-anomalies/build-lookup-table.py
index c173d58d..a2de6ca8 100644
--- a/examples/tools/hunting-anomalies/build-lookup-table.py
+++ b/examples/tools/hunting-anomalies/build-lookup-table.py
@@ -5,13 +5,13 @@
 import json
 
 # Directory containing the daily CSV files
-data_dir = './aggregates_day/'
+data_dir = "./aggregates_day/"
 
 # Initialize a dictionary to hold trades data
 trades_data = defaultdict(list)
 
 # List all CSV files in the directory
-files = sorted([f for f in os.listdir(data_dir) if f.endswith('.csv')])
+files = sorted([f for f in os.listdir(data_dir) if f.endswith(".csv")])
 
 print("Starting to process files...")
 
@@ -22,15 +22,13 @@
     df = pd.read_csv(file_path)
     # For each stock, store the date and relevant data
     for _, row in df.iterrows():
-        ticker = row['ticker']
-        date = pd.to_datetime(row['window_start'], unit='ns').date()
-        trades = row['transactions']
-        close_price = row['close']  # Ensure 'close' column exists in your CSV
-        trades_data[ticker].append({
-            'date': date,
-            'trades': trades,
-            'close_price': close_price
-        })
+        ticker = row["ticker"]
+        date = pd.to_datetime(row["window_start"], unit="ns").date()
+        trades = row["transactions"]
+        close_price = row["close"]  # Ensure 'close' column exists in your CSV
+        trades_data[ticker].append(
+            {"date": date, "trades": trades, "close_price": close_price}
+        )
 
 print("Finished processing files.")
 print("Building lookup table...")
@@ -42,38 +40,40 @@
     # Convert records to DataFrame
     df_ticker = pd.DataFrame(records)
     # Sort records by date
-    df_ticker.sort_values('date', inplace=True)
-    df_ticker.set_index('date', inplace=True)
+    df_ticker.sort_values("date", inplace=True)
+    df_ticker.set_index("date", inplace=True)
 
     # Calculate the percentage change in close_price
-    df_ticker['price_diff'] = df_ticker['close_price'].pct_change() * 100  # Multiply by 100 for percentage
+    df_ticker["price_diff"] = (
+        df_ticker["close_price"].pct_change() * 100
+    )  # Multiply by 100 for percentage
 
     # Shift trades to exclude the current day from rolling calculations
-    df_ticker['trades_shifted'] = df_ticker['trades'].shift(1)
+    df_ticker["trades_shifted"] = df_ticker["trades"].shift(1)
     # Calculate rolling average and standard deviation over the previous 5 days
-    df_ticker['avg_trades'] = df_ticker['trades_shifted'].rolling(window=5).mean()
-    df_ticker['std_trades'] = df_ticker['trades_shifted'].rolling(window=5).std()
+    df_ticker["avg_trades"] = df_ticker["trades_shifted"].rolling(window=5).mean()
+    df_ticker["std_trades"] = df_ticker["trades_shifted"].rolling(window=5).std()
     # Store the data in the lookup table
     for date, row in df_ticker.iterrows():
         # Convert date to string for JSON serialization
-        date_str = date.strftime('%Y-%m-%d')
+        date_str = date.strftime("%Y-%m-%d")
         # Ensure rolling stats are available
-        if pd.notnull(row['avg_trades']) and pd.notnull(row['std_trades']):
+        if pd.notnull(row["avg_trades"]) and pd.notnull(row["std_trades"]):
             lookup_table[ticker][date_str] = {
-                'trades': row['trades'],
-                'close_price': row['close_price'],
-                'price_diff': row['price_diff'],
-                'avg_trades': row['avg_trades'],
-                'std_trades': row['std_trades']
+                "trades": row["trades"],
+                "close_price": row["close_price"],
+                "price_diff": row["price_diff"],
+                "avg_trades": row["avg_trades"],
+                "std_trades": row["std_trades"],
             }
         else:
             # Store data without rolling stats if not enough data points
             lookup_table[ticker][date_str] = {
-                'trades': row['trades'],
-                'close_price': row['close_price'],
-                'price_diff': row['price_diff'],
-                'avg_trades': None,
-                'std_trades': None
+                "trades": row["trades"],
+                "close_price": row["close_price"],
+                "price_diff": row["price_diff"],
+                "avg_trades": None,
+                "std_trades": None,
             }
 
 print("Lookup table built successfully.")
@@ -82,13 +82,13 @@
 lookup_table = {k: v for k, v in lookup_table.items()}
 
 # Save the lookup table to a JSON file
-with open('lookup_table.json', 'w') as f:
+with open("lookup_table.json", "w") as f:
     json.dump(lookup_table, f, indent=4)
 
 print("Lookup table saved to 'lookup_table.json'.")
 
 # Save the lookup table to a file for later use
-with open('lookup_table.pkl', 'wb') as f:
+with open("lookup_table.pkl", "wb") as f:
     pickle.dump(lookup_table, f)
 
 print("Lookup table saved to 'lookup_table.pkl'.")
diff --git a/examples/tools/hunting-anomalies/gui-lookup-table.py b/examples/tools/hunting-anomalies/gui-lookup-table.py
index ee2fc43b..df58746c 100644
--- a/examples/tools/hunting-anomalies/gui-lookup-table.py
+++ b/examples/tools/hunting-anomalies/gui-lookup-table.py
@@ -12,27 +12,28 @@
 PORT = 8888
 
 # Load the lookup_table
-with open('lookup_table.pkl', 'rb') as f:
+with open("lookup_table.pkl", "rb") as f:
     lookup_table = pickle.load(f)
 
+
 class handler(http.server.SimpleHTTPRequestHandler):
     def do_GET(self):
         # Parse the path and query parameters
         parsed_path = urlparse(self.path)
         path = parsed_path.path
         query_params = parse_qs(parsed_path.query)
-        
-        if path == '/':
+
+        if path == "/":
             # Handle the root path
             # Get the date parameter if provided
-            date_param = query_params.get('date', [None])[0]
-            
+            date_param = query_params.get("date", [None])[0]
+
             # Get all dates from the lookup table
             all_dates = set()
             for ticker_data in lookup_table.values():
                 all_dates.update(ticker_data.keys())
             all_dates = sorted(all_dates)
-            
+
             # If date is None, get the latest date from the lookup table
             if date_param is None:
                 if all_dates:
@@ -41,109 +42,131 @@ def do_GET(self):
                     self.send_response(200)
                     self.send_header("Content-type", "text/html")
                     self.end_headers()
-                    html_content = '<html><body><h1>No data available.</h1></body></html>'
+                    html_content = (
+                        "<html><body><h1>No data available.</h1></body></html>"
+                    )
                     self.wfile.write(html_content.encode())
                     return
             else:
                 latest_date = date_param
-            
+
             # Ensure latest_date is in all_dates
             if latest_date not in all_dates:
                 # Handle the case where the provided date is invalid
                 self.send_response(400)
                 self.send_header("Content-type", "text/html")
                 self.end_headers()
-                error_html = f'<html><body><h1>Error: No data available for date {latest_date}</h1></body></html>'
+                error_html = f"<html><body><h1>Error: No data available for date {latest_date}</h1></body></html>"
                 self.wfile.write(error_html.encode())
                 return
-            
+
             # Now, get the anomalies for the latest_date
             anomalies = []
             for ticker, date_data in lookup_table.items():
                 if latest_date in date_data:
                     data = date_data[latest_date]
-                    trades = data['trades']
-                    avg_trades = data['avg_trades']
-                    std_trades = data['std_trades']
+                    trades = data["trades"]
+                    avg_trades = data["avg_trades"]
+                    std_trades = data["std_trades"]
                     if (
-                        avg_trades is not None and
-                        std_trades is not None and
-                        std_trades > 0
+                        avg_trades is not None
+                        and std_trades is not None
+                        and std_trades > 0
                     ):
                         z_score = (trades - avg_trades) / std_trades
                         threshold_multiplier = 3  # Adjust as needed
                         if z_score > threshold_multiplier:
-                            anomalies.append({
-                                'ticker': ticker,
-                                'date': latest_date,
-                                'trades': trades,
-                                'avg_trades': avg_trades,
-                                'std_trades': std_trades,
-                                'z_score': z_score,
-                                'close_price': data['close_price'],
-                                'price_diff': data['price_diff']
-                            })
+                            anomalies.append(
+                                {
+                                    "ticker": ticker,
+                                    "date": latest_date,
+                                    "trades": trades,
+                                    "avg_trades": avg_trades,
+                                    "std_trades": std_trades,
+                                    "z_score": z_score,
+                                    "close_price": data["close_price"],
+                                    "price_diff": data["price_diff"],
+                                }
+                            )
             # Sort anomalies by trades in descending order
-            anomalies.sort(key=lambda x: x['trades'], reverse=True)
+            anomalies.sort(key=lambda x: x["trades"], reverse=True)
             # Generate the HTML to display the anomalies
             self.send_response(200)
             self.send_header("Content-type", "text/html")
             self.end_headers()
             # Build the HTML content
-            html_content = '<html><link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js" integrity="sha512-F/gIMdDfda6OD2rnzt/Iyp2V9JLHlFQ+EUyixDg9+rkwjqgW1snpkpx7FD5FV1+gG2fmFj7I3r6ReQDUidHelA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/sorts/tablesort.number.min.js" integrity="sha512-dRD755QRxlybm0h3LXXIGrFcjNakuxW3reZqnPtUkMv6YsSWoJf+slPjY5v4lZvx2ss+wBZQFegepmA7a2W9eA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><head><title>Anomalies for {}</title></head><body>'.format(latest_date)
-            html_content += '<div id="container" style="padding:4px;"><h1>Anomalies for {}</h1>'.format(latest_date)
+            html_content = '<html><link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js" integrity="sha512-F/gIMdDfda6OD2rnzt/Iyp2V9JLHlFQ+EUyixDg9+rkwjqgW1snpkpx7FD5FV1+gG2fmFj7I3r6ReQDUidHelA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/sorts/tablesort.number.min.js" integrity="sha512-dRD755QRxlybm0h3LXXIGrFcjNakuxW3reZqnPtUkMv6YsSWoJf+slPjY5v4lZvx2ss+wBZQFegepmA7a2W9eA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><head><title>Anomalies for {}</title></head><body>'.format(
+                latest_date
+            )
+            html_content += '<div id="container" style="padding:4px;"><h1>Anomalies for {}</h1>'.format(
+                latest_date
+            )
             # Add navigation links (prev and next dates)
             current_index = all_dates.index(latest_date)
             prev_date = all_dates[current_index - 1] if current_index > 0 else None
-            next_date = all_dates[current_index + 1] if current_index < len(all_dates) - 1 else None
-            html_content += '<p>'
+            next_date = (
+                all_dates[current_index + 1]
+                if current_index < len(all_dates) - 1
+                else None
+            )
+            html_content += "<p>"
             if prev_date:
-                html_content += '<a href="/?date={}">Previous Date</a> '.format(prev_date)
+                html_content += '<a href="/?date={}">Previous Date</a> '.format(
+                    prev_date
+                )
             if next_date:
                 html_content += '<a href="/?date={}">Next Date</a> '.format(next_date)
-            html_content += '</p>'
+            html_content += "</p>"
             # Display the anomalies in a table
-            html_content += '<table id="anomalies" class="table table-striped table-hover">'
-            html_content += '<thead><tr>'
-            html_content += '<th>Ticker</th>'
-            html_content += '<th>Trades</th>'
-            html_content += '<th>Avg Trades</th>'
-            html_content += '<th>Std Dev</th>'
-            html_content += '<th>Z-score</th>'
-            html_content += '<th>Close Price</th>'
-            html_content += '<th>Price Diff</th>'
-            html_content += '<th>Chart</th>'
-            html_content += '</tr></thead><tbody>'
+            html_content += (
+                '<table id="anomalies" class="table table-striped table-hover">'
+            )
+            html_content += "<thead><tr>"
+            html_content += "<th>Ticker</th>"
+            html_content += "<th>Trades</th>"
+            html_content += "<th>Avg Trades</th>"
+            html_content += "<th>Std Dev</th>"
+            html_content += "<th>Z-score</th>"
+            html_content += "<th>Close Price</th>"
+            html_content += "<th>Price Diff</th>"
+            html_content += "<th>Chart</th>"
+            html_content += "</tr></thead><tbody>"
             for anomaly in anomalies:
-                html_content += '<tr>'
-                html_content += '<td>{}</td>'.format(anomaly['ticker'])
-                html_content += '<td>{}</td>'.format(anomaly['trades'])
-                html_content += '<td>{:.2f}</td>'.format(anomaly['avg_trades'])
-                html_content += '<td>{:.2f}</td>'.format(anomaly['std_trades'])
-                html_content += '<td>{:.2f}</td>'.format(anomaly['z_score'])
-                html_content += '<td>{:.2f}</td>'.format(anomaly['close_price'])
-                html_content += '<td>{:.2f}</td>'.format(anomaly['price_diff'])
+                html_content += "<tr>"
+                html_content += "<td>{}</td>".format(anomaly["ticker"])
+                html_content += "<td>{}</td>".format(anomaly["trades"])
+                html_content += "<td>{:.2f}</td>".format(anomaly["avg_trades"])
+                html_content += "<td>{:.2f}</td>".format(anomaly["std_trades"])
+                html_content += "<td>{:.2f}</td>".format(anomaly["z_score"])
+                html_content += "<td>{:.2f}</td>".format(anomaly["close_price"])
+                html_content += "<td>{:.2f}</td>".format(anomaly["price_diff"])
                 # Add a link to the chart
-                html_content += '<td><a href="/chart?ticker={}&date={}">View Chart</a></td>'.format(anomaly['ticker'], latest_date)
-                html_content += '</tr>'
+                html_content += (
+                    '<td><a href="/chart?ticker={}&date={}">View Chart</a></td>'.format(
+                        anomaly["ticker"], latest_date
+                    )
+                )
+                html_content += "</tr>"
             html_content += '</tbody></table><script>new Tablesort(document.getElementById("anomalies"));</script>'
-            html_content += '</div></body></html>'
+            html_content += "</div></body></html>"
             self.wfile.write(html_content.encode())
-        elif path == '/chart':
+        elif path == "/chart":
             # Handle the chart page
             # Get 'ticker' and 'date' from query parameters
-            ticker = query_params.get('ticker', [None])[0]
-            date = query_params.get('date', [None])[0]
+            ticker = query_params.get("ticker", [None])[0]
+            date = query_params.get("date", [None])[0]
             if ticker is None or date is None:
                 # Return an error page
                 self.send_response(400)
                 self.send_header("Content-type", "text/html")
                 self.end_headers()
-                error_html = '<html><body><h1>Error: Missing ticker or date parameter</h1></body></html>'
+                error_html = "<html><body><h1>Error: Missing ticker or date parameter</h1></body></html>"
                 self.wfile.write(error_html.encode())
             else:
                 # Fetch minute aggregates for the ticker and date
-                client = RESTClient(trace=True)  # POLYGON_API_KEY environment variable is used
+                client = RESTClient(
+                    trace=True
+                )  # POLYGON_API_KEY environment variable is used
                 try:
                     aggs = []
                     date_from = date
@@ -166,7 +189,7 @@ def do_GET(self):
                                 agg.open,
                                 agg.high,
                                 agg.low,
-                                agg.close
+                                agg.close,
                             ]
                             data.append(new_record)
                     # Generate the HTML for the chart page
@@ -239,10 +262,15 @@ def do_GET(self):
                     </div>
                     </body>
                     </html>
-                    """ % (json.dumps(data), ticker, date, ticker)
+                    """ % (
+                        json.dumps(data),
+                        ticker,
+                        date,
+                        ticker,
+                    )
                     self.send_response(200)
                     self.send_header("Content-type", "text/html")
-                    self.send_header('Access-Control-Allow-Origin', '*')
+                    self.send_header("Access-Control-Allow-Origin", "*")
                     self.end_headers()
                     self.wfile.write(chart_html.encode())
                 except Exception as e:
@@ -250,12 +278,15 @@ def do_GET(self):
                     self.send_response(500)
                     self.send_header("Content-type", "text/html")
                     self.end_headers()
-                    error_html = '<html><body><h1>Error fetching data: {}</h1></body></html>'.format(str(e))
+                    error_html = "<html><body><h1>Error fetching data: {}</h1></body></html>".format(
+                        str(e)
+                    )
                     self.wfile.write(error_html.encode())
         else:
             # Serve files from the current directory
             super().do_GET()
 
+
 def run_server():
     with socketserver.TCPServer(("", PORT), handler) as httpd:
         print("serving at port", PORT)
@@ -266,5 +297,6 @@ def run_server():
             httpd.shutdown()
             httpd.server_close()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     run_server()
diff --git a/examples/tools/hunting-anomalies/query-lookup-table.py b/examples/tools/hunting-anomalies/query-lookup-table.py
index 4037a031..38bb86cf 100644
--- a/examples/tools/hunting-anomalies/query-lookup-table.py
+++ b/examples/tools/hunting-anomalies/query-lookup-table.py
@@ -2,12 +2,12 @@
 import argparse
 
 # Parse command-line arguments
-parser = argparse.ArgumentParser(description='Anomaly Detection Script')
-parser.add_argument('date', type=str, help='Target date in YYYY-MM-DD format')
+parser = argparse.ArgumentParser(description="Anomaly Detection Script")
+parser.add_argument("date", type=str, help="Target date in YYYY-MM-DD format")
 args = parser.parse_args()
 
 # Load the lookup_table
-with open('lookup_table.pkl', 'rb') as f:
+with open("lookup_table.pkl", "rb") as f:
     lookup_table = pickle.load(f)
 
 # Threshold for considering an anomaly (e.g., 3 standard deviations)
@@ -23,33 +23,33 @@
 for ticker, date_data in lookup_table.items():
     if target_date_str in date_data:
         data = date_data[target_date_str]
-        trades = data['trades']
-        avg_trades = data['avg_trades']
-        std_trades = data['std_trades']
-        if (
-            avg_trades is not None and
-            std_trades is not None and
-            std_trades > 0
-        ):
+        trades = data["trades"]
+        avg_trades = data["avg_trades"]
+        std_trades = data["std_trades"]
+        if avg_trades is not None and std_trades is not None and std_trades > 0:
             z_score = (trades - avg_trades) / std_trades
             if z_score > threshold_multiplier:
-                anomalies.append({
-                    'ticker': ticker,
-                    'date': target_date_str,
-                    'trades': trades,
-                    'avg_trades': avg_trades,
-                    'std_trades': std_trades,
-                    'z_score': z_score,
-                    'close_price': data['close_price'],
-                    'price_diff': data['price_diff']
-                })
+                anomalies.append(
+                    {
+                        "ticker": ticker,
+                        "date": target_date_str,
+                        "trades": trades,
+                        "avg_trades": avg_trades,
+                        "std_trades": std_trades,
+                        "z_score": z_score,
+                        "close_price": data["close_price"],
+                        "price_diff": data["price_diff"],
+                    }
+                )
 
 # Sort anomalies by trades in descending order
-anomalies.sort(key=lambda x: x['trades'], reverse=True)
+anomalies.sort(key=lambda x: x["trades"], reverse=True)
 
 # Print the anomalies with aligned columns
 print(f"\nAnomalies Found for {target_date_str}:\n")
-print(f"{'Ticker':<10}{'Trades':>10}{'Avg Trades':>15}{'Std Dev':>10}{'Z-score':>10}{'Close Price':>12}{'Price Diff':>12}")
+print(
+    f"{'Ticker':<10}{'Trades':>10}{'Avg Trades':>15}{'Std Dev':>10}{'Z-score':>10}{'Close Price':>12}{'Price Diff':>12}"
+)
 print("-" * 91)
 for anomaly in anomalies:
     print(

From c873d63f8a398deaa4bb5af704dd1d969b250474 Mon Sep 17 00:00:00 2001
From: justinpolygon <123573436+justinpolygon@users.noreply.github.com>
Date: Mon, 4 Nov 2024 13:44:46 -0800
Subject: [PATCH 3/5] Ignore type and fix typo

---
 examples/tools/hunting-anomalies/README.md             | 5 ++---
 examples/tools/hunting-anomalies/build-lookup-table.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/tools/hunting-anomalies/README.md b/examples/tools/hunting-anomalies/README.md
index f7bc9aa2..4b36f1b5 100644
--- a/examples/tools/hunting-anomalies/README.md
+++ b/examples/tools/hunting-anomalies/README.md
@@ -22,7 +22,7 @@ This repository contains all the necessary scripts and data directories used in
 
 2. **Set up your API key:** Make sure you have an active paid Polygon.io Stock subscription for accessing Flat Files. Set up your API key in your environment or directly in the scripts where required.
 
-3. **Download Historical Data:** Use the MinIO client to download historical stock market data:
+3. **Download Historical Data:** Use the MinIO client to download historical stock market data. Adjust the commands and paths based on the data you are interested in.
    ```bash
    mc alias set s3polygon https://files.polygon.io YOUR_ACCESS_KEY YOUR_SECRET_KEY
    mc cp --recursive s3polygon/flatfiles/us_stocks_sip/day_aggs_v1/2024/08/ ./aggregates_day/
@@ -30,7 +30,6 @@ This repository contains all the necessary scripts and data directories used in
    mc cp --recursive s3polygon/flatfiles/us_stocks_sip/day_aggs_v1/2024/10/ ./aggregates_day/
    gunzip ./aggregates_day/*.gz
    ```
-   Adjust the commands and paths based on the data you're interested in.
 
 4. **Build the Lookup Table:** This script processes the downloaded data and builds a lookup table, saving it as `lookup_table.pkl`.
    ```bash
@@ -47,4 +46,4 @@ This repository contains all the necessary scripts and data directories used in
    python gui-lookup-table.py
    ```
 
-For a complete step-by-step guide on each phase of the anomaly detection process, including additional configurations and troubleshooting, refer to the detailed [tutorial on our blog](https://polygon.io/blog/hunting-anomalies-in-stock-market).
+For a complete step-by-step guide on each phase of the anomaly detection process, including additional configurations and troubleshooting, refer to the detailed [tutorial on our blog](https://polygon.io/blog/hunting-anomalies-in-stock-market/).
diff --git a/examples/tools/hunting-anomalies/build-lookup-table.py b/examples/tools/hunting-anomalies/build-lookup-table.py
index a2de6ca8..84b88f3d 100644
--- a/examples/tools/hunting-anomalies/build-lookup-table.py
+++ b/examples/tools/hunting-anomalies/build-lookup-table.py
@@ -1,5 +1,5 @@
 import os
-import pandas as pd
+import pandas as pd # type: ignore
 from collections import defaultdict
 import pickle
 import json

From 8c35454db9256e5e41882634ed373eafc357346c Mon Sep 17 00:00:00 2001
From: justinpolygon <123573436+justinpolygon@users.noreply.github.com>
Date: Mon, 4 Nov 2024 13:52:17 -0800
Subject: [PATCH 4/5] Fix type def from linter

---
 .../tools/hunting-anomalies/build-lookup-table.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/examples/tools/hunting-anomalies/build-lookup-table.py b/examples/tools/hunting-anomalies/build-lookup-table.py
index 84b88f3d..7576c131 100644
--- a/examples/tools/hunting-anomalies/build-lookup-table.py
+++ b/examples/tools/hunting-anomalies/build-lookup-table.py
@@ -1,8 +1,9 @@
 import os
-import pandas as pd # type: ignore
+import pandas as pd  # type: ignore
 from collections import defaultdict
 import pickle
 import json
+from typing import DefaultDict, Dict, Any, BinaryIO
 
 # Directory containing the daily CSV files
 data_dir = "./aggregates_day/"
@@ -34,7 +35,9 @@
 print("Building lookup table...")
 
 # Now, build the lookup table with rolling averages and percentage price change
-lookup_table = defaultdict(dict)  # Nested dict: ticker -> date -> stats
+lookup_table: DefaultDict[str, Dict[str, Any]] = defaultdict(
+    dict
+)  # Nested dict: ticker -> date -> stats
 
 for ticker, records in trades_data.items():
     # Convert records to DataFrame
@@ -79,16 +82,16 @@
 print("Lookup table built successfully.")
 
 # Convert defaultdict to regular dict for JSON serialization
-lookup_table = {k: v for k, v in lookup_table.items()}
+lookup_table_dict = {k: v for k, v in lookup_table.items()}
 
 # Save the lookup table to a JSON file
 with open("lookup_table.json", "w") as f:
-    json.dump(lookup_table, f, indent=4)
+    json.dump(lookup_table_dict, f, indent=4)
 
 print("Lookup table saved to 'lookup_table.json'.")
 
 # Save the lookup table to a file for later use
-with open("lookup_table.pkl", "wb") as f:
-    pickle.dump(lookup_table, f)
+with open("lookup_table.pkl", "wb") as f:  # type: BinaryIO
+    pickle.dump(lookup_table_dict, f)
 
 print("Lookup table saved to 'lookup_table.pkl'.")

From 8bd08f84d84045daf45325d2c258d37259815229 Mon Sep 17 00:00:00 2001
From: justinpolygon <123573436+justinpolygon@users.noreply.github.com>
Date: Mon, 4 Nov 2024 13:55:16 -0800
Subject: [PATCH 5/5] Removed json dump

---
 examples/tools/hunting-anomalies/build-lookup-table.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/examples/tools/hunting-anomalies/build-lookup-table.py b/examples/tools/hunting-anomalies/build-lookup-table.py
index 7576c131..16abca2d 100644
--- a/examples/tools/hunting-anomalies/build-lookup-table.py
+++ b/examples/tools/hunting-anomalies/build-lookup-table.py
@@ -84,12 +84,6 @@
 # Convert defaultdict to regular dict for JSON serialization
 lookup_table_dict = {k: v for k, v in lookup_table.items()}
 
-# Save the lookup table to a JSON file
-with open("lookup_table.json", "w") as f:
-    json.dump(lookup_table_dict, f, indent=4)
-
-print("Lookup table saved to 'lookup_table.json'.")
-
 # Save the lookup table to a file for later use
 with open("lookup_table.pkl", "wb") as f:  # type: BinaryIO
     pickle.dump(lookup_table_dict, f)