Skip to content

Commit

Permalink
Fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
justinpolygon committed Nov 4, 2024
1 parent f6a96b1 commit ce3a0f1
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 119 deletions.
62 changes: 31 additions & 31 deletions examples/tools/hunting-anomalies/build-lookup-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import json

# Directory containing the daily CSV files
data_dir = './aggregates_day/'
data_dir = "./aggregates_day/"

# Initialize a dictionary to hold trades data
trades_data = defaultdict(list)

# List all CSV files in the directory
files = sorted([f for f in os.listdir(data_dir) if f.endswith('.csv')])
files = sorted([f for f in os.listdir(data_dir) if f.endswith(".csv")])

print("Starting to process files...")

Expand All @@ -22,15 +22,13 @@
df = pd.read_csv(file_path)
# For each stock, store the date and relevant data
for _, row in df.iterrows():
ticker = row['ticker']
date = pd.to_datetime(row['window_start'], unit='ns').date()
trades = row['transactions']
close_price = row['close'] # Ensure 'close' column exists in your CSV
trades_data[ticker].append({
'date': date,
'trades': trades,
'close_price': close_price
})
ticker = row["ticker"]
date = pd.to_datetime(row["window_start"], unit="ns").date()
trades = row["transactions"]
close_price = row["close"] # Ensure 'close' column exists in your CSV
trades_data[ticker].append(
{"date": date, "trades": trades, "close_price": close_price}
)

print("Finished processing files.")
print("Building lookup table...")
Expand All @@ -42,38 +40,40 @@
# Convert records to DataFrame
df_ticker = pd.DataFrame(records)
# Sort records by date
df_ticker.sort_values('date', inplace=True)
df_ticker.set_index('date', inplace=True)
df_ticker.sort_values("date", inplace=True)
df_ticker.set_index("date", inplace=True)

# Calculate the percentage change in close_price
df_ticker['price_diff'] = df_ticker['close_price'].pct_change() * 100 # Multiply by 100 for percentage
df_ticker["price_diff"] = (
df_ticker["close_price"].pct_change() * 100
) # Multiply by 100 for percentage

# Shift trades to exclude the current day from rolling calculations
df_ticker['trades_shifted'] = df_ticker['trades'].shift(1)
df_ticker["trades_shifted"] = df_ticker["trades"].shift(1)
# Calculate rolling average and standard deviation over the previous 5 days
df_ticker['avg_trades'] = df_ticker['trades_shifted'].rolling(window=5).mean()
df_ticker['std_trades'] = df_ticker['trades_shifted'].rolling(window=5).std()
df_ticker["avg_trades"] = df_ticker["trades_shifted"].rolling(window=5).mean()
df_ticker["std_trades"] = df_ticker["trades_shifted"].rolling(window=5).std()
# Store the data in the lookup table
for date, row in df_ticker.iterrows():
# Convert date to string for JSON serialization
date_str = date.strftime('%Y-%m-%d')
date_str = date.strftime("%Y-%m-%d")
# Ensure rolling stats are available
if pd.notnull(row['avg_trades']) and pd.notnull(row['std_trades']):
if pd.notnull(row["avg_trades"]) and pd.notnull(row["std_trades"]):
lookup_table[ticker][date_str] = {
'trades': row['trades'],
'close_price': row['close_price'],
'price_diff': row['price_diff'],
'avg_trades': row['avg_trades'],
'std_trades': row['std_trades']
"trades": row["trades"],
"close_price": row["close_price"],
"price_diff": row["price_diff"],
"avg_trades": row["avg_trades"],
"std_trades": row["std_trades"],
}
else:
# Store data without rolling stats if not enough data points
lookup_table[ticker][date_str] = {
'trades': row['trades'],
'close_price': row['close_price'],
'price_diff': row['price_diff'],
'avg_trades': None,
'std_trades': None
"trades": row["trades"],
"close_price": row["close_price"],
"price_diff": row["price_diff"],
"avg_trades": None,
"std_trades": None,
}

print("Lookup table built successfully.")
Expand All @@ -82,13 +82,13 @@
lookup_table = {k: v for k, v in lookup_table.items()}

# Save the lookup table to a JSON file
with open('lookup_table.json', 'w') as f:
with open("lookup_table.json", "w") as f:
json.dump(lookup_table, f, indent=4)

print("Lookup table saved to 'lookup_table.json'.")

# Save the lookup table to a file for later use
with open('lookup_table.pkl', 'wb') as f:
with open("lookup_table.pkl", "wb") as f:
pickle.dump(lookup_table, f)

print("Lookup table saved to 'lookup_table.pkl'.")
162 changes: 97 additions & 65 deletions examples/tools/hunting-anomalies/gui-lookup-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,28 @@
PORT = 8888

# Load the lookup_table
with open('lookup_table.pkl', 'rb') as f:
with open("lookup_table.pkl", "rb") as f:
lookup_table = pickle.load(f)


class handler(http.server.SimpleHTTPRequestHandler):
def do_GET(self):
# Parse the path and query parameters
parsed_path = urlparse(self.path)
path = parsed_path.path
query_params = parse_qs(parsed_path.query)
if path == '/':

if path == "/":
# Handle the root path
# Get the date parameter if provided
date_param = query_params.get('date', [None])[0]
date_param = query_params.get("date", [None])[0]

# Get all dates from the lookup table
all_dates = set()
for ticker_data in lookup_table.values():
all_dates.update(ticker_data.keys())
all_dates = sorted(all_dates)

# If date is None, get the latest date from the lookup table
if date_param is None:
if all_dates:
Expand All @@ -41,109 +42,131 @@ def do_GET(self):
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
html_content = '<html><body><h1>No data available.</h1></body></html>'
html_content = (
"<html><body><h1>No data available.</h1></body></html>"
)
self.wfile.write(html_content.encode())
return
else:
latest_date = date_param

# Ensure latest_date is in all_dates
if latest_date not in all_dates:
# Handle the case where the provided date is invalid
self.send_response(400)
self.send_header("Content-type", "text/html")
self.end_headers()
error_html = f'<html><body><h1>Error: No data available for date {latest_date}</h1></body></html>'
error_html = f"<html><body><h1>Error: No data available for date {latest_date}</h1></body></html>"
self.wfile.write(error_html.encode())
return

# Now, get the anomalies for the latest_date
anomalies = []
for ticker, date_data in lookup_table.items():
if latest_date in date_data:
data = date_data[latest_date]
trades = data['trades']
avg_trades = data['avg_trades']
std_trades = data['std_trades']
trades = data["trades"]
avg_trades = data["avg_trades"]
std_trades = data["std_trades"]
if (
avg_trades is not None and
std_trades is not None and
std_trades > 0
avg_trades is not None
and std_trades is not None
and std_trades > 0
):
z_score = (trades - avg_trades) / std_trades
threshold_multiplier = 3 # Adjust as needed
if z_score > threshold_multiplier:
anomalies.append({
'ticker': ticker,
'date': latest_date,
'trades': trades,
'avg_trades': avg_trades,
'std_trades': std_trades,
'z_score': z_score,
'close_price': data['close_price'],
'price_diff': data['price_diff']
})
anomalies.append(
{
"ticker": ticker,
"date": latest_date,
"trades": trades,
"avg_trades": avg_trades,
"std_trades": std_trades,
"z_score": z_score,
"close_price": data["close_price"],
"price_diff": data["price_diff"],
}
)
# Sort anomalies by trades in descending order
anomalies.sort(key=lambda x: x['trades'], reverse=True)
anomalies.sort(key=lambda x: x["trades"], reverse=True)
# Generate the HTML to display the anomalies
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
# Build the HTML content
html_content = '<html><link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js" integrity="sha512-F/gIMdDfda6OD2rnzt/Iyp2V9JLHlFQ+EUyixDg9+rkwjqgW1snpkpx7FD5FV1+gG2fmFj7I3r6ReQDUidHelA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/sorts/tablesort.number.min.js" integrity="sha512-dRD755QRxlybm0h3LXXIGrFcjNakuxW3reZqnPtUkMv6YsSWoJf+slPjY5v4lZvx2ss+wBZQFegepmA7a2W9eA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><head><title>Anomalies for {}</title></head><body>'.format(latest_date)
html_content += '<div id="container" style="padding:4px;"><h1>Anomalies for {}</h1>'.format(latest_date)
html_content = '<html><link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/tablesort.min.js" integrity="sha512-F/gIMdDfda6OD2rnzt/Iyp2V9JLHlFQ+EUyixDg9+rkwjqgW1snpkpx7FD5FV1+gG2fmFj7I3r6ReQDUidHelA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.2.1/sorts/tablesort.number.min.js" integrity="sha512-dRD755QRxlybm0h3LXXIGrFcjNakuxW3reZqnPtUkMv6YsSWoJf+slPjY5v4lZvx2ss+wBZQFegepmA7a2W9eA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><head><title>Anomalies for {}</title></head><body>'.format(
latest_date
)
html_content += '<div id="container" style="padding:4px;"><h1>Anomalies for {}</h1>'.format(
latest_date
)
# Add navigation links (prev and next dates)
current_index = all_dates.index(latest_date)
prev_date = all_dates[current_index - 1] if current_index > 0 else None
next_date = all_dates[current_index + 1] if current_index < len(all_dates) - 1 else None
html_content += '<p>'
next_date = (
all_dates[current_index + 1]
if current_index < len(all_dates) - 1
else None
)
html_content += "<p>"
if prev_date:
html_content += '<a href="/?date={}">Previous Date</a> '.format(prev_date)
html_content += '<a href="/?date={}">Previous Date</a> '.format(
prev_date
)
if next_date:
html_content += '<a href="/?date={}">Next Date</a> '.format(next_date)
html_content += '</p>'
html_content += "</p>"
# Display the anomalies in a table
html_content += '<table id="anomalies" class="table table-striped table-hover">'
html_content += '<thead><tr>'
html_content += '<th>Ticker</th>'
html_content += '<th>Trades</th>'
html_content += '<th>Avg Trades</th>'
html_content += '<th>Std Dev</th>'
html_content += '<th>Z-score</th>'
html_content += '<th>Close Price</th>'
html_content += '<th>Price Diff</th>'
html_content += '<th>Chart</th>'
html_content += '</tr></thead><tbody>'
html_content += (
'<table id="anomalies" class="table table-striped table-hover">'
)
html_content += "<thead><tr>"
html_content += "<th>Ticker</th>"
html_content += "<th>Trades</th>"
html_content += "<th>Avg Trades</th>"
html_content += "<th>Std Dev</th>"
html_content += "<th>Z-score</th>"
html_content += "<th>Close Price</th>"
html_content += "<th>Price Diff</th>"
html_content += "<th>Chart</th>"
html_content += "</tr></thead><tbody>"
for anomaly in anomalies:
html_content += '<tr>'
html_content += '<td>{}</td>'.format(anomaly['ticker'])
html_content += '<td>{}</td>'.format(anomaly['trades'])
html_content += '<td>{:.2f}</td>'.format(anomaly['avg_trades'])
html_content += '<td>{:.2f}</td>'.format(anomaly['std_trades'])
html_content += '<td>{:.2f}</td>'.format(anomaly['z_score'])
html_content += '<td>{:.2f}</td>'.format(anomaly['close_price'])
html_content += '<td>{:.2f}</td>'.format(anomaly['price_diff'])
html_content += "<tr>"
html_content += "<td>{}</td>".format(anomaly["ticker"])
html_content += "<td>{}</td>".format(anomaly["trades"])
html_content += "<td>{:.2f}</td>".format(anomaly["avg_trades"])
html_content += "<td>{:.2f}</td>".format(anomaly["std_trades"])
html_content += "<td>{:.2f}</td>".format(anomaly["z_score"])
html_content += "<td>{:.2f}</td>".format(anomaly["close_price"])
html_content += "<td>{:.2f}</td>".format(anomaly["price_diff"])
# Add a link to the chart
html_content += '<td><a href="/chart?ticker={}&date={}">View Chart</a></td>'.format(anomaly['ticker'], latest_date)
html_content += '</tr>'
html_content += (
'<td><a href="/chart?ticker={}&date={}">View Chart</a></td>'.format(
anomaly["ticker"], latest_date
)
)
html_content += "</tr>"
html_content += '</tbody></table><script>new Tablesort(document.getElementById("anomalies"));</script>'
html_content += '</div></body></html>'
html_content += "</div></body></html>"
self.wfile.write(html_content.encode())
elif path == '/chart':
elif path == "/chart":
# Handle the chart page
# Get 'ticker' and 'date' from query parameters
ticker = query_params.get('ticker', [None])[0]
date = query_params.get('date', [None])[0]
ticker = query_params.get("ticker", [None])[0]
date = query_params.get("date", [None])[0]
if ticker is None or date is None:
# Return an error page
self.send_response(400)
self.send_header("Content-type", "text/html")
self.end_headers()
error_html = '<html><body><h1>Error: Missing ticker or date parameter</h1></body></html>'
error_html = "<html><body><h1>Error: Missing ticker or date parameter</h1></body></html>"
self.wfile.write(error_html.encode())
else:
# Fetch minute aggregates for the ticker and date
client = RESTClient(trace=True) # POLYGON_API_KEY environment variable is used
client = RESTClient(
trace=True
) # POLYGON_API_KEY environment variable is used
try:
aggs = []
date_from = date
Expand All @@ -166,7 +189,7 @@ def do_GET(self):
agg.open,
agg.high,
agg.low,
agg.close
agg.close,
]
data.append(new_record)
# Generate the HTML for the chart page
Expand Down Expand Up @@ -239,23 +262,31 @@ def do_GET(self):
</div>
</body>
</html>
""" % (json.dumps(data), ticker, date, ticker)
""" % (
json.dumps(data),
ticker,
date,
ticker,
)
self.send_response(200)
self.send_header("Content-type", "text/html")
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(chart_html.encode())
except Exception as e:
# Handle exceptions
self.send_response(500)
self.send_header("Content-type", "text/html")
self.end_headers()
error_html = '<html><body><h1>Error fetching data: {}</h1></body></html>'.format(str(e))
error_html = "<html><body><h1>Error fetching data: {}</h1></body></html>".format(
str(e)
)
self.wfile.write(error_html.encode())
else:
# Serve files from the current directory
super().do_GET()


def run_server():
with socketserver.TCPServer(("", PORT), handler) as httpd:
print("serving at port", PORT)
Expand All @@ -266,5 +297,6 @@ def run_server():
httpd.shutdown()
httpd.server_close()

if __name__ == '__main__':

if __name__ == "__main__":
run_server()
Loading

0 comments on commit ce3a0f1

Please sign in to comment.