-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e476339
commit c14f5b7
Showing
13 changed files
with
4,539 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "fda13211-51f3-4192-aefc-846657dbace9", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Train shapes: (1551, 60, 768) (1551, 60) (1551,)\n", | ||
"Test shapes: (318, 60, 768) (318, 60) (318,)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import torch\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import torch.nn as nn\n", | ||
"from torch.utils.data import TensorDataset, DataLoader\n", | ||
"from utils import create_sequences\n", | ||
"\n", | ||
"# import all data\n", | ||
"df = pd.read_csv('data/upload_DJIA_table.csv', parse_dates=['Date'], index_col='Date')\n", | ||
"df = df[['Close']]\n", | ||
"df = df.sort_index()\n", | ||
"\n", | ||
"train_data = df[:'2014']\n", | ||
"test_data = df['2015':]\n", | ||
"\n", | ||
"bert_embeddings = np.load('bert_embeddings.npy')\n", | ||
"\n", | ||
"bert_embeddings_train = bert_embeddings[:train_data.shape[0]].reshape(-1, 768)\n", | ||
"bert_embeddings_test = bert_embeddings[train_data.shape[0]:].reshape(-1, 768)\n", | ||
" \n", | ||
"\n", | ||
"X_train_emb, X_train_pr, y_train = create_sequences(train_data, 60, bert_embeddings_train)\n", | ||
"X_test_emb, X_test_pr, y_test = create_sequences(test_data, 60, bert_embeddings_test)\n", | ||
"\n", | ||
"\n", | ||
"print(\"Train shapes: \", X_train_emb.shape, X_train_pr.shape, y_train.shape)\n", | ||
"print(\"Test shapes: \", X_test_emb.shape, X_test_pr.shape, y_test.shape)\n", | ||
"\n", | ||
"# convert to torch dataset\n", | ||
"dataset_train = TensorDataset(\n", | ||
" torch.from_numpy(X_train_emb).float(),\n", | ||
" torch.from_numpy(X_train_pr).float(),\n", | ||
" torch.from_numpy(y_train).float()\n", | ||
")\n", | ||
"dataset_test = TensorDataset(\n", | ||
" torch.from_numpy(X_test_emb).float(),\n", | ||
" torch.from_numpy(X_test_pr).float(),\n", | ||
" torch.from_numpy(y_test).float()\n", | ||
")\n", | ||
"\n", | ||
"dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)\n", | ||
"dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "2cc49abb-d4bd-422e-98a4-b10c91bfa35c", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from models import StockPredictor\n", | ||
"\n", | ||
"# Model parameters\n", | ||
"embedding_dim = 768 # Size of BERT embeddings\n", | ||
"price_dim = 1 # Each stock price is a single number\n", | ||
"hidden_dim = 128\n", | ||
"num_layers = 2\n", | ||
"\n", | ||
"model = StockPredictor(embedding_dim, price_dim, hidden_dim, num_layers)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "be347a74-7360-4836-9204-74f19db79dad", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/Users/ignatiocalvin/anaconda3/envs/dowJones/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", | ||
" from .autonotebook import tqdm as notebook_tqdm\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Epoch 1, Loss: 178201392.0, MAE: 13349.209414793073\n", | ||
"Epoch 2, Loss: 146288288.0, MAE: 12094.969532826446\n", | ||
"Epoch 3, Loss: 129430936.0, MAE: 11376.771774101826\n", | ||
"Epoch 4, Loss: 61263784.0, MAE: 7827.118499166855\n", | ||
"Epoch 5, Loss: 35276108.0, MAE: 5939.36932678883\n", | ||
"Epoch 6, Loss: 9780979.0, MAE: 3127.455675145533\n", | ||
"Epoch 7, Loss: 11902432.0, MAE: 3449.9901449134604\n", | ||
"Epoch 8, Loss: 5983936.0, MAE: 2446.2084947935246\n", | ||
"Epoch 9, Loss: 6555874.0, MAE: 2560.4441021041644\n", | ||
"Epoch 10, Loss: 6831317.5, MAE: 2613.6789206021463\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from torch.utils.data import DataLoader, TensorDataset\n", | ||
"import torch.optim as optim\n", | ||
"\n", | ||
"\n", | ||
"# Training parameters\n", | ||
"num_epochs = 10\n", | ||
"learning_rate = 0.001\n", | ||
"\n", | ||
"# Loss and optimizer\n", | ||
"criterion = nn.MSELoss()\n", | ||
"optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n", | ||
"\n", | ||
"# Training loop\n", | ||
"model.train()\n", | ||
"for epoch in range(num_epochs):\n", | ||
" for batch_bert, batch_price, batch_y in dataloader_train:\n", | ||
" optimizer.zero_grad()\n", | ||
" outputs = model(batch_bert, batch_price.unsqueeze(-1))\n", | ||
" loss = criterion(outputs.squeeze(), batch_y)\n", | ||
" loss.backward()\n", | ||
" optimizer.step()\n", | ||
" print(f\"Epoch {epoch+1}, Loss: {loss.item()}, MAE: {loss.item()**0.5}\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "60fddd7f-00e0-44b5-89c3-831ac5e29aca", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Test MSE: tensor(23406206.)\n", | ||
"Test MAE: tensor(4837.9961)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import torchmetrics\n", | ||
"\n", | ||
"mse = torchmetrics.MeanSquaredError()\n", | ||
"model.eval()\n", | ||
"with torch.no_grad():\n", | ||
" for batch_bert, batch_price, batch_y in dataloader_test:\n", | ||
" outputs = model(batch_bert, batch_price.unsqueeze(-1))\n", | ||
" mse(outputs.squeeze(), batch_y)\n", | ||
"\n", | ||
"print(\"Test MSE: \", mse.compute())\n", | ||
"print(\"Test MAE: \", mse.compute()**0.5) " | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.18" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.