Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Ignatiocalvin committed Jun 7, 2024
1 parent e476339 commit c14f5b7
Show file tree
Hide file tree
Showing 13 changed files with 4,539 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
Binary file added __pycache__/models.cpython-39.pyc
Binary file not shown.
Binary file added __pycache__/utils.cpython-39.pyc
Binary file not shown.
Binary file added bert_embeddings.npy
Binary file not shown.
188 changes: 188 additions & 0 deletions bert_rnn_sep.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "fda13211-51f3-4192-aefc-846657dbace9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train shapes: (1551, 60, 768) (1551, 60) (1551,)\n",
"Test shapes: (318, 60, 768) (318, 60) (318,)\n"
]
}
],
"source": [
"import torch\n",
"import numpy as np\n",
"import pandas as pd\n",
"import torch.nn as nn\n",
"from torch.utils.data import TensorDataset, DataLoader\n",
"from utils import create_sequences\n",
"\n",
"# import all data\n",
"df = pd.read_csv('data/upload_DJIA_table.csv', parse_dates=['Date'], index_col='Date')\n",
"df = df[['Close']]\n",
"df = df.sort_index()\n",
"\n",
"train_data = df[:'2014']\n",
"test_data = df['2015':]\n",
"\n",
"bert_embeddings = np.load('bert_embeddings.npy')\n",
"\n",
"bert_embeddings_train = bert_embeddings[:train_data.shape[0]].reshape(-1, 768)\n",
"bert_embeddings_test = bert_embeddings[train_data.shape[0]:].reshape(-1, 768)\n",
" \n",
"\n",
"X_train_emb, X_train_pr, y_train = create_sequences(train_data, 60, bert_embeddings_train)\n",
"X_test_emb, X_test_pr, y_test = create_sequences(test_data, 60, bert_embeddings_test)\n",
"\n",
"\n",
"print(\"Train shapes: \", X_train_emb.shape, X_train_pr.shape, y_train.shape)\n",
"print(\"Test shapes: \", X_test_emb.shape, X_test_pr.shape, y_test.shape)\n",
"\n",
"# convert to torch dataset\n",
"dataset_train = TensorDataset(\n",
" torch.from_numpy(X_train_emb).float(),\n",
" torch.from_numpy(X_train_pr).float(),\n",
" torch.from_numpy(y_train).float()\n",
")\n",
"dataset_test = TensorDataset(\n",
" torch.from_numpy(X_test_emb).float(),\n",
" torch.from_numpy(X_test_pr).float(),\n",
" torch.from_numpy(y_test).float()\n",
")\n",
"\n",
"dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)\n",
"dataloader_test = DataLoader(dataset_test, batch_size=32, shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2cc49abb-d4bd-422e-98a4-b10c91bfa35c",
"metadata": {},
"outputs": [],
"source": [
"from models import StockPredictor\n",
"\n",
"# Model parameters\n",
"embedding_dim = 768 # Size of BERT embeddings\n",
"price_dim = 1 # Each stock price is a single number\n",
"hidden_dim = 128\n",
"num_layers = 2\n",
"\n",
"model = StockPredictor(embedding_dim, price_dim, hidden_dim, num_layers)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "be347a74-7360-4836-9204-74f19db79dad",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/ignatiocalvin/anaconda3/envs/dowJones/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1, Loss: 178201392.0, MAE: 13349.209414793073\n",
"Epoch 2, Loss: 146288288.0, MAE: 12094.969532826446\n",
"Epoch 3, Loss: 129430936.0, MAE: 11376.771774101826\n",
"Epoch 4, Loss: 61263784.0, MAE: 7827.118499166855\n",
"Epoch 5, Loss: 35276108.0, MAE: 5939.36932678883\n",
"Epoch 6, Loss: 9780979.0, MAE: 3127.455675145533\n",
"Epoch 7, Loss: 11902432.0, MAE: 3449.9901449134604\n",
"Epoch 8, Loss: 5983936.0, MAE: 2446.2084947935246\n",
"Epoch 9, Loss: 6555874.0, MAE: 2560.4441021041644\n",
"Epoch 10, Loss: 6831317.5, MAE: 2613.6789206021463\n"
]
}
],
"source": [
"from torch.utils.data import DataLoader, TensorDataset\n",
"import torch.optim as optim\n",
"\n",
"\n",
"# Training parameters\n",
"num_epochs = 10\n",
"learning_rate = 0.001\n",
"\n",
"# Loss and optimizer\n",
"criterion = nn.MSELoss()\n",
"optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n",
"\n",
"# Training loop\n",
"model.train()\n",
"for epoch in range(num_epochs):\n",
" for batch_bert, batch_price, batch_y in dataloader_train:\n",
" optimizer.zero_grad()\n",
" outputs = model(batch_bert, batch_price.unsqueeze(-1))\n",
" loss = criterion(outputs.squeeze(), batch_y)\n",
" loss.backward()\n",
" optimizer.step()\n",
" print(f\"Epoch {epoch+1}, Loss: {loss.item()}, MAE: {loss.item()**0.5}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "60fddd7f-00e0-44b5-89c3-831ac5e29aca",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test MSE: tensor(23406206.)\n",
"Test MAE: tensor(4837.9961)\n"
]
}
],
"source": [
"import torchmetrics\n",
"\n",
"mse = torchmetrics.MeanSquaredError()\n",
"model.eval()\n",
"with torch.no_grad():\n",
" for batch_bert, batch_price, batch_y in dataloader_test:\n",
" outputs = model(batch_bert, batch_price.unsqueeze(-1))\n",
" mse(outputs.squeeze(), batch_y)\n",
"\n",
"print(\"Test MSE: \", mse.compute())\n",
"print(\"Test MAE: \", mse.compute()**0.5) "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit c14f5b7

Please sign in to comment.