Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 0 additions & 43 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,44 +1 @@

.git
.gitignore
.github
.venv
__pycache__
*.pyc
*.pyo
*.pyd
*.egg-info
.pytest_cache
.ruff_cache
dist/
build/
*.egg
node_modules
frontend/node_modules
frontend/dist
frontend/.vite
*.npm-cache
.npmignore
*.o
*.a
*.so
*.dylib
quadtrix.exe
quadtrix
build/
cmake-build-*/
.vscode
*.bin
*.pt
*.gguf
*.safetensors
engine/best_model.pt
engine/logs/
engine/fineweb_30mb.txt
data/input.txt
.DS_Store
Thumbs.db
*.swp
*.swo
.idea
docker-compose.override.yml
43 changes: 43 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,46 @@ engine/fine-tune/input.txt
*best_model.pt
*.pt
*exe
.git
.gitignore
.github
.venv
__pycache__
*.pyc
*.pyo
*.pyd
*.egg-info
.pytest_cache
.ruff_cache
dist/
build/
*.egg
node_modules
frontend/node_modules
frontend/dist
frontend/.vite
*.npm-cache
.npmignore
*.o
*.a
*.so
*.dylib
quadtrix.exe
quadtrix
build/
cmake-build-*/
.vscode
*.bin
*.pt
*.gguf
*.safetensors
engine/best_model.pt
engine/logs/
engine/fineweb_30mb.txt
data/input.txt
.DS_Store
Thumbs.db
*.swp
*.swo
.idea
docker-compose.override.yml
17 changes: 9 additions & 8 deletions config/config.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
#pragma once
#include <string>

static const std::string DEFAULT_CLEANED_PATH = "data/input.txt";
static const std::string DATA_PATH_ENV_VAR = "GPT_DATA_PATH";
static const unsigned int SEED = 1337;
static const double TRAIN_SPLIT = 0.9; // 90 % train, 10 % val
static const int BATCH_SIZE = 4;
static const int BLOCK_SIZE = 64; // context length
static const int MAX_ITERS = 10000;
static const int EVAL_INTERVAL = 20;
static const float LEARNING_RATE = 3e-4f;
static const int EVAL_ITERS = 1;
static const double TRAIN_SPLIT = 0.9; // 90% train, 10% val
static const int BATCH_SIZE = 16;
static const int BLOCK_SIZE = 64; // Context length
static const int MAX_ITERS = 5000;
static const int EVAL_INTERVAL = 250;
static const float LEARNING_RATE = 5e-4f;
static const int EVAL_ITERS = 100;
static const int N_EMBD = 128;
static const int N_HEAD = 4;
static const int N_LAYER = 4;
static const float DROPOUT = 0.2f; // applied during training only
static const float DROPOUT = 0.05f;
static const std::string BEST_MODEL_PATH = "best_model.bin";
static const std::string MODEL_PATH_ENV_VAR = "GPT_MODEL_PATH";
Loading