diff --git a/.dockerignore b/.dockerignore index 603874e..6c7b69a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,44 +1 @@ - -.git .gitignore -.github -.venv -__pycache__ -*.pyc -*.pyo -*.pyd -*.egg-info -.pytest_cache -.ruff_cache -dist/ -build/ -*.egg -node_modules -frontend/node_modules -frontend/dist -frontend/.vite -*.npm-cache -.npmignore -*.o -*.a -*.so -*.dylib -quadtrix.exe -quadtrix -build/ -cmake-build-*/ -.vscode -*.bin -*.pt -*.gguf -*.safetensors -engine/best_model.pt -engine/logs/ -engine/fineweb_30mb.txt -data/input.txt -.DS_Store -Thumbs.db -*.swp -*.swo -.idea -docker-compose.override.yml diff --git a/.gitignore b/.gitignore index 82e644e..0dfdee5 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,46 @@ engine/fine-tune/input.txt *best_model.pt *.pt *exe +.git +.gitignore +.github +.venv +__pycache__ +*.pyc +*.pyo +*.pyd +*.egg-info +.pytest_cache +.ruff_cache +dist/ +build/ +*.egg +node_modules +frontend/node_modules +frontend/dist +frontend/.vite +*.npm-cache +.npmignore +*.o +*.a +*.so +*.dylib +quadtrix.exe +quadtrix +build/ +cmake-build-*/ +.vscode +*.bin +*.pt +*.gguf +*.safetensors +engine/best_model.pt +engine/logs/ +engine/fineweb_30mb.txt +data/input.txt +.DS_Store +Thumbs.db +*.swp +*.swo +.idea +docker-compose.override.yml diff --git a/config/config.h b/config/config.h index 844efeb..e917c38 100644 --- a/config/config.h +++ b/config/config.h @@ -1,18 +1,19 @@ #pragma once #include + static const std::string DEFAULT_CLEANED_PATH = "data/input.txt"; static const std::string DATA_PATH_ENV_VAR = "GPT_DATA_PATH"; static const unsigned int SEED = 1337; -static const double TRAIN_SPLIT = 0.9; // 90 % train, 10 % val -static const int BATCH_SIZE = 4; -static const int BLOCK_SIZE = 64; // context length -static const int MAX_ITERS = 10000; -static const int EVAL_INTERVAL = 20; -static const float LEARNING_RATE = 3e-4f; -static const int EVAL_ITERS = 1; +static const double TRAIN_SPLIT = 0.9; // 90% train, 10% val +static const int BATCH_SIZE = 16; +static const int BLOCK_SIZE = 64; // Context length +static const int MAX_ITERS = 5000; +static const int EVAL_INTERVAL = 250; +static const float LEARNING_RATE = 5e-4f; +static const int EVAL_ITERS = 100; static const int N_EMBD = 128; static const int N_HEAD = 4; static const int N_LAYER = 4; -static const float DROPOUT = 0.2f; // applied during training only +static const float DROPOUT = 0.05f; static const std::string BEST_MODEL_PATH = "best_model.bin"; static const std::string MODEL_PATH_ENV_VAR = "GPT_MODEL_PATH";