From 2769809db8bc702d962530f8e91b3e36f163c320 Mon Sep 17 00:00:00 2001 From: Eamon Sippy Date: Mon, 8 Jun 2026 18:09:08 +0530 Subject: [PATCH] Modify training configuration parameters Updated training parameters including batch size, iterations, evaluation interval, learning rate, and dropout rate. --- config/config.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/config/config.h b/config/config.h index 844efeb..e917c38 100644 --- a/config/config.h +++ b/config/config.h @@ -1,18 +1,19 @@ #pragma once #include + static const std::string DEFAULT_CLEANED_PATH = "data/input.txt"; static const std::string DATA_PATH_ENV_VAR = "GPT_DATA_PATH"; static const unsigned int SEED = 1337; -static const double TRAIN_SPLIT = 0.9; // 90 % train, 10 % val -static const int BATCH_SIZE = 4; -static const int BLOCK_SIZE = 64; // context length -static const int MAX_ITERS = 10000; -static const int EVAL_INTERVAL = 20; -static const float LEARNING_RATE = 3e-4f; -static const int EVAL_ITERS = 1; +static const double TRAIN_SPLIT = 0.9; // 90% train, 10% val +static const int BATCH_SIZE = 16; +static const int BLOCK_SIZE = 64; // Context length +static const int MAX_ITERS = 5000; +static const int EVAL_INTERVAL = 250; +static const float LEARNING_RATE = 5e-4f; +static const int EVAL_ITERS = 100; static const int N_EMBD = 128; static const int N_HEAD = 4; static const int N_LAYER = 4; -static const float DROPOUT = 0.2f; // applied during training only +static const float DROPOUT = 0.05f; static const std::string BEST_MODEL_PATH = "best_model.bin"; static const std::string MODEL_PATH_ENV_VAR = "GPT_MODEL_PATH";