Skip to content

Commit

Permalink
update memory wording
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Nov 18, 2024
1 parent 7292733 commit 5860057
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
8 changes: 4 additions & 4 deletions ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@
"id": "qcD8LSHNhBRW"
},
"source": [
"- Note that we also added a `dtype=cfg[\"dtype\"]` setting above, which will allow us to load the model directly in lower precision formats later to save memory (versus instantiating it in the original 32-bit precision format and then converting it)\n",
"- Note that we also added a `dtype=cfg[\"dtype\"]` setting above, which will allow us to load the model directly in lower precision formats later to reduce memory usage (versus instantiating it in the original 32-bit precision format and then converting it)\n",
"- We also set `bias=False` since Llama doesn't use any bias units"
]
},
Expand Down Expand Up @@ -648,7 +648,7 @@
"\n",
"mha(example_batch)\n",
"\n",
"del mha # delete to save memory"
"del mha # delete to free up memory"
]
},
{
Expand Down Expand Up @@ -890,7 +890,7 @@
" \"n_heads\": 32, # Number of attention heads\n",
" \"n_layers\": 32, # Number of layers\n",
" \"hidden_dim\": 11008, # NEW: Size of the intermediate dimension in FeedForward\n",
" \"dtype\": torch.bfloat16 # NEW: Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16 # NEW: Lower-precision dtype to reduce memory usage\n",
"}"
]
},
Expand Down Expand Up @@ -1691,7 +1691,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.11.4"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
12 changes: 6 additions & 6 deletions ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@
" \"n_heads\": 32, # Number of attention heads\n",
" \"n_layers\": 32, # Number of layers\n",
" \"hidden_dim\": 11_008, # Size of the intermediate dimension in FeedForward\n",
" \"dtype\": torch.bfloat16 # Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16 # Lower-precision dtype to reduce memory usage\n",
"}"
]
},
Expand All @@ -909,7 +909,7 @@
" \"n_kv_groups\": 8, # NEW: Key-Value groups for grouped-query attention\n",
" \"rope_base\": 500_000.0, # NEW: The base in RoPE's \"theta\" was increased to 500_000\n",
" \"rope_freq\": None, # NEW: Additional configuration for adjusting the RoPE frequencies\n",
" \"dtype\": torch.bfloat16 # Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16 # Lower-precision dtype to reduce memory usage\n",
"}"
]
},
Expand Down Expand Up @@ -2062,7 +2062,7 @@
" \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n",
" \"rope_base\": 500_000.0, # The base in RoPE's \"theta\"\n",
" \"rope_freq\": None, # Additional configuration for adjusting the RoPE frequencies\n",
" \"dtype\": torch.bfloat16 # Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16 # Lower-precision dtype to reduce memory usage\n",
"}\n",
"\n",
"LLAMA31_CONFIG_8B = {\n",
Expand All @@ -2074,7 +2074,7 @@
" \"hidden_dim\": 14_336, # Size of the intermediate dimension in FeedForward\n",
" \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n",
" \"rope_base\": 500_000.0, # The base in RoPE's \"theta\"\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to reduce memory usage\n",
" \"rope_freq\": { # NEW: RoPE frequency scaling\n",
" \"factor\": 8.0,\n",
" \"low_freq_factor\": 1.0,\n",
Expand Down Expand Up @@ -2448,7 +2448,7 @@
" \"hidden_dim\": 14_336, # Size of the intermediate dimension in FeedForward\n",
" \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n",
" \"rope_base\": 500_000.0, # The base in RoPE's \"theta\"\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to reduce memory usagey\n",
" \"rope_freq\": { # NEW: RoPE frequency scaling\n",
" \"factor\": 8.0,\n",
" \"low_freq_factor\": 1.0,\n",
Expand All @@ -2467,7 +2467,7 @@
" \"hidden_dim\": 8192, # NEW: Almost half the size of the intermediate dimension in FeedForward\n",
" \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n",
" \"rope_base\": 500_000.0, # The base in RoPE's \"theta\"\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to reduce memory usage\n",
" \"rope_freq\": { # RoPE frequency scaling\n",
" \"factor\": 32.0, # NEW: Adjustment of the rescaling factor\n",
" \"low_freq_factor\": 1.0,\n",
Expand Down
4 changes: 2 additions & 2 deletions ch05/07_gpt_to_llama/standalone-llama32.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@
" \"hidden_dim\": 8192, # Size of the intermediate dimension in FeedForward\n",
" \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n",
" \"rope_base\": 500_000.0, # The base in RoPE's \"theta\"\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to save memory\n",
" \"dtype\": torch.bfloat16, # Lower-precision dtype to reduce memory usage\n",
" \"rope_freq\": { # RoPE frequency scaling\n",
" \"factor\": 32.0,\n",
" \"low_freq_factor\": 1.0,\n",
Expand All @@ -458,7 +458,7 @@
"# \"hidden_dim\": 8192, # Size of the intermediate dimension in FeedForward\n",
"# \"n_kv_groups\": 8, # Key-Value groups for grouped-query attention\n",
"# \"rope_base\": 500_000.0, # The base in RoPE's \"theta\"\n",
"# \"dtype\": torch.bfloat16, # Lower-precision dtype to save memory\n",
"# \"dtype\": torch.bfloat16, # Lower-precision dtype to reduce memory usage\n",
"# \"rope_freq\": { # RoPE frequency scaling\n",
"# \"factor\": 32.0,\n",
"# \"low_freq_factor\": 1.0,\n",
Expand Down

0 comments on commit 5860057

Please sign in to comment.