-
Notifications
You must be signed in to change notification settings - Fork 13.7k
Implement SparseK Attention mechanism — new GGML operator with CPU backend (GPU planned next) #16817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Implement SparseK Attention mechanism — new GGML operator with CPU backend (GPU planned next) #16817
Changes from 17 commits
efd9ad4
8db1307
68ab48c
ce761f8
9d07172
af711f8
3933069
0c2dd04
c6a5db4
a6784f0
f9bd873
de64151
08e359d
49a8a81
ea21d8f
161e7cd
b9a960f
b7315fc
35180a1
2fd25a8
5c3c65c
5798c33
48ccccd
a365437
db3e875
194f6a3
60c75e7
88ac1d9
e6b0b10
46e192f
a9d2015
060ee50
729973b
205fded
6e36508
ed9ed7e
212d47f
087ecf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -795,14 +795,16 @@ def set_gguf_parameters(self): | |
| self.gguf_writer.add_feed_forward_length(n_ff) | ||
| logger.info(f"gguf: feed forward length = {n_ff}") | ||
|
|
||
| if (n_head := self.find_hparam(["num_attention_heads", "n_head", "n_heads"], optional=True)) is not None: | ||
| self.gguf_writer.add_head_count(n_head) | ||
| logger.info(f"gguf: head count = {n_head}") | ||
|
|
||
| if (n_head_kv := self.find_hparam(["num_key_value_heads", "n_kv_heads"], optional=True)) is not None: | ||
| self.gguf_writer.add_head_count_kv(n_head_kv) | ||
| logger.info(f"gguf: key-value head count = {n_head_kv}") | ||
|
|
||
| # === SparseK dynamic attention metadata === | ||
| self.gguf_writer.add_key("llama.sparsek.enable", int(self.hparams.get("sparsek_enable", 0))) | ||
| self.gguf_writer.add_key("llama.sparsek.top_k", int(self.hparams.get("sparsek_topk", 0))) | ||
| self.gguf_writer.add_key("llama.sparsek.window", int(self.hparams.get("sparsek_window", 0))) | ||
| self.gguf_writer.add_key("llama.sparsek.stride", int(self.hparams.get("sparsek_stride", 0))) | ||
| # ============================================ | ||
|
||
| if (rope_theta := self.hparams.get("rope_theta")) is not None: | ||
| self.gguf_writer.add_rope_freq_base(rope_theta) | ||
| logger.info(f"gguf: rope theta = {rope_theta}") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Restore this please.