@@ -704,6 +704,15 @@ llm:
704704 - attention
705705 - ssm
706706 tag : nips25
707+ - title : ' Mamba-3: Improved Sequence Modeling using State Space Principles'
708+ date : ' 2026-03-01'
709+ url : /papers/llm/algorithm/architecture/attention/ssm/2026/03/01/mamba-3-improved-sequence-modeling-using-state-space-principles.html
710+ categories :
711+ - llm
712+ - algorithm
713+ - architecture
714+ - attention
715+ - ssm
707716 - title : ' GATED DELTA NETWORKS: IMPROVING MAMBA2 WITH DELTA RULE'
708717 date : ' 2024-12-01'
709718 url : /papers/llm/algorithm/architecture/attention/ssm/2024/12/01/gated-delta-networks-improving-mamba2-with-delta-rule.html
@@ -774,6 +783,15 @@ llm:
774783 - attention
775784 - sparsity
776785 tag : iclr25
786+ - title : Mixture-of-Depths Attention
787+ date : ' 2026-03-01'
788+ url : /papers/llm/algorithm/architecture/attention/sparsity/2026/03/01/mixture-of-depths-attention.html
789+ categories :
790+ - llm
791+ - algorithm
792+ - architecture
793+ - attention
794+ - sparsity
777795 - title : ' MOBA: MIXTURE OF BLOCK ATTENTION FOR LONG-CONTEXT LLMS'
778796 date : ' 2025-02-01'
779797 url : /papers/llm/algorithm/architecture/attention/sparsity/2025/02/01/moba-mixture-of-block-attention-for-long-context-llms.html
@@ -1047,6 +1065,13 @@ llm:
10471065 - llm
10481066 - algorithm
10491067 - pretrain_sft
1068+ - title : Scaling up Muon for Large-Scale Language Model Training
1069+ date : ' 2025-02-01'
1070+ url : /papers/llm/algorithm/pretrain_sft/2025/02/01/scaling-up-muon-for-large-scale-language-model-training.html
1071+ categories :
1072+ - llm
1073+ - algorithm
1074+ - pretrain_sft
10501075 - title : ' Fusechat: Knowledge Fusion of Chat Models'
10511076 date : ' 2024-08-01'
10521077 url : /papers/llm/algorithm/pretrain_sft/2024/08/01/fusechat-knowledge-fusion-of-chat-models.html
@@ -4337,6 +4362,13 @@ mlsys:
43374362 - networking
43384363 - nccl
43394364 tag : blog
4365+ - title : ' NCCL EP: Towards a Unified Expert Parallel Communication API for NCCL'
4366+ date : ' 2026-03-01'
4367+ url : /papers/mlsys/networking/nccl/2026/03/01/nccl-ep-towards-a-unified-expert-parallel-communication-api-for-nccl.html
4368+ categories :
4369+ - mlsys
4370+ - networking
4371+ - nccl
43404372 - title : New Scaling Algorithm and Initialization with NVIDIA Collective Communications
43414373 Library 2.23
43424374 date : ' 2025-01-01'
0 commit comments