Commit d20311b
authored
Update cudnn frontend to v1.16.0 (NVIDIA#2362)
Signed-off-by: Kirthi Shankar Sivamani <[email protected]>1 parent 5978f1d commit d20311b
1 file changed
+1
-1
lines changedSubmodule cudnn-frontend updated 87 files
- .github/ISSUE_TEMPLATE/bug_report.md+1-1
- CMakeLists.txt+1-1
- benchmark/sdpa_benchmark/benchmark_flash_attention.py+4-4
- benchmark/sdpa_benchmark_training/README.md+4-4
- benchmark/sdpa_benchmark_training/benchmark_single_sdpa.py+53-96
- dlpack_version.txt+1-1
- include/cudnn_frontend/cudnn_interface.h+1-1
- include/cudnn_frontend/graph_interface.h+39
- include/cudnn_frontend/graph_properties.h+51-4
- include/cudnn_frontend/node/moe_grouped_matmul.h+196
- include/cudnn_frontend/node/scaled_dot_product_flash_attention.h+90-5
- include/cudnn_frontend/node/sdpa_support_surface.h+38-17
- include/cudnn_frontend/node_interface.h+11
- include/cudnn_frontend/utils/serialize.h+14
- include/cudnn_frontend_EngineConfigGenerator.h+4-2
- include/cudnn_frontend_get_plan.h+10-4
- include/cudnn_frontend_utils.h+92-2
- include/cudnn_frontend_version.h+1-1
- pyproject.toml+9-2
- python/cudnn/README.md+41
- python/cudnn/__init__.py+51-1
- python/cudnn/api_base.py+238
- python/cudnn/datatypes.py+75
- python/cudnn/gemm_amax/__init__.py+9
- python/cudnn/gemm_amax/api.py+842
- python/cudnn/gemm_amax/dense_blockscaled_gemm_persistent_amax.py+1.9k
- python/cudnn/gemm_swiglu/__init__.py+9
- python/cudnn/gemm_swiglu/api.py+626
- python/cudnn/gemm_swiglu/dense_gemm_persistent_swiglu.py+1.7k
- python/cudnn/wrapper.py+50-18
- python/properties.cpp+15-1
- python/pygraph/pygraph.cpp+88
- python/pygraph/pygraph.h+21
- python/pygraph/sdpa.cpp+11-1
- samples/cpp/CMakeLists.txt+3
- samples/cpp/sdpa/fp16_bwd_with_flexible_graphs.cpp+6
- samples/cpp/sdpa/fp16_fwd_with_block_mask.cpp+267
- samples/cpp/sdpa/fp8_bwd.cpp+10
- samples/cpp/sdpa/fp8_fwd.cpp+5
- samples/cpp/sdpa/fp8_fwd_bottom_right_causal_mask.cpp+5
- samples/cpp/sdpa/fp8_fwd_current_scaling.cpp+165
- samples/cpp/utils/helpers.h+6
- samples/legacy_samples/fp8_sample.cpp+2-4
- samples/python/00_introduction.ipynb+1-1
- samples/python/01_matmul_bias.ipynb-230
- samples/python/02_low_level_api.ipynb+2-2
- samples/python/02_sdpa_graph_serialization.ipynb-296
- samples/python/03_mixed_precision_matmul.ipynb-234
- samples/python/20_layernorm.ipynb-374
- samples/python/24_rmsnorm.ipynb-481
- samples/python/25_batchnorm.ipynb-321
- samples/python/25_layernorm_forward_training_and_backward_with_relu_bitmask.ipynb+2-2
- samples/python/27_instancenorm.ipynb-326
- samples/python/28_instancenorm_fusion.ipynb-357
- samples/python/29_layernorm_zero_centered_gamma_forward_training_and_backward.ipynb-587
- samples/python/30_layernorm_zero_centered_gamma_inference.ipynb-407
- samples/python/31_adaptive_layernorm_forward_training_and_backward.ipynb-541
- samples/python/32_adaptive_layernorm_inference.ipynb-494
- samples/python/33_layernorm_forward_training_and_backward_with_relu_bitmask.ipynb-583
- samples/python/50_scaled_dot_product_attention.ipynb-258
- samples/python/50_sdpa_forward.ipynb+1-1
- samples/python/51_scaled_dot_product_attention_backward.ipynb-370
- samples/python/51_sdpa_backward.ipynb+1-1
- samples/python/52_scaled_dot_product_attention_with_paged_caches.ipynb-517
- samples/python/52_sdpa_with_paged_caches.ipynb+3-1
- samples/python/53_scaled_dot_product_attention_decode_with_paged_caches.ipynb-486
- samples/python/53_sdpa_decode_with_paged_caches.ipynb+3-1
- setup.py+1-1
- test/python/conftest.py+59-51
- test/python/fe_api/test_gemm_amax.py+189
- test/python/fe_api/test_gemm_amax_utils.py+316
- test/python/fe_api/test_gemm_swiglu.py+184
- test/python/fe_api/test_gemm_swiglu_utils.py+231
- test/python/test_batchnorm.py+24-6
- test/python/test_block_scale_quantize.py+353
- test/python/test_conv_genstats.py+8-2
- test/python/test_flexible_sdpa.py+33-3
- test/python/test_instancenorm.py+8-2
- test/python/test_kernel_cache.py+11-1
- test/python/test_layernorm.py+14-2
- test/python/test_low_precision_matmul.py+34
- test/python/test_matmul_bias_relu.py+22-2
- test/python/test_mhas.py+25-9
- test/python/test_rmsnorm.py+8-2
- test/python/test_sdpa_with_caching.py+1-1
- test/python/test_silu_and_mul.py+12-3
- test/python/test_slice.py+11-1
0 commit comments