-
Notifications
You must be signed in to change notification settings - Fork 112
AIMIGRAPHX-351 Update quantization to support dynamic shapes with single dynamic dimension #4467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
8cfd942
f7f69d4
bd2a6a7
f30135c
2076d29
18a7db7
dc0bdf8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,8 @@ | |
| #include <migraphx/simplify_reshapes.hpp> | ||
| #include <migraphx/simplify_qdq.hpp> | ||
| #include <migraphx/eliminate_common_subexpression.hpp> | ||
| #include <migraphx/split_single_dyn_dim.hpp> | ||
| #include <migraphx/simplify_dyn_ops.hpp> | ||
| #include <migraphx/optimize_module.hpp> | ||
| #include <migraphx/dead_code_elimination.hpp> | ||
| #include <migraphx/program.hpp> | ||
|
|
@@ -68,7 +70,11 @@ static tracer quant_tracer() | |
| void quantize_fp16(program& prog, const std::vector<std::string>& ins_names) | ||
| { | ||
| run_passes(prog, | ||
| {normalize_ops{}, | ||
| {split_single_dyn_dim{}, | ||
| dead_code_elimination{}, | ||
| simplify_dyn_ops{}, | ||
| dead_code_elimination{}, | ||
| normalize_ops{}, | ||
| optimize_module{{"quantizelinear", "dequantizelinear"}}, | ||
| truncate_float_pass{ins_names, shape::half_type}, | ||
| optimize_module{{"quantizelinear", "dequantizelinear"}}}, | ||
|
Comment on lines
+73
to
80
|
||
|
|
@@ -78,7 +84,11 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names) | |
| void quantize_bf16(program& prog, const std::vector<std::string>& ins_names) | ||
| { | ||
| run_passes(prog, | ||
| {normalize_ops{}, | ||
| {split_single_dyn_dim{}, | ||
| dead_code_elimination{}, | ||
| simplify_dyn_ops{}, | ||
| dead_code_elimination{}, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do these passes need to be ran?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems we should be able to quantize without needing to convert dynamic shapes which is a decision that should be made by the backend target.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that should be the case, but currently |
||
| normalize_ops{}, | ||
| optimize_module{{"quantizelinear", "dequantizelinear"}}, | ||
| truncate_float_pass{ins_names, shape::bf16_type}, | ||
| optimize_module{{"quantizelinear", "dequantizelinear"}}}, | ||
|
|
@@ -93,7 +103,16 @@ static void quantize_8bits(program& prog, | |
| { | ||
| // Run optimize_module() before converting to int8/fp8 to const eval and fold in FP32 to | ||
| // avoid loss of precision. | ||
| run_passes(prog, {rewrite_rnn{}, normalize_ops{}, optimize_module{}}, quant_tracer()); | ||
| run_passes(prog, | ||
| {split_single_dyn_dim{}, | ||
| dead_code_elimination{}, | ||
| simplify_dyn_ops{}, | ||
| dead_code_elimination{}, | ||
| rewrite_rnn{}, | ||
| dead_code_elimination{}, | ||
| normalize_ops{}, | ||
| optimize_module{}}, | ||
| quant_tracer()); | ||
|
|
||
| std::shared_ptr<std::vector<std::pair<float, float>>> quant_8bit_params = | ||
| std::make_shared<std::vector<std::pair<float, float>>>(); | ||
|
|
@@ -188,7 +207,15 @@ void quantize_int8(program& prog, | |
|
|
||
| void quantize_int4_weights(program& prog) | ||
| { | ||
| run_passes(prog, {normalize_ops{}, optimize_module{}, quantize_int4_pass{}}, quant_tracer()); | ||
| run_passes(prog, | ||
| {split_single_dyn_dim{}, | ||
| dead_code_elimination{}, | ||
| simplify_dyn_ops{}, | ||
| dead_code_elimination{}, | ||
| normalize_ops{}, | ||
| optimize_module{}, | ||
| quantize_int4_pass{}}, | ||
| quant_tracer()); | ||
| } | ||
|
|
||
| void quantize_fp8(program& prog, const target& t, const std::vector<parameter_map>& calibration) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -95,6 +95,11 @@ | |
| */ | ||
| static bool any_sm_next(const_module_ref mm, const std::vector<dynamic_dimensions_check>& ddcs) | ||
| { | ||
| // skip main module that contains select_module (meaning this pass already ran) | ||
| if(any_of(mm->begin(), mm->end(), [](auto ins) { return ins.name() == "select_module"; })) | ||
| { | ||
| return true; | ||
| } | ||
|
Comment on lines
+98
to
+102
|
||
| for(const auto& ddc : ddcs) | ||
| { | ||
| auto p_outputs = mm->get_parameter(ddc.dyn_param_str)->outputs(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,7 +1,7 @@ | ||||||||||||||||||||||||||||||||||
| /* | ||||||||||||||||||||||||||||||||||
| * The MIT License (MIT) | ||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||
| * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. | ||||||||||||||||||||||||||||||||||
| * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. | ||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||
| * Permission is hereby granted, free of charge, to any person obtaining a copy | ||||||||||||||||||||||||||||||||||
| * of this software and associated documentation files (the "Software"), to deal | ||||||||||||||||||||||||||||||||||
|
|
@@ -38,6 +38,11 @@ | |||||||||||||||||||||||||||||||||
| static void | ||||||||||||||||||||||||||||||||||
| quantize_module(module& m, const std::vector<std::string>& ins_names, shape::type_t float_type) | ||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||
| // skip main module that contains select_module | ||||||||||||||||||||||||||||||||||
| if(any_of(m.begin(), m.end(), [](auto ins) { return ins.name() == "select_module"; })) | ||||||||||||||||||||||||||||||||||
|
Comment on lines
38
to
+42
|
||||||||||||||||||||||||||||||||||
| static void | |
| quantize_module(module& m, const std::vector<std::string>& ins_names, shape::type_t float_type) | |
| { | |
| // skip main module that contains select_module | |
| if(any_of(m.begin(), m.end(), [](auto ins) { return ins.name() == "select_module"; })) | |
| // Helper function to check for select_module | |
| static inline bool module_has_select_module(const module& m) | |
| { | |
| return any_of(m.begin(), m.end(), [](auto ins) { return ins.name() == "select_module"; }); | |
| } | |
| static void | |
| quantize_module(module& m, const std::vector<std::string>& ins_names, shape::type_t float_type) | |
| { | |
| // skip main module that contains select_module | |
| if(module_has_select_module(m)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The pass sequence
{split_single_dyn_dim{}, dead_code_elimination{}, simplify_dyn_ops{}, dead_code_elimination{}}is repeated four times in this file (lines 73-76, 87-90, 107-110, 211-214). Consider extracting this into a named constant or helper function to improve maintainability:This would make the code DRYer and ensure consistent preprocessing across all quantization functions.