Qualcomm AI Engine Direct - Flags for CI (#9536)

winskuo-quic · web-flow · commit f1ef702ff35f · 2025-05-13T11:10:44.000-07:00
### Summary
Initially, `--compile_only` will use 1 random input for CI purpose.
Introducing `--ci` flag, so when users are using `--compile_only`, they
won't be getting models with poor accuracy.
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -201,7 +201,7 @@ test_model_with_qnn() {
   # TODO(guangyang): Make QNN chipset matches the target device
   QNN_CHIPSET=SM8450
 
-  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
diff --git a/backends/qualcomm/_passes/annotate_quant_attrs.py b/backends/qualcomm/_passes/annotate_quant_attrs.py
@@ -31,15 +31,12 @@
 class AnnotateQuantAttrs(ExportPass):
     """
     Add "quant_attrs" to graph nodes' meta from the QDQ information
-    generated after quatization process.
+    generated after quantization process.
     """
 
-    def __init__(
-        self, edge_program: torch.export.ExportedProgram, skip_advanced_requat: bool
-    ):
+    def __init__(self, edge_program: torch.export.ExportedProgram):
         super(AnnotateQuantAttrs, self).__init__()
         self.edge_program = edge_program
-        self.skip_advanced_requant = skip_advanced_requat
 
     def _annotate_source_nodes(
         self, quant_node: torch.fx.Node, quant_attrs: Dict[str, Any]
@@ -88,30 +85,21 @@ def _annotate_requant(self, n):
                 dq_attrs = get_quant_attrs(self.edge_program, dq_node)
                 # TODO: Store multiple pairs of requantize attributes when we have an op builder
                 # that has multiple outputs that requires quant attributes.
-                if self.skip_advanced_requant:
-                    if q_attrs[QCOM_DTYPE] != dq_attrs[QCOM_DTYPE]:
-                        dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
-                        user_node = list(dq_node.users)[0]
-                        n.args[0].meta.setdefault(QCOM_REQUANTIZE, {})
-                        n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs
-                else:
-                    # When dtype is the same but other specs such as scale and offset are different,
-                    # insert requant to improve accuracy.
-                    # Users can turn this feature off if any inference speed drop is observed.
-                    if any(
-                        q_attrs[attr] != dq_attrs[attr]
-                        for attr in [
-                            QCOM_SCALE,
-                            QCOM_ZERO_POINT,
-                            QCOM_QUANT_MIN,
-                            QCOM_QUANT_MAX,
-                            QCOM_DTYPE,
-                        ]
-                    ):
-                        dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
-                        user_node = list(dq_node.users)[0]
-                        n.args[0].meta.setdefault(QCOM_REQUANTIZE, {})
-                        n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs
+
+                if any(
+                    q_attrs[attr] != dq_attrs[attr]
+                    for attr in [
+                        QCOM_SCALE,
+                        QCOM_ZERO_POINT,
+                        QCOM_QUANT_MIN,
+                        QCOM_QUANT_MAX,
+                        QCOM_DTYPE,
+                    ]
+                ):
+                    dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
+                    user_node = list(dq_node.users)[0]
+                    n.args[0].meta.setdefault(QCOM_REQUANTIZE, {})
+                    n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs
 
     # Dequant all the fold_quant parameters back to fp32.
     # If an operation is not supported by QNN and got fallback, it will expect a fp32 param.
diff --git a/examples/qualcomm/scripts/deeplab_v3.py b/examples/qualcomm/scripts/deeplab_v3.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 import random
 import re
@@ -74,8 +75,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_dataset(
             data_size=data_num, dataset_dir=args.artifact, download=args.download
diff --git a/examples/qualcomm/scripts/edsr.py b/examples/qualcomm/scripts/edsr.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 import re
 from multiprocessing.connection import Client
@@ -113,8 +114,11 @@ def main(args):
         )
 
     instance = EdsrModel()
-    if args.compile_only:
+    if args.ci:
         inputs = instance.get_example_inputs()
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         dataset = get_dataset(
             args.hr_ref_dir, args.lr_dir, args.default_dataset, args.artifact
diff --git a/examples/qualcomm/scripts/inception_v3.py b/examples/qualcomm/scripts/inception_v3.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -37,8 +38,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/inception_v4.py b/examples/qualcomm/scripts/inception_v4.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -37,8 +38,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
-        inputs = [(torch.rand(1, 3, 299, 299),)]
+    if args.ci:
+        inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/mobilenet_v2.py b/examples/qualcomm/scripts/mobilenet_v2.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -37,8 +38,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/mobilenet_v3.py b/examples/qualcomm/scripts/mobilenet_v3.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -36,8 +37,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/torchvision_vit.py b/examples/qualcomm/scripts/torchvision_vit.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -28,8 +29,11 @@ def main(args):
     os.makedirs(args.artifact, exist_ok=True)
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/wav2letter.py b/examples/qualcomm/scripts/wav2letter.py
@@ -134,10 +134,10 @@ def main(args):
 
     # retrieve dataset, will take some time to download
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 1, 700, 1),)]
         logging.warning(
-            "With compile_only, accuracy will be bad due to insufficient datasets for quantization."
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
         )
     else:
         inputs, targets, input_list = get_dataset(
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
@@ -585,6 +585,13 @@ def setup_common_args_and_variables():
         action="store_true",
     )
 
+    parser.add_argument(
+        "--ci",
+        help="This flag is for Continuous Integration(CI) purpose and is NOT recommended to turn on for typical use cases. It will use random inputs instead of real inputs.",
+        action="store_true",
+        default=False,
+    )
+
     # QNN_SDK_ROOT might also be an argument, but it is used in various places.
     # So maybe it's fine to just use the environment.
     if "QNN_SDK_ROOT" not in os.environ:

Original file line number	Diff line number	Diff line change
`@@ -201,7 +201,7 @@ test_model_with_qnn() {`
`201`	`201`	`# TODO(guangyang): Make QNN chipset matches the target device`
`202`	`202`	`QNN_CHIPSET=SM8450`
`203`	`203`
`204`		`- "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS`
	`204`	`+ "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS`
`205`	`205`	`EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)`
`206`	`206`	`}`
`207`	`207`