Skip to content

Commit cd036ed

Browse files
committed
revert unintended change
Signed-off-by: weimingc <[email protected]>
1 parent e8a009b commit cd036ed

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

examples/vllm_serve/vllm_serve_fakequant.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,7 @@ def disable_compilation(model):
9797
quant_config: dict[str, Any] = {
9898
"quant_dataset": "cnn_dailymail",
9999
"quant_num_samples": 512,
100-
# "quant_format": "NVFP4_DEFAULT_CFG",
101-
"quant_format": "NVFP4_AWQ_LITE_CFG",
100+
"quant_format": "NVFP4_DEFAULT_CFG",
102101
"amax_file_path": None, # Optional: path to pre-computed amax values (e.g., "/path/to/amax.pt")
103102
}
104103

@@ -177,7 +176,6 @@ def calibrate_loop(model: Any = None) -> None:
177176

178177
quant_cfg = getattr(mtq, quant_config["quant_format"])
179178

180-
print(f"Quantizing model with {quant_config['quant_format']} format")
181179
with disable_compilation(self.model):
182180
mtq.quantize(self.model, quant_cfg, forward_loop=calibrate_loop)
183181

0 commit comments

Comments
 (0)