fix and updates

sayakpaul · sayakpaul · commit 8b7ef9caed1f · 2025-04-14T16:08:44.000+05:30
diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
@@ -95,11 +95,6 @@ def _maybe_dequantize_weight_for_expanded_lora(model, module):
         weight_on_cpu = True
 
     if is_bnb_4bit_quantized:
-        if module.weight.quant_state.dtype != model.dtype:
-            raise ValueError(
-                f"Model is in {model.dtype} dtype while the current module weight will be dequantized to {module.weight.quant_state.dtype} dtype. "
-                f"Please pass {module.weight.quant_state.dtype} as `torch_dtype` in `from_pretrained()`."
-            )
         module_weight = dequantize_bnb_weight(
             module.weight.cuda() if weight_on_cpu else module.weight,
             state=module.weight.quant_state,
@@ -2372,14 +2367,14 @@ def _maybe_expand_transformer_param_shape_or_error_(
                     # TODO: consider if this layer needs to be a quantized layer as well if `is_quantized` is True.
                     with torch.device("meta"):
                         expanded_module = torch.nn.Linear(
-                            in_features, out_features, bias=bias, dtype=module_weight.dtype
+                            in_features, out_features, bias=bias, dtype=transformer.dtype
                         )
                     # Only weights are expanded and biases are not. This is because only the input dimensions
                     # are changed while the output dimensions remain the same. The shape of the weight tensor
                     # is (out_features, in_features), while the shape of bias tensor is (out_features,), which
                     # explains the reason why only weights are expanded.
                     new_weight = torch.zeros_like(
-                        expanded_module.weight.data, device=module_weight.device, dtype=module_weight.dtype
+                        expanded_module.weight.data, device=module_weight.device, dtype=transformer.dtype
                     )
                     slices = tuple(slice(0, dim) for dim in module_weight_shape)
                     new_weight[slices] = module_weight
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -752,20 +752,6 @@ def test_lora_loading(self):
         max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
         self.assertTrue(max_diff < 1e-3, msg=f"{out_slice=} != {expected_slice=}")
 
-    def test_loading_lora_with_incorrect_dtype_raises_error(self):
-        self.tearDown()
-        model_dtype = torch.bfloat16
-        #  https://huggingface.co/eramth/flux-4bit/blob/main/transformer/config.json#L23
-        actual_dtype = torch.float16
-        self.pipeline_4bit = FluxControlPipeline.from_pretrained("eramth/flux-4bit", torch_dtype=torch.bfloat16)
-        self.pipeline_4bit.enable_model_cpu_offload()
-        with self.assertRaises(ValueError) as err_context:
-            self.pipeline_4bit.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
-        assert (
-            f"Model is in {model_dtype} dtype while the current module weight will be dequantized to {actual_dtype} dtype."
-            in str(err_context.exception)
-        )
-
 
 @slow
 class BaseBnb4BitSerializationTests(Base4bitTests):