@@ -95,11 +95,6 @@ def _maybe_dequantize_weight_for_expanded_lora(model, module):
95
95
weight_on_cpu = True
96
96
97
97
if is_bnb_4bit_quantized :
98
- if module .weight .quant_state .dtype != model .dtype :
99
- raise ValueError (
100
- f"Model is in { model .dtype } dtype while the current module weight will be dequantized to { module .weight .quant_state .dtype } dtype. "
101
- f"Please pass { module .weight .quant_state .dtype } as `torch_dtype` in `from_pretrained()`."
102
- )
103
98
module_weight = dequantize_bnb_weight (
104
99
module .weight .cuda () if weight_on_cpu else module .weight ,
105
100
state = module .weight .quant_state ,
@@ -2372,14 +2367,14 @@ def _maybe_expand_transformer_param_shape_or_error_(
2372
2367
# TODO: consider if this layer needs to be a quantized layer as well if `is_quantized` is True.
2373
2368
with torch .device ("meta" ):
2374
2369
expanded_module = torch .nn .Linear (
2375
- in_features , out_features , bias = bias , dtype = module_weight .dtype
2370
+ in_features , out_features , bias = bias , dtype = transformer .dtype
2376
2371
)
2377
2372
# Only weights are expanded and biases are not. This is because only the input dimensions
2378
2373
# are changed while the output dimensions remain the same. The shape of the weight tensor
2379
2374
# is (out_features, in_features), while the shape of bias tensor is (out_features,), which
2380
2375
# explains the reason why only weights are expanded.
2381
2376
new_weight = torch .zeros_like (
2382
- expanded_module .weight .data , device = module_weight .device , dtype = module_weight .dtype
2377
+ expanded_module .weight .data , device = module_weight .device , dtype = transformer .dtype
2383
2378
)
2384
2379
slices = tuple (slice (0 , dim ) for dim in module_weight_shape )
2385
2380
new_weight [slices ] = module_weight
0 commit comments