Skip to content

Commit 8009d1a

Browse files
ochouguleplatero97
authored andcommitted
added retrying downloads logic for stability (quic#370)
Signed-off-by: Onkar Chougule <[email protected]>
1 parent 91eeb68 commit 8009d1a

File tree

4 files changed

+11
-11
lines changed

4 files changed

+11
-11
lines changed

QEfficient/base/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from transformers import AutoConfig
1919

2020
from QEfficient.base.modeling_qeff import QEFFBaseModel
21-
from QEfficient.transformers.modeling_utils import MODEL_CLASS_MAPPING
21+
from QEfficient.transformers.models.modeling_auto import QEFFAutoModelForCausalLM
2222
from QEfficient.utils import login_and_download_hf_lm
2323

2424

scripts/replicate_kv_head/replicate_kv_heads.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from QEfficient.transformers.quantizers.auto import replace_transformers_quantizers, undo_transformers_quantizers
1515
from QEfficient.transformers.quantizers.awq import WQLinear_GEMM
1616
from QEfficient.transformers.quantizers.gptq import QuantLinearGPTQ
17+
from QEfficient.transformers.quantizers.quantizer_compressed_tensors import FP8DeQuantLinear
1718

1819

1920
def duplicate_weights_for_linear_layer(
@@ -78,7 +79,6 @@ def main(args):
7879
model_kwargs = {"attn_implementation": "eager"}
7980
if args.num_hidden_layers:
8081
model_kwargs["num_hidden_layers"] = args.num_hidden_layers
81-
8282
model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
8383

8484
# Undo the effect of replace_transformers_quantizers

tests/transformers/spd/test_pld_inference.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ def get_padded_input_len(input_len: int, prefill_seq_len: int, ctx_len: int):
145145
"""
146146
num_chunks = -(input_len // -prefill_seq_len) # ceil divide without float
147147
input_len_padded = num_chunks * prefill_seq_len # Convert input_len to a multiple of prefill_seq_len
148-
assert input_len_padded <= ctx_len, (
149-
"input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
150-
)
148+
assert (
149+
input_len_padded <= ctx_len
150+
), "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
151151
return input_len_padded
152152

153153

tests/transformers/spd/test_spd_inference.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,9 @@ def get_padded_input_len(input_len: int, prefill_seq_len: int, ctx_len: int):
8686
"""
8787
num_chunks = -(input_len // -prefill_seq_len) # ceil divide without float
8888
input_len_padded = num_chunks * prefill_seq_len # Convert input_len to a multiple of prefill_seq_len
89-
assert input_len_padded <= ctx_len, (
90-
"input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
91-
)
89+
assert (
90+
input_len_padded <= ctx_len
91+
), "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
9292
return input_len_padded
9393

9494

@@ -335,9 +335,9 @@ def test_spec_decode_inference(
335335
for prompt, generation in zip(prompts, batch_decode):
336336
print(f"{prompt=} {generation=}")
337337
# validation check
338-
assert mean_num_accepted_tokens == float(num_speculative_tokens + 1), (
339-
f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens + 1}"
340-
)
338+
assert mean_num_accepted_tokens == float(
339+
num_speculative_tokens + 1
340+
), f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens + 1}"
341341
del target_model_session
342342
del draft_model_session
343343
generated_ids = np.asarray(generated_ids[0]).flatten()

0 commit comments

Comments
 (0)