Skip to content

Refit debug patch #3620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion py/torch_tensorrt/dynamo/_refit.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def refit_module_weights(

# Check the number of supported operations in the graph
num_supported_ops, total_ops = partitioning.get_graph_converter_support(
new_gm, settings.debug, settings.torch_executed_ops
new_gm, settings.torch_executed_ops
)

if num_supported_ops == 0 or (
Expand Down
6 changes: 5 additions & 1 deletion py/torch_tensorrt/dynamo/backend/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from torch._dynamo.backends.common import aot_autograd
from torch._dynamo.utils import detect_fake_mode
from torch._functorch.aot_autograd import aot_export_joint_simple
from torch.distributed.tensor import DTensor
from torch_tensorrt.dynamo import CompilationSettings
from torch_tensorrt.dynamo._compiler import compile_module
from torch_tensorrt.dynamo.lowering import (
Expand Down Expand Up @@ -89,6 +88,11 @@ def aot_torch_tensorrt_aten_backend(
logger.warning(
"It is recommended to run the model with use_distributed_mode_trace = True since there are distributed tensors in the input which is not supported in aot_export_joint_simple"
)

if settings.offload_module_to_cpu:
logger.warning(
"The offload_module_to_cpu option is set, but it is being ignored since the torch_compile backend does not support this feature"
)
return _pretraced_backend(gm, sample_inputs, settings, engine_cache)


Expand Down
11 changes: 6 additions & 5 deletions tests/py/dynamo/models/test_export_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,12 @@ def test_resnet18_cpu_offload(ir):

exp_program = torchtrt.dynamo.trace(model, **compile_spec)
trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec)
assertions.assertTrue(
get_model_device(model).type == "cpu",
msg="Model should be offloaded to CPU",
)
model.cuda()
if ir == "dynamo":
assertions.assertTrue(
get_model_device(model).type == "cpu",
msg="Model should be offloaded to CPU",
)
model.cuda()
torchtrt.save(trt_module, trt_ep_path)

deser_trt_module = torchtrt.load(trt_ep_path).module()
Expand Down
22 changes: 12 additions & 10 deletions tests/py/dynamo/models/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,12 @@ def test_resnet18_cpu_offload(ir):
}

trt_mod = torchtrt.compile(model, **compile_spec)
assertions.assertTrue(
get_model_device(model).type == "cpu",
msg="Model should be offloaded to CPU",
)
model.cuda()
if ir == "dynamo":
assertions.assertTrue(
get_model_device(model).type == "cpu",
msg="Model should be offloaded to CPU",
)
model.cuda()
Comment on lines +82 to +87
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this ir check needed anywhere else in your offload_module_to_cpu tests ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I added the check at all other places

cos_sim = cosine_similarity(model(input), trt_mod(input))
assertions.assertTrue(
cos_sim > COSINE_THRESHOLD,
Expand Down Expand Up @@ -286,11 +287,12 @@ def test_bert_base_uncased_cpu_offload(ir):
"offload_module_to_cpu": True,
}
trt_mod = torchtrt.compile(model, **compile_spec)
assertions.assertTrue(
get_model_device(model).type == "cpu",
msg="Model should be offloaded to CPU",
)
model.cuda()
if ir == "dynamo":
assertions.assertTrue(
get_model_device(model).type == "cpu",
msg="Model should be offloaded to CPU",
)
model.cuda()

model_outputs = model(input, input2)
trt_model_outputs = trt_mod(input, input2)
Expand Down
Loading