Skip to content

Commit eb73f34

Browse files
silverguoYuhan Guo
and
Yuhan Guo
authored
Move tokenizer info into pte when exporting to et (#1510)
Co-authored-by: Yuhan Guo <[email protected]>
1 parent 3c7e839 commit eb73f34

File tree

3 files changed

+48
-14
lines changed

3 files changed

+48
-14
lines changed

runner/run.cpp

+23-1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ using executorch::extension::TensorPtr;
5353
using torch::executor::EValue;
5454
using torch::executor::Module;
5555
using torch::executor::Result;
56+
using executorch::runtime::MemoryManager;
57+
using executorch::runtime::MemoryAllocator;
58+
using executorch::runtime::Error;
5659
#endif
5760

5861
using tokenizers::SPTokenizer;
@@ -867,7 +870,26 @@ int main(int argc, char *argv[]) {
867870
: torch::Device(torch::kCUDA);
868871
ModelType model_type = get_model_type(std::stoi(aoti_metadata["tokenizer_type"]));
869872
#else // __ET_MODEL__
870-
ModelType model_type = get_model_type(llama_ver);
873+
Error load_status = transformer.runner->load();
874+
ET_CHECK_MSG(
875+
load_status == torch::executor::Error::Ok,
876+
"program::load() failed with status 0x%" PRIx32,
877+
static_cast<uint32_t>(load_status));
878+
879+
static std::array<uint8_t, 4 * 1024U * 1024U> method_allocator_pool; // 4MB
880+
MemoryAllocator method_allocator{MemoryAllocator(
881+
sizeof(method_allocator_pool), method_allocator_pool.data())};
882+
MemoryManager memory_manager(&method_allocator, nullptr);
883+
auto tokenizer_method = transformer.runner->program()->load_method("tokenizer_type", &memory_manager);
884+
885+
Error execute_status = tokenizer_method->execute();
886+
ET_CHECK_MSG(
887+
execute_status == torch::executor::Error::Ok,
888+
"method::execute() failed with status 0x%" PRIx32,
889+
static_cast<uint32_t>(execute_status));
890+
891+
auto tokenizer_type = tokenizer_method->get_output(0).toInt();
892+
ModelType model_type = get_model_type(tokenizer_type);
871893
#endif
872894

873895
if (model_type == UNKNOWN_MODEL) {

torchchat/export.py

+24-12
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def export_to_edge(
313313
core_aten_ep, edge_constant_methods, edge_compile_config, verbose=verbose
314314
)
315315

316-
def export_for_et(model, device, output_path) -> str:
316+
def export_for_et(model, device, output_path, edge_constant_methods) -> str:
317317

318318
input = (
319319
torch.tensor([[1]], dtype=torch.long, device=device),
@@ -344,12 +344,15 @@ def export_for_et(model, device, output_path) -> str:
344344
with torch.nn.attention.sdpa_kernel(
345345
[torch.nn.attention.SDPBackend.MATH]
346346
), torch.no_grad():
347-
m = export_for_training(model, input, dynamic_shapes=dynamic_shapes).module()
347+
m = export_for_training(
348+
model, input, dynamic_shapes=dynamic_shapes
349+
).module()
348350

349351
edge_manager = export_to_edge(
350352
m,
351353
input,
352354
dynamic_shapes=dynamic_shapes,
355+
edge_constant_methods=edge_constant_methods,
353356
edge_compile_config=edge_config,
354357
)
355358
edge_manager = edge_manager.to_backend(XnnpackDynamicallyQuantizedPartitioner())
@@ -365,6 +368,7 @@ def export_for_et(model, device, output_path) -> str:
365368
)
366369

367370
print("The methods are: ", export_program.methods)
371+
print("The config methods are: ", export_program.config_methods)
368372
with open(output_path, "wb") as f:
369373
export_program.write_to_file(f)
370374

@@ -407,7 +411,9 @@ def main(args):
407411
f"Warning! ExecuTorch export target is controlled by export recipe, not device setting. Ignoring device={builder_args.device} setting."
408412
)
409413
builder_args.device = "cpu"
410-
elif (output_pte_path or output_dso_path or output_aoti_package_path) and "mps" in builder_args.device:
414+
elif (
415+
output_pte_path or output_dso_path or output_aoti_package_path
416+
) and "mps" in builder_args.device:
411417
print("Warning! Device MPS not supported for export. Exporting for device CPU.")
412418
builder_args.device = "cpu"
413419

@@ -473,13 +479,26 @@ def main(args):
473479
support_tensor_subclass=False,
474480
)
475481
_unset_gguf_kwargs(builder_args)
476-
482+
483+
if tokenizer_args is None:
484+
tokenizer_type = "0"
485+
elif tokenizer_args.is_sentencepiece:
486+
tokenizer_type = "2" # Corresponding to llama2
487+
else:
488+
tokenizer_type = "3" # Corresponding to llama3
489+
477490
with torch.no_grad():
478491
if output_pte_path:
479492
output_pte_path = str(os.path.abspath(output_pte_path))
480493
if executorch_export_available:
481494
print(f"Exporting model using ExecuTorch to {output_pte_path}")
482-
export_for_et(model_to_pte, builder_args.device, args.output_pte_path)
495+
print(f"Tokenizer type is {tokenizer_type}")
496+
export_for_et(
497+
model_to_pte,
498+
builder_args.device,
499+
args.output_pte_path,
500+
{"tokenizer_type": int(tokenizer_type)},
501+
)
483502
else:
484503
print(
485504
"Export with executorch requested but ExecuTorch could not be loaded"
@@ -503,13 +522,6 @@ def main(args):
503522
if output_aoti_package_path:
504523
output_aoti_package_path = str(os.path.abspath(output_aoti_package_path))
505524

506-
if tokenizer_args is None:
507-
tokenizer_type = "0"
508-
elif tokenizer_args.is_sentencepiece:
509-
tokenizer_type = "2" # Corresponding to llama2
510-
else:
511-
tokenizer_type = "3" # Corresponding to llama3
512-
513525
metadata = {"tokenizer_type": tokenizer_type}
514526
print(
515527
"Exporting model using AOT Inductor to " f"{output_aoti_package_path}."

torchchat/utils/scripts/build_native.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,4 @@ else
100100
fi
101101
cmake --build ./cmake-out --target "${TARGET}"_run
102102

103-
printf "Build finished. Please run: \n./cmake-out/${TARGET}_run model.<pte|so> -z tokenizer.model -l <llama version (2 or 3)> -i <prompt>\n"
103+
printf "Build finished. Please run: \n./cmake-out/${TARGET}_run model.<pte|so> -z tokenizer.model > -i <prompt>\n"

0 commit comments

Comments
 (0)