Add stub impl of json tokenizer to llama runner

jackzhxng · jackzhxng · commit 1e9bb6eb0490 · 2025-03-24T03:29:30.000-07:00
diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt
@@ -47,6 +47,10 @@ list(
 )
 list(APPEND _llama_runner__srcs
      ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
+   )
+list(
+  APPEND _llama_runner__srcs
+  ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/hf_tokenizer.cpp
 )
 
 if(CMAKE_TOOLCHAIN_IOS
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
@@ -17,6 +17,7 @@
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #include <pytorch/tokenizers/llama2c_tokenizer.h>
+#include <pytorch/tokenizers/hf_tokenizer.h>
 
 namespace example {
 
@@ -75,24 +76,33 @@ Error Runner::load() {
     return Error::Ok;
   }
   ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
-  // load tokenizer. Assuming tiktoken is the default tokenizer
+  // Load tokenizer.
   tokenizer_ = nullptr;
-  tokenizer_ = get_tiktoken_for_llama();
-  ::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
-  // Rely on tiktoken to throw error if the artifact is incompatible. Then we
-  // fallback to BPE tokenizer.
-  if (err != ::tokenizers::Error::Ok) {
+  // Check if tokenizer_path_ ends with ".json".
+  if (tokenizer_path_.size() >= 5 &&
+      tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) {
+    tokenizer_ = std::make_unique<tokenizers::HFTokenizer>();
+    tokenizer_->load(tokenizer_path_);
     ET_LOG(
-        Info,
-        "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
-        tokenizer_path_.c_str());
-    tokenizer_.reset();
-    tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
-    err = tokenizer_->load(tokenizer_path_);
-    ET_CHECK_TK_OK_OR_RETURN_ERROR(
-        err,
-        "Failed to load %s as a llama2.c tokenizer artifact",
-        tokenizer_path_.c_str());
+        Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str());
+  } else {
+    ::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
+    tokenizer_ = get_tiktoken_for_llama();
+    // Rely on tiktoken to throw error if the artifact is incompatible. Then we
+    // fallback to BPE tokenizer.
+    if (err != ::tokenizers::Error::Ok) {
+      ET_LOG(
+	  Info,
+	  "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
+	  tokenizer_path_.c_str());
+      tokenizer_.reset();
+      tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
+      err = tokenizer_->load(tokenizer_path_);
+      ET_CHECK_TK_OK_OR_RETURN_ERROR(
+	  err,
+	  "Failed to load %s as a llama2.c tokenizer artifact",
+	  tokenizer_path_.c_str());
+    }
   }
 
   ET_LOG(Info, "Reading metadata from model");
diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl
@@ -49,6 +49,7 @@ def define_common_targets():
                 "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
                 "//executorch/examples/models/llama/tokenizer:tiktoken",
                 "//pytorch/tokenizers:llama2c_tokenizer",
+		"//pytorch/tokenizers:hf_tokenizer",
             ] + (_get_operator_lib(aten)) + ([
                 # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
                 # Therefore enable it explicitly for now to avoid failing tests
diff --git a/extension/llm/tokenizer/hf_tokenizer.cpp b/extension/llm/tokenizer/hf_tokenizer.cpp
@@ -0,0 +1,41 @@
+#include <executorch/extension/llm/tokenizer/hf_tokenizer.h>
+
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/result.h>
+#include <string>
+#include <vector>
+
+using ::executorch::runtime::Error;
+using ::executorch::runtime::Result;
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+HfTokenizer::~HfTokenizer() {}
+
+Error HfTokenizer::load(const std::string& tokenizer_path) {
+  // Stub implementation for loading the tokenizer.
+  // TODO: Implement actual loading logic.
+  return ::executorch::runtime::Error::Ok;
+}
+
+Result<std::vector<uint64_t>>
+HfTokenizer::encode(const std::string& input, int8_t bos, int8_t eos) const {
+  // Stub implementation for encoding.
+  // TODO: Implement actual encoding logic.
+  std::vector<uint64_t> tokens;
+  return ::executorch::runtime::Result<std::vector<uint64_t>>(tokens);
+}
+
+Result<std::string> HfTokenizer::decode(uint64_t prev_token, uint64_t token)
+    const {
+  // Stub implementation for decoding.
+  // TODO: Implement actual decoding logic.
+  std::string decoded_string;
+  return ::executorch::runtime::Result<std::string>(decoded_string);
+}
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/tokenizer/hf_tokenizer.h b/extension/llm/tokenizer/hf_tokenizer.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/extension/llm/tokenizer/tokenizer.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+class ET_EXPERIMENTAL HfTokenizer : public Tokenizer {
+ public:
+  explicit HfTokenizer(){};
+  ~HfTokenizer() override;
+
+  ::executorch::runtime::Error load(const std::string& tokenizer_path) override;
+
+  ::executorch::runtime::Result<std::vector<uint64_t>>
+  encode(const std::string& input, int8_t bos, int8_t eos) const override;
+
+  ::executorch::runtime::Result<std::string> decode(
+      uint64_t prev_token,
+      uint64_t token) const override;
+};
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/tokenizer/targets.bzl b/extension/llm/tokenizer/targets.bzl
@@ -96,3 +96,20 @@ def define_common_targets():
             "re2",
         ],
     )
+
+    runtime.cxx_library(
+        name = "hf_tokenizer",
+        srcs = [
+            "hf_tokenizer.cpp",
+        ],
+        exported_headers = [
+            "hf_tokenizer.h",
+        ],
+        exported_deps = [
+            ":tokenizer_header",
+            "//executorch/runtime/core:core",
+        ],
+        visibility = [
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )

Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,10 @@ list(`
`47`	`47`	`)`
`48`	`48`	`list(APPEND _llama_runner__srcs`
`49`	`49`	`${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp`
	`50`	`+ )`
	`51`	`+list(`
	`52`	`+ APPEND _llama_runner__srcs`
	`53`	`+ ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/hf_tokenizer.cpp`
`50`	`54`	`)`
`51`	`55`
`52`	`56`	`if(CMAKE_TOOLCHAIN_IOS`