vllm-project · sducouedic · Mar 5, 2025 · Mar 5, 2025 · Mar 5, 2025 · sducouedic
diff --git a/.github/workflows/test-spyre.yml b/.github/workflows/test-spyre.yml
@@ -24,5 +24,5 @@ jobs:
           export MASTER_ADDR=localhost && \
           export DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding && \
           cd vllm-spyre && \
-          python -m pytest tests -v
+          python -m pytest tests -v -k eager
         '''
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,4 +1,5 @@
 import pytest
+import torch
 from vllm.connections import global_http_connection
 from vllm.distributed import cleanup_dist_env_and_memory
 
@@ -42,3 +43,9 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
     yield
     if should_do_global_cleanup_after_test:
         cleanup_dist_env_and_memory()
+
+
+@pytest.fixture(autouse=True)
+def dynamo_reset():
+    yield
+    torch._dynamo.reset()
diff --git a/tests/spyre_util.py b/tests/spyre_util.py
@@ -277,4 +277,42 @@ def compare_embedding_results(model: str, prompts: List[str],
         sim = util.pytorch_cos_sim(hf_result["embeddings"],
                                    vllm_result["embeddings"])
 
-        assert math.isclose(sim, 1.0, rel_tol=0.05)
+        assert math.isclose(sim, 1.0, rel_tol=0.05)
+
+
+# get model directory path from env, if not set then default to "/models".
+def get_spyre_model_dir_path():
+    model_dir_path = os.environ.get("VLLM_SPYRE_TEST_MODEL_DIR", "/models")
+    return model_dir_path
+
+
+# get model backend from env, if not set then default to "eager"
+# For multiple values:
+# export SPYRE_TEST_BACKEND_LIST="eager, inductor, sendnn_decoder"
+def get_spyre_backend_list():
+    test_backend_list = []
+    user_backend_list = os.environ.get("VLLM_SPYRE_TEST_BACKEND_LIST",
+                                       "eager,inductor,sendnn_decoder,sendnn")
+
+    for sypre_backend in user_backend_list.split(","):
+        test_backend_list.append(sypre_backend.strip())
+    return test_backend_list
+
+
+# get model names from env, if not set then default to "llama-194m"
+# For multiple values:
+# export SPYRE_TEST_MODEL_LIST="llama-194m,all-roberta-large-v1"
+def get_spyre_model_list(isEmbeddings=False):
+    spyre_model_dir_path = get_spyre_model_dir_path()
+    test_model_list = []
+    user_test_model_list = os.environ.get("VLLM_SPYRE_TEST_MODEL_LIST",
+                                          "llama-194m")
+
+    # set default to bert if testing embeddings
+    if isEmbeddings:
+        user_test_model_list = os.environ.get("VLLM_SPYRE_TEST_MODEL_LIST",
+                                              "all-roberta-large-v1")
+
+    for model in user_test_model_list.split(","):
+        test_model_list.append(f"{spyre_model_dir_path}/{model.strip()}")
+    return test_model_list
diff --git a/tests/test_spyre_basic.py b/tests/test_spyre_basic.py
@@ -7,11 +7,12 @@
 
 import pytest
 from spyre_util import (compare_results, generate_hf_output,
-                        generate_spyre_vllm_output)
+                        generate_spyre_vllm_output, get_spyre_backend_list,
+                        get_spyre_model_list)
 from vllm import SamplingParams
 
 
-@pytest.mark.parametrize("model", ["/models/llama-194m"])
+@pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("prompts", [[
     "Provide a list of instructions for preparing"
     " chicken soup for a family of four.", "Hello",
@@ -20,8 +21,7 @@
 @pytest.mark.parametrize("warmup_shape", [(64, 20, 4), (64, 20, 8),
                                           (128, 20, 4), (128, 20, 8)]
                          )  # (prompt_length/new_tokens/batch_size)
-@pytest.mark.parametrize("backend",
-                         ["eager"])  #, "inductor", "sendnn_decoder"])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_output(
     model: str,
     prompts: List[str],

diff --git a/tests/test_spyre_embeddings.py b/tests/test_spyre_embeddings.py
@@ -6,11 +6,12 @@
 from typing import List, Tuple
 
 import pytest
-from spyre_util import (compare_embedding_results, spyre_vllm_embeddings,
+from spyre_util import (compare_embedding_results, get_spyre_backend_list,
+                        get_spyre_model_list, spyre_vllm_embeddings,
                         st_embeddings)
 
 
-@pytest.mark.parametrize("model", ["/models/all-roberta-large-v1"])
+@pytest.mark.parametrize("model", get_spyre_model_list(isEmbeddings=True))
 @pytest.mark.parametrize("prompts", [[
     "The capital of France is Paris."
     "Provide a list of instructions for preparing"
@@ -20,8 +21,7 @@
 @pytest.mark.parametrize("warmup_shape",
                          [(64, 4), (64, 8), (128, 4),
                           (128, 8)])  # (prompt_length/new_tokens/batch_size)
-@pytest.mark.parametrize("backend",
-                         ["eager"])  #, "inductor", "sendnn_decoder"])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_output(
     model: str,
     prompts: List[str],

diff --git a/tests/test_spyre_max_new_tokens.py b/tests/test_spyre_max_new_tokens.py
@@ -7,7 +7,8 @@
 
 import pytest
 from spyre_util import (compare_results, generate_hf_output,
-                        generate_spyre_vllm_output)
+                        generate_spyre_vllm_output, get_spyre_backend_list,
+                        get_spyre_model_list)
 from vllm import SamplingParams
 
 template = (
@@ -20,15 +21,14 @@
                           "chicken soup for a family of four.")
 
 
-@pytest.mark.parametrize("model", ["/models/llama-194m"])
+@pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("prompts", [[prompt1, prompt2, prompt2, prompt2],
                                      [prompt2, prompt2, prompt2, prompt1],
                                      [prompt2, prompt2, prompt2, prompt2]])
 @pytest.mark.parametrize("stop_last", [True, False])
 @pytest.mark.parametrize("warmup_shape", [(64, 10, 4)]
                          )  # (prompt_length/new_tokens/batch_size)
-@pytest.mark.parametrize("backend",
-                         ["eager"])  #, "inductor", "sendnn_decoder"])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_output(
     model: str,
     prompts: List[str],

diff --git a/tests/test_spyre_max_prompt_length.py b/tests/test_spyre_max_prompt_length.py
@@ -7,12 +7,13 @@
 
 import pytest
 from spyre_util import (compare_results, generate_hf_output,
-                        generate_spyre_vllm_output)
+                        generate_spyre_vllm_output, get_spyre_backend_list,
+                        get_spyre_model_list)
 from transformers import AutoTokenizer
 from vllm import SamplingParams
 
 
-@pytest.mark.parametrize("model", ["/models/llama-194m"])
+@pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("prompts", [
     7 * [
         "Hello",
@@ -27,8 +28,7 @@
 @pytest.mark.parametrize("warmup_shapes",
                          [[(64, 20, 4)], [(64, 20, 4), (128, 20, 4)]]
                          )  # (prompt_length/new_tokens/batch_size)
-@pytest.mark.parametrize("backend",
-                         ["eager"])  #, "inductor", "sendnn_decoder"])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_output(
     model: str,
     prompts: List[str],

diff --git a/tests/test_spyre_seed.py b/tests/test_spyre_seed.py
@@ -7,11 +7,12 @@
 from typing import Tuple
 
 import pytest
-from spyre_util import generate_spyre_vllm_output
+from spyre_util import (generate_spyre_vllm_output, get_spyre_backend_list,
+                        get_spyre_model_list)
 from vllm import SamplingParams
 
 
-@pytest.mark.parametrize("model", ["/models/llama-194m"])
+@pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("prompt", [
     "Provide a list of instructions for preparing"
     " chicken soup for a family of four."
@@ -21,8 +22,7 @@
 @pytest.mark.parametrize("warmup_shape", [(64, 20, 4), (64, 20, 8),
                                           (128, 20, 4), (128, 20, 8)]
                          )  # (prompt_length/new_tokens/batch_size)
-@pytest.mark.parametrize("backend",
-                         ["eager"])  #, "inductor", "sendnn_decoder"])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_seed(
     model: str,
     prompt: str,

diff --git a/tests/test_spyre_tensor_parallel.py b/tests/test_spyre_tensor_parallel.py
@@ -7,11 +7,12 @@
 
 import pytest
 from spyre_util import (compare_results, generate_hf_output,
-                        generate_spyre_vllm_output)
+                        generate_spyre_vllm_output, get_spyre_backend_list,
+                        get_spyre_model_list)
 from vllm import SamplingParams
 
 
-@pytest.mark.parametrize("model", ["/models/llama-194m"])
+@pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("prompts", [[
     "Provide a list of instructions for preparing"
     " chicken soup for a family of four.", "Hello",
@@ -21,8 +22,7 @@
                          )  #,[(64,20,8)],[(128,20,4)],[(128,20,8)]])
 # (prompt_length/new_tokens/batch_size)
 @pytest.mark.parametrize("tp_size", [2])
-@pytest.mark.parametrize("backend",
-                         ["eager"])  #, "inductor", "sendnn_decoder"])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_output(
     model: str,
     prompts: List[str],

diff --git a/tests/test_spyre_warmup_shapes.py b/tests/test_spyre_warmup_shapes.py
@@ -7,11 +7,12 @@
 
 import pytest
 from spyre_util import (compare_results, generate_hf_output,
-                        generate_spyre_vllm_output)
+                        generate_spyre_vllm_output, get_spyre_backend_list,
+                        get_spyre_model_list)
 from vllm import SamplingParams
 
 
-@pytest.mark.parametrize("model", ["/models/llama-194m"])
+@pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("prompts", [
     7 * [
         "Hello",
@@ -25,8 +26,7 @@
 ])
 @pytest.mark.parametrize("warmup_shapes", [[(64, 20, 8), (128, 20, 4)]]
                          )  # (prompt_length/new_tokens/batch_size)
-@pytest.mark.parametrize("backend",
-                         ["eager"])  #, "inductor", "sendnn_decoder"])
+@pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_output(
     model: str,
     prompts: List[str],