Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added test clean up and customization #8

Merged
merged 2 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-spyre.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ jobs:
export MASTER_ADDR=localhost && \
export DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding && \
cd vllm-spyre && \
python -m pytest tests -v
python -m pytest tests -v -k eager
'''
7 changes: 7 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import torch
from vllm.connections import global_http_connection
from vllm.distributed import cleanup_dist_env_and_memory

Expand Down Expand Up @@ -42,3 +43,9 @@ def cleanup_fixture(should_do_global_cleanup_after_test: bool):
yield
if should_do_global_cleanup_after_test:
cleanup_dist_env_and_memory()


@pytest.fixture(autouse=True)
def dynamo_reset():
yield
torch._dynamo.reset()
40 changes: 39 additions & 1 deletion tests/spyre_util.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is ok to merge this PR as it is, but it is worth trying this suggestion in the future: IBM/vllm#77 (review)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @sducouedic, Agree, will try that in next pass and create a new PR once fixtures are working

Original file line number Diff line number Diff line change
Expand Up @@ -277,4 +277,42 @@ def compare_embedding_results(model: str, prompts: List[str],
sim = util.pytorch_cos_sim(hf_result["embeddings"],
vllm_result["embeddings"])

assert math.isclose(sim, 1.0, rel_tol=0.05)
assert math.isclose(sim, 1.0, rel_tol=0.05)


# get model directory path from env, if not set then default to "/models".
def get_spyre_model_dir_path():
model_dir_path = os.environ.get("VLLM_SPYRE_TEST_MODEL_DIR", "/models")
return model_dir_path


# get model backend from env, if not set then default to "eager"
# For multiple values:
# export SPYRE_TEST_BACKEND_LIST="eager, inductor, sendnn_decoder"
def get_spyre_backend_list():
test_backend_list = []
user_backend_list = os.environ.get("VLLM_SPYRE_TEST_BACKEND_LIST",
"eager,inductor,sendnn_decoder,sendnn")

for sypre_backend in user_backend_list.split(","):
test_backend_list.append(sypre_backend.strip())
return test_backend_list


# get model names from env, if not set then default to "llama-194m"
# For multiple values:
# export SPYRE_TEST_MODEL_LIST="llama-194m,all-roberta-large-v1"
def get_spyre_model_list(isEmbeddings=False):
spyre_model_dir_path = get_spyre_model_dir_path()
test_model_list = []
user_test_model_list = os.environ.get("VLLM_SPYRE_TEST_MODEL_LIST",
"llama-194m")

# set default to bert if testing embeddings
if isEmbeddings:
user_test_model_list = os.environ.get("VLLM_SPYRE_TEST_MODEL_LIST",
"all-roberta-large-v1")

for model in user_test_model_list.split(","):
test_model_list.append(f"{spyre_model_dir_path}/{model.strip()}")
return test_model_list
8 changes: 4 additions & 4 deletions tests/test_spyre_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)
from vllm import SamplingParams


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize("prompts", [[
"Provide a list of instructions for preparing"
" chicken soup for a family of four.", "Hello",
Expand All @@ -20,8 +21,7 @@
@pytest.mark.parametrize("warmup_shape", [(64, 20, 4), (64, 20, 8),
(128, 20, 4), (128, 20, 8)]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand Down
8 changes: 4 additions & 4 deletions tests/test_spyre_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
from typing import List, Tuple

import pytest
from spyre_util import (compare_embedding_results, spyre_vllm_embeddings,
from spyre_util import (compare_embedding_results, get_spyre_backend_list,
get_spyre_model_list, spyre_vllm_embeddings,
st_embeddings)


@pytest.mark.parametrize("model", ["/models/all-roberta-large-v1"])
@pytest.mark.parametrize("model", get_spyre_model_list(isEmbeddings=True))
@pytest.mark.parametrize("prompts", [[
"The capital of France is Paris."
"Provide a list of instructions for preparing"
Expand All @@ -20,8 +21,7 @@
@pytest.mark.parametrize("warmup_shape",
[(64, 4), (64, 8), (128, 4),
(128, 8)]) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand Down
8 changes: 4 additions & 4 deletions tests/test_spyre_max_new_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)
from vllm import SamplingParams

template = (
Expand All @@ -20,15 +21,14 @@
"chicken soup for a family of four.")


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize("prompts", [[prompt1, prompt2, prompt2, prompt2],
[prompt2, prompt2, prompt2, prompt1],
[prompt2, prompt2, prompt2, prompt2]])
@pytest.mark.parametrize("stop_last", [True, False])
@pytest.mark.parametrize("warmup_shape", [(64, 10, 4)]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand Down
8 changes: 4 additions & 4 deletions tests/test_spyre_max_prompt_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)
from transformers import AutoTokenizer
from vllm import SamplingParams


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize("prompts", [
7 * [
"Hello",
Expand All @@ -27,8 +28,7 @@
@pytest.mark.parametrize("warmup_shapes",
[[(64, 20, 4)], [(64, 20, 4), (128, 20, 4)]]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand Down
8 changes: 4 additions & 4 deletions tests/test_spyre_seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from typing import Tuple

import pytest
from spyre_util import generate_spyre_vllm_output
from spyre_util import (generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)
from vllm import SamplingParams


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize("prompt", [
"Provide a list of instructions for preparing"
" chicken soup for a family of four."
Expand All @@ -21,8 +22,7 @@
@pytest.mark.parametrize("warmup_shape", [(64, 20, 4), (64, 20, 8),
(128, 20, 4), (128, 20, 8)]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_seed(
model: str,
prompt: str,
Expand Down
8 changes: 4 additions & 4 deletions tests/test_spyre_tensor_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)
from vllm import SamplingParams


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize("prompts", [[
"Provide a list of instructions for preparing"
" chicken soup for a family of four.", "Hello",
Expand All @@ -21,8 +22,7 @@
) #,[(64,20,8)],[(128,20,4)],[(128,20,8)]])
# (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("tp_size", [2])
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand Down
8 changes: 4 additions & 4 deletions tests/test_spyre_warmup_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)
from vllm import SamplingParams


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize("prompts", [
7 * [
"Hello",
Expand All @@ -25,8 +26,7 @@
])
@pytest.mark.parametrize("warmup_shapes", [[(64, 20, 8), (128, 20, 4)]]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand Down