Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dpatel enable tests on spyre #77

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions tests/spyre/spyre_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,40 @@ def compare_embedding_results(model: str, prompts: List[str],
vllm_result["embeddings"])

assert math.isclose(sim, 1.0, rel_tol=0.05)


# get model directory path from env, if not set then default to "/models".
def get_spyre_model_dir_path():
model_dir_path = os.environ.get("SPYRE_TEST_MODEL_DIR", "/models")
return model_dir_path


# get model backend from env, if not set then default to "eager"
# For multiple values:
# export SPYRE_TEST_BACKEND_LIST="eager, inductor, sendnn_decoder"
def get_spyre_backend_list():
test_backend_list = []
user_backend_list = os.environ.get("SPYRE_TEST_BACKEND_LIST", "eager")

for backend in user_backend_list.split(","):
test_backend_list.append(backend.strip())
return test_backend_list


# get model names from env, if not set then default to "llama-194m"
# For multiple values:
# export SPYRE_TEST_MODEL_LIST="llama-194m,all-roberta-large-v1"
def get_spyre_model_list(isEmbeddings=False):
spyre_model_dir_path = get_spyre_model_dir_path()
test_model_list = []
user_test_model_list = os.environ.get("SPYRE_TEST_MODEL_LIST",
"llama-194m")

# set default to bert if testing embeddings
if isEmbeddings:
user_test_model_list = os.environ.get("SPYRE_TEST_MODEL_LIST",
"all-roberta-large-v1")

for model in user_test_model_list.split(","):
test_model_list.append(f"{spyre_model_dir_path}/{model.strip()}")
return test_model_list
45 changes: 27 additions & 18 deletions tests/spyre/test_spyre_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,27 @@

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)

from vllm import SamplingParams


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("prompts", [[
"Provide a list of instructions for preparing"
" chicken soup for a family of four.", "Hello",
"What is the weather today like?", "Who are you?"
]])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize(
"prompts",
[[
"Provide a list of instructions for preparing"
" chicken soup for a family of four.",
"Hello",
"What is the weather today like?",
"Who are you?",
]],
)
@pytest.mark.parametrize("warmup_shape", [(64, 20, 4), (64, 20, 8),
(128, 20, 4), (128, 20, 8)]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand All @@ -48,7 +53,8 @@ def test_output(
max_tokens=max_new_tokens,
temperature=0,
logprobs=0, # return logprobs of generated tokens only
ignore_eos=True)
ignore_eos=True,
)

vllm_results = generate_spyre_vllm_output(
model=model,
Expand All @@ -58,16 +64,19 @@ def test_output(
block_size=2048,
sampling_params=vllm_sampling_params,
tensor_parallel_size=1,
backend=backend)
backend=backend,
)

hf_results = generate_hf_output(model=model,
prompts=prompts,
max_new_tokens=max_new_tokens)

compare_results(model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results)
compare_results(
model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results,
)
61 changes: 35 additions & 26 deletions tests/spyre/test_spyre_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,26 @@

import pytest
from spyre_util import (compare_embedding_results, spyre_vllm_embeddings,
st_embeddings)


@pytest.mark.parametrize("model", ["/models/all-roberta-large-v1"])
@pytest.mark.parametrize("prompts", [[
"The capital of France is Paris."
"Provide a list of instructions for preparing"
" chicken soup for a family of four.", "Hello",
"What is the weather today like?", "Who are you?"
]])
st_embeddings, get_spyre_backend_list,
get_spyre_model_list)


@pytest.mark.parametrize("model", get_spyre_model_list(isEmbeddings=True))
@pytest.mark.parametrize(
"prompts",
[[
"The capital of France is Paris."
"Provide a list of instructions for preparing"
" chicken soup for a family of four.",
"Hello",
"What is the weather today like?",
"Who are you?",
]],
)
@pytest.mark.parametrize("warmup_shape",
[(64, 4), (64, 8), (128, 4),
(128, 8)]) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand All @@ -35,20 +40,24 @@ def test_output(
are verified to be identical for vLLM and SentenceTransformers.
'''

vllm_results = spyre_vllm_embeddings(model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
max_model_len=256,
block_size=256,
tensor_parallel_size=1,
backend=backend)
vllm_results = spyre_vllm_embeddings(
model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
max_model_len=256,
block_size=256,
tensor_parallel_size=1,
backend=backend,
)

hf_results = st_embeddings(model=model, prompts=prompts)

compare_embedding_results(model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results)
compare_embedding_results(
model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results,
)
45 changes: 29 additions & 16 deletions tests/spyre/test_spyre_max_new_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
from typing import List, Tuple

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
from spyre_util import (
compare_results,
generate_hf_output,
generate_spyre_vllm_output,
)

from vllm import SamplingParams

Expand All @@ -22,14 +25,19 @@


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("prompts", [[prompt1, prompt2, prompt2, prompt2],
[prompt2, prompt2, prompt2, prompt1],
[prompt2, prompt2, prompt2, prompt2]])
@pytest.mark.parametrize(
"prompts",
[
[prompt1, prompt2, prompt2, prompt2],
[prompt2, prompt2, prompt2, prompt1],
[prompt2, prompt2, prompt2, prompt2],
],
)
@pytest.mark.parametrize("stop_last", [True, False])
@pytest.mark.parametrize("warmup_shape", [(64, 10, 4)]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
["eager"]) # , "inductor", "sendnn_decoder"])
def test_output(
model: str,
prompts: List[str],
Expand Down Expand Up @@ -57,13 +65,15 @@ def test_output(
max_tokens=max_new_tokens_warmup,
temperature=0,
logprobs=0, # return logprobs of generated tokens only
ignore_eos=False)
ignore_eos=False,
)

vllm_sampling_params_early_stop = SamplingParams(
max_tokens=max_new_tokens_early_stop,
temperature=0,
logprobs=0, # return logprobs of generated tokens only
ignore_eos=False)
ignore_eos=False,
)

vllm_sampling_params = [vllm_sampling_params_normal] * 3
hf_max_new_tokens = [max_new_tokens_warmup] * 3
Expand All @@ -87,16 +97,19 @@ def test_output(
block_size=2048,
sampling_params=vllm_sampling_params,
tensor_parallel_size=1,
backend=backend)
backend=backend,
)

hf_results = generate_hf_output(model=model,
prompts=prompts,
max_new_tokens=hf_max_new_tokens)

compare_results(model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results)
compare_results(
model=model,
prompts=prompts,
warmup_shapes=[warmup_shape],
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results,
)
44 changes: 25 additions & 19 deletions tests/spyre/test_spyre_max_prompt_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@

import pytest
from spyre_util import (compare_results, generate_hf_output,
generate_spyre_vllm_output)
generate_spyre_vllm_output, get_spyre_backend_list,
get_spyre_model_list)

from transformers import AutoTokenizer

from vllm import SamplingParams


@pytest.mark.parametrize("model", ["/models/llama-194m"])
@pytest.mark.parametrize("model", get_spyre_model_list())
@pytest.mark.parametrize("prompts", [
7 * [
"Hello",
Expand All @@ -22,14 +24,14 @@
" to the user. Provide a list of instructions for preparing chicken "
"soup for a family of four. Indicate if the weather forecast looks "
"good for today. Explain in a brief summary comprised of at most 50"
" words what you are."
" words what you are.",
]
])
],
)
@pytest.mark.parametrize("warmup_shapes",
[[(64, 20, 4)], [(64, 20, 4), (128, 20, 4)]]
) # (prompt_length/new_tokens/batch_size)
@pytest.mark.parametrize("backend",
["eager"]) #, "inductor", "sendnn_decoder"])
@pytest.mark.parametrize("backend", get_spyre_backend_list())
def test_output(
model: str,
prompts: List[str],
Expand Down Expand Up @@ -62,7 +64,8 @@ def test_output(
max_tokens=max_new_tokens,
temperature=0,
logprobs=0, # return logprobs of generated tokens only
ignore_eos=True)
ignore_eos=True,
)

vllm_results = generate_spyre_vllm_output(
model=model,
Expand All @@ -72,7 +75,8 @@ def test_output(
block_size=2048,
sampling_params=vllm_sampling_params,
tensor_parallel_size=1,
backend=backend)
backend=backend,
)

hf_results = generate_hf_output(model=model,
prompts=prompts,
Expand All @@ -86,16 +90,18 @@ def test_output(
hf_input_tokens = hf_tokenizer(prompt, return_tensors="pt").input_ids
if len(hf_input_tokens[0]) > max_prompt_length:
hf_results[prompt_index] = {
'text': '',
'token_ids': (),
'tokens': (),
'logprobs': ()
"text": "",
"token_ids": (),
"tokens": (),
"logprobs": (),
}

compare_results(model=model,
prompts=prompts,
warmup_shapes=warmup_shapes,
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results)
compare_results(
model=model,
prompts=prompts,
warmup_shapes=warmup_shapes,
tensor_parallel_size=1,
backend=backend,
vllm_results=vllm_results,
hf_results=hf_results,
)
Loading
Loading