Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debug: don't switch from pytest.skip to pytest.xfail #3201

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/test/regression/test_cast_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def matmul_kernel(A, B, C, M, N, K, #
for o in out_dtypes])
def test_cast_matmul(M, K, N, BLOCK_K, BLOCK_M, w_dtype, x_dtype, out_dtype, device):
if x_dtype == w_dtype:
pytest.xfail("skip the same input dtype")
pytest.skip("skip the same input dtype")
if device == "xpu" and "float64" in (w_dtype,
x_dtype) and not tr.driver.active.get_current_target().arch['has_fp64']:
pytest.xfail("float64 not supported on current xpu hardware")
Expand Down
2 changes: 1 addition & 1 deletion python/test/unit/language/test_block_pointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_block_copy(dtypes_str, n, padding_option, boundary_check, device):
check_type_supported(dst_dtype, device)
if src_dtype_str in ("bool", "int16", "int32"):
if padding_option == "nan":
pytest.xfail("Padding with NaN is not supported for integer types")
pytest.skip("Padding with NaN is not supported for integer types")
a = torch.randint(0, 2, (n, ), device=device, dtype=src_dtype)
else:
a = torch.randn((n, ), device=device, dtype=src_dtype)
Expand Down
2 changes: 1 addition & 1 deletion python/test/unit/language/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def test_typeconvert_downcast(src_dtype, dst_dtype, rounding, max_repr, device):
pytest.skip(f"{dst_dtype} downcast with RTNE rounding tests only supported on NVGPU with compute capability 9.0+")

if dst_dtype in ('float8e5b16', 'float8e4b8') and rounding == 'rtne' and (is_cuda() or not is_hip_mi300()):
pytest.xfail(f"{dst_dtype} downcast with RTNE rounding tests only supported on AMDGPU MI300")
pytest.skip(f"{dst_dtype} downcast with RTNE rounding tests only supported on AMDGPU MI300")

if dst_dtype == 'float8e4nv' and is_hip():
pytest.skip(f"{dst_dtype} downcast not supported in HIP")
Expand Down
50 changes: 25 additions & 25 deletions python/test/unit/language/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def check_cuda_or_hip(device):
# CUDA and HIP both use pytorch device 'cuda'. Other backends like Intel
# GPU do not.
if device not in ['cuda']:
pytest.xfail("Only for cuda or HIP")
pytest.skip("Only for cuda or HIP")


def check_type_supported(dtype, device):
Expand All @@ -129,7 +129,7 @@ def check_type_supported(dtype, device):
pytest.skip("float8e4nv is only supported on NVGPU with cc >= 90")
if is_interpreter():
if dtype in [tl.bfloat16, "bfloat16", torch.bfloat16]:
pytest.xfail("bfloat16 is not supported in the interpreter")
pytest.skip("bfloat16 is not supported in the interpreter")
elif device in ['xpu']:
if dtype in [torch.float64, "float64"] and not xpu_has_fp64():
pytest.xfail("float64 not supported on current xpu hardware")
Expand Down Expand Up @@ -1536,7 +1536,7 @@ def test_atomic_rmw(op, dtype_x_str, mode, sem, device):
check_type_supported(dtype_x_str, device)
if is_interpreter():
if dtype_x_str == 'float16':
pytest.xfail("Only test atomic float16 ops on GPU")
pytest.skip("Only test atomic float16 ops on GPU")

n_programs = 5

Expand Down Expand Up @@ -2508,9 +2508,9 @@ def test_scan2d(op, dtype_str, shape, axis, reverse, num_warps, device):
check_type_supported(dtype_str, device)
if dtype_str == 'bfloat16':
if op == 'cummax':
pytest.xfail("bfloat16 compare not supported before sm90")
pytest.skip("bfloat16 compare not supported before sm90")
if op == 'linear_recurrence':
pytest.xfail("Skipping linear_recurrence scan on bfloat16 due to accuracy issues")
pytest.skip("Skipping linear_recurrence scan on bfloat16 due to accuracy issues")
numpy_dtype_str = 'float32' if dtype_str == 'bfloat16' else dtype_str

# triton kernel
Expand Down Expand Up @@ -2853,7 +2853,7 @@ def test_reduce_layouts(M, N, src_layout, axis, epilogue_kind, dtype_str, add_ov
if is_hip() and isinstance(src_layout, MfmaLayout) and ((M, N) == (128, 128)):
pytest.skip("Skipping test because it runs out of shared memory")
if reduce_op == "sum" and dtype_str == "float16" and M * N > 1024:
pytest.xfail("Skipping sum reduction on float16 due to accuracy issues")
pytest.skip("Skipping sum reduction on float16 due to accuracy issues")
if is_hip() and isinstance(src_layout, LinearLayout):
pytest.skip("FIXME: LinearLayout not supported on HIP")
if is_xpu() and isinstance(src_layout, LinearLayout) and epilogue_kind == "reduce1d":
Expand Down Expand Up @@ -3429,7 +3429,7 @@ def test_dot(M, N, K, num_warps, col_a, col_b, epilogue, input_precision, in_dty
num_ctas, device):
if is_interpreter():
if in_dtype == 'bfloat16':
pytest.xfail("bfloat16 is not supported in the interpreter")
pytest.skip("bfloat16 is not supported in the interpreter")
else:
if is_xpu():
if (M < 8 or N < 16 or (K < 16 and in_dtype == 'float16') or (K < 8 and in_dtype == 'float32')):
Expand Down Expand Up @@ -3461,14 +3461,14 @@ def test_dot(M, N, K, num_warps, col_a, col_b, epilogue, input_precision, in_dty
if is_hip() and (kpack == 2 and in_dtype == 'int8' and K < 64):
pytest.skip("kpack too large for K")
if not is_hip() and kpack == 2:
pytest.xfail("Skip duplicated tests on nv path")
pytest.skip("Skip duplicated tests on nv path")

if is_cuda():
torch.backends.cuda.matmul.allow_tf32 = input_precision == "tf32"

if num_ctas > 1 and in_dtype == 'int8':
# FIXME: mma v2 with num_ctas > 1 does not work
pytest.xfail()
pytest.skip()

# triton kernel
@triton.jit
Expand Down Expand Up @@ -5025,7 +5025,7 @@ def kernel(X, Y, BLOCK: tl.constexpr):
@pytest.mark.parametrize("num_ctas", num_ctas_list)
def test_inline_asm(num_ctas, device):
if not is_cuda():
pytest.xfail("test_inline_asm is only supported in CUDA")
pytest.skip("test_inline_asm is only supported in CUDA")

@triton.jit
def kernel(X, Y, Z, n: tl.constexpr, BLOCK: tl.constexpr):
Expand Down Expand Up @@ -5053,7 +5053,7 @@ def kernel(X, Y, Z, n: tl.constexpr, BLOCK: tl.constexpr):
@pytest.mark.parametrize("num_ctas", num_ctas_list)
def test_inline_asm_packed(num_ctas, device):
if not is_cuda():
pytest.xfail("test_inline_asm is only supported in CUDA")
pytest.skip("test_inline_asm is only supported in CUDA")

@triton.jit
def kernel(X, Y, BLOCK: tl.constexpr):
Expand All @@ -5080,7 +5080,7 @@ def kernel(X, Y, BLOCK: tl.constexpr):
@pytest.mark.parametrize('num_ctas', num_ctas_list)
def test_inline_asm_with_pointers(num_ctas, device):
if not is_cuda():
pytest.xfail('test_inline_asm is only supported in CUDA')
pytest.skip('test_inline_asm is only supported in CUDA')

@triton.jit
def kernel(X, Y, BLOCK: tl.constexpr):
Expand All @@ -5105,7 +5105,7 @@ def kernel(X, Y, BLOCK: tl.constexpr):

def test_inline_asm_multiple_outputs(device):
if not is_cuda():
pytest.xfail('test_inline_asm is only supported in CUDA')
pytest.skip('test_inline_asm is only supported in CUDA')

@triton.jit
def kernel(A, B, C, D, BLOCK: tl.constexpr):
Expand Down Expand Up @@ -5151,7 +5151,7 @@ def kernel(A, B, C, D, BLOCK: tl.constexpr):

def test_inline_asm_packed_multiple_outputs(device):
if not is_cuda():
pytest.xfail('test_inline_asm is only supported in CUDA')
pytest.skip('test_inline_asm is only supported in CUDA')

@triton.jit
def kernel(A, B, C, D, BLOCK: tl.constexpr):
Expand Down Expand Up @@ -5657,11 +5657,11 @@ def compute_scratch_buffer_shape(src_layout, dst_layout, shape):
@pytest.mark.parametrize("dst_layout", filter_layouts(layouts))
def test_convert2d(M, N, src_layout, interm_layout, dst_layout, dtype, device, tmp_path: pathlib.Path):
if str(src_layout) == str(dst_layout):
pytest.xfail("Do not convert same layout")
pytest.skip("Do not convert same layout")
if (isinstance(src_layout, DotOperandLayout)
and isinstance(interm_layout, SharedLayout)) or (isinstance(dst_layout, DotOperandLayout)
and isinstance(interm_layout, SharedLayout)):
pytest.xfail("DotOperandLayout <-> SharedLayout conversion is not completely supported")
pytest.skip("DotOperandLayout <-> SharedLayout conversion is not completely supported")
if is_hip() or is_xpu():
try:
scratch_shape = compute_scratch_buffer_shape(src_layout, dst_layout, (M, N))
Expand Down Expand Up @@ -6083,15 +6083,15 @@ def do_test(src_layout, dst_layout):
@pytest.mark.parametrize("dst_layout", single_warp_layouts)
def test_convert_warp_local(M, N, src_layout, dst_layout, dtype, device, tmp_path: pathlib.Path):
if str(src_layout) == str(dst_layout):
pytest.xfail()
pytest.skip()
if np.prod(src_layout.threads_per_warp) == 0 or np.prod(dst_layout.threads_per_warp) == 0:
pytest.xfail()
pytest.skip()

# Test layout pairs that are likely to codegen warp shuffles.
a, b = list(np.array(src_layout.threads_per_warp) // np.array(dst_layout.threads_per_warp))
c = a if a != 0 else b
if c > 2:
pytest.xfail()
pytest.skip()

layouts = f"""
#src = {src_layout}
Expand Down Expand Up @@ -6333,7 +6333,7 @@ def mul_add(data):
@pytest.mark.parametrize("env_var_override", [False, True])
def test_override_arch(arch, env_var_override, device):
if not is_cuda():
pytest.xfail('arch only for CUDA')
pytest.skip('arch only for CUDA')

@triton.jit
def simple(data, out):
Expand Down Expand Up @@ -6530,7 +6530,7 @@ def maxnreg_noinline2(X):
@pytest.mark.interpreter
def test_maxnreg(device):
if not is_cuda():
pytest.xfail('maxnreg only works on CUDA')
pytest.skip('maxnreg only works on CUDA')

# triton kernel
@triton.jit
Expand Down Expand Up @@ -6614,7 +6614,7 @@ def kernel(input):
@pytest.mark.parametrize("dtype_str", ['float32', 'float64'])
def test_math_extern(dtype_str, device):
if is_interpreter():
pytest.xfail('math_extern does not work in the interpreter mode')
pytest.skip('math_extern does not work in the interpreter mode')

@triton.jit
def kernel(
Expand Down Expand Up @@ -6680,7 +6680,7 @@ def sanitize_add(a, b):

def test_side_effectful_reduction(device):
if device != "cuda":
pytest.xfail()
pytest.skip()

@triton.jit(debug=True)
def sanitize_sum_kernel(Z, X, BLOCK: tl.constexpr):
Expand All @@ -6701,7 +6701,7 @@ def sanitize_sum_kernel(Z, X, BLOCK: tl.constexpr):
@pytest.mark.parametrize("reduce_dim", [0, 1])
def test_side_effectful_reduction_2d(device, reduce_dim):
if device != "cuda":
pytest.xfail()
pytest.skip()

@triton.jit(debug=True)
def sanitize_sum_2d_kernel(Z, X, BLOCK_0: tl.constexpr, BLOCK_1: tl.constexpr, reduce_dim: tl.constexpr,
Expand Down Expand Up @@ -6737,7 +6737,7 @@ def kernel(X):

def test_side_effectful_scan(device):
if device != "cuda":
pytest.xfail()
pytest.skip()

@triton.jit(debug=True)
def sanitize_cumsum_kernel(Z, X, BLOCK: tl.constexpr):
Expand Down