diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 119d98c7276a..7e3bbf4a12b8 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -423,7 +423,6 @@ impl WastTest { "misc_testsuite/simd/almost-extmul.wast", "misc_testsuite/simd/canonicalize-nan.wast", "misc_testsuite/simd/issue_3327_bnot_lowering.wast", - "spec_testsuite/simd_bit_shift.wast", "spec_testsuite/simd_boolean.wast", "spec_testsuite/simd_f32x4.wast", "spec_testsuite/simd_f32x4_arith.wast", @@ -433,25 +432,19 @@ impl WastTest { "spec_testsuite/simd_f64x2_arith.wast", "spec_testsuite/simd_f64x2_pmin_pmax.wast", "spec_testsuite/simd_f64x2_rounding.wast", - "spec_testsuite/simd_i16x8_arith.wast", "spec_testsuite/simd_i16x8_arith2.wast", "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast", "spec_testsuite/simd_i16x8_extmul_i8x16.wast", "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast", - "spec_testsuite/simd_i16x8_sat_arith.wast", - "spec_testsuite/simd_i32x4_arith.wast", "spec_testsuite/simd_i32x4_arith2.wast", "spec_testsuite/simd_i32x4_dot_i16x8.wast", "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast", "spec_testsuite/simd_i32x4_extmul_i16x8.wast", "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast", - "spec_testsuite/simd_i64x2_arith.wast", "spec_testsuite/simd_i64x2_arith2.wast", "spec_testsuite/simd_i64x2_extmul_i32x4.wast", - "spec_testsuite/simd_i8x16_arith.wast", "spec_testsuite/simd_i8x16_arith2.wast", - "spec_testsuite/simd_i8x16_sat_arith.wast", "spec_testsuite/simd_lane.wast", "spec_testsuite/simd_load.wast", "spec_testsuite/simd_load_zero.wast", @@ -499,6 +492,13 @@ impl WastTest { "multi-memory/simd_memory-multi.wast", "misc_testsuite/simd/issue4807.wast", "spec_testsuite/simd_const.wast", + "spec_testsuite/simd_i8x16_sat_arith.wast", + "spec_testsuite/simd_i64x2_arith.wast", + "spec_testsuite/simd_i16x8_arith.wast", + "spec_testsuite/simd_i32x4_arith.wast", + "spec_testsuite/simd_i16x8_sat_arith.wast", + "spec_testsuite/simd_i8x16_arith.wast", + "spec_testsuite/simd_bit_shift.wast", ]; if unsupported.iter().any(|part| self.path.ends_with(part)) { diff --git a/tests/disas/winch/x64/i16x8/neg/neg.wat b/tests/disas/winch/x64/i16x8/neg/neg.wat new file mode 100644 index 000000000000..c24ef451d87b --- /dev/null +++ b/tests/disas/winch/x64/i16x8/neg/neg.wat @@ -0,0 +1,33 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i16x8.neg (v128.const i64x2 0xFFFFFFFFFFFFFFFF 42) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x43 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; vpxor %xmm15, %xmm15, %xmm15 +;; vpsubw %xmm0, %xmm15, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 43: ud2 +;; 45: addb %al, (%rax) +;; 47: addb %al, (%rax) +;; 49: addb %al, (%rax) +;; 4b: addb %al, (%rax) +;; 4d: addb %al, (%rax) +;; 4f: addb %bh, %bh diff --git a/tests/disas/winch/x64/i16x8/shift/shl.wat b/tests/disas/winch/x64/i16x8/shift/shl.wat new file mode 100644 index 000000000000..e087685d165f --- /dev/null +++ b/tests/disas/winch/x64/i16x8/shift/shl.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i16x8.shl (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0xf, %eax +;; vmovd %eax, %xmm15 +;; vpsllw %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8/shift/shr_s.wat b/tests/disas/winch/x64/i16x8/shift/shr_s.wat new file mode 100644 index 000000000000..c18823f977f5 --- /dev/null +++ b/tests/disas/winch/x64/i16x8/shift/shr_s.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i16x8.shr_s (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0xf, %eax +;; vmovd %eax, %xmm15 +;; vpsraw %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i16x8/shift/shr_u.wat b/tests/disas/winch/x64/i16x8/shift/shr_u.wat new file mode 100644 index 000000000000..b154bc918758 --- /dev/null +++ b/tests/disas/winch/x64/i16x8/shift/shr_u.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i16x8.shr_u (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0xf, %eax +;; vmovd %eax, %xmm15 +;; vpsrlw %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i32x4/neg/neg.wat b/tests/disas/winch/x64/i32x4/neg/neg.wat new file mode 100644 index 000000000000..9d55702fa383 --- /dev/null +++ b/tests/disas/winch/x64/i32x4/neg/neg.wat @@ -0,0 +1,33 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i32x4.neg (v128.const i64x2 0xFFFFFFFFFFFFFFFF 42) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x43 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; vpxor %xmm15, %xmm15, %xmm15 +;; vpsubd %xmm0, %xmm15, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 43: ud2 +;; 45: addb %al, (%rax) +;; 47: addb %al, (%rax) +;; 49: addb %al, (%rax) +;; 4b: addb %al, (%rax) +;; 4d: addb %al, (%rax) +;; 4f: addb %bh, %bh diff --git a/tests/disas/winch/x64/i32x4/shift/shl.wat b/tests/disas/winch/x64/i32x4/shift/shl.wat new file mode 100644 index 000000000000..72e64363fd5c --- /dev/null +++ b/tests/disas/winch/x64/i32x4/shift/shl.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i32x4.shl (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0x1f, %eax +;; vmovd %eax, %xmm15 +;; vpslld %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i32x4/shift/shr_s.wat b/tests/disas/winch/x64/i32x4/shift/shr_s.wat new file mode 100644 index 000000000000..5c87156bb713 --- /dev/null +++ b/tests/disas/winch/x64/i32x4/shift/shr_s.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i32x4.shr_s (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0x1f, %eax +;; vmovd %eax, %xmm15 +;; vpsrad %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i32x4/shift/shr_u.wat b/tests/disas/winch/x64/i32x4/shift/shr_u.wat new file mode 100644 index 000000000000..a12e6cfab9f7 --- /dev/null +++ b/tests/disas/winch/x64/i32x4/shift/shr_u.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i32x4.shr_u (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0x1f, %eax +;; vmovd %eax, %xmm15 +;; vpsrld %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i64x2/neg/neg.wat b/tests/disas/winch/x64/i64x2/neg/neg.wat new file mode 100644 index 000000000000..72e02f862dff --- /dev/null +++ b/tests/disas/winch/x64/i64x2/neg/neg.wat @@ -0,0 +1,33 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i64x2.neg (v128.const i64x2 0xFFFFFFFFFFFFFFFF 42) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x43 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; vpxor %xmm15, %xmm15, %xmm15 +;; vpsubq %xmm0, %xmm15, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 43: ud2 +;; 45: addb %al, (%rax) +;; 47: addb %al, (%rax) +;; 49: addb %al, (%rax) +;; 4b: addb %al, (%rax) +;; 4d: addb %al, (%rax) +;; 4f: addb %bh, %bh diff --git a/tests/disas/winch/x64/i64x2/shift/shl.wat b/tests/disas/winch/x64/i64x2/shift/shl.wat new file mode 100644 index 000000000000..6db216c0514a --- /dev/null +++ b/tests/disas/winch/x64/i64x2/shift/shl.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i64x2.shl (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0x3f, %eax +;; vmovd %eax, %xmm15 +;; vpsllq %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i64x2/shift/shr_s.wat b/tests/disas/winch/x64/i64x2/shift/shr_s.wat new file mode 100644 index 000000000000..e70e35e90562 --- /dev/null +++ b/tests/disas/winch/x64/i64x2/shift/shr_s.wat @@ -0,0 +1,54 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i64x2.shr_s (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x60 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x37(%rip), %xmm0 +;; andl $0x3f, %eax +;; vmovd %eax, %xmm15 +;; vmovdqu 0x38(%rip), %xmm1 +;; vpsrlq %xmm15, %xmm1, %xmm1 +;; vpsrlq %xmm15, %xmm0, %xmm0 +;; vpxor %xmm1, %xmm0, %xmm0 +;; vpsubq %xmm1, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 60: ud2 +;; 62: addb %al, (%rax) +;; 64: addb %al, (%rax) +;; 66: addb %al, (%rax) +;; 68: addb %al, (%rax) +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) +;; 70: addl %eax, (%rax) +;; 72: addb %al, (%rax) +;; 74: addb %al, (%rax) +;; 76: addb %al, (%rax) +;; 78: addb (%rax), %al +;; 7a: addb %al, (%rax) +;; 7c: addb %al, (%rax) +;; 7e: addb %al, (%rax) +;; 80: addb %al, (%rax) +;; 82: addb %al, (%rax) +;; 84: addb %al, (%rax) +;; 86: addb %al, (%rax) +;; 8c: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i64x2/shift/shr_u.wat b/tests/disas/winch/x64/i64x2/shift/shr_u.wat new file mode 100644 index 000000000000..a9ab3e496966 --- /dev/null +++ b/tests/disas/winch/x64/i64x2/shift/shr_u.wat @@ -0,0 +1,39 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i64x2.shr_u (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4b +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x17(%rip), %xmm0 +;; andl $0x3f, %eax +;; vmovd %eax, %xmm15 +;; vpsrlq %xmm15, %xmm0, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4b: ud2 +;; 4d: addb %al, (%rax) +;; 4f: addb %al, (%rcx) +;; 51: addb %al, (%rax) +;; 53: addb %al, (%rax) +;; 55: addb %al, (%rax) +;; 57: addb %al, (%rdx) +;; 59: addb %al, (%rax) +;; 5b: addb %al, (%rax) +;; 5d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16/neg/neg.wat b/tests/disas/winch/x64/i8x16/neg/neg.wat new file mode 100644 index 000000000000..c89173dfb3f9 --- /dev/null +++ b/tests/disas/winch/x64/i8x16/neg/neg.wat @@ -0,0 +1,33 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i8x16.neg (v128.const i64x2 0xFFFFFFFFFFFFFFFF 42) + ))) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x43 +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; vpxor %xmm15, %xmm15, %xmm15 +;; vpsubb %xmm0, %xmm15, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 43: ud2 +;; 45: addb %al, (%rax) +;; 47: addb %al, (%rax) +;; 49: addb %al, (%rax) +;; 4b: addb %al, (%rax) +;; 4d: addb %al, (%rax) +;; 4f: addb %bh, %bh diff --git a/tests/disas/winch/x64/i8x16/shift/shl.wat b/tests/disas/winch/x64/i8x16/shift/shl.wat new file mode 100644 index 000000000000..3e62dee7e68c --- /dev/null +++ b/tests/disas/winch/x64/i8x16/shift/shl.wat @@ -0,0 +1,50 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i8x16.shl (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x5f +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x37(%rip), %xmm0 +;; andl $7, %eax +;; vmovd %eax, %xmm15 +;; vpsllw %xmm15, %xmm0, %xmm0 +;; leaq 0x34(%rip), %r11 +;; shll $4, %eax +;; vmovdqu (%r11, %rax), %xmm15 +;; vpand %xmm0, %xmm15, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 5f: ud2 +;; 61: addb %al, (%rax) +;; 63: addb %al, (%rax) +;; 65: addb %al, (%rax) +;; 67: addb %al, (%rax) +;; 69: addb %al, (%rax) +;; 6b: addb %al, (%rax) +;; 6d: addb %al, (%rax) +;; 6f: addb %al, (%rcx) +;; 71: addb %al, (%rax) +;; 73: addb %al, (%rax) +;; 75: addb %al, (%rax) +;; 77: addb %al, (%rdx) +;; 79: addb %al, (%rax) +;; 7b: addb %al, (%rax) +;; 7d: addb %al, (%rax) +;; 7f: addb %bh, %bh diff --git a/tests/disas/winch/x64/i8x16/shift/shr_s.wat b/tests/disas/winch/x64/i8x16/shift/shr_s.wat new file mode 100644 index 000000000000..c9db7668b5e3 --- /dev/null +++ b/tests/disas/winch/x64/i8x16/shift/shr_s.wat @@ -0,0 +1,50 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i8x16.shr_s (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x5f +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x37(%rip), %xmm0 +;; andl $7, %eax +;; addl $8, %eax +;; vmovd %eax, %xmm15 +;; vpunpcklbw %xmm0, %xmm0, %xmm1 +;; vpunpckhbw %xmm0, %xmm0, %xmm2 +;; vpsraw %xmm15, %xmm1, %xmm1 +;; vpsraw %xmm15, %xmm2, %xmm2 +;; vpacksswb %xmm2, %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 5f: ud2 +;; 61: addb %al, (%rax) +;; 63: addb %al, (%rax) +;; 65: addb %al, (%rax) +;; 67: addb %al, (%rax) +;; 69: addb %al, (%rax) +;; 6b: addb %al, (%rax) +;; 6d: addb %al, (%rax) +;; 6f: addb %al, (%rcx) +;; 71: addb %al, (%rax) +;; 73: addb %al, (%rax) +;; 75: addb %al, (%rax) +;; 77: addb %al, (%rdx) +;; 79: addb %al, (%rax) +;; 7b: addb %al, (%rax) +;; 7d: addb %al, (%rax) diff --git a/tests/disas/winch/x64/i8x16/shift/shr_u.wat b/tests/disas/winch/x64/i8x16/shift/shr_u.wat new file mode 100644 index 000000000000..50aa63c9fde9 --- /dev/null +++ b/tests/disas/winch/x64/i8x16/shift/shr_u.wat @@ -0,0 +1,50 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i8x16.shr_u (v128.const i64x2 1 2) (i32.const 3)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x5f +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movl $3, %eax +;; movdqu 0x37(%rip), %xmm0 +;; andl $7, %eax +;; vmovd %eax, %xmm15 +;; vpsrlw %xmm15, %xmm0, %xmm0 +;; leaq 0x34(%rip), %r11 +;; shll $4, %eax +;; vmovdqu (%r11, %rax), %xmm15 +;; vpand %xmm0, %xmm15, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 5f: ud2 +;; 61: addb %al, (%rax) +;; 63: addb %al, (%rax) +;; 65: addb %al, (%rax) +;; 67: addb %al, (%rax) +;; 69: addb %al, (%rax) +;; 6b: addb %al, (%rax) +;; 6d: addb %al, (%rax) +;; 6f: addb %al, (%rcx) +;; 71: addb %al, (%rax) +;; 73: addb %al, (%rax) +;; 75: addb %al, (%rax) +;; 77: addb %al, (%rdx) +;; 79: addb %al, (%rax) +;; 7b: addb %al, (%rax) +;; 7d: addb %al, (%rax) +;; 7f: addb %bh, %bh diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index f4e9484fe7e0..737521760543 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -1157,6 +1157,19 @@ impl Masm for MacroAssembler { ) -> Result<()> { Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) } + + fn v128_neg(&mut self, _op: WritableReg, _size: OperandSize) -> Result<()> { + Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) + } + + fn v128_shift( + &mut self, + _context: &mut CodeGenContext, + _lane_width: OperandSize, + _shift_kind: ShiftKind, + ) -> Result<()> { + Err(anyhow!(CodeGenError::unimplemented_masm_instruction())) + } } impl MacroAssembler { diff --git a/winch/codegen/src/isa/x64/address.rs b/winch/codegen/src/isa/x64/address.rs index 47229ae37448..e7ce8d3c5ea0 100644 --- a/winch/codegen/src/isa/x64/address.rs +++ b/winch/codegen/src/isa/x64/address.rs @@ -10,6 +10,13 @@ pub(crate) enum Address { Offset { base: Reg, offset: u32 }, /// Address to identify a constant. Const(Constant), + /// Address at `(base + index * 2^shift) + simm32` + ImmRegRegShift { + simm32: i32, + base: Reg, + index: Reg, + shift: u8, + }, } impl Address { diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index ed9547c92f16..12e086b66a12 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -323,9 +323,9 @@ impl Assembler { buffer: &mut MachBuffer, memflags: MemFlags, ) -> SyntheticAmode { - match addr { + match *addr { Address::Offset { base, offset } => { - let amode = Amode::imm_reg(*offset as i32, (*base).into()).with_flags(memflags); + let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags); SyntheticAmode::real(amode) } Address::Const(c) => { @@ -333,18 +333,30 @@ impl Assembler { // `SyntheticAmode::ConstantOffset` addressing mode // until the address is referenced by an actual // instruction. - let constant_data = pool.get(*c); - let data = VCodeConstantData::Pool(*c, constant_data.clone()); + let constant_data = pool.get(c); + let data = VCodeConstantData::Pool(c, constant_data.clone()); // If the constant data is not marked as used, it will be // inserted, therefore, it needs to be registered. let needs_registration = !constants.pool_uses(&data); - let constant = constants.insert(VCodeConstantData::Pool(*c, constant_data.clone())); + let constant = constants.insert(VCodeConstantData::Pool(c, constant_data.clone())); if needs_registration { buffer.register_constant(&constant, &data); } SyntheticAmode::ConstantOffset(constant) } + Address::ImmRegRegShift { + simm32, + base, + index, + shift, + } => SyntheticAmode::Real(Amode::ImmRegRegShift { + simm32, + base: base.into(), + index: index.into(), + shift, + flags: memflags, + }), } } @@ -1937,6 +1949,38 @@ impl Assembler { }); } + /// Move unaligned packed integer values from address `src` to `dst`. + pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) { + let src = Self::to_synthetic_amode( + src, + &mut self.pool, + &mut self.constants, + &mut self.buffer, + flags, + ); + self.emit(Inst::XmmUnaryRmRVex { + op: AvxOpcode::Vmovdqu, + src: XmmMem::unwrap_new(RegMem::mem(src)), + dst: dst.map(Into::into), + }); + } + + /// Move integer from `src` to xmm register `dst` using an AVX instruction. + pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) { + let op = match size { + OperandSize::S32 => AvxOpcode::Vmovd, + OperandSize::S64 => AvxOpcode::Vmovq, + _ => unreachable!(), + }; + + self.emit(Inst::GprToXmmVex { + op, + src: src.into(), + dst: dst.map(Into::into), + src_size: size.into(), + }) + } + /// The `vpinsr` opcode to use. fn vpinsr_opcode(size: OperandSize) -> AvxOpcode { match size { diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 8bd8358647b9..d76d4ec65716 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -44,6 +44,38 @@ use cranelift_codegen::{ use wasmtime_cranelift::TRAP_UNREACHABLE; use wasmtime_environ::{PtrSize, WasmValType}; +// Taken from `cranelift/codegen/src/isa/x64/lower/isle.rs` +// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we +// need to fix up the bits that migrate from one half of the lane to the +// other. Each 16-byte mask is indexed by the shift amount: e.g. if we shift +// right by 0 (no movement), we want to retain all the bits so we mask with +// `0xff`; if we shift right by 1, we want to retain all bits except the MSB so +// we mask with `0x7f`; etc. + +#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row. +const I8X16_ISHL_MASKS: [u8; 128] = [ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, + 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, + 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +]; + +#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row. +const I8X16_USHR_MASKS: [u8; 128] = [ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, +]; + /// x64 MacroAssembler. pub(crate) struct MacroAssembler { /// Stack pointer offset. @@ -2198,6 +2230,202 @@ impl Masm for MacroAssembler { Ok(()) } + + fn v128_neg(&mut self, op: WritableReg, size: OperandSize) -> Result<()> { + let tmp = regs::scratch_xmm(); + self.v128_xor(tmp, tmp, writable!(tmp))?; + self.v128_sub(tmp, op.to_reg(), op, size, HandleOverflowKind::None)?; + Ok(()) + } + + fn v128_shift( + &mut self, + context: &mut CodeGenContext, + lane_width: OperandSize, + kind: ShiftKind, + ) -> Result<()> { + self.ensure_has_avx()?; + let shift_amount = context.pop_to_reg(self, None)?.reg; + let operand = context.pop_to_reg(self, None)?.reg; + + let tmp_xmm = regs::scratch_xmm(); + let tmp = regs::scratch(); + let amount_mask = lane_width.num_bits() - 1; + self.and( + writable!(shift_amount), + shift_amount, + RegImm::i32(amount_mask as i32), + OperandSize::S32, + )?; + + let shl_normal = |this: &mut Self, op: AvxOpcode| { + this.asm + .avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32); + this.asm + .xmm_vex_rr(op, operand, tmp_xmm, writable!(operand)); + }; + + let shift_i8x16 = |this: &mut Self, masks: &'static [u8], op: AvxOpcode| { + // The case for i8x16 is a little bit trickier because x64 doesn't provide a 8bit + // shift instruction. Instead, we shift as 16bits, and then mask the bits in the + // 8bits lane, for example (with 2 8bits lanes): + // - Before shifting: + // 01001101 11101110 + // - shifting by 2 left: + // 00110111 10111000 + // ^^_ these bits come from the previous byte, and need to be masked. + // - The mask: + // 11111100 11111111 + // - After masking: + // 00110100 10111000 + // + // The mask is loaded from a well known memory, depending on the shift amount. + + this.asm + .avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32); + + // perform 16 bit shift + this.asm + .xmm_vex_rr(op, operand, tmp_xmm, writable!(operand)); + + // get a handle to the masks array constant. + let masks_addr = this.asm.add_constant(masks); + + // Load the masks array effective address into the tmp register. + this.asm.lea(&masks_addr, writable!(tmp), OperandSize::S64); + + // Compute the offset of the mask that we need to use. This is shift_amount * 16 == + // shift_amount << 4. + this.asm + .shift_ir(4, writable!(shift_amount), ShiftKind::Shl, OperandSize::S32); + + // Load the mask to tmp_xmm. + this.asm.xmm_vmovdqu_mr( + &Address::ImmRegRegShift { + simm32: 0, + base: tmp, + index: shift_amount, + shift: 0, + }, + writable!(tmp_xmm), + MemFlags::trusted(), + ); + + // Mask unwanted bits from operand. + this.asm + .xmm_vex_rr(AvxOpcode::Vpand, tmp_xmm, operand, writable!(operand)); + }; + + let i64x2_shr_s = |this: &mut Self, context: &mut CodeGenContext| -> Result<()> { + const SIGN_MASK: u128 = 0x8000000000000000_8000000000000000; + + // AVX doesn't have an instruction for i64x2 signed right shift. Instead we use the + // following formula (from hacker's delight 2-7), where x is the value and n the shift + // amount, for each lane: + // t = (1 << 63) >> n; ((x >> n) ^ t) - t + + // we need an extra scratch register + let tmp_xmm2 = context.any_fpr(this)?; + + this.asm + .avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32); + + let cst = this.asm.add_constant(&SIGN_MASK.to_le_bytes()); + + this.asm + .xmm_vmovdqu_mr(&cst, writable!(tmp_xmm2), MemFlags::trusted()); + this.asm + .xmm_vex_rr(AvxOpcode::Vpsrlq, tmp_xmm2, tmp_xmm, writable!(tmp_xmm2)); + this.asm + .xmm_vex_rr(AvxOpcode::Vpsrlq, operand, tmp_xmm, writable!(operand)); + this.asm + .xmm_vex_rr(AvxOpcode::Vpxor, operand, tmp_xmm2, writable!(operand)); + this.asm + .xmm_vex_rr(AvxOpcode::Vpsubq, operand, tmp_xmm2, writable!(operand)); + + context.free_reg(tmp_xmm2); + + Ok(()) + }; + + let i8x16_shr_s = |this: &mut Self, context: &mut CodeGenContext| -> Result<()> { + // Since the x86 instruction set does not have an 8x16 shift instruction and the + // approach used for `ishl` and `ushr` cannot be easily used (the masks do not + // preserve the sign), we use a different approach here: separate the low and + // high lanes, shift them separately, and merge them into the final result. + // + // Visually, this looks like the following, where `src.i8x16 = [s0, s1, ..., + // s15]: + // + // lo.i16x8 = [(s0, s0), (s1, s1), ..., (s7, s7)] + // shifted_lo.i16x8 = shift each lane of `low` + // hi.i16x8 = [(s8, s8), (s9, s9), ..., (s15, s15)] + // shifted_hi.i16x8 = shift each lane of `high` + // result = [s0'', s1'', ..., s15''] + + // In order for `packsswb` later to only use the high byte of each + // 16x8 lane, we shift right an extra 8 bits, relying on `psraw` to + // fill in the upper bits appropriately. + this.asm + .add_ir(8, writable!(shift_amount), OperandSize::S32); + this.asm + .avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32); + + let tmp_lo = context.any_fpr(this)?; + let tmp_hi = context.any_fpr(this)?; + + // Extract lower and upper bytes. + this.asm + .xmm_vex_rr(AvxOpcode::Vpunpcklbw, operand, operand, writable!(tmp_lo)); + this.asm + .xmm_vex_rr(AvxOpcode::Vpunpckhbw, operand, operand, writable!(tmp_hi)); + + // Perform 16bit right shift of upper and lower bytes. + this.asm + .xmm_vex_rr(AvxOpcode::Vpsraw, tmp_lo, tmp_xmm, writable!(tmp_lo)); + this.asm + .xmm_vex_rr(AvxOpcode::Vpsraw, tmp_hi, tmp_xmm, writable!(tmp_hi)); + + // Merge lower and upper bytes back. + this.asm + .xmm_vex_rr(AvxOpcode::Vpacksswb, tmp_lo, tmp_hi, writable!(operand)); + + context.free_reg(tmp_lo); + context.free_reg(tmp_hi); + + Ok(()) + }; + + match (lane_width, kind) { + // shl + (OperandSize::S8, ShiftKind::Shl) => { + shift_i8x16(self, &I8X16_ISHL_MASKS, AvxOpcode::Vpsllw) + } + (OperandSize::S16, ShiftKind::Shl) => shl_normal(self, AvxOpcode::Vpsllw), + (OperandSize::S32, ShiftKind::Shl) => shl_normal(self, AvxOpcode::Vpslld), + (OperandSize::S64, ShiftKind::Shl) => shl_normal(self, AvxOpcode::Vpsllq), + // shr_u + (OperandSize::S8, ShiftKind::ShrU) => { + shift_i8x16(self, &I8X16_USHR_MASKS, AvxOpcode::Vpsrlw) + } + (OperandSize::S16, ShiftKind::ShrU) => shl_normal(self, AvxOpcode::Vpsrlw), + (OperandSize::S32, ShiftKind::ShrU) => shl_normal(self, AvxOpcode::Vpsrld), + (OperandSize::S64, ShiftKind::ShrU) => shl_normal(self, AvxOpcode::Vpsrlq), + // shr_s + (OperandSize::S8, ShiftKind::ShrS) => i8x16_shr_s(self, context)?, + (OperandSize::S16, ShiftKind::ShrS) => shl_normal(self, AvxOpcode::Vpsraw), + (OperandSize::S32, ShiftKind::ShrS) => shl_normal(self, AvxOpcode::Vpsrad), + (OperandSize::S64, ShiftKind::ShrS) => i64x2_shr_s(self, context)?, + + _ => bail!(CodeGenError::invalid_operand_combination()), + } + + context.free_reg(shift_amount); + context + .stack + .push(TypedReg::new(WasmValType::V128, operand).into()); + Ok(()) + } } impl MacroAssembler { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 1af99b406688..299bb144a1fa 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1817,4 +1817,19 @@ pub(crate) trait MacroAssembler { context: &mut CodeGenContext, lane_width: OperandSize, ) -> Result<()>; + + /// Vectorized negate of the content of `op`, with lanes of size `size`. + fn v128_neg(&mut self, op: WritableReg, size: OperandSize) -> Result<()>; + + /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit + /// integer at the top the the stack, on the 128-bit vector specified by the second value + /// from the top of the stack, interpreted as packed integers of size `lane_width`. + /// + /// The shift amount is taken modulo `lane_width`. + fn v128_shift( + &mut self, + context: &mut CodeGenContext, + lane_width: OperandSize, + kind: ShiftKind, + ) -> Result<()>; } diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 6bdce5e1a819..12ea71c97be9 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -460,6 +460,22 @@ macro_rules! def_unsupported { (emit I16x8SubSatS $($rest:tt)*) => {}; (emit I8x16SubSatU $($rest:tt)*) => {}; (emit I16x8SubSatU $($rest:tt)*) => {}; + (emit I8x16Neg $($rest:tt)*) => {}; + (emit I16x8Neg $($rest:tt)*) => {}; + (emit I32x4Neg $($rest:tt)*) => {}; + (emit I64x2Neg $($rest:tt)*) => {}; + (emit I8x16Shl $($rest:tt)*) => {}; + (emit I16x8Shl $($rest:tt)*) => {}; + (emit I32x4Shl $($rest:tt)*) => {}; + (emit I64x2Shl $($rest:tt)*) => {}; + (emit I8x16ShrU $($rest:tt)*) => {}; + (emit I16x8ShrU $($rest:tt)*) => {}; + (emit I32x4ShrU $($rest:tt)*) => {}; + (emit I64x2ShrU $($rest:tt)*) => {}; + (emit I8x16ShrS $($rest:tt)*) => {}; + (emit I16x8ShrS $($rest:tt)*) => {}; + (emit I32x4ShrS $($rest:tt)*) => {}; + (emit I64x2ShrS $($rest:tt)*) => {}; (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } @@ -3908,6 +3924,94 @@ where }) } + fn visit_i8x16_neg(&mut self) -> Self::Output { + self.context.unop(self.masm, |masm, op| { + masm.v128_neg(writable!(op), OperandSize::S8)?; + Ok(TypedReg::new(WasmValType::V128, op)) + }) + } + + fn visit_i16x8_neg(&mut self) -> Self::Output { + self.context.unop(self.masm, |masm, op| { + masm.v128_neg(writable!(op), OperandSize::S16)?; + Ok(TypedReg::new(WasmValType::V128, op)) + }) + } + + fn visit_i32x4_neg(&mut self) -> Self::Output { + self.context.unop(self.masm, |masm, op| { + masm.v128_neg(writable!(op), OperandSize::S32)?; + Ok(TypedReg::new(WasmValType::V128, op)) + }) + } + + fn visit_i64x2_neg(&mut self) -> Self::Output { + self.context.unop(self.masm, |masm, op| { + masm.v128_neg(writable!(op), OperandSize::S64)?; + Ok(TypedReg::new(WasmValType::V128, op)) + }) + } + + fn visit_i8x16_shl(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S8, ShiftKind::Shl) + } + + fn visit_i16x8_shl(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S16, ShiftKind::Shl) + } + + fn visit_i32x4_shl(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S32, ShiftKind::Shl) + } + + fn visit_i64x2_shl(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S64, ShiftKind::Shl) + } + + fn visit_i8x16_shr_u(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S8, ShiftKind::ShrU) + } + + fn visit_i16x8_shr_u(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S16, ShiftKind::ShrU) + } + + fn visit_i32x4_shr_u(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S32, ShiftKind::ShrU) + } + + fn visit_i64x2_shr_u(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S64, ShiftKind::ShrU) + } + + fn visit_i8x16_shr_s(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S8, ShiftKind::ShrS) + } + + fn visit_i16x8_shr_s(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S16, ShiftKind::ShrS) + } + + fn visit_i32x4_shr_s(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S32, ShiftKind::ShrS) + } + + fn visit_i64x2_shr_s(&mut self) -> Self::Output { + self.masm + .v128_shift(&mut self.context, OperandSize::S64, ShiftKind::ShrS) + } + wasmparser::for_each_visit_simd_operator!(def_unsupported); }