diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index fadcef08e961..cd770e67826f 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -433,7 +433,6 @@ impl WastTest { "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast", "spec_testsuite/simd_i16x8_extmul_i8x16.wast", "spec_testsuite/simd_i32x4_arith2.wast", - "spec_testsuite/simd_i32x4_dot_i16x8.wast", "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast", "spec_testsuite/simd_i32x4_extmul_i16x8.wast", "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", @@ -493,9 +492,10 @@ impl WastTest { "spec_testsuite/simd_i8x16_sat_arith.wast", "spec_testsuite/simd_i64x2_arith.wast", "spec_testsuite/simd_i16x8_arith.wast", - "spec_testsuite/simd_i32x4_arith.wast", "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast", "spec_testsuite/simd_i16x8_sat_arith.wast", + "spec_testsuite/simd_i32x4_arith.wast", + "spec_testsuite/simd_i32x4_dot_i16x8.wast", "spec_testsuite/simd_i8x16_arith.wast", "spec_testsuite/simd_bit_shift.wast", "spec_testsuite/simd_lane.wast", diff --git a/tests/disas/winch/x64/i32x4_dot_i16x8_s/const_avx.wat b/tests/disas/winch/x64/i32x4_dot_i16x8_s/const_avx.wat new file mode 100644 index 000000000000..299983e4a540 --- /dev/null +++ b/tests/disas/winch/x64/i32x4_dot_i16x8_s/const_avx.wat @@ -0,0 +1,47 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = [ "-Ccranelift-has-avx" ] + +(module + (func (result v128) + (i32x4.dot_i16x8_s (v128.const i32x4 0 1 2 3) (v128.const i32x4 3 2 1 0)) + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x10(%r11), %r11 +;; addq $0x10, %r11 +;; cmpq %rsp, %r11 +;; ja 0x4a +;; 1c: movq %rdi, %r14 +;; subq $0x10, %rsp +;; movq %rdi, 8(%rsp) +;; movq %rsi, (%rsp) +;; movdqu 0x1c(%rip), %xmm0 +;; movdqu 0x24(%rip), %xmm1 +;; vpmaddwd %xmm0, %xmm1, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x10, %rsp +;; popq %rbp +;; retq +;; 4a: ud2 +;; 4c: addb %al, (%rax) +;; 4e: addb %al, (%rax) +;; 50: addl (%rax), %eax +;; 52: addb %al, (%rax) +;; 54: addb (%rax), %al +;; 56: addb %al, (%rax) +;; 58: addl %eax, (%rax) +;; 5a: addb %al, (%rax) +;; 5c: addb %al, (%rax) +;; 5e: addb %al, (%rax) +;; 60: addb %al, (%rax) +;; 62: addb %al, (%rax) +;; 64: addl %eax, (%rax) +;; 66: addb %al, (%rax) +;; 68: addb (%rax), %al +;; 6a: addb %al, (%rax) +;; 6c: addl (%rax), %eax +;; 6e: addb %al, (%rax) diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 2e9fcc2aefec..147ba23e2207 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -1193,6 +1193,10 @@ impl Masm for MacroAssembler { fn v128_bitmask(&mut self, _src: Reg, _dst: WritableReg, _size: OperandSize) -> Result<()> { bail!(CodeGenError::unimplemented_masm_instruction()) } + + fn v128_dot(&mut self, _lhs: Reg, _rhs: Reg, _dst: WritableReg) -> Result<()> { + bail!(CodeGenError::unimplemented_masm_instruction()) + } } impl MacroAssembler { diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index fc4e04e2a2a5..2da64b161969 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -2542,6 +2542,12 @@ impl Masm for MacroAssembler { } Ok(()) } + + fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()> { + self.ensure_has_avx()?; + self.asm.xmm_vex_rr(AvxOpcode::Vpmaddwd, lhs, rhs, dst); + Ok(()) + } } impl MacroAssembler { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 478e3a257f4f..61bb586ea130 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1881,4 +1881,8 @@ pub(crate) trait MacroAssembler { /// Extracts the high bit of each lane in `src` and produces a scalar mask /// with all bits concatenated in `dst`. fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>; + + /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add + /// adjacent pairs of the 32-bit results. + fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>; } diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index a85e9ec71b16..656211416995 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -491,6 +491,7 @@ macro_rules! def_unsupported { (emit I16x8Bitmask $($rest:tt)*) => {}; (emit I32x4Bitmask $($rest:tt)*) => {}; (emit I64x2Bitmask $($rest:tt)*) => {}; + (emit I32x4DotI16x8S $($rest:tt)*) => {}; (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } @@ -4125,6 +4126,14 @@ where }) } + fn visit_i32x4_dot_i16x8_s(&mut self) -> Self::Output { + self.context + .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { + masm.v128_dot(dst, src, writable!(dst))?; + Ok(TypedReg::v128(dst)) + }) + } + wasmparser::for_each_visit_simd_operator!(def_unsupported); }