bytecodealliance · saulecabrera · Feb 12, 2025 · Feb 11, 2025 · Feb 12, 2025
@@ -433,7 +433,6 @@ impl WastTest {
                 "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast",
                 "spec_testsuite/simd_i16x8_extmul_i8x16.wast",
                 "spec_testsuite/simd_i32x4_arith2.wast",
-                "spec_testsuite/simd_i32x4_dot_i16x8.wast",
                 "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast",
                 "spec_testsuite/simd_i32x4_extmul_i16x8.wast",
                 "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast",
@@ -493,9 +492,10 @@ impl WastTest {
                     "spec_testsuite/simd_i8x16_sat_arith.wast",
                     "spec_testsuite/simd_i64x2_arith.wast",
                     "spec_testsuite/simd_i16x8_arith.wast",
-                    "spec_testsuite/simd_i32x4_arith.wast",
                     "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast",
                     "spec_testsuite/simd_i16x8_sat_arith.wast",
+                    "spec_testsuite/simd_i32x4_arith.wast",
+                    "spec_testsuite/simd_i32x4_dot_i16x8.wast",
                     "spec_testsuite/simd_i8x16_arith.wast",
                     "spec_testsuite/simd_bit_shift.wast",
                     "spec_testsuite/simd_lane.wast",

@@ -0,0 +1,47 @@
+;;! target = "x86_64"
+;;! test = "winch"
+;;! flags = [ "-Ccranelift-has-avx" ]
+
+(module
+    (func (result v128)
+        (i32x4.dot_i16x8_s (v128.const i32x4 0 1 2 3) (v128.const i32x4 3 2 1 0))
+    )
+)
+;; wasm[0]::function[0]:
+;;       pushq   %rbp
+;;       movq    %rsp, %rbp
+;;       movq    8(%rdi), %r11
+;;       movq    0x10(%r11), %r11
+;;       addq    $0x10, %r11
+;;       cmpq    %rsp, %r11
+;;       ja      0x4a
+;;   1c: movq    %rdi, %r14
+;;       subq    $0x10, %rsp
+;;       movq    %rdi, 8(%rsp)
+;;       movq    %rsi, (%rsp)
+;;       movdqu  0x1c(%rip), %xmm0
+;;       movdqu  0x24(%rip), %xmm1
+;;       vpmaddwd %xmm0, %xmm1, %xmm1
+;;       movdqa  %xmm1, %xmm0
+;;       addq    $0x10, %rsp
+;;       popq    %rbp
+;;       retq
+;;   4a: ud2
+;;   4c: addb    %al, (%rax)
+;;   4e: addb    %al, (%rax)
+;;   50: addl    (%rax), %eax
+;;   52: addb    %al, (%rax)
+;;   54: addb    (%rax), %al
+;;   56: addb    %al, (%rax)
+;;   58: addl    %eax, (%rax)
+;;   5a: addb    %al, (%rax)
+;;   5c: addb    %al, (%rax)
+;;   5e: addb    %al, (%rax)
+;;   60: addb    %al, (%rax)
+;;   62: addb    %al, (%rax)
+;;   64: addl    %eax, (%rax)
+;;   66: addb    %al, (%rax)
+;;   68: addb    (%rax), %al
+;;   6a: addb    %al, (%rax)
+;;   6c: addl    (%rax), %eax
+;;   6e: addb    %al, (%rax)
@@ -1193,6 +1193,10 @@ impl Masm for MacroAssembler {
     fn v128_bitmask(&mut self, _src: Reg, _dst: WritableReg, _size: OperandSize) -> Result<()> {
         bail!(CodeGenError::unimplemented_masm_instruction())
     }
+
+    fn v128_dot(&mut self, _lhs: Reg, _rhs: Reg, _dst: WritableReg) -> Result<()> {
+        bail!(CodeGenError::unimplemented_masm_instruction())
+    }
 }
 
 impl MacroAssembler {

@@ -2542,6 +2542,12 @@ impl Masm for MacroAssembler {
         }
         Ok(())
     }
+
+    fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()> {
+        self.ensure_has_avx()?;
+        self.asm.xmm_vex_rr(AvxOpcode::Vpmaddwd, lhs, rhs, dst);
+        Ok(())
+    }
 }
 
 impl MacroAssembler {

@@ -1881,4 +1881,8 @@ pub(crate) trait MacroAssembler {
     /// Extracts the high bit of each lane in `src` and produces a scalar mask
     /// with all bits concatenated in `dst`.
     fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
+
+    /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
+    /// adjacent pairs of the 32-bit results.
+    fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
 }
@@ -491,6 +491,7 @@ macro_rules! def_unsupported {
     (emit I16x8Bitmask $($rest:tt)*) => {};
     (emit I32x4Bitmask $($rest:tt)*) => {};
     (emit I64x2Bitmask $($rest:tt)*) => {};
+    (emit I32x4DotI16x8S $($rest:tt)*) => {};
 
     (emit $unsupported:tt $($rest:tt)*) => {$($rest)*};
 }
@@ -4125,6 +4126,14 @@ where
         })
     }
 
+    fn visit_i32x4_dot_i16x8_s(&mut self) -> Self::Output {
+        self.context
+            .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| {
+                masm.v128_dot(dst, src, writable!(dst))?;
+                Ok(TypedReg::v128(dst))
+            })
+    }
+
     wasmparser::for_each_visit_simd_operator!(def_unsupported);
 }