oxcaml · TheNumbat · Jun 13, 2025 · May 22, 2025 · May 23, 2025 · May 27, 2025
diff --git a/.github/workflows/80ch.sh b/.github/workflows/80ch.sh
@@ -44,8 +44,11 @@ do
     # Don't check tests for long lines
     # (a more sophisticated script would instead remove %%expect blocks)
     testsuite/tests/*) ;&
+    oxcaml/tests/*) ;&
     # Don't check files generated by menhir either
-    middle_end/flambda2/parser/flambda_parser.ml)
+    middle_end/flambda2/parser/flambda_parser.ml) ;&
+    # Don't check generated simd instruction defs
+    tools/simdgen/amd64_simd_instrs.ml)
       continue ;;
   esac
 

diff --git a/backend/afl_instrument.ml b/backend/afl_instrument.ml
@@ -101,7 +101,7 @@ and instrument = function
 
   (* these are base cases and have no logging *)
   | Cconst_int _ | Cconst_natint _ | Cconst_float32 _ | Cconst_float _
-  | Cconst_vec128 _ | Cconst_symbol _
+  | Cconst_vec128 _ | Cconst_vec256 _ | Cconst_vec512 _ | Cconst_symbol _
   | Cvar _ as c -> c
 
 let instrument_function c dbg =

diff --git a/backend/amd64/CSE.ml b/backend/amd64/CSE.ml
@@ -52,7 +52,7 @@ let class_of_operation (op : Operation.t)
   | Csel _
   | Reinterpret_cast _ | Static_cast _
   | Const_int _ | Const_float32 _ | Const_float _
-  | Const_symbol _ | Const_vec128 _
+  | Const_symbol _ | Const_vec128 _ | Const_vec256 _ | Const_vec512 _
   | Stackoffset _ | Load _ | Store _ | Alloc _
   | Intop _ | Intop_imm _ | Intop_atomic _
   | Name_for_debugger _ | Probe_is_enabled _ | Opaque

diff --git a/backend/amd64/arch.ml b/backend/amd64/arch.ml
@@ -30,6 +30,9 @@ module Extension = struct
       | LZCNT
       | BMI
       | BMI2
+      | AVX
+      | AVX2
+      | AVX512F
 
     let rank = function
       | POPCNT -> 0
@@ -43,6 +46,9 @@ module Extension = struct
       | LZCNT -> 8
       | BMI -> 9
       | BMI2 -> 10
+      | AVX -> 11
+      | AVX2 -> 12
+      | AVX512F -> 13
 
     let compare left right = Int.compare (rank left) (rank right)
   end
@@ -62,6 +68,9 @@ module Extension = struct
     | LZCNT -> "LZCNT"
     | BMI -> "BMI"
     | BMI2 -> "BMI2"
+    | AVX -> "AVX"
+    | AVX2 -> "AVX2"
+    | AVX512F -> "AVX512F"
 
   let generation = function
     | POPCNT -> "Nehalem+"
@@ -75,18 +84,36 @@ module Extension = struct
     | LZCNT -> "Haswell+"
     | BMI -> "Haswell+"
     | BMI2 -> "Haswell+"
+    | AVX -> "Sandybridge+"
+    | AVX2 -> "Haswell+"
+    | AVX512F -> "SkylakeXeon+"
 
   let enabled_by_default = function
     | SSE3 | SSSE3 | SSE4_1 | SSE4_2
-    | POPCNT | CLMUL | LZCNT | BMI | BMI2 -> true
-    | PREFETCHW | PREFETCHWT1 -> false
+    | POPCNT | CLMUL | LZCNT | BMI | BMI2 | AVX | AVX2 -> true
+    | PREFETCHW | PREFETCHWT1 | AVX512F -> false
+
+  let all =
+    Set.of_list
+      [ POPCNT; PREFETCHW; PREFETCHWT1; SSE3; SSSE3; SSE4_1; SSE4_2; CLMUL;
+        LZCNT; BMI; BMI2; AVX; AVX2; AVX512F ]
 
-  let all = Set.of_list [ POPCNT; PREFETCHW; PREFETCHWT1; SSE3; SSSE3; SSE4_1; SSE4_2; CLMUL; LZCNT; BMI; BMI2 ]
   let config = ref (Set.filter enabled_by_default all)
 
   let enabled t = Set.mem t !config
   let disabled t = not (enabled t)
 
+  let allow_vec256 () = List.exists enabled [AVX; AVX2; AVX512F]
+  let allow_vec512 () = List.exists enabled [AVX512F]
+
+  let require_vec256 () =
+    if not (allow_vec256 ())
+    then Misc.fatal_error "AVX or AVX512 is required for 256-bit vectors"
+
+  let require_vec512 () =
+    if not (allow_vec512 ())
+    then Misc.fatal_error "AVX512 is required for 512-bit vectors"
+
   let args =
     let y t = "-f" ^ (name t |> String.lowercase_ascii) in
     let n t = "-fno-" ^ (name t |> String.lowercase_ascii) in
@@ -206,6 +233,8 @@ let size_int = 8
 let size_float = 8
 
 let size_vec128 = 16
+let size_vec256 = 32
+let size_vec512 = 64
 
 let allow_unaligned_access = true
 

diff --git a/backend/amd64/arch.mli b/backend/amd64/arch.mli
@@ -31,11 +31,19 @@ module Extension : sig
              to Haswell, i.e. they do not cause an illegal instruction fault.
              That means code using LZCNT/TZCNT will silently produce wrong results. *)
     | BMI2
+    | AVX
+    | AVX2
+    | AVX512F
 
   val name : t -> string
 
   val enabled : t -> bool
   val available : unit -> t list
+
+  val allow_vec256 : unit -> bool
+  val allow_vec512 : unit -> bool
+  val require_vec256 : unit -> unit
+  val require_vec512 : unit -> unit
 end
 
 val trap_notes : bool ref
@@ -116,6 +124,10 @@ val size_float : int
 
 val size_vec128 : int
 
+val size_vec256 : int
+
+val size_vec512 : int
+
 val allow_unaligned_access : bool
 
 val division_crashes_on_overflow : bool

diff --git a/backend/amd64/cfg_selection.ml b/backend/amd64/cfg_selection.ml
@@ -95,8 +95,6 @@ let rcx = phys_reg Int 5
 
 let rdx = phys_reg Int 4
 
-let _xmm0v () = phys_reg Vec128 100
-
 let select_locality (l : Cmm.prefetch_temporal_locality_hint) :
     Arch.prefetch_temporal_locality_hint =
   match l with
@@ -213,8 +211,8 @@ let pseudoregs_for_operation op arg res =
       | Ioffset_loc (_, _)
       | Irdtsc | Icldemote _ | Iprefetch _ )
   | Move | Spill | Reload | Reinterpret_cast _ | Static_cast _ | Const_int _
-  | Const_float32 _ | Const_float _ | Const_vec128 _ | Const_symbol _
-  | Stackoffset _ | Load _
+  | Const_float32 _ | Const_float _ | Const_vec128 _ | Const_vec256 _
+  | Const_vec512 _ | Const_symbol _ | Stackoffset _ | Load _
   | Store (_, _, _)
   | Alloc _ | Name_for_debugger _ | Probe_is_enabled _ | Opaque | Begin_region
   | End_region | Poll | Dls_get ->
@@ -271,7 +269,7 @@ let select_store ~is_assign addr (exp : Cmm.expression) :
       (Specific (Istore_int (Nativeint.of_int n, addr, is_assign)), Ctuple [])
   | Cconst_natint (n, _dbg) when is_immediate_natint n ->
     Rewritten (Specific (Istore_int (n, addr, is_assign)), Ctuple [])
-  | Cconst_int _ | Cconst_vec128 _
+  | Cconst_int _ | Cconst_vec128 _ | Cconst_vec256 _ | Cconst_vec512 _
   | Cconst_natint (_, _)
   | Cconst_float32 (_, _)
   | Cconst_float (_, _)