Skip to content

Implement vec256/512 #3966

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/afl_instrument.ml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ and instrument = function

(* these are base cases and have no logging *)
| Cconst_int _ | Cconst_natint _ | Cconst_float32 _ | Cconst_float _
| Cconst_vec128 _ | Cconst_symbol _
| Cconst_vec128 _ | Cconst_vec256 _ | Cconst_vec512 _ | Cconst_symbol _
| Cvar _ as c -> c

let instrument_function c dbg =
Expand Down
2 changes: 1 addition & 1 deletion backend/amd64/CSE.ml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ let class_of_operation (op : Operation.t)
| Csel _
| Reinterpret_cast _ | Static_cast _
| Const_int _ | Const_float32 _ | Const_float _
| Const_symbol _ | Const_vec128 _
| Const_symbol _ | Const_vec128 _ | Const_vec256 _ | Const_vec512 _
| Stackoffset _ | Load _ | Store _ | Alloc _
| Intop _ | Intop_imm _ | Intop_atomic _
| Name_for_debugger _ | Probe_is_enabled _ | Opaque
Expand Down
28 changes: 25 additions & 3 deletions backend/amd64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ module Extension = struct
| LZCNT
| BMI
| BMI2
| AVX
| AVX2
| AVX512F

let rank = function
| POPCNT -> 0
Expand All @@ -43,6 +46,9 @@ module Extension = struct
| LZCNT -> 8
| BMI -> 9
| BMI2 -> 10
| AVX -> 11
| AVX2 -> 12
| AVX512F -> 13

let compare left right = Int.compare (rank left) (rank right)
end
Expand All @@ -62,6 +68,9 @@ module Extension = struct
| LZCNT -> "LZCNT"
| BMI -> "BMI"
| BMI2 -> "BMI2"
| AVX -> "AVX"
| AVX2 -> "AVX2"
| AVX512F -> "AVX512F"

let generation = function
| POPCNT -> "Nehalem+"
Expand All @@ -75,18 +84,29 @@ module Extension = struct
| LZCNT -> "Haswell+"
| BMI -> "Haswell+"
| BMI2 -> "Haswell+"
| AVX -> "Sandybridge+"
| AVX2 -> "Haswell+"
| AVX512F -> "SkylakeXeon+"

let enabled_by_default = function
| SSE3 | SSSE3 | SSE4_1 | SSE4_2
| POPCNT | CLMUL | LZCNT | BMI | BMI2 -> true
| PREFETCHW | PREFETCHWT1 -> false
| POPCNT | CLMUL | LZCNT | BMI | BMI2 | AVX | AVX2 -> true
| PREFETCHW | PREFETCHWT1 | AVX512F -> false

let all = Set.of_list [ POPCNT; PREFETCHW; PREFETCHWT1; SSE3; SSSE3; SSE4_1; SSE4_2; CLMUL; LZCNT; BMI; BMI2 ]
let all = Set.of_list [ POPCNT; PREFETCHW; PREFETCHWT1; SSE3; SSSE3; SSE4_1; SSE4_2; CLMUL; LZCNT; BMI; BMI2; AVX; AVX2; AVX512F ]
let config = ref (Set.filter enabled_by_default all)

let enabled t = Set.mem t !config
let disabled t = not (enabled t)

let allow_vec256 () = List.exists (fun t -> enabled t) [AVX; AVX2; AVX512F]
let allow_vec512 () = List.exists (fun t -> enabled t) [AVX512F]

let require_vec256 () =
if not (allow_vec256 ()) then Misc.fatal_error "AVX or AVX512 is required for 256-bit vectors"
let require_vec512 () =
if not (allow_vec512 ()) then Misc.fatal_error "AVX512 is required for 512-bit vectors"

let args =
let y t = "-f" ^ (name t |> String.lowercase_ascii) in
let n t = "-fno-" ^ (name t |> String.lowercase_ascii) in
Expand Down Expand Up @@ -206,6 +226,8 @@ let size_int = 8
let size_float = 8

let size_vec128 = 16
let size_vec256 = 32
let size_vec512 = 64

let allow_unaligned_access = true

Expand Down
12 changes: 12 additions & 0 deletions backend/amd64/arch.mli
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,19 @@ module Extension : sig
to Haswell, i.e. they do not cause an illegal instruction fault.
That means code using LZCNT/TZCNT will silently produce wrong results. *)
| BMI2
| AVX
| AVX2
| AVX512F

val name : t -> string

val enabled : t -> bool
val available : unit -> t list

val allow_vec256 : unit -> bool
val allow_vec512 : unit -> bool
val require_vec256 : unit -> unit
val require_vec512 : unit -> unit
end

val trap_notes : bool ref
Expand Down Expand Up @@ -116,6 +124,10 @@ val size_float : int

val size_vec128 : int

val size_vec256 : int

val size_vec512 : int

val allow_unaligned_access : bool

val division_crashes_on_overflow : bool
Expand Down
6 changes: 3 additions & 3 deletions backend/amd64/cfg_selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ let pseudoregs_for_operation op arg res =
| Ioffset_loc (_, _)
| Irdtsc | Icldemote _ | Iprefetch _ )
| Move | Spill | Reload | Reinterpret_cast _ | Static_cast _ | Const_int _
| Const_float32 _ | Const_float _ | Const_vec128 _ | Const_symbol _
| Stackoffset _ | Load _
| Const_float32 _ | Const_float _ | Const_vec128 _ | Const_vec256 _
| Const_vec512 _ | Const_symbol _ | Stackoffset _ | Load _
| Store (_, _, _)
| Alloc _ | Name_for_debugger _ | Probe_is_enabled _ | Opaque | Begin_region
| End_region | Poll | Dls_get ->
Expand Down Expand Up @@ -271,7 +271,7 @@ let select_store ~is_assign addr (exp : Cmm.expression) :
(Specific (Istore_int (Nativeint.of_int n, addr, is_assign)), Ctuple [])
| Cconst_natint (n, _dbg) when is_immediate_natint n ->
Rewritten (Specific (Istore_int (n, addr, is_assign)), Ctuple [])
| Cconst_int _ | Cconst_vec128 _
| Cconst_int _ | Cconst_vec128 _ | Cconst_vec256 _ | Cconst_vec512 _
| Cconst_natint (_, _)
| Cconst_float32 (_, _)
| Cconst_float (_, _)
Expand Down
Loading
Loading