@@ -107,17 +107,32 @@ let hard_vec256_reg = Array.map (fun r -> {r with Reg.typ = Vec256}) hard_float_
107
107
let hard_vec512_reg = Array. map (fun r -> {r with Reg. typ = Vec512 }) hard_float_reg
108
108
let hard_float32_reg = Array. map (fun r -> {r with Reg. typ = Float32 }) hard_float_reg
109
109
110
+ let add_hard_vec256_regs list ~f =
111
+ if Arch.Extension. allow_vec256 ()
112
+ then f hard_vec256_reg :: list else list
113
+
114
+ let add_hard_vec512_regs list ~f =
115
+ if Arch.Extension. allow_vec512 ()
116
+ then f hard_vec512_reg :: list else list
117
+
110
118
let all_phys_regs =
111
- Array. concat [hard_int_reg; hard_float_reg; hard_float32_reg; hard_vec128_reg; hard_vec256_reg; hard_vec512_reg]
119
+ [hard_int_reg; hard_float_reg; hard_float32_reg; hard_vec128_reg]
120
+ |> add_hard_vec256_regs ~f: (fun regs -> regs)
121
+ |> add_hard_vec512_regs ~f: (fun regs -> regs)
122
+ |> Array. concat
112
123
113
124
let phys_reg ty n =
114
125
match (ty : machtype_component ) with
115
126
| Int | Addr | Val -> hard_int_reg.(n)
116
127
| Float -> hard_float_reg.(n - 100 )
117
128
| Float32 -> hard_float32_reg.(n - 100 )
118
129
| Vec128 | Valx2 -> hard_vec128_reg.(n - 100 )
119
- | Vec256 -> hard_vec256_reg.(n - 100 )
120
- | Vec512 -> hard_vec512_reg.(n - 100 )
130
+ | Vec256 ->
131
+ Arch.Extension. require_vec256 () ;
132
+ hard_vec256_reg.(n - 100 )
133
+ | Vec512 ->
134
+ Arch.Extension. require_vec512 () ;
135
+ hard_vec512_reg.(n - 100 )
121
136
122
137
let rax = phys_reg Int 0
123
138
let rdi = phys_reg Int 2
@@ -128,9 +143,14 @@ let r11 = phys_reg Int 11
128
143
let rbp = phys_reg Int 12
129
144
130
145
(* CSE needs to know that all versions of xmm15 are destroyed. *)
131
- let destroy_xmm n =
132
- [| phys_reg Float (100 + n); phys_reg Float32 (100 + n);
133
- phys_reg Vec128 (100 + n); phys_reg Vec256 (100 + n); phys_reg Vec512 (100 + n) |]
146
+ let destroy_xmm =
147
+ let types =
148
+ ([ Float ; Float32 ; Vec128 ] : machtype_component list )
149
+ |> add_hard_vec256_regs ~f: (fun _ -> Vec256 )
150
+ |> add_hard_vec512_regs ~f: (fun _ -> Vec512 )
151
+ |> Array. of_list
152
+ in
153
+ fun n -> Array. map (fun t -> phys_reg t (100 + n)) types
134
154
135
155
let destroyed_by_plt_stub =
136
156
if not X86_proc. use_plt then [| |] else [| r10; r11 |]
@@ -189,6 +209,7 @@ let calling_conventions
189
209
ofs := ! ofs + size_vec128
190
210
end
191
211
| Vec256 ->
212
+ Arch.Extension. require_vec256 () ;
192
213
if ! float < = last_float then begin
193
214
loc.(i) < - phys_reg Vec256 ! float ;
194
215
incr float
@@ -198,6 +219,7 @@ let calling_conventions
198
219
ofs := ! ofs + size_vec256
199
220
end
200
221
| Vec512 ->
222
+ Arch.Extension. require_vec512 () ;
201
223
if ! float < = last_float then begin
202
224
loc.(i) < - phys_reg Vec512 ! float ;
203
225
incr float
@@ -390,21 +412,23 @@ let int_regs_destroyed_at_c_call =
390
412
391
413
let destroyed_at_c_call_win64 =
392
414
(* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *)
393
- Array. concat [
394
- Array. map (phys_reg Int ) int_regs_destroyed_at_c_call_win64;
415
+ [ Array. map (phys_reg Int ) int_regs_destroyed_at_c_call_win64;
395
416
Array. sub hard_float_reg 0 6 ;
396
417
Array. sub hard_float32_reg 0 6 ;
397
- Array. sub hard_vec128_reg 0 6
398
- ]
418
+ Array. sub hard_vec128_reg 0 6 ]
419
+ |> add_hard_vec256_regs ~f: (fun regs -> Array. sub regs 0 6 )
420
+ |> add_hard_vec512_regs ~f: (fun regs -> Array. sub regs 0 6 )
421
+ |> Array. concat
399
422
400
423
let destroyed_at_c_call_unix =
401
424
(* Unix: rbx, rbp, r12-r15 preserved *)
402
- Array. concat [
403
- Array. map (phys_reg Int ) int_regs_destroyed_at_c_call;
404
- hard_float_reg;
405
- hard_float32_reg;
406
- hard_vec128_reg
407
- ]
425
+ [ Array. map (phys_reg Int ) int_regs_destroyed_at_c_call;
426
+ hard_float_reg;
427
+ hard_float32_reg;
428
+ hard_vec128_reg ]
429
+ |> add_hard_vec256_regs ~f: (fun regs -> regs)
430
+ |> add_hard_vec512_regs ~f: (fun regs -> regs)
431
+ |> Array. concat
408
432
409
433
let destroyed_at_c_call =
410
434
(* C calling conventions preserve rbx, but it is clobbered
0 commit comments