Skip to content

(arm64) fix save/restore for vec128 #3979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 12, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 60 additions & 60 deletions runtime/arm64.S
Original file line number Diff line number Diff line change
Expand Up @@ -280,18 +280,18 @@ G(name):
str x25, [TMP, 192]
/* Save caller-save floating-point registers
(callee-saves are preserved by C functions) */
stp d0, d1, [TMP, 208]
stp d2, d3, [TMP, 224]
stp d4, d5, [TMP, 240]
stp d6, d7, [TMP, 256]
stp d16, d17, [TMP, 272]
stp d18, d19, [TMP, 288]
stp d20, d21, [TMP, 304]
stp d22, d23, [TMP, 320]
stp d24, d25, [TMP, 336]
stp d26, d27, [TMP, 352]
stp d28, d29, [TMP, 368]
stp d30, d31, [TMP, 384]
stp q0, q1, [TMP, 208]
stp q2, q3, [TMP, 240]
stp q4, q5, [TMP, 272]
stp q6, q7, [TMP, 304]
stp q16, q17, [TMP, 336]
stp q18, q19, [TMP, 368]
stp q20, q21, [TMP, 400]
stp q22, q23, [TMP, 432]
stp q24, q25, [TMP, 464]
stp q26, q27, [TMP, 496]
stp q28, q29, [TMP, 528]
stp q30, q31, [TMP, 560]
add TMP, TMP, #16
str TMP, Caml_state(gc_regs)
.endm
Expand All @@ -315,18 +315,18 @@ G(name):
ldp x21, x22, [TMP, 160]
ldp x23, x24, [TMP, 176]
ldr x25, [TMP, 192]
ldp d0, d1, [TMP, 208]
ldp d2, d3, [TMP, 224]
ldp d4, d5, [TMP, 240]
ldp d6, d7, [TMP, 256]
ldp d16, d17, [TMP, 272]
ldp d18, d19, [TMP, 288]
ldp d20, d21, [TMP, 304]
ldp d22, d23, [TMP, 320]
ldp d24, d25, [TMP, 336]
ldp d26, d27, [TMP, 352]
ldp d28, d29, [TMP, 368]
ldp d30, d31, [TMP, 384]
ldp q0, q1, [TMP, 208]
ldp q2, q3, [TMP, 240]
ldp q4, q5, [TMP, 272]
ldp q6, q7, [TMP, 304]
ldp q16, q17, [TMP, 336]
ldp q18, q19, [TMP, 368]
ldp q20, q21, [TMP, 400]
ldp q22, q23, [TMP, 432]
ldp q24, q25, [TMP, 464]
ldp q26, q27, [TMP, 496]
ldp q28, q29, [TMP, 528]
ldp q30, q31, [TMP, 560]
/* Put gc_regs struct back in bucket linked list */
ldr TMP2, Caml_state(gc_regs_buckets)
str TMP2, [TMP, 0] /* next ptr */
Expand Down Expand Up @@ -392,18 +392,18 @@ G(name):
stp x12, x13, [TMP, 112]
stp x14, x15, [TMP, 128]
/* Save caller-save floating-point registers */
stp d0, d1, [TMP, 208]
stp d2, d3, [TMP, 224]
stp d4, d5, [TMP, 240]
stp d6, d7, [TMP, 256]
stp d16, d17, [TMP, 272]
stp d18, d19, [TMP, 288]
stp d20, d21, [TMP, 304]
stp d22, d23, [TMP, 320]
stp d24, d25, [TMP, 336]
stp d26, d27, [TMP, 352]
stp d28, d29, [TMP, 368]
stp d30, d31, [TMP, 384]
stp q0, q1, [TMP, 208]
stp q2, q3, [TMP, 240]
stp q4, q5, [TMP, 272]
stp q6, q7, [TMP, 304]
stp q16, q17, [TMP, 336]
stp q18, q19, [TMP, 368]
stp q20, q21, [TMP, 400]
stp q22, q23, [TMP, 432]
stp q24, q25, [TMP, 464]
stp q26, q27, [TMP, 496]
stp q28, q29, [TMP, 528]
stp q30, q31, [TMP, 560]
add TMP, TMP, #16
str TMP, Caml_state(gc_regs)
.endm
Expand All @@ -423,18 +423,18 @@ G(name):
ldp x10, x11, [TMP, 96]
ldp x12, x13, [TMP, 112]
ldp x14, x15, [TMP, 128]
ldp d0, d1, [TMP, 208]
ldp d2, d3, [TMP, 224]
ldp d4, d5, [TMP, 240]
ldp d6, d7, [TMP, 256]
ldp d16, d17, [TMP, 272]
ldp d18, d19, [TMP, 288]
ldp d20, d21, [TMP, 304]
ldp d22, d23, [TMP, 320]
ldp d24, d25, [TMP, 336]
ldp d26, d27, [TMP, 352]
ldp d28, d29, [TMP, 368]
ldp d30, d31, [TMP, 384]
ldp q0, q1, [TMP, 208]
ldp q2, q3, [TMP, 240]
ldp q4, q5, [TMP, 272]
ldp q6, q7, [TMP, 304]
ldp q16, q17, [TMP, 336]
ldp q18, q19, [TMP, 368]
ldp q20, q21, [TMP, 400]
ldp q22, q23, [TMP, 432]
ldp q24, q25, [TMP, 464]
ldp q26, q27, [TMP, 496]
ldp q28, q29, [TMP, 528]
ldp q30, q31, [TMP, 560]
/* Put gc_regs struct back in bucket linked list */
ldr TMP2, Caml_state(gc_regs_buckets)
str TMP2, [TMP, 0] /* next ptr */
Expand Down Expand Up @@ -732,8 +732,8 @@ FUNCTION(caml_start_program)

L(jump_to_caml):
/* Set up stack frame and save callee-save registers */
stp x29, x30, [sp, -160]!
CFI_ADJUST(160)
stp x29, x30, [sp, -224]!
CFI_ADJUST(224)
CFI_OFFSET(29, 0)
CFI_OFFSET(30, 8)
add x29, sp, #0
Expand All @@ -742,10 +742,10 @@ L(jump_to_caml):
stp x23, x24, [sp, 48]
stp x25, x26, [sp, 64]
stp x27, x28, [sp, 80]
stp d8, d9, [sp, 96]
stp d10, d11, [sp, 112]
stp d12, d13, [sp, 128]
stp d14, d15, [sp, 144]
stp q8, q9, [sp, 96]
stp q10, q11, [sp, 128]
stp q12, q13, [sp, 160]
stp q14, q15, [sp, 192]
/* Load domain state pointer from argument */
mov DOMAIN_STATE_PTR, TMP
/* Reload allocation pointer */
Expand Down Expand Up @@ -841,12 +841,12 @@ L(return_result):
ldp x23, x24, [sp, 48]
ldp x25, x26, [sp, 64]
ldp x27, x28, [sp, 80]
ldp d8, d9, [sp, 96]
ldp d10, d11, [sp, 112]
ldp d12, d13, [sp, 128]
ldp d14, d15, [sp, 144]
ldp x29, x30, [sp], 160
CFI_ADJUST(-160)
ldp q8, q9, [sp, 96]
ldp q10, q11, [sp, 128]
ldp q12, q13, [sp, 160]
ldp q14, q15, [sp, 192]
ldp x29, x30, [sp], 224
CFI_ADJUST(-224)
/* Return to C caller */
ret
CFI_ENDPROC
Expand Down
148 changes: 74 additions & 74 deletions runtime4/arm64.S
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,11 @@ L(caml_call_gc):
mov TMP, sp
str TMP, Caml_state(bottom_of_stack)
/* Set up stack space, saving return address and frame pointer */
/* (2 regs RA/GP, 24 allocatable int regs, 24 caller-save float regs) * 8 */
CFI_OFFSET(29, -400)
CFI_OFFSET(30, -392)
stp x29, x30, [sp, -400]!
CFI_ADJUST(400)
/* (2 regs RA/GP, 24 allocatable int regs)*8 + (24 caller-save float regs)*16 */
CFI_OFFSET(29, -592)
CFI_OFFSET(30, -584)
stp x29, x30, [sp, -592]!
CFI_ADJUST(592)
add x29, sp, #0
/* Save allocatable integer registers on the stack, in the order
given in proc.ml */
Expand All @@ -186,18 +186,18 @@ L(caml_call_gc):
str x25, [sp, 192]
/* Save caller-save floating-point registers on the stack
(callee-saves are preserved by caml_garbage_collection) */
stp d0, d1, [sp, 208]
stp d2, d3, [sp, 224]
stp d4, d5, [sp, 240]
stp d6, d7, [sp, 256]
stp d16, d17, [sp, 272]
stp d18, d19, [sp, 288]
stp d20, d21, [sp, 304]
stp d22, d23, [sp, 320]
stp d24, d25, [sp, 336]
stp d26, d27, [sp, 352]
stp d28, d29, [sp, 368]
stp d30, d31, [sp, 384]
stp q0, q1, [sp, 208]
stp q2, q3, [sp, 240]
stp q4, q5, [sp, 272]
stp q6, q7, [sp, 304]
stp q16, q17, [sp, 336]
stp q18, q19, [sp, 368]
stp q20, q21, [sp, 400]
stp q22, q23, [sp, 432]
stp q24, q25, [sp, 464]
stp q26, q27, [sp, 496]
stp q28, q29, [sp, 528]
stp q30, q31, [sp, 560]
/* Store pointer to saved integer registers in Caml_state->gc_regs */
add TMP, sp, #16
str TMP, Caml_state(gc_regs)
Expand All @@ -220,22 +220,22 @@ L(caml_call_gc):
ldp x21, x22, [sp, 160]
ldp x23, x24, [sp, 176]
ldr x25, [sp, 192]
ldp d0, d1, [sp, 208]
ldp d2, d3, [sp, 224]
ldp d4, d5, [sp, 240]
ldp d6, d7, [sp, 256]
ldp d16, d17, [sp, 272]
ldp d18, d19, [sp, 288]
ldp d20, d21, [sp, 304]
ldp d22, d23, [sp, 320]
ldp d24, d25, [sp, 336]
ldp d26, d27, [sp, 352]
ldp d28, d29, [sp, 368]
ldp d30, d31, [sp, 384]
ldp q0, q1, [sp, 208]
ldp q2, q3, [sp, 240]
ldp q4, q5, [sp, 272]
ldp q6, q7, [sp, 304]
ldp q16, q17, [sp, 336]
ldp q18, q19, [sp, 368]
ldp q20, q21, [sp, 400]
ldp q22, q23, [sp, 432]
ldp q24, q25, [sp, 464]
ldp q26, q27, [sp, 496]
ldp q28, q29, [sp, 528]
ldp q30, q31, [sp, 560]
/* Reload new allocation pointer */
ldr ALLOC_PTR, Caml_state(young_ptr)
/* Free stack space and return to caller */
ldp x29, x30, [sp], 400
ldp x29, x30, [sp], 592
ret
CFI_ENDPROC
END_FUNCTION(caml_call_gc)
Expand Down Expand Up @@ -285,11 +285,11 @@ L(caml_call_local_realloc):
CFI_STARTPROC
/* Set up stack space, saving return address and frame pointer */
/* Store return address and frame pointer */
/* (2 RA/GP, 24 allocatable int regs, 24 caller-saved float regs) * 8 */
CFI_OFFSET(29,-400)
CFI_OFFSET(30,-392)
stp x29, x30, [sp,-400]! /* pre-indexing stp */
CFI_ADJUST(400)
/* (2 RA/GP, 24 allocatable int regs)*8 + (24 caller-saved float regs)*16 */
CFI_OFFSET(29,-592)
CFI_OFFSET(30,-584)
stp x29, x30, [sp,-592]! /* pre-indexing stp */
CFI_ADJUST(592)
add x29, sp, #0

/* Save allocatable integer registers on the stack, using order in proc.ml */
Expand All @@ -307,18 +307,18 @@ L(caml_call_local_realloc):
str x25, [sp, 192]

/* Save caller saved floating-point registers on the stack */
stp d0, d1, [sp, 208]
stp d2, d3, [sp, 224]
stp d4, d5, [sp, 240]
stp d6, d7, [sp, 256]
stp d16, d17, [sp, 272]
stp d18, d19, [sp, 288]
stp d20, d21, [sp, 304]
stp d22, d23, [sp, 320]
stp d24, d25, [sp, 336]
stp d26, d27, [sp, 352]
stp d28, d29, [sp, 368]
stp d30, d31, [sp, 384]
stp q0, q1, [sp, 208]
stp q2, q3, [sp, 240]
stp q4, q5, [sp, 272]
stp q6, q7, [sp, 304]
stp q16, q17, [sp, 336]
stp q18, q19, [sp, 368]
stp q20, q21, [sp, 400]
stp q22, q23, [sp, 432]
stp q24, q25, [sp, 464]
stp q26, q27, [sp, 496]
stp q28, q29, [sp, 528]
stp q30, q31, [sp, 560]

/* Store pointer to saved integer registers in Caml_state->gc_regs */
add TMP, sp, #16
Expand All @@ -343,24 +343,24 @@ L(caml_call_local_realloc):
ldp x21, x22, [sp, 160]
ldp x23, x24, [sp, 176]
ldr x25, [sp, 192]
ldp d0, d1, [sp, 208]
ldp d2, d3, [sp, 224]
ldp d4, d5, [sp, 240]
ldp d6, d7, [sp, 256]
ldp d16, d17, [sp, 272]
ldp d18, d19, [sp, 288]
ldp d20, d21, [sp, 304]
ldp d22, d23, [sp, 320]
ldp d24, d25, [sp, 336]
ldp d26, d27, [sp, 352]
ldp d28, d29, [sp, 368]
ldp d30, d31, [sp, 384]
ldp q0, q1, [sp, 208]
ldp q2, q3, [sp, 240]
ldp q4, q5, [sp, 272]
ldp q6, q7, [sp, 304]
ldp q16, q17, [sp, 336]
ldp q18, q19, [sp, 368]
ldp q20, q21, [sp, 400]
ldp q22, q23, [sp, 432]
ldp q24, q25, [sp, 464]
ldp q26, q27, [sp, 496]
ldp q28, q29, [sp, 528]
ldp q30, q31, [sp, 560]

/* Reload new allocation pointer */
ldr ALLOC_PTR, Caml_state(young_ptr)

/* Free stack space and return to caller */
ldp x29, x30, [sp], 400
ldp x29, x30, [sp], 592
ret
CFI_ENDPROC
END_FUNCTION(caml_call_local_realloc)
Expand Down Expand Up @@ -403,20 +403,20 @@ FUNCTION(caml_start_program)

L(jump_to_caml):
/* Set up stack frame and save callee-save registers */
CFI_OFFSET(29, -160)
CFI_OFFSET(30, -152)
stp x29, x30, [sp, -160]!
CFI_ADJUST(160)
CFI_OFFSET(29, -224)
CFI_OFFSET(30, -216)
stp x29, x30, [sp, -224]!
CFI_ADJUST(224)
add x29, sp, #0
stp x19, x20, [sp, 16]
stp x21, x22, [sp, 32]
stp x23, x24, [sp, 48]
stp x25, x26, [sp, 64]
stp x27, x28, [sp, 80]
stp d8, d9, [sp, 96]
stp d10, d11, [sp, 112]
stp d12, d13, [sp, 128]
stp d14, d15, [sp, 144]
stp q8, q9, [sp, 96]
stp q10, q11, [sp, 128]
stp q12, q13, [sp, 160]
stp q14, q15, [sp, 192]
/* Load domain state pointer from argument */
mov DOMAIN_STATE_PTR, TMP
/* Setup a callback link on the stack */
Expand Down Expand Up @@ -463,12 +463,12 @@ L(return_result):
ldp x23, x24, [sp, 48]
ldp x25, x26, [sp, 64]
ldp x27, x28, [sp, 80]
ldp d8, d9, [sp, 96]
ldp d10, d11, [sp, 112]
ldp d12, d13, [sp, 128]
ldp d14, d15, [sp, 144]
ldp x29, x30, [sp], 160
CFI_ADJUST(-160)
ldp q8, q9, [sp, 96]
ldp q10, q11, [sp, 128]
ldp q12, q13, [sp, 160]
ldp q14, q15, [sp, 192]
ldp x29, x30, [sp], 224
CFI_ADJUST(-224)
/* Return to C caller */
ret
CFI_ENDPROC
Expand Down
Loading