diff --git a/.github/workflows/build-and-test-macos.yaml b/.github/workflows/build-and-test-macos.yaml index 57e9ac424c..ccac90a4dd 100644 --- a/.github/workflows/build-and-test-macos.yaml +++ b/.github/workflows/build-and-test-macos.yaml @@ -47,6 +47,14 @@ jobs: otp: "28" cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" + - os: "macos-14" + otp: "28" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" + + - os: "macos-15" + otp: "28" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" + steps: # Setup - name: "Checkout repo" diff --git a/CMakeLists.txt b/CMakeLists.txt index 17eb2a02be..0c21bc78b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,10 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64|aarch64$") + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "aarch64") + endif() else() if (NOT AVM_DISABLE_JIT) message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index 8fe0d6bde1..427fa40aec 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -21,6 +21,7 @@ -define(JIT_FORMAT_VERSION, 1). -define(JIT_ARCH_X86_64, 1). +-define(JIT_ARCH_AARCH64, 2). -define(JIT_VARIANT_PIC, 1). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index bc97ea6903..a5810feff9 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -27,6 +27,8 @@ set(ERLANG_MODULES jit_precompile jit_stream_binary jit_stream_mmap + jit_aarch64 + jit_aarch64_asm jit_x86_64 jit_x86_64_asm ) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl new file mode 100644 index 0000000000..d0ffdd892d --- /dev/null +++ b/libs/jit/src/jit_aarch64.erl @@ -0,0 +1,2227 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_aarch64). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3 +]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). + +%-define(ASSERT(Expr), true = Expr). +-define(ASSERT(_Expr), ok). + +%% AArch64 ABI: r0-r7 are used for argument passing and return value. +%% r8 is the indirect result location register (platform-specific), +%% r9-r15 are caller-saved scratch registers (used by JIT), +%% r16-r17 are intra-procedure-call scratch registers, +%% r18 is platform register (reserved), +%% r19-r28 are callee-saved, +%% r29 is frame pointer, r30 is link register, r31 is stack pointer/zero. +%% d0-d7 are used for FP argument passing and return value. +%% d8-d15 are callee-saved FP registers. +%% +%% https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst +%% +%% Registers used by the JIT backend: +%% - Scratch GPRs: r9-r15 +%% - Argument/return: r0-r7, d0-d7 +%% - Stack pointer: r31 (sp) +%% - Frame pointer: r29 +%% - Link register: r30 +%% - Indirect result: r8 +%% +%% Note: r18 is reserved for platform use and must not be used. + +-type aarch64_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15. + +-define(IS_GPR(Reg), + (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse + Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7 orelse Reg =:= r8 orelse Reg =:= r9 orelse + Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse + Reg =:= r15) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [aarch64_register()], + used_regs :: [aarch64_register()], + labels :: [{integer() | reference(), integer()}] +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, aarch64_register()}. +-type value() :: immediate() | vm_register() | aarch64_register() | {ptr, aarch64_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()}. + +-type maybe_free_aarch64_register() :: + {free, aarch64_register()} | aarch64_register(). + +-type condition() :: + {aarch64_register(), '<', integer()} + | {maybe_free_aarch64_register(), '<', aarch64_register()} + | {maybe_free_aarch64_register(), '==', integer()} + | {maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} + | {'(int)', maybe_free_aarch64_register(), '==', integer()} + | {'(int)', maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} + | {'(bool)', maybe_free_aarch64_register(), '==', false} + | {'(bool)', maybe_free_aarch64_register(), '!=', false} + | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()}. + +% ctx->e is 0x28 +% ctx->x is 0x30 +-define(WORD_SIZE, 8). +-define(CTX_REG, r0). +-define(JITSTATE_REG, r1). +-define(NATIVE_INTERFACE_REG, r2). +-define(Y_REGS, {?CTX_REG, 16#28}). +-define(X_REG(N), {?CTX_REG, 16#30 + (N * ?WORD_SIZE)}). +-define(CP, {?CTX_REG, 16#B8}). +-define(FP_REGS, {?CTX_REG, 16#C0}). +-define(BS, {?CTX_REG, 16#C8}). +-define(BS_OFFSET, {?CTX_REG, 16#D0}). +-define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). +-define(JITSTATE_CONTINUATION, {?JITSTATE_REG, 16#8}). +-define(JITSTATE_REDUCTIONCOUNT, {?JITSTATE_REG, 16#10}). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +% aarch64 ABI specific +-define(LR_REG, r30). +-define(IP0_REG, r16). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). + +-define(AVAILABLE_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6]). +-define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). +-define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> ?WORD_SIZE. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(_Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + used_regs = [], + labels = [] + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:brk(0)), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [aarch64_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [aarch64_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +-spec jump_table0(state(), non_neg_integer(), pos_integer()) -> state(). +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + Offset = StreamModule:offset(Stream0), + BranchInstr = jit_aarch64_asm:b(0), + Reloc = {N, Offset, b}, + Stream1 = StreamModule:append(Stream0, BranchInstr), + jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); + {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); + b -> jit_aarch64_asm:b(Rel) + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), aarch64_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Primitive, + Args +) -> + PrepCall = + case Primitive of + 0 -> + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, 0}); + N -> + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE}) + end, + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{stream = Stream1}, + call_func_ptr(StateCall, {free, ?IP0_REG}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive_last(state(), non_neg_integer(), [arg()]) -> state(). +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = + case Primitive of + 0 -> + jit_aarch64_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, 0}); + N -> + jit_aarch64_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE}) + end, + Stream1 = StreamModule:append(Stream0, PrepCall), + State1 = set_args( + State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + Args + ), + #state{stream = Stream2} = State1, + Call = jit_aarch64_asm:br(Temp), + Stream3 = StreamModule:append(Stream2, Call), + State1#state{stream = Stream3, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec return_if_not_equal_to_ctx(state(), {free, aarch64_register()}) -> state(). +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + I1 = jit_aarch64_asm:cmp(Reg, ?CTX_REG), + I3 = + case Reg of + % Return value is already in r0 + r0 -> <<>>; + % Move to r0 (return register) + _ -> jit_aarch64_asm:orr(r0, xzr, Reg) + end, + I4 = jit_aarch64_asm:ret(), + I2 = jit_aarch64_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, Reg), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_to_label(state(), integer() | reference()) -> state(). +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches, labels = Labels} = + State, + Label +) -> + Offset = StreamModule:offset(Stream0), + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct branch without relocation + Rel = LabelOffset - Offset, + I1 = jit_aarch64_asm:b(Rel), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + I1 = jit_aarch64_asm:b(0), + Reloc = {Label, Offset, b}, + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1, branches = [Reloc | AccBranches]} + end. + +%% @private +-spec rewrite_branch_instruction( + jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer() +) -> binary(). +rewrite_branch_instruction({cbnz, Reg}, Offset) -> + jit_aarch64_asm:cbnz(Reg, Offset); +rewrite_branch_instruction({cbnz_w, Reg}, Offset) -> + jit_aarch64_asm:cbnz_w(Reg, Offset); +rewrite_branch_instruction({tbz, Reg, Bit}, Offset) -> + jit_aarch64_asm:tbz(Reg, Bit, Offset); +rewrite_branch_instruction({tbnz, Reg, Bit}, Offset) -> + jit_aarch64_asm:tbnz(Reg, Bit, Offset); +rewrite_branch_instruction(CC, Offset) when is_atom(CC) -> + jit_aarch64_asm:bcc(CC, Offset). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, CC, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, CC} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, CC}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = jit_aarch64_asm:bcc(CC, BranchOffset), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + NewBranchInstr = rewrite_branch_instruction(CC, BranchOffset), + Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + ElseJumpInstr = jit_aarch64_asm:b(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + NewBranchInstr = rewrite_branch_instruction(CC, ElseBranchOffset), + Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_aarch64_asm:b(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +%% @private +-spec if_block_cond(state(), condition()) -> + { + state(), + jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, + non_neg_integer() + }. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + I = jit_aarch64_asm:tbz(Reg, 63, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = State0#state{stream = Stream1}, + {State1, {tbz, Reg, 63}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + I1 = jit_aarch64_asm:cmp(Reg, Val), + % ge = greater than or equal + I2 = jit_aarch64_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}, + {State1, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp(Reg, RegB), + % ge = greater than or equal + I2 = jit_aarch64_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I = jit_aarch64_asm:cbnz(Reg, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {cbnz, Reg}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I = jit_aarch64_asm:cbnz_w(Reg, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {cbnz_w, Reg}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(int)', RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp_w(Reg, Val), + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) orelse ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp(Reg, Val), + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, eq, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(int)', RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp_w(Reg, Val), + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, eq, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_aarch64_asm:cmp(Reg, Val), + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test lowest bit + I = jit_aarch64_asm:tbnz(Reg, 0, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {tbnz, Reg, 0}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test lowest bit + I = jit_aarch64_asm:tbz(Reg, 0, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {tbz, Reg, 0}, 0}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test bits + TestCode = + try + jit_aarch64_asm:tst(Reg, Val) + catch + error:{unencodable_immediate, Val} -> + TestCode0 = jit_aarch64_asm:mov(Temp, Val), + TestCode1 = jit_aarch64_asm:tst(Reg, Temp), + <> + end, + I2 = jit_aarch64_asm:bcc(eq, 0), + Code = << + TestCode/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, eq, byte_size(TestCode)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % AND with mask + OffsetBefore = StreamModule:offset(Stream0), + State1 = op_imm(State0, and_, Temp, Reg, Mask), + Stream1 = State1#state.stream, + % Compare with value + I2 = jit_aarch64_asm:cmp(Temp, Val), + Stream2 = StreamModule:append(Stream1, I2), + OffsetAfter = StreamModule:offset(Stream2), + I3 = jit_aarch64_asm:bcc(eq, 0), + Stream3 = StreamModule:append(Stream2, I3), + State2 = State1#state{stream = Stream3}, + {State2, eq, OffsetAfter - OffsetBefore}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % AND with mask + OffsetBefore = StreamModule:offset(Stream0), + State1 = and_(State0, Reg, Mask), + Stream1 = State1#state.stream, + % Compare with value + I2 = jit_aarch64_asm:cmp(Reg, Val), + Stream2 = StreamModule:append(Stream1, I2), + OffsetAfter = StreamModule:offset(Stream2), + I3 = jit_aarch64_asm:bcc(eq, 0), + Stream3 = StreamModule:append(Stream2, I3), + State3 = State1#state{stream = Stream3}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, eq, OffsetAfter - OffsetBefore}. + +%% @private +-spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +%% @private +-spec merge_used_regs(state(), [aarch64_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(state(), aarch64_register(), non_neg_integer()) -> state(). +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_aarch64_asm:lsr(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_left(state(), aarch64_register(), non_neg_integer()) -> state(). +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_aarch64_asm:lsl(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, aarch64_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), aarch64_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, ?IP0_REG}) -> []; + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + SavedRegs = [?LR_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + {SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0), + + % Set up arguments following AArch64 calling convention + State1 = set_args(State0#state{stream = Stream1}, Args), + #state{stream = Stream2} = State1, + + {FuncPtrReg, Stream3} = + case FuncPtrTuple of + {free, Reg} -> + {Reg, Stream2}; + {primitive, Primitive} -> + % We use r16 for the address. + PrepCall = + case Primitive of + 0 -> + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, 0}); + N -> + jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE}) + end, + {?IP0_REG, StreamModule:append(Stream2, PrepCall)} + end, + + % Call the function pointer (using BLR for call with return) + Call = jit_aarch64_asm:blr(FuncPtrReg), + Stream4 = StreamModule:append(Stream3, Call), + + % If r0 is in used regs, save it to another temporary register + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + {Stream5, ResultReg} = + case lists:member(r0, SavedRegs) of + true -> + [Temp | _] = AvailableRegs1, + {StreamModule:append(Stream4, jit_aarch64_asm:mov(Temp, r0)), Temp}; + false -> + {Stream4, r0} + end, + + Stream6 = pop_registers(SavedRegsOdd, lists:reverse(SavedRegs), StreamModule, Stream5), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + UsedRegs2 = [ResultReg | UsedRegs1], + { + State1#state{ + stream = Stream6, + available_regs = AvailableRegs3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +%% @private +-spec push_registers([aarch64_register()], module(), stream()) -> {boolean(), stream()}. +push_registers([RegA, RegB | Tail], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:stp(RegA, RegB, {sp, -16}, '!')), + push_registers(Tail, StreamModule, Stream1); +push_registers([], _StreamModule, Stream0) -> + {false, Stream0}; +push_registers([RegA], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:str(RegA, {sp, -16}, '!')), + {true, Stream1}. + +%% @private +-spec pop_registers(boolean(), [aarch64_register()], module(), stream()) -> stream(). +pop_registers(true, [Reg | Tail], StreamModule, Stream0) -> + % Odd number of registers, pop the last one first + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldr(Reg, {sp}, 16)), + pop_registers(false, Tail, StreamModule, Stream1); +pop_registers(false, [], _StreamModule, Stream0) -> + Stream0; +pop_registers(false, [RegB, RegA | Tail], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldp(RegA, RegB, {sp}, 16)), + pop_registers(false, Tail, StreamModule, Stream1). + +%% @private +-spec set_args(state(), [arg()]) -> state(). +set_args( + #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, Args +) -> + ParamRegs = parameter_regs(Args), + ArgsRegs = args_regs(Args), + AvailableScratchGP = + ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs, + Offset = StreamModule:offset(Stream0), + Args1 = [ + case Arg of + offset -> Offset; + _ -> Arg + end + || Arg <- Args + ], + SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, []), + Stream1 = StreamModule:append(Stream0, SetArgsCode), + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State0#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +%% @private +-spec parameter_regs([arg()]) -> [aarch64_register()]. +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +%% @private +-spec parameter_regs0([arg()], [aarch64_register()], [aarch64_register()]) -> [aarch64_register()]. +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([Special | T], [GPReg | GPRegsT], Acc) when + Special =:= ctx orelse Special =:= jit_state orelse Special =:= offset +-> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{free, Free} | T], GPRegs, Acc) -> + parameter_regs0([Free | T], GPRegs, Acc); +parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([Reg | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], Acc) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], Acc) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([Int | T], [GPReg | GPRegsT], Acc) when is_integer(Int) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]). + +%% @private +-spec replace_reg([arg()], aarch64_register(), aarch64_register()) -> [arg()]. +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +%% @private +-spec replace_reg0([arg()], aarch64_register(), aarch64_register(), [arg()]) -> [arg()]. +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +%% @private +-spec set_args0([arg()], [aarch64_register() | imm], [aarch64_register()], [aarch64_register()], [ + binary() +]) -> binary(). +set_args0([], [], [], _AvailGP, Acc) -> + list_to_binary(lists:reverse(Acc)); +set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc) -> + set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc); +set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); +set_args0( + [jit_state | ArgsT], + [?JITSTATE_REG | ArgsRegs], + [?JITSTATE_REG | ParamRegs], + AvailGP, + Acc +) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); +set_args0( + [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, Acc +) -> + false = lists:member(ParamReg, ArgsRegs), + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [ + jit_aarch64_asm:mov(ParamReg, ?JITSTATE_REG) | Acc + ]); +% ctx is special as we need it to access x_reg/y_reg/fp_reg +set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> + false = lists:member(?CTX_REG, ArgsRegs), + J = set_args1(Arg, ?CTX_REG), + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); +set_args0( + [Arg | ArgsT], + [_ArgReg | ArgsRegs], + [ParamReg | ParamRegs], + [Avail | AvailGPT] = AvailGP, + Acc +) -> + J = set_args1(Arg, ParamReg), + case lists:member(ParamReg, ArgsRegs) of + false -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); + true -> + I = jit_aarch64_asm:mov(Avail, ParamReg), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, [J, I | Acc]) + end. + +%% @private +-spec set_args1(arg(), aarch64_register()) -> binary() | [binary()]. +set_args1(Reg, Reg) -> + []; +set_args1({x_reg, extra}, Reg) -> + jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG)); +set_args1({x_reg, X}, Reg) -> + jit_aarch64_asm:ldr(Reg, ?X_REG(X)); +set_args1({ptr, Source}, Reg) -> + jit_aarch64_asm:ldr(Reg, {Source, 0}); +set_args1({y_reg, X}, Reg) -> + [ + jit_aarch64_asm:ldr(Reg, ?Y_REGS), + jit_aarch64_asm:ldr(Reg, {Reg, X * ?WORD_SIZE}) + ]; +set_args1(ArgReg, Reg) when ?IS_GPR(ArgReg) -> + jit_aarch64_asm:mov(Reg, ArgReg); +set_args1(Arg, Reg) when is_integer(Arg) -> + jit_aarch64_asm:mov(Reg, Arg). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register + (state(), Src :: value() | vm_register(), Dest :: vm_register()) -> state(); + (state(), Src :: {free, {ptr, aarch64_register(), 1}}, Dest :: {fp_reg, non_neg_integer()}) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + I1 = jit_aarch64_asm:str(Src, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + I1 = jit_aarch64_asm:str(Src, ?X_REG(X)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_aarch64_asm:str(Src, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp | _]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:str(Src, {Temp, Y * ?WORD_SIZE}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer +move_to_vm_register(State, 0, Dest) -> + move_to_vm_register(State, xzr, Dest); +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + I1 = jit_aarch64_asm:mov(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_aarch64_asm:ldr(Temp, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * ?WORD_SIZE}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% term_to_float +move_to_vm_register( + #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + I1 = jit_aarch64_asm:ldr(Reg, {Reg, ?WORD_SIZE}), + I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), + I3 = jit_aarch64_asm:str(Reg, {Temp, F * ?WORD_SIZE}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + aarch64_register(), + non_neg_integer() | aarch64_register(), + vm_register() | aarch64_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}), + I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}), + I2 = jit_aarch64_asm:str(Temp, {Dest, 0}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp2, {Reg, Index * ?WORD_SIZE}), + I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * ?WORD_SIZE}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Reg, {Reg, Index * ?WORD_SIZE}), + I3 = jit_aarch64_asm:str(Reg, {Temp, Y * ?WORD_SIZE}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:ldr(Dest, {Reg, Index * ?WORD_SIZE}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), + I2 = jit_aarch64_asm:str(IndexReg, ?X_REG(X)), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), + I2 = jit_aarch64_asm:str(IndexReg, {PtrReg, 0}), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when ?IS_GPR(IndexReg) -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), + I3 = jit_aarch64_asm:str(IndexReg, {Temp, Y * ?WORD_SIZE}), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a new native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec get_array_element( + state(), aarch64_register() | {free, aarch64_register()}, non_neg_integer() +) -> + {state(), aarch64_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_aarch64_asm:ldr(Reg, {Reg, Index * ?WORD_SIZE}), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_aarch64_asm:ldr(ElemReg, {Reg, Index * ?WORD_SIZE}), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of a value (integer, vm register or native register) to an +%% array element (reg[x]) +%% @end +%% @param State current backend state +%% @param Value value to move +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_array_element( + state(), integer() | vm_register() | aarch64_register(), aarch64_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_aarch64_asm:str(ValueReg, {Reg, Index * ?WORD_SIZE}), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_aarch64_asm:str(ValueReg, {Reg, IndexReg, lsl, 3}), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of a value (integer, vm register or native register) to an +%% array element (reg[x+offset]) +%% @end +%% @param State current backend state +%% @param Value value to move +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Offset additional offset +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_array_element( + state(), value(), aarch64_register(), aarch64_register() | non_neg_integer(), integer() +) -> state(). +move_to_array_element( + State, + Value, + BaseReg, + IndexVal, + Offset +) when is_integer(IndexVal) andalso is_integer(Offset) -> + move_to_array_element(State, Value, BaseReg, IndexVal + Offset); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), + I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), + I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), + Stream1 = (State1#state.stream_module):append(State1#state.stream, <>), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +%%----------------------------------------------------------------------------- +%% @doc Move a value (integer, vm register, pointer or native register) to a +%% native register. This allocates a new native register from the available +%% pool if needed. +%% @end +%% @param State current backend state +%% @param Value value to move (can be an immediate, vm register, pointer, or native register) +%% @return Tuple of {Updated backend state, Native register containing the value} +%%----------------------------------------------------------------------------- +-spec move_to_native_register(state(), value()) -> {state(), aarch64_register()}. +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_aarch64_asm:ldr(Reg, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + Imm +) when + is_integer(Imm) +-> + I1 = jit_aarch64_asm:mov(Reg, Imm), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * ?WORD_SIZE}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}. + +%%----------------------------------------------------------------------------- +%% @doc Move a value (integer, vm register, pointer or native register) to a +%% specific native register. +%% @end +%% @param State current backend state +%% @param Value value to move (can be an immediate, vm register, pointer, or native register) +%% @param TargetReg the specific native register to move the value to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_native_register(state(), value(), aarch64_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) orelse is_integer(RegSrc) -> + I = jit_aarch64_asm:mov(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_aarch64_asm:ldr(RegDst, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst +) -> + I1 = jit_aarch64_asm:ldr(RegDst, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(RegDst, {RegDst, Y * ?WORD_SIZE}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Copy a value to a native register, allocating a new register from the +%% available pool. Unlike move_to_native_register, this always allocates a new +%% register and copies the value (preserving the source if it's a register). +%% @end +%% @param State current backend state +%% @param Value value to copy (can be an immediate, vm register, pointer, or native register) +%% @return Tuple of {Updated backend state, Native register containing the copied value} +%%----------------------------------------------------------------------------- +-spec copy_to_native_register(state(), value()) -> {state(), aarch64_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_aarch64_asm:mov(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_aarch64_asm:ldr(SaveReg, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +%%----------------------------------------------------------------------------- +%% @doc Move a VM register value to the continuation pointer (CP). +%% @end +%% @param State current backend state +%% @param VMReg VM register to move to CP +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_cp(state(), vm_register()) -> state(). +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + {y_reg, Y} +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * ?WORD_SIZE}), + I3 = jit_aarch64_asm:str(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Increment the stack pointer (SP) by a given offset. +%% @end +%% @param State current backend state +%% @param Offset offset to add to SP (in words, will be multiplied by 8) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec increment_sp(state(), integer()) -> state(). +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), + I2 = jit_aarch64_asm:add(Reg, Reg, Offset * ?WORD_SIZE), + I3 = jit_aarch64_asm:str(Reg, ?Y_REGS), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Set the continuation address to point to a specific label. The actual +%% address will be resolved during branch update. +%% @end +%% @param State current backend state +%% @param Label label to set as continuation target +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec set_continuation_to_label(state(), integer() | reference()) -> state(). +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State, + Label +) -> + Offset = StreamModule:offset(Stream0), + I1 = jit_aarch64_asm:adr(Temp, 0), + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]}. + +%%----------------------------------------------------------------------------- +%% @doc Set the continuation address to the current offset, creating a +%% reference for later resolution. Returns a reference that can be used +%% to add the label at the target location. +%% @end +%% @param State current backend state +%% @return Tuple of {Updated backend state, Reference for the continuation offset} +%%----------------------------------------------------------------------------- +-spec set_continuation_to_offset(state()) -> {state(), reference()}. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + I1 = jit_aarch64_asm:adr(Temp, 0), + Reloc = {OffsetRef, Offset, {adr, Temp}}, + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%%----------------------------------------------------------------------------- +%% @doc Implement a continuation entry point. On AArch64 this is a nop +%% as we don't need to save any register. +%% @end +%% @param State current backend state +%% @return Updated backend state (unchanged on AArch64) +%%----------------------------------------------------------------------------- +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Get the module index from the JIT state and load it into a native +%% register. +%% @end +%% @param State current backend state +%% @return Tuple of {Updated backend state, Native register containing module index} +%%----------------------------------------------------------------------------- +-spec get_module_index(state()) -> {state(), aarch64_register()}. +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?JITSTATE_MODULE), + I2 = jit_aarch64_asm:ldr_w(Reg, ?MODULE_INDEX(Reg)), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]}, + Reg + }. + +%% @private +-spec op_imm(state(), atom(), aarch64_register(), aarch64_register(), integer()) -> state(). +op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, Reg, Reg, Val) -> + Stream1 = + try + I = jit_aarch64_asm:Op(Reg, Reg, Val), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + [Temp | _] = State#state.available_regs, + I1 = jit_aarch64_asm:mov(Temp, Val), + I2 = jit_aarch64_asm:Op(Reg, Reg, Temp), + StreamModule:append(Stream0, <>) + end, + State#state{stream = Stream1}; +op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, RegB, Val) -> + Stream1 = + try + I = jit_aarch64_asm:Op(RegA, RegB, Val), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + MoveI = jit_aarch64_asm:mov(RegA, Val), + AndI = jit_aarch64_asm:Op(RegA, RegB, RegA), + StreamModule:append(Stream0, <>) + end, + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Perform bitwise AND of a register with an immediate value. +%% @end +%% @param State current backend state +%% @param Reg register to AND with value +%% @param Val immediate value to AND +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec and_(state(), aarch64_register(), integer()) -> state(). +and_(State, Reg, Val) -> + op_imm(State, and_, Reg, Reg, Val). + +%%----------------------------------------------------------------------------- +%% @doc Perform bitwise OR of a register with an immediate value. +%% @end +%% @param State current backend state +%% @param Reg register to OR with value +%% @param Val immediate value to OR +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec or_(state(), aarch64_register(), integer()) -> state(). +or_(State, Reg, Val) -> + op_imm(State, orr, Reg, Reg, Val). + +%%----------------------------------------------------------------------------- +%% @doc Add an immediate value to a register. +%% @end +%% @param State current backend state +%% @param Reg register to add to +%% @param Val immediate value to add +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add(state(), aarch64_register(), integer()) -> state(). +add(State, Reg, Val) -> + op_imm(State, add, Reg, Reg, Val). + +%%----------------------------------------------------------------------------- +%% @doc Subtract an immediate value from a register. +%% @end +%% @param State current backend state +%% @param Reg register to subtract from +%% @param Val immediate value to subtract +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec sub(state(), aarch64_register(), integer()) -> state(). +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + I1 = jit_aarch64_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Multiply a register by a constant value. Uses optimized instruction +%% sequences for common multipliers (powers of 2, small values). +%% @end +%% @param State current backend state +%% @param Reg register to multiply +%% @param Val constant multiplier (non-negative integer) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec mul(state(), aarch64_register(), non_neg_integer()) -> state(). +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 1), + I2 = jit_aarch64_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 2), + I2 = jit_aarch64_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), + I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), + I2 = jit_aarch64_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_aarch64_asm:lsl(Temp, Reg, 4), + I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + I1 = jit_aarch64_asm:mov(Temp, Val), + I2 = jit_aarch64_asm:mul(Reg, Reg, Temp), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Decrement the reduction count and schedule the next process if it +%% reaches zero. If reductions remain, execution continues; otherwise, the +%% continuation is set and the scheduler is invoked. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + % Decrement reduction count + I2 = jit_aarch64_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = jit_aarch64_asm:bcc(ne, 0), + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + I5 = jit_aarch64_asm:adr(Temp, 0), + I6 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_aarch64_asm:bcc(ne, NewOffset - BNEOffset), + NewI5 = jit_aarch64_asm:adr(Temp, NewOffset - ADROffset), + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a label with automatic scheduling. Decrements reductions +%% and calls the label if reductions remain, otherwise schedules the next +%% process. Sets the continuation pointer before the call. +%% @end +%% @param State current backend state +%% @param Label label to call +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, RewriteSize} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, RewriteSize). + +%%----------------------------------------------------------------------------- +%% @doc Emit a tail call to a label with automatic scheduling. Decrements +%% reductions and jumps to the label if reductions remain, otherwise schedules +%% the next process. Does not set a new continuation pointer (tail call). +%% @end +%% @param State current backend state +%% @param Label label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_only_or_schedule_next(state(), non_neg_integer()) -> state(). +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = Branches, + available_regs = [Temp | _] + } = State0, + Label +) -> + % Load reduction count + I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + % Decrement reduction count + I2 = jit_aarch64_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch to label if reduction count is not zero + I4 = jit_aarch64_asm:bcc(ne, 0), + Reloc1 = {Label, BNEOffset, {bcc, ne}}, + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}, + State2 = set_continuation_to_label(State1, Label), + call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a primitive with continuation pointer setup. This is +%% used for primitives that may not return directly (e.g., those that can +%% trap or reschedule). Sets CP before calling the primitive. +%% @end +%% @param State current backend state +%% @param Primitive index of the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive_with_cp(state(), non_neg_integer(), [arg()]) -> state(). +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, RewriteSize} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, RewriteSize). + +%% @private +-spec set_cp(state()) -> {state(), non_neg_integer(), 4 | 8}. +set_cp(State0) -> + % get module index (dynamically) + {#state{stream_module = StreamModule, stream = Stream0} = State1, Reg} = get_module_index( + State0 + ), + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_aarch64_asm:lsl(Reg, Reg, 24), + if + Offset >= 16250 -> + I2 = jit_aarch64_asm:nop(), + I3 = jit_aarch64_asm:nop(), + RewriteSize = 8; + true -> + I2 = jit_aarch64_asm:nop(), + I3 = <<>>, + RewriteSize = 4 + end, + MOVOffset = Offset + byte_size(I1), + I4 = jit_aarch64_asm:orr(Reg, Reg, ?IP0_REG), + I5 = jit_aarch64_asm:str(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + {State3, MOVOffset, RewriteSize}. + +%% @private +-spec rewrite_cp_offset(state(), non_neg_integer(), 4 | 8) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + _RewriteSize +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + NewMoveInstr = jit_aarch64_asm:mov(?IP0_REG, NewOffset bsl 2), + ?ASSERT(byte_size(NewMoveInstr) =< _RewriteSize), + Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Set the binary state (BS) register to point to a term and reset the +%% BS offset to zero. Used for binary matching operations. +%% @end +%% @param State current backend state +%% @param TermReg register containing the term to set as binary state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec set_bs(state(), aarch64_register()) -> state(). +set_bs(#state{stream_module = StreamModule, stream = Stream0} = State0, TermReg) -> + I1 = jit_aarch64_asm:str(TermReg, ?BS), + I2 = jit_aarch64_asm:str(xzr, ?BS_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +-spec return_labels_and_lines(state(), [{non_neg_integer(), non_neg_integer()}]) -> state(). +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + I1 = jit_aarch64_asm:adr(r0, 8), + I2 = jit_aarch64_asm:ret(), + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% @private +-spec free_reg([aarch64_register()], [aarch64_register()], aarch64_register()) -> + {[aarch64_register()], [aarch64_register()]}. +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, UsedRegs1}. + +%% @private +-spec free_reg0([aarch64_register()], [aarch64_register()], aarch64_register(), [aarch64_register()]) -> + [aarch64_register()]. +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +%% @private +-spec args_regs([arg()]) -> [aarch64_register() | imm]. +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> ?JITSTATE_REG; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -> + Offset = StreamModule:offset(Stream), + add_label(State, Label, Offset). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl new file mode 100644 index 0000000000..66e46b3255 --- /dev/null +++ b/libs/jit/src/jit_aarch64_asm.erl @@ -0,0 +1,982 @@ +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + +-module(jit_aarch64_asm). + +-export([ + add/3, + add/4, + sub/3, + sub/4, + mul/3, + madd/4, + b/1, + bcc/2, + blr/1, + br/1, + brk/1, + cbnz/2, + cbnz_w/2, + tbz/3, + tbnz/3, + cmp/2, + cmp_w/2, + and_/3, + ldr/2, + ldr_w/2, + ldr/3, + lsl/3, + lsr/3, + mov/2, + movk/3, + movz/3, + orr/3, + ret/0, + nop/0, + str/2, + str_w/2, + str/3, + tst/2, + tst_w/2, + stp/4, + ldp/4, + subs/3, + adr/2 +]). + +-export_type([ + cc/0 +]). + +-type aarch64_gpr_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15 + | r16 + | r17 + | r18 + | r19 + | r20 + | r21 + | r22 + | r23 + | r24 + | r25 + | r26 + | r27 + | r28 + | r29 + | r30 + | sp + | xzr. + +-type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al | nv. + +%% Emit an ADD instruction (AArch64 encoding) +%% ADD Rd, Rn, #imm - adds immediate value to register +-spec add(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary(). +add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 ADD (immediate) encoding: 1001000100iiiiiiiiiiiinnnnndddddd + %% 0x91000000 | Imm << 10 | Rn << 5 | Rd + <<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}); +add(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + add(Rd, Rn, Rm, {lsl, 0}). + +%% ADD (shifted register) +%% ADD Rd, Rn, Rm, {lsl, #amount} +-spec add(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), {lsl, 0..63}) -> + binary(). +add(Rd, Rn, Rm, {lsl, Amount}) when + is_atom(Rd), is_atom(Rn), is_atom(Rm), is_integer(Amount), Amount >= 0, Amount =< 63 +-> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 ADD (shifted register) encoding: 10001011000mmmmmiiiiiinnnnndddddd + %% 0x8B000000 | Rm << 16 | Amount << 10 | Rn << 5 | Rd + << + (16#8B000000 bor (RmNum bsl 16) bor ((Amount band 16#3F) bsl 10) bor (RnNum bsl 5) bor + RdNum):32/little + >>. + +%% Emit an unconditional branch (B) to a 32-bit relative offset (AArch64 encoding) +%% offset is in bytes, relative to the next instruction +-spec b(integer()) -> binary(). +b(Offset) when is_integer(Offset) -> + %% AArch64 B encoding: 0b000101 | imm26 | 00000 + %% imm26 is (Offset / 4) signed, fits in 26 bits + Offset26 = Offset div 4, + <<(16#14000000 bor (Offset26 band 16#3FFFFFF)):32/little>>. + +%% Emit a breakpoint (BRK) instruction with immediate (AArch64 encoding) +%% imm is a 16-bit immediate value (usually 0 for debuggers) +-spec brk(integer()) -> binary(). +brk(Imm) when is_integer(Imm), Imm >= 0, Imm =< 16#FFFF -> + %% AArch64 BRK encoding: 11010100 00100000 00000000 iiiiiiii iiiiiiii + %% 0xd4200000 | Imm << 5 + <<(16#D4200000 bor ((Imm band 16#FFFF) bsl 5)):32/little>>. + +%% Emit a branch with link register (BLR) instruction (AArch64 encoding) +%% Register is the register atom (r0-r15) +-spec blr(aarch64_gpr_register()) -> binary(). +blr(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% AArch64 BLR encoding: 1101011000111111000000rrrrr00000 + %% 0xd63f0000 | (Reg << 5) + <<(16#D63F0000 bor (RegNum bsl 5)):32/little>>. + +%% Emit a branch register (BR) instruction (AArch64 encoding) +%% Register is the register atom (r0-r15) +-spec br(aarch64_gpr_register()) -> binary(). +br(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% AArch64 BR encoding: 1101011000011111000000rrrrr00000 + %% 0xd61f0000 | (Reg << 5) + <<(16#D61F0000 bor (RegNum bsl 5)):32/little>>. + +%% Emit a load register (LDR) instruction for 64-bit load from memory (AArch64 encoding) +%% Dst is destination register atom, Src is {BaseReg, Offset} tuple +-spec ldr(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +ldr(Dst, {BaseReg, Offset}) when + is_atom(Dst), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 32760, + (Offset rem 8) =:= 0 +-> + DstNum = reg_to_num(Dst), + BaseRegNum = reg_to_num(BaseReg), + %% AArch64 LDR (immediate) encoding for 64-bit: 11111001010iiiiiiiiiiibbbbbttttt + %% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst + << + (16#F9400000 bor ((Offset div 8) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little + >>; +ldr(Xt, {Xn, Xm}) when + is_atom(Xt), + is_atom(Xn), + is_atom(Xm) +-> + ldr(Xt, {Xn, Xm, lsl, 0}); +ldr(Xt, {Xn, Xm, lsl, Amount}) when + is_atom(Xt), + is_atom(Xn), + is_atom(Xm), + Amount =:= 0 orelse Amount =:= 3 +-> + XtNum = reg_to_num(Xt), + XnNum = reg_to_num(Xn), + XmNum = reg_to_num(Xm), + S = Amount div 3, + << + (16#F8606800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little + >>. + +%% Emit a load register (LDR) instruction for 32-bit load from memory (AArch64 encoding) +%% Dst is destination register atom, Src is {BaseReg, Offset} tuple +-spec ldr_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +ldr_w(Dst, {BaseReg, Offset}) when + is_atom(Dst), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 16380, + (Offset rem 4) =:= 0 +-> + DstNum = reg_to_num(Dst), + BaseRegNum = reg_to_num(BaseReg), + << + (16#B9400000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little + >>. + +%% Emit a move immediate (MOV) instruction for various immediate sizes (AArch64 encoding) +%% Dst is destination register atom, Imm is immediate value +%% Returns a binary that may contain multiple instructions for complex immediates +-spec mov(aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> binary(). +mov(Dst, Imm) when is_atom(Dst), is_integer(Imm) -> + mov_immediate(Dst, Imm); +mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> + orr(Rd, xzr, Rm). + +%% Helper function to encode immediate values using optimal instruction sequence +-spec mov_immediate(aarch64_gpr_register(), integer()) -> binary(). +mov_immediate(Dst, Imm) when Imm >= 0, Imm =< 16#FFFF -> + %% Simple 16-bit positive immediate + movz(Dst, Imm, 0); +mov_immediate(Dst, Imm) when Imm < 0, (-Imm - 1) =< 16#FFFF -> + %% Simple 16-bit negative immediate using MOVN + %% MOVN encodes ~immediate, so we can use it when ~Imm fits in 16 bits + DstNum = reg_to_num(Dst), + <<(16#92800000 bor (((-Imm - 1) band 16#FFFF) bsl 5) bor DstNum):32/little>>; +mov_immediate(Dst, Imm) when Imm >= 0 -> + %% Complex positive immediate - build with MOVZ + MOVK sequence + build_positive_immediate(Dst, <>); +mov_immediate(Dst, Imm) when Imm < 0 -> + %% Complex negative immediate - try MOVN approach first + build_negative_immediate(Dst, <>). + +%% Build positive immediate using MOVZ + MOVK sequence +-spec build_positive_immediate(aarch64_gpr_register(), binary()) -> binary(). +build_positive_immediate(Dst, <> = ImmB) -> + %% First try simple MOVZ/MOVK sequence for values with few non-zero chunks + Chunks = [ + Imm1, + Imm2, + Imm3, + Imm4 + ], + NonZeroChunks = length([C || C <- Chunks, C =/= 0]), + + if + NonZeroChunks =< 2 -> + %% Use simple MOVZ/MOVK sequence for 1-2 chunks + build_immediate_sequence(Dst, Chunks); + true -> + %% For complex values, try bitmask immediate first + case encode_bitmask_immediate(ImmB) of + {ok, N, Immr, Imms} -> + %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm) + orr_immediate(Dst, N, Immr, Imms); + error -> + %% Fallback to multi-instruction sequence + build_immediate_sequence(Dst, Chunks) + end + end. + +%% Build negative immediate using MOVN or fallback to positive approach +-spec build_negative_immediate(aarch64_gpr_register(), binary()) -> binary(). +build_negative_immediate(Dst, ImmB) -> + %% First try to encode as bitmask immediate with ORR + case encode_bitmask_immediate(ImmB) of + {ok, N, Immr, Imms} -> + %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm) + orr_immediate(Dst, N, Immr, Imms); + error -> + %% Fallback to multi-instruction sequence + build_positive_immediate(Dst, ImmB) + end. + +%% Build instruction sequence from chunks +-spec build_immediate_sequence(aarch64_gpr_register(), [integer()]) -> binary(). +build_immediate_sequence(Dst, [C0, C1, C2, C3]) -> + %% Find the first non-zero chunk to start with MOVZ + {Index, Value} = find_first_nonzero_chunk([C0, C1, C2, C3]), + First = movz(Dst, Value, Index * 16), + Rest = build_movk_sequence(Dst, [C0, C1, C2, C3], Index), + <>. + +%% Find the first non-zero chunk +-spec find_first_nonzero_chunk([integer()]) -> {integer(), integer()} | none. +find_first_nonzero_chunk(Chunks) -> + find_first_nonzero_chunk(Chunks, 0). + +find_first_nonzero_chunk([0 | Rest], Index) -> find_first_nonzero_chunk(Rest, Index + 1); +find_first_nonzero_chunk([Chunk | _], Index) -> {Index, Chunk}. + +%% Build MOVK sequence for remaining non-zero chunks +-spec build_movk_sequence(aarch64_gpr_register(), [integer()], integer()) -> binary(). +build_movk_sequence(Dst, Chunks, SkipIndex) -> + build_movk_sequence(Dst, Chunks, SkipIndex, 0, <<>>). + +build_movk_sequence(_, [], _, _, Acc) -> + Acc; +build_movk_sequence(Dst, [Chunk | Rest], SkipIndex, CurrentIndex, Acc) -> + NewAcc = + if + CurrentIndex =:= SkipIndex orelse Chunk =:= 0 -> + Acc; + true -> + MovkInstr = movk(Dst, Chunk, CurrentIndex * 16), + <> + end, + build_movk_sequence(Dst, Rest, SkipIndex, CurrentIndex + 1, NewAcc). + +%% Emit a MOVZ instruction (move with zero) +-spec movz(aarch64_gpr_register(), integer(), integer()) -> binary(). +movz(Dst, Imm, Shift) when + is_atom(Dst), + is_integer(Imm), + Imm >= 0, + Imm =< 16#FFFF, + Shift rem 16 =:= 0, + Shift >= 0, + Shift =< 48 +-> + DstNum = reg_to_num(Dst), + Hw = Shift div 16, + %% AArch64 MOVZ encoding: 1101001000hwiiiiiiiiiiiiiiiiibbbbb + <<(16#D2800000 bor (Hw bsl 21) bor ((Imm band 16#FFFF) bsl 5) bor DstNum):32/little>>. + +%% Emit a MOVK instruction (move with keep) +-spec movk(aarch64_gpr_register(), integer(), integer()) -> binary(). +movk(Dst, Imm, Shift) when + is_atom(Dst), + is_integer(Imm), + Imm >= 0, + Imm =< 16#FFFF, + Shift rem 16 =:= 0, + Shift >= 0, + Shift =< 48 +-> + DstNum = reg_to_num(Dst), + Hw = Shift div 16, + %% AArch64 MOVK encoding: 1111001000hwiiiiiiiiiiiiiiiiibbbbb + <<(16#F2800000 bor (Hw bsl 21) bor ((Imm band 16#FFFF) bsl 5) bor DstNum):32/little>>. + +%% Emit an ORR immediate instruction (used for MOV with bitmask immediates) +-spec orr_immediate(aarch64_gpr_register(), integer(), integer(), integer()) -> binary(). +orr_immediate(Dst, N, Immr, Imms) when + is_atom(Dst), + N >= 0, + N =< 1, + Immr >= 0, + Immr =< 63, + Imms >= 0, + Imms =< 63 +-> + DstNum = reg_to_num(Dst), + %% AArch64 ORR (immediate) encoding: sf 01 100100 N immr imms Rn Rd + %% For MOV Rd, #imm: ORR Rd, XZR, #imm (Rn = 31) + + % 64-bit operation + Sf = 1, + << + ((Sf bsl 31) bor (16#B2000000) bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor + (31 bsl 5) bor DstNum):32/little + >>. + +%% Encode a value as AArch64 bitmask immediate +%% Returns {ok, N, Immr, Imms} if encodable, error otherwise +-spec encode_bitmask_immediate(binary()) -> {ok, 0..1, integer(), integer()} | error. +encode_bitmask_immediate(Value) when byte_size(Value) =:= 8 -> + %% Try different pattern sizes (64, 32, 16, 8, 4, 2) + PatternSizes = [64, 32, 16, 8, 4, 2], + try_pattern_sizes(Value, PatternSizes). + +%% Encode a value as AArch64 bitmask immediate for 32 bits values +%% Returns {ok, Immr, Imms} if encodable, error otherwise +-spec encode_bitmask_immediate_w(binary()) -> {ok, integer(), integer()} | error. +encode_bitmask_immediate_w(Value) when byte_size(Value) =:= 4 -> + %% Try different pattern sizes (32, 16, 8, 4, 2) + PatternSizes = [32, 16, 8, 4, 2], + case try_pattern_sizes(Value, PatternSizes) of + {ok, 0, Immr, Imms} -> {ok, Immr, Imms}; + error -> error + end. + +%% Try encoding with different pattern sizes +-spec try_pattern_sizes(binary(), [integer()]) -> {ok, integer(), integer(), integer()} | error. +try_pattern_sizes(_, []) -> + error; +try_pattern_sizes(Value, [Size | Rest]) -> + case try_encode_pattern_size(Value, Size) of + {ok, N, Immr, Imms} -> {ok, N, Immr, Imms}; + error -> try_pattern_sizes(Value, Rest) + end. + +%% Try to encode value with a specific pattern size +-spec try_encode_pattern_size(binary(), integer()) -> + {ok, integer(), integer(), integer()} | error. +try_encode_pattern_size(Value, Size) -> + <> = Value, + if + Value =:= <> -> + try_encode_single_pattern(Pattern, Size); + true -> + error + end. + +%% Try to encode a single pattern as bitmask immediate +-spec try_encode_single_pattern(integer(), integer()) -> + {ok, integer(), integer(), integer()} | error. +try_encode_single_pattern(Pattern, Size) -> + %% Find runs of consecutive 1s and 0s + case find_single_run_of_ones(Pattern, Size) of + {ok, OnesCount, StartPos} -> + %% Calculate N, Immr, Imms + N = + case Size of + 64 -> 1; + 32 -> 0; + 16 -> 0; + 8 -> 0; + 4 -> 0; + 2 -> 0 + end, + + %% For N=0 patterns, we need to encode the size in imms + Imms = + case Size of + 64 -> OnesCount - 1; + 32 -> OnesCount - 1; + 16 -> 2#100000 bor (OnesCount - 1); + 8 -> 2#110000 bor (OnesCount - 1); + 4 -> 2#111000 bor (OnesCount - 1); + 2 -> 2#111100 bor (OnesCount - 1) + end, + %% immr is the rotation amount (negate of start position) + Immr = (-StartPos) band (Size - 1), + + {ok, N, Immr, Imms}; + error -> + error + end. + +%% Find a single run of consecutive 1s in the pattern +-spec find_single_run_of_ones(integer(), integer()) -> {ok, integer(), integer()} | error. +find_single_run_of_ones(Pattern, Size) -> + %% Convert to binary string for easier analysis + Bits = [(Pattern bsr I) band 1 || I <- lists:seq(0, Size - 1)], + find_ones_run(Bits, 0, 0, 0, none). + +find_ones_run([], _, OnesCount, StartPos, in_ones) when OnesCount > 0 -> + %% Reached end while in ones run + {ok, OnesCount, StartPos}; +find_ones_run([1 | Rest], Pos, 0, _, none) -> + %% Start of ones run + find_ones_run(Rest, Pos + 1, 1, Pos, in_ones); +find_ones_run([1 | Rest], Pos, OnesCount, StartPos, in_ones) -> + %% Continue ones run + find_ones_run(Rest, Pos + 1, OnesCount + 1, StartPos, in_ones); +find_ones_run([0 | Rest], _Pos, OnesCount, StartPos, in_ones) -> + %% End of ones run - make sure rest are zeros (single run only) + case lists:all(fun(X) -> X =:= 0 end, Rest) of + true -> {ok, OnesCount, StartPos}; + %% Multiple runs not supported in simple encoding + false -> error + end; +find_ones_run([0 | Rest], Pos, OnesCount, StartPos, none) -> + %% Still looking for start of ones run + find_ones_run(Rest, Pos + 1, OnesCount, StartPos, none). + +%% Emit an ORR instruction (AArch64 encoding) +%% ORR Rd, Rn, Rm - performs bitwise OR of Rn and Rm, storing result in Rd +%% Special cases: ORR Rd, XZR, Rm is equivalent to MOV Rd, Rm +-spec orr(aarch64_gpr_register(), aarch64_gpr_register() | xzr, aarch64_gpr_register()) -> binary(). +orr(DstReg, xzr, SrcReg) when is_atom(DstReg), is_atom(SrcReg) -> + %% ORR Rd, XZR, Rm - equivalent to MOV Rd, Rm + SrcNum = reg_to_num(SrcReg), + DstNum = reg_to_num(DstReg), + %% AArch64 ORR (shifted register) encoding: Rd = Rm (with XZR as Rn) + %% 10101010000mmmmm000000nnnnndddddd (64-bit) + %% 0xaa000000 | Rm << 16 | Rn << 5 | Rd (where Rn = 31 for XZR) + <<(16#AA0003E0 bor (SrcNum bsl 16) bor DstNum):32/little>>; +orr(DstReg, Rn, Rm) when is_atom(DstReg), is_atom(Rn), is_atom(Rm) -> + %% General ORR Rd, Rn, Rm + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + DstNum = reg_to_num(DstReg), + %% AArch64 ORR (shifted register) encoding: + %% 10101010000mmmmm000000nnnnndddddd (64-bit) + << + (16#AA000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor DstNum):32/little + >>; +orr(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + case encode_bitmask_immediate(<>) of + {ok, N, Immr, Imms} -> + % OR immediate encoding: sf=1(64b) 01(op) 100100 N immr imms Rn Rd + Opcode = 16#B2000000, + Instr = + Opcode bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5) bor + RdNum, + <>; + error -> + error({unencodable_immediate, Imm}) + end. + +%% Emit a store register (STR) instruction for 64-bit store to memory +-spec str(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +str(SrcReg, {BaseReg, Offset}) when + is_atom(SrcReg), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 32760, + (Offset rem 8) =:= 0 +-> + SrcNum = reg_to_num(SrcReg), + BaseNum = reg_to_num(BaseReg), + %% AArch64 STR (immediate) encoding for 64-bit: 11111001000iiiiiiiiiiibbbbbttttt + %% 0xf9000000 | (Offset div 8) << 10 | BaseReg << 5 | SrcReg + << + (16#F9000000 bor ((Offset div 8) bsl 10) bor (BaseNum bsl 5) bor SrcNum):32/little + >>; +str(Xt, {Xn, Xm, lsl, Amount}) when + is_atom(Xt), + is_atom(Xn), + is_atom(Xm), + Amount =:= 0 orelse Amount =:= 3 +-> + XtNum = reg_to_num(Xt), + XnNum = reg_to_num(Xn), + XmNum = reg_to_num(Xm), + S = Amount div 3, + << + (16#F8206800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little + >>. + +%% Emit a store register (STR) instruction for 64-bit store to memory, with store-update (writeback) +-spec str + (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary(); + (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary(). +str(Reg, {Base, Imm}, '!') when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8000C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>; +str(Reg, {Base}, Imm) when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8000400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>. + +%% Emit a store register (STR) instruction for 32-bit store to memory +-spec str_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary(). +str_w(Src, {BaseReg, Offset}) when + is_atom(Src), + is_atom(BaseReg), + is_integer(Offset), + Offset >= 0, + Offset =< 16380, + (Offset rem 4) =:= 0 +-> + SrcNum = reg_to_num(Src), + BaseRegNum = reg_to_num(BaseReg), + << + (16#B9000000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor SrcNum):32/little + >>. + +%% Emit a load register (LDR) instruction for 64-bit store to memory, with store-update (writeback) +-spec ldr + (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary(); + (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary(). +ldr(Reg, {Base, Imm}, '!') when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8400C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>; +ldr(Reg, {Base}, Imm) when + is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0 +-> + RegNum = reg_to_num(Reg), + BaseNum = reg_to_num(Base), + <<(16#F8400400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>. + +%% Emit a store pair (STP) instruction for 64-bit registers +%% stp(Rn, Rm, {Base}, Imm) -> binary() +%% stp(Rn, Rm, {Base, Imm}, '!') -> binary() (store-update) +-spec stp( + aarch64_gpr_register(), + aarch64_gpr_register(), + {aarch64_gpr_register()} | {aarch64_gpr_register(), integer()}, + integer() | '!' +) -> binary(). +stp(Rn, Rm, {Base}, Imm) when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + %% STP encoding: 1010100010|imm7|base|rm|rn + %% 0xa9bf0000 | ((Imm div 8) band 0x7f) << 15 | Base << 5 | Rm << 10 | Rn + << + (16#A8800000 bor ((Imm div 8) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor RnNum):32/little + >>; +stp(Rn, Rm, {Base, Imm}, '!') when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + << + (16#A9800000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor + RnNum):32/little + >>. + +%% Emit a load pair (LDP) instruction for 64-bit registers +%% ldp(Rn, Rm, {Base}, Imm) -> binary() +-spec ldp(aarch64_gpr_register(), aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> + binary(). +ldp(Rn, Rm, {Base}, Imm) when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + %% LDP encoding: 1010100011|imm7|base|rm|rn + << + (16#A8C00000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor + RnNum):32/little + >>. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers for assembly generation +%% for r0 to r30 +reg_to_num(r0) -> 0; +reg_to_num(r1) -> 1; +reg_to_num(r2) -> 2; +reg_to_num(r3) -> 3; +reg_to_num(r4) -> 4; +reg_to_num(r5) -> 5; +reg_to_num(r6) -> 6; +reg_to_num(r7) -> 7; +reg_to_num(r8) -> 8; +reg_to_num(r9) -> 9; +reg_to_num(r10) -> 10; +reg_to_num(r11) -> 11; +reg_to_num(r12) -> 12; +reg_to_num(r13) -> 13; +reg_to_num(r14) -> 14; +reg_to_num(r15) -> 15; +reg_to_num(r16) -> 16; +reg_to_num(r17) -> 17; +reg_to_num(r18) -> 18; +reg_to_num(r19) -> 19; +reg_to_num(r20) -> 20; +reg_to_num(r21) -> 21; +reg_to_num(r22) -> 22; +reg_to_num(r23) -> 23; +reg_to_num(r24) -> 24; +reg_to_num(r25) -> 25; +reg_to_num(r26) -> 26; +reg_to_num(r27) -> 27; +reg_to_num(r28) -> 28; +reg_to_num(r29) -> 29; +reg_to_num(r30) -> 30; +%% Stack pointer (SP) is r31 +reg_to_num(sp) -> 31; +%% Zero register (XZR) is also r31 +reg_to_num(xzr) -> 31. + +%% Emit a conditional branch instruction +-spec bcc(cc(), integer()) -> binary(). +bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> + CondNum = + case Cond of + % Equal (Z set) + eq -> 0; + % Not equal (Z clear) + ne -> 1; + % Carry set + cs -> 2; + % Carry clear + cc -> 3; + % Minus (N set) + mi -> 4; + % Plus (N clear) + pl -> 5; + % Overflow set + vs -> 6; + % Overflow clear + vc -> 7; + % Higher (unsigned) + hi -> 8; + % Lower or same (unsigned) + ls -> 9; + % Greater than or equal (signed) + ge -> 10; + % Less than (signed) + lt -> 11; + % Greater than (signed) + gt -> 12; + % Less than or equal (signed) + le -> 13; + % Always + al -> 14; + % Never + nv -> 15 + end, + Offset19 = Offset div 4, + <<(16#54000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor CondNum):32/little>>. + +%% Emit a compare and branch on non-zero +-spec cbnz(aarch64_gpr_register(), integer()) -> binary(). +cbnz(Rt, Offset) when is_integer(Offset) -> + RtNum = reg_to_num(Rt), + Offset19 = Offset div 4, + <<(16#B5000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor RtNum):32/little>>. + +-spec cbnz_w(aarch64_gpr_register(), integer()) -> binary(). +cbnz_w(Rt, Offset) when is_integer(Offset) -> + RtNum = reg_to_num(Rt), + Offset19 = Offset div 4, + <<(16#35000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor RtNum):32/little>>. + +%% Emit a test bit and branch if zero +-spec tbz(aarch64_gpr_register(), 0..63, integer()) -> binary(). +tbz(Rt, Bit, Offset) when Offset >= -32768 andalso Offset < 32768 -> + RtNum = reg_to_num(Rt), + Offset14 = Offset div 4, + << + ((Bit band 32 bsl 26) bor 16#36000000 bor (Bit band 31 bsl 19) bor + ((Offset14 band 16#3FFF) bsl 5) bor RtNum):32/little + >>. + +%% Emit a test bit and branch if not zero +-spec tbnz(aarch64_gpr_register(), 0..63, integer()) -> binary(). +tbnz(Rt, Bit, Offset) when Offset >= -32768 andalso Offset < 32768 -> + RtNum = reg_to_num(Rt), + Offset14 = Offset div 4, + << + ((Bit band 32 bsl 26) bor 16#37000000 bor (Bit band 31 bsl 19) bor + ((Offset14 band 16#3FFF) bsl 5) bor RtNum):32/little + >>. + +%% Emit a compare instruction +-spec cmp(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +cmp(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 CMP (shifted register) encoding: CMP Rn, Rm + %% This is SUBS XZR, Rn, Rm: 11101011000mmmmm000000nnnnn11111 + <<(16#EB00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; +cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RnNum = reg_to_num(Rn), + %% AArch64 CMP (immediate) encoding: CMP Rn, #imm + %% This is SUBS XZR, Rn, #imm: 1111000100iiiiiiiiiiiinnnnn11111 + <<(16#F100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>; +cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + %% For large immediates, load into a temporary register and compare + %% Use r16 as temporary register (caller-saved) + TempReg = r16, + LoadInstr = mov_immediate(TempReg, Imm), + CmpInstr = cmp(Rn, TempReg), + <>. + +%% Emit a 32-bit compare instruction +-spec cmp_w(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +cmp_w(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RnNum = reg_to_num(Rn), + %% AArch64 CMP (32-bit immediate) encoding: CMP Wn, #imm + %% This is SUBS WZR, Wn, #imm: 0111000100iiiiiiiiiiiinnnnn11111 + <<(16#7100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>; +cmp_w(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm < 0, Imm >= -4095 -> + RnNum = reg_to_num(Rn), + %% For negative immediates, use ADD form: CMP Wn, #(-imm) becomes ADDS WZR, Wn, #(-imm) + %% AArch64 ADDS (32-bit immediate) encoding: 0011000100iiiiiiiiiiiinnnnn11111 + PosImm = -Imm, + <<(16#3100001F bor ((PosImm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>. + +%% Emit an AND instruction (bitwise AND) +-spec and_(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> + binary(). +and_(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 AND (shifted register) encoding: AND Rd, Rn, Rm + %% 10001010000mmmmm000000nnnnndddddd (64-bit) + << + (16#8A000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little + >>; +and_(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + case encode_bitmask_immediate(<>) of + {ok, N, Immr, Imms} -> + % AND immediate encoding: sf=1(64b) 00(op) 100100 N immr imms Rn Rd + Opcode = 16#92000000, + Instr = + Opcode bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5) bor + RdNum, + <>; + error -> + error({unencodable_immediate, Imm}) + end. + +%% Emit a logical shift left instruction +-spec lsl(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary(). +lsl(Rd, Rn, Shift) when is_atom(Rd), is_atom(Rn), is_integer(Shift), Shift >= 0, Shift =< 63 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 LSL (immediate) encoding: LSL Rd, Rn, #shift + %% This is UBFM Rd, Rn, #(-shift MOD 64), #(63-shift): 1101001101ssssssrrrrrrnnnnndddddd + NegShift = (-Shift) band 63, + Width = 63 - Shift, + << + (16#D3400000 bor ((NegShift band 16#3F) bsl 16) bor ((Width band 16#3F) bsl 10) bor + (RnNum bsl 5) bor RdNum):32/little + >>. + +%% Emit a logical shift right instruction +-spec lsr(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary(). +lsr(Rd, Rn, Shift) when is_atom(Rd), is_atom(Rn), is_integer(Shift), Shift >= 0, Shift =< 63 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 LSR (immediate) encoding: LSR Rd, Rn, #shift + %% This is UBFM Rd, Rn, #shift, #63: 1101001101ssssss111111nnnnndddddd + << + (16#D340FC00 bor ((Shift band 16#3F) bsl 16) bor (RnNum bsl 5) bor RdNum):32/little + >>. + +%% Emit a return instruction +-spec ret() -> binary(). +ret() -> + %% AArch64 RET encoding: RET (defaults to X30/LR) + %% 11010110010111110000001111000000 + <<16#D65F03C0:32/little>>. + +%% Emit a NOP instruction +-spec nop() -> binary(). +nop() -> + %% 11010101000000110010000000011111 + <<16#d503201f:32/little>>. + +%% Emit a test instruction (bitwise AND, discarding result) +-spec tst(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 TST (shifted register) encoding: TST Rn, Rm + %% This is ANDS XZR, Rn, Rm: 11101010000mmmmm000000nnnnn11111 + <<(16#EA00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; +tst(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + RnNum = reg_to_num(Rn), + case encode_bitmask_immediate(<>) of + {ok, N, Immr, Imms} -> + << + (16#F200001F bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little + >>; + _ -> + error({unencodable_immediate, Imm}) + end. + +%% Emit a 32-bit test instruction (bitwise AND, discarding result) +-spec tst_w(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary(). +tst_w(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 TST (32-bit shifted register) encoding: TST Wn, Wm + %% This is ANDS WZR, Wn, Wm: 01101010000mmmmm000000nnnnn11111 + <<(16#6A00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>; +tst_w(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + RnNum = reg_to_num(Rn), + case encode_bitmask_immediate_w(<>) of + {ok, Immr, Imms} -> + <<(16#7200001F bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little>>; + _ -> + error({unencodable_immediate, Imm}) + end. + +%% Emit a subtract and set flags (SUBS) instruction (AArch64 encoding) +%% SUBS Rd, Rn, Rm/imm - subtracts and sets condition flags +-spec subs(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> + binary(). +subs(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% AArch64 SUBS (immediate): 1111000101iiiiiiiiiiiinnnnndddddd + <<(16#F1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% AArch64 SUBS (register): 11101011000mmmmm000000nnnnndddddd + <<(16#EB000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little>>. + +-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> + binary(). +sub(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + <<(16#D1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>; +sub(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + sub(Rd, Rn, Rm, {lsl, 0}). + +-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), {lsl, 0..63}) -> + binary(). +sub(Rd, Rn, Rm, {lsl, Amount}) when + is_atom(Rd), is_atom(Rn), is_atom(Rm), is_integer(Amount), Amount >= 0, Amount =< 63 +-> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + << + (16#CB000000 bor (RmNum bsl 16) bor ((Amount band 16#3F) bsl 10) bor (RnNum bsl 5) bor + RdNum):32/little + >>. + +%% Emit an ADR (PC-relative address) instruction (AArch64 encoding) +%% Dst is destination register atom, Offset is signed immediate (in bytes, -1MB..+1MB) +-spec adr(aarch64_gpr_register(), integer()) -> binary(). +adr(Dst, Imm) when is_atom(Dst), is_integer(Imm), Imm >= -1048576, Imm =< 1048572 -> + DstNum = reg_to_num(Dst), + ImmLo = Imm band 3, + ImmHi = Imm bsr 2, + Word = (16#10000000) bor (ImmLo bsl 29) bor ((ImmHi band 16#7FFFF) bsl 5) bor DstNum, + <>. + +-spec mul(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register()) -> binary(). +mul(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + madd(Rd, Rn, Rm, xzr). + +-spec madd( + aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() +) -> binary(). +madd(Rd, Rn, Rm, Ra) when is_atom(Rd), is_atom(Rn), is_atom(Rm), is_atom(Ra) -> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + RaNum = reg_to_num(Ra), + << + (16#9B000000 bor (RmNum bsl 16) bor (RaNum bsl 10) bor (RnNum bsl 5) bor + RdNum):32/little + >>. diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index f358138e60..5885c113e8 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -65,8 +65,16 @@ compile(Target, Dir, Path) -> Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, + + Arch = + case Target of + "x86_64" -> ?JIT_ARCH_X86_64; + "aarch64" -> ?JIT_ARCH_AARCH64; + _ -> error({unsupported_target, Target}) + end, + Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC) ), Backend = list_to_atom("jit_" ++ Target), Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 6ce8523320..04aff1f840 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -207,3 +207,4 @@ X(EMU_FLAVOR_ATOM, "\xA", "emu_flavor") X(CODE_SERVER_ATOM, "\xB", "code_server") X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") +X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 8ce68b333c..6cd22607bf 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -72,6 +72,19 @@ _Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_x86_64.erl"); _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_x86_64.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_x86_64.erl"); +#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 +_Static_assert(offsetof(Context, e) == 0x28, "ctx->e is 0x28 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, x) == 0x30, "ctx->x is 0x30 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, cp) == 0xB8, "ctx->cp is 0xB8 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, fr) == 0xC0, "ctx->fr is 0xC0 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, bs) == 0xC8, "ctx->bs is 0xC8 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in jit/src/jit_aarch64.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_aarch64.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_aarch64.erl"); +#else +#error Unknown jit target #endif #define PROCESS_MAYBE_TRAP_RETURN_VALUE(return_value, offset) \ diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index 972a1a1180..e158cf87cd 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -172,14 +172,27 @@ enum TrapAndLoadResult #define JIT_FORMAT_VERSION 1 #define JIT_ARCH_X86_64 1 +#define JIT_ARCH_AARCH64 2 #define JIT_VARIANT_PIC 1 +#ifndef AVM_NO_JIT + #ifdef __x86_64__ #define JIT_ARCH_TARGET JIT_ARCH_X86_64 #define JIT_JUMPTABLE_ENTRY_SIZE 5 #endif +#if defined(__arm64__) || defined(__aarch64__) +#define JIT_ARCH_TARGET JIT_ARCH_AARCH64 +#define JIT_JUMPTABLE_ENTRY_SIZE 4 +#endif + +#ifndef JIT_ARCH_TARGET +#error Unknown JIT target +#endif +#endif + /** * @brief Return the entry point from a given jit stream * diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index a5f5c907ed..bbfd839938 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -338,7 +338,8 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary for (int arch_index = 0; arch_index < ENDIAN_SWAP_16(native_code->architectures_count); arch_index++) { if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == JIT_VARIANT_PIC) { size_t offset = ENDIAN_SWAP_32(native_code->info_size) + ENDIAN_SWAP_32(native_code->architectures[arch_index].offset) + sizeof(native_code->info_size); - module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), (ModuleNativeEntryPoint) ((const uint8_t *) &native_code->info_size + offset)); + ModuleNativeEntryPoint module_entry_point = sys_map_native_code((const uint8_t *) &native_code->info_size, ENDIAN_SWAP_32(native_code->size), offset); + module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), module_entry_point); break; } } diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 52e621ca48..37a38d06b5 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -5678,6 +5678,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) #if JIT_ARCH_TARGET == JIT_ARCH_X86_64 return JIT_X86_64_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 + return JIT_AARCH64_ATOM; #else #error Unknown JIT target #endif diff --git a/src/libAtomVM/sys.h b/src/libAtomVM/sys.h index fdbfa66b85..0735d86ed3 100644 --- a/src/libAtomVM/sys.h +++ b/src/libAtomVM/sys.h @@ -284,6 +284,18 @@ void sys_init_platform(GlobalContext *global); */ void sys_free_platform(GlobalContext *global); +/** + * @brief Map precompiled native code to a module entry point. + * + * @details If mmap module is executable, returns native_code + offset. + * Otherwise (Apple Silicon) copy it to an executable buffer. Only implemented + * on platforms with JIT. + * @param native_code pointer to native code chunk + * @param size size of native code chunk + * @param offset offset to the module entry point + */ +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset); + #ifdef __cplusplus } #endif diff --git a/src/platforms/generic_unix/CMakeLists.txt b/src/platforms/generic_unix/CMakeLists.txt index 668cb3db1a..933971dd39 100644 --- a/src/platforms/generic_unix/CMakeLists.txt +++ b/src/platforms/generic_unix/CMakeLists.txt @@ -39,11 +39,6 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") endif() endif() -if(NOT AVM_DISABLE_JIT) -include(DefineIfExists) -define_if_function_exists(AtomVM pthread_jit_write_protect_np "pthread.h" PUBLIC HAVE_PTHREAD_JIT_WRITE_PROTECT_NP) -define_if_symbol_exists(AtomVM MAP_JIT "sys/mman.h" PUBLIC HAVE_MAP_JIT) -endif() add_subdirectory(lib) target_include_directories(AtomVM PUBLIC lib/) @@ -57,17 +52,21 @@ set( ) target_link_libraries(AtomVM PRIVATE libAtomVM${PLATFORM_LIB_SUFFIX}) +if(AVM_DISABLE_JIT) +set(precompiled_suffix) +else() +set(precompiled_suffix "-${AVM_JIT_TARGET_ARCH}") +include(DefineIfExists) +define_if_function_exists(libAtomVM${PLATFORM_LIB_SUFFIX} pthread_jit_write_protect_np "pthread.h" PUBLIC HAVE_PTHREAD_JIT_WRITE_PROTECT_NP) +define_if_symbol_exists(libAtomVM${PLATFORM_LIB_SUFFIX} MAP_JIT "sys/mman.h" PUBLIC HAVE_MAP_JIT) +endif() + if (COVERAGE) include(CodeCoverage) append_coverage_compiler_flags_to_target(AtomVM) append_coverage_linker_flags_to_target(AtomVM) endif() -if(AVM_DISABLE_JIT) -set(precompiled_suffix) -else() -set(precompiled_suffix "-${AVM_JIT_TARGET_ARCH}") -endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/atomvm ${CMAKE_CURRENT_BINARY_DIR}/atomvm @ONLY) install(TARGETS AtomVM DESTINATION lib/atomvm) diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c index bed7819c2f..376f7384d0 100644 --- a/src/platforms/generic_unix/lib/jit_stream_mmap.c +++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c @@ -29,6 +29,7 @@ #include "platform_defaultatoms.h" #include "term.h" +#include #include #include #include @@ -74,6 +75,7 @@ static term nif_jit_stream_mmap_new(Context *ctx, int argc, term argv[]) uint8_t *addr = (uint8_t *) mmap(0, size, prot, flags, fd, offset); if (addr == MAP_FAILED) { + fprintf(stderr, "Could not allocate mmap for JIT: size=%zu, errno=%d\n", size, errno); RAISE_ERROR(BADARG_ATOM); } @@ -87,15 +89,6 @@ static term nif_jit_stream_mmap_new(Context *ctx, int argc, term argv[]) js->stream_offset = 0; js->stream_size = size; -#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP - pthread_jit_write_protect_np(0); -#endif -#if defined(__APPLE__) - sys_icache_invalidate(addr, size); -#elif defined(__GNUC__) - __builtin___clear_cache(addr, addr + size); -#endif - term obj = enif_make_resource(erl_nif_env_from_context(ctx), js); enif_release_resource(js); // decrement refcount after enif_alloc_resource return obj; @@ -127,9 +120,17 @@ static term nif_jit_stream_mmap_append(Context *ctx, int argc, term argv[]) size_t binary_size = term_binary_size(argv[1]); const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[1]); - assert(js_obj->stream_offset + binary_size < js_obj->stream_size); + if (UNLIKELY(js_obj->stream_offset + binary_size > js_obj->stream_size)) { + RAISE_ERROR(BADARG_ATOM); + } +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(0); +#endif memcpy(js_obj->stream_base + js_obj->stream_offset, binary_data, binary_size); +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(1); +#endif js_obj->stream_offset += binary_size; return argv[0]; @@ -155,7 +156,13 @@ static term nif_jit_stream_mmap_replace(Context *ctx, int argc, term argv[]) RAISE_ERROR(BADARG_ATOM); } +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(0); +#endif memcpy(js_obj->stream_base + offset, binary_data, binary_size); +#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP + pthread_jit_write_protect_np(1); +#endif return argv[0]; } @@ -232,9 +239,6 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) return NULL; } -#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP - pthread_jit_write_protect_np(1); -#endif #if defined(__APPLE__) sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size); #elif defined(__GNUC__) diff --git a/src/platforms/generic_unix/lib/mapped_file.c b/src/platforms/generic_unix/lib/mapped_file.c index f33aa183a1..3a58802180 100644 --- a/src/platforms/generic_unix/lib/mapped_file.c +++ b/src/platforms/generic_unix/lib/mapped_file.c @@ -22,6 +22,7 @@ #include "utils.h" +#include #include #include #include @@ -50,9 +51,18 @@ MappedFile *mapped_file_open_beam(const char *file_name) fstat(mf->fd, &file_stats); mf->size = file_stats.st_size; - mf->mapped = mmap(NULL, mf->size, PROT_READ | PROT_EXEC, MAP_SHARED, mf->fd, 0); - if (IS_NULL_PTR(mf->mapped)) { - fprintf(stderr, "Cannot mmap %s\n", file_name); + int prot; +#ifdef AVM_NO_JIT + prot = PROT_READ; +#elif defined(__APPLE__) && defined(__arm64__) + prot = PROT_READ; +#else + prot = PROT_READ | PROT_EXEC; +#endif + + mf->mapped = mmap(NULL, mf->size, prot, MAP_SHARED, mf->fd, 0); + if (UNLIKELY(mf->mapped == MAP_FAILED)) { + fprintf(stderr, "Cannot mmap %s -- errno=%d\n", file_name, errno); close(mf->fd); free(mf); return NULL; diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 439304870b..6e7272095c 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -47,6 +47,14 @@ #ifndef AVM_NO_JIT #include "jit_stream_mmap.h" + +#include +#include +#include + +#if defined(__APPLE__) +#include +#endif #endif #include @@ -805,3 +813,24 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) } #endif + +#ifndef AVM_NO_JIT +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ +#if defined(__APPLE__) && defined(__arm64__) + uint8_t *native_code_mmap = (uint8_t *) mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT, -1, 0); + if (native_code_mmap == MAP_FAILED) { + fprintf(stderr, "Could not allocate mmap for native code: size=%zu, errno=%d\n", size, errno); + return NULL; + } + pthread_jit_write_protect_np(0); + memcpy(native_code_mmap, native_code, size); + pthread_jit_write_protect_np(1); + sys_icache_invalidate(native_code_mmap, size); + return (ModuleNativeEntryPoint) (native_code_mmap + offset); +#else + UNUSED(size); + return (ModuleNativeEntryPoint) (native_code + offset); +#endif +} +#endif diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index 7bb3df40de..ff8ede14e2 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -70,10 +70,37 @@ endfunction() set(TO_HRL_PATH ${CMAKE_CURRENT_LIST_DIR}) function(generate_hrl out_file def_name in_file) + if(AVM_DISABLE_JIT) + # For non-JIT builds, use the base file + set(selected_file ${in_file}) + else() + # For JIT builds, determine the architecture-specific file + get_filename_component(base_name ${in_file} NAME_WE) + get_filename_component(base_ext ${in_file} EXT) + get_filename_component(base_dir ${in_file} DIRECTORY) + + # Check if it's a .avm file (pack) or .beam file + if(base_ext STREQUAL ".avm") + # For .avm files: name.avm -> name-arch.avm + if(base_dir) + set(selected_file ${base_dir}/${base_name}-${AVM_JIT_TARGET_ARCH}${base_ext}) + else() + set(selected_file ${base_name}-${AVM_JIT_TARGET_ARCH}${base_ext}) + endif() + else() + # For .beam files: name.beam -> arch/name.beam + if(base_dir) + set(selected_file ${base_dir}/${AVM_JIT_TARGET_ARCH}/${base_name}${base_ext}) + else() + set(selected_file ${AVM_JIT_TARGET_ARCH}/${base_name}${base_ext}) + endif() + endif() + endif() + add_custom_command( OUTPUT ${out_file} - COMMAND escript ${TO_HRL_PATH}/to_hrl.erl ${in_file} ${def_name} ${out_file} - DEPENDS ${in_file} + COMMAND escript ${TO_HRL_PATH}/to_hrl.erl ${selected_file} ${def_name} ${out_file} + DEPENDS ${selected_file} COMMENT "Generating ${out_file}" ) endfunction() diff --git a/tests/erlang_tests/code_load/CMakeLists.txt b/tests/erlang_tests/code_load/CMakeLists.txt index e8c88ea657..e12b6b5b59 100644 --- a/tests/erlang_tests/code_load/CMakeLists.txt +++ b/tests/erlang_tests/code_load/CMakeLists.txt @@ -37,9 +37,7 @@ set(code_load_deps code_load_pack_data.hrl ) if(NOT AVM_DISABLE_JIT) - generate_hrl(export_test_module_data_${AVM_JIT_TARGET_ARCH}.hrl EXPORT_TEST_MODULE_DATA_${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/export_test_module.beam) - generate_hrl(code_load_pack_data_${AVM_JIT_TARGET_ARCH}.hrl CODE_LOAD_PACK_DATA_${AVM_JIT_TARGET_ARCH} code_load_pack-${AVM_JIT_TARGET_ARCH}.avm) - set(code_load_deps ${code_load_deps} export_test_module_data_${AVM_JIT_TARGET_ARCH}.hrl code_load_pack_data_${AVM_JIT_TARGET_ARCH}.hrl jit) + set(code_load_deps ${code_load_deps} jit) endif() add_custom_target(code_load_files DEPENDS ${code_load_deps}) diff --git a/tests/erlang_tests/test_add_avm_pack_binary.erl b/tests/erlang_tests/test_add_avm_pack_binary.erl index 807b4a9559..ed2509bfb0 100644 --- a/tests/erlang_tests/test_add_avm_pack_binary.erl +++ b/tests/erlang_tests/test_add_avm_pack_binary.erl @@ -22,17 +22,10 @@ -export([start/0]). --ifdef(AVM_DISABLE_JIT). -include("code_load/code_load_pack_data.hrl"). load_pack_data() -> ?CODE_LOAD_PACK_DATA. --else. --include("code_load/code_load_pack_data_x86_64.hrl"). - -load_pack_data() -> - ?CODE_LOAD_PACK_DATA_x86_64. --endif. start() -> Bin = load_pack_data(), diff --git a/tests/erlang_tests/test_add_avm_pack_file.erl b/tests/erlang_tests/test_add_avm_pack_file.erl index 53551c3e37..5533c2ff51 100644 --- a/tests/erlang_tests/test_add_avm_pack_file.erl +++ b/tests/erlang_tests/test_add_avm_pack_file.erl @@ -22,11 +22,15 @@ -export([start/0]). +-ifdef(AVM_DISABLE_JIT). +path() -> + "code_load/code_load_pack.avm". +-else. +path() -> + "../code_load/code_load_pack-" ++ atom_to_list(?AVM_JIT_TARGET_ARCH) ++ ".avm". +-endif. + start() -> - AVM = - case erlang:system_info(emu_flavor) of - emu -> "code_load/code_load_pack.avm"; - jit -> "../code_load/code_load_pack-x86_64.avm" - end, + AVM = path(), erlang:display(atomvm:add_avm_pack_file(AVM, [])), export_test_module:exported_func(4). diff --git a/tests/erlang_tests/test_close_avm_pack.erl b/tests/erlang_tests/test_close_avm_pack.erl index f89feccb07..74ef632038 100644 --- a/tests/erlang_tests/test_close_avm_pack.erl +++ b/tests/erlang_tests/test_close_avm_pack.erl @@ -22,17 +22,10 @@ -export([start/0]). --ifdef(AVM_DISABLE_JIT). -include("code_load/code_load_pack_data.hrl"). load_pack_data() -> ?CODE_LOAD_PACK_DATA. --else. --include("code_load/code_load_pack_data_x86_64.hrl"). - -load_pack_data() -> - ?CODE_LOAD_PACK_DATA_x86_64. --endif. start() -> Bin = load_pack_data(), diff --git a/tests/erlang_tests/test_code_load_abs.erl b/tests/erlang_tests/test_code_load_abs.erl index a421ac5980..a3292fe4c3 100644 --- a/tests/erlang_tests/test_code_load_abs.erl +++ b/tests/erlang_tests/test_code_load_abs.erl @@ -22,14 +22,19 @@ -export([start/0]). +-ifdef(AVM_DISABLE_JIT). +path() -> + "code_load/export_test_module". +-else. +path() -> + "../code_load/" ++ atom_to_list(?AVM_JIT_TARGET_ARCH) ++ "/export_test_module". +-endif. + start() -> Path = case erlang:system_info(machine) of "ATOM" -> - case erlang:system_info(emu_flavor) of - emu -> "code_load/export_test_module"; - jit -> "../code_load/x86_64/export_test_module" - end; + path(); "BEAM" -> "code_load/export_test_module" end, diff --git a/tests/erlang_tests/test_code_load_binary.erl b/tests/erlang_tests/test_code_load_binary.erl index 758e0ac69f..56f3356f5c 100644 --- a/tests/erlang_tests/test_code_load_binary.erl +++ b/tests/erlang_tests/test_code_load_binary.erl @@ -22,17 +22,10 @@ -export([start/0]). --ifdef(AVM_DISABLE_JIT). -include("code_load/export_test_module_data.hrl"). export_test_module_data() -> ?EXPORT_TEST_MODULE_DATA. --else. --include("code_load/export_test_module_data_x86_64.hrl"). - -export_test_module_data() -> - ?EXPORT_TEST_MODULE_DATA_x86_64. --endif. start() -> Bin = export_test_module_data(), diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 9dbe754525..70f46ccc09 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -26,6 +26,8 @@ set(ERLANG_MODULES tests jit_tests jit_tests_common + jit_aarch64_tests + jit_aarch64_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl new file mode 100644 index 0000000000..7e43ddb654 --- /dev/null +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -0,0 +1,692 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_aarch64_asm_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-export([ + list_to_integer/1, + list_to_integer/2 +]). + +list_to_integer(X) -> erlang:list_to_integer(X). +list_to_integer(X, B) -> erlang:list_to_integer(X, B). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(aarch64, Bin, Str), Value) +). +-define(_assertAsmEqualLargeInt(Bin, Str, Value), + ?_test(begin + case erlang:system_info(machine) of + "BEAM" -> + ?assertEqual(jit_tests_common:asm(aarch64, Bin, Str), Value); + "ATOM" -> + % AtomVM doesn't handle large integers yet. + % Skip the test + ok + end + end) +). + +add_test_() -> + [ + ?_assertAsmEqual( + <<16#9100e0e7:32/little>>, "add x7, x7, #56", jit_aarch64_asm:add(r7, r7, 56) + ), + ?_assertAsmEqual( + <<16#91000000:32/little>>, "add x0, x0, #0", jit_aarch64_asm:add(r0, r0, 0) + ), + ?_assertAsmEqual( + <<16#91000421:32/little>>, "add x1, x1, #1", jit_aarch64_asm:add(r1, r1, 1) + ), + ?_assertAsmEqual( + <<16#8b031041:32/little>>, + "add x1, x2, x3, lsl #4", + jit_aarch64_asm:add(r1, r2, r3, {lsl, 4}) + ), + ?_assertAsmEqual( + <<16#8b030041:32/little>>, "add x1, x2, x3", jit_aarch64_asm:add(r1, r2, r3) + ), + %% Test add with invalid immediate + ?_assertError({unencodable_immediate, 16#FFFF}, jit_aarch64_asm:add(r0, r0, 16#FFFF)), + + %% Test cases for additional registers (r11, r12, r14, r22-r30) + ?_assertAsmEqual( + <<16#8b0b000b:32/little>>, "add x11, x0, x11", jit_aarch64_asm:add(r11, r0, r11) + ), + ?_assertAsmEqual( + <<16#8b0c000c:32/little>>, "add x12, x0, x12", jit_aarch64_asm:add(r12, r0, r12) + ), + ?_assertAsmEqual( + <<16#8b0e000e:32/little>>, "add x14, x0, x14", jit_aarch64_asm:add(r14, r0, r14) + ), + ?_assertAsmEqual( + <<16#8b160016:32/little>>, "add x22, x0, x22", jit_aarch64_asm:add(r22, r0, r22) + ), + ?_assertAsmEqual( + <<16#8b170017:32/little>>, "add x23, x0, x23", jit_aarch64_asm:add(r23, r0, r23) + ), + ?_assertAsmEqual( + <<16#8b180018:32/little>>, "add x24, x0, x24", jit_aarch64_asm:add(r24, r0, r24) + ), + ?_assertAsmEqual( + <<16#8b190019:32/little>>, "add x25, x0, x25", jit_aarch64_asm:add(r25, r0, r25) + ), + ?_assertAsmEqual( + <<16#8b1a001a:32/little>>, "add x26, x0, x26", jit_aarch64_asm:add(r26, r0, r26) + ), + ?_assertAsmEqual( + <<16#8b1b001b:32/little>>, "add x27, x0, x27", jit_aarch64_asm:add(r27, r0, r27) + ), + ?_assertAsmEqual( + <<16#8b1c001c:32/little>>, "add x28, x0, x28", jit_aarch64_asm:add(r28, r0, r28) + ), + ?_assertAsmEqual( + <<16#8b1d001d:32/little>>, "add x29, x0, x29", jit_aarch64_asm:add(r29, r0, r29) + ), + ?_assertAsmEqual( + <<16#8b1e001e:32/little>>, "add x30, x0, x30", jit_aarch64_asm:add(r30, r0, r30) + ) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual( + <<16#d100e0e7:32/little>>, "sub x7, x7, #56", jit_aarch64_asm:sub(r7, r7, 56) + ), + ?_assertAsmEqual( + <<16#d1000000:32/little>>, "sub x0, x0, #0", jit_aarch64_asm:sub(r0, r0, 0) + ), + ?_assertAsmEqual( + <<16#d1000421:32/little>>, "sub x1, x1, #1", jit_aarch64_asm:sub(r1, r1, 1) + ), + ?_assertAsmEqual( + <<16#cb031041:32/little>>, + "sub x1, x2, x3, lsl #4", + jit_aarch64_asm:sub(r1, r2, r3, {lsl, 4}) + ), + ?_assertAsmEqual( + <<16#cb030041:32/little>>, "sub x1, x2, x3", jit_aarch64_asm:sub(r1, r2, r3) + ) + ]. + +madd_test_() -> + [ + ?_assertAsmEqual( + <<16#9b037c41:32/little>>, "mul x1, x2, x3", jit_aarch64_asm:mul(r1, r2, r3) + ), + ?_assertAsmEqual( + <<16#9b031041:32/little>>, "madd x1, x2, x3, x4", jit_aarch64_asm:madd(r1, r2, r3, r4) + ) + ]. + +b_test_() -> + [ + ?_assertAsmEqual(<<16#14000000:32/little>>, "b .+0", jit_aarch64_asm:b(0)), + ?_assertAsmEqual(<<16#14000004:32/little>>, "b .+16", jit_aarch64_asm:b(16)), + ?_assertAsmEqual(<<16#17fffff0:32/little>>, "b .-64", jit_aarch64_asm:b(-64)), + ?_assertAsmEqual(<<16#14000001:32/little>>, "b .+4", jit_aarch64_asm:b(4)) + ]. + +brk_test_() -> + [ + ?_assertAsmEqual(<<16#D4200000:32/little>>, "brk #0", jit_aarch64_asm:brk(0)), + ?_assertAsmEqual(<<16#D4201900:32/little>>, "brk #200", jit_aarch64_asm:brk(200)) + ]. + +blr_test_() -> + [ + ?_assertAsmEqual(<<16#D63F0000:32/little>>, "blr x0", jit_aarch64_asm:blr(r0)), + ?_assertAsmEqual(<<16#D63F0020:32/little>>, "blr x1", jit_aarch64_asm:blr(r1)), + ?_assertAsmEqual(<<16#D63F01A0:32/little>>, "blr x13", jit_aarch64_asm:blr(r13)) + ]. + +br_test_() -> + [ + ?_assertAsmEqual(<<16#D61F0000:32/little>>, "br x0", jit_aarch64_asm:br(r0)), + ?_assertAsmEqual(<<16#D61F0020:32/little>>, "br x1", jit_aarch64_asm:br(r1)), + ?_assertAsmEqual(<<16#D61F01A0:32/little>>, "br x13", jit_aarch64_asm:br(r13)) + ]. + +ldr_test_() -> + [ + ?_assertAsmEqual( + <<16#F9400421:32/little>>, "ldr x1, [x1, #8]", jit_aarch64_asm:ldr(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#F9403042:32/little>>, "ldr x2, [x2, #96]", jit_aarch64_asm:ldr(r2, {r2, 96}) + ), + % Load-update (writeback) with SP, negative offset + ?_assertAsmEqual( + <<16#F85F0FE7:32/little>>, + "ldr x7, [sp, #-16]!", + jit_aarch64_asm:ldr(r7, {sp, -16}, '!') + ), + % Load-update (writeback) with SP, positive offset + ?_assertAsmEqual( + <<16#F8410FE7:32/little>>, "ldr x7, [sp, #16]!", jit_aarch64_asm:ldr(r7, {sp, 16}, '!') + ), + % Load-update (writeback) with SP, zero offset + ?_assertAsmEqual( + <<16#F84007E7:32/little>>, "ldr x7, [sp], #0", jit_aarch64_asm:ldr(r7, {sp}, 0) + ), + % shift + ?_assertAsmEqual( + <<16#f8637841:32/little>>, + "ldr x1, [x2, x3, lsl #3]", + jit_aarch64_asm:ldr(r1, {r2, r3, lsl, 3}) + ), + ?_assertAsmEqual( + <<16#f8677907:32/little>>, + "ldr x7, [x8, x7, lsl #3]", + jit_aarch64_asm:ldr(r7, {r8, r7, lsl, 3}) + ), + ?_assertAsmEqual( + <<16#f8636841:32/little>>, "ldr x1, [x2, x3]", jit_aarch64_asm:ldr(r1, {r2, r3}) + ) + ]. + +ldr_w_test_() -> + [ + ?_assertAsmEqual( + <<16#b9400821:32/little>>, "ldr w1, [x1, 8]", jit_aarch64_asm:ldr_w(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#b9406042:32/little>>, "ldr w2, [x2, 96]", jit_aarch64_asm:ldr_w(r2, {r2, 96}) + ), + ?_assertAsmEqual( + <<16#b97ffc60:32/little>>, "ldr w0, [x3, 16380]", jit_aarch64_asm:ldr_w(r0, {r3, 16380}) + ) + ]. + +str_w_test_() -> + [ + ?_assertAsmEqual( + <<16#b9000821:32/little>>, "str w1, [x1, 8]", jit_aarch64_asm:str_w(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#b9006042:32/little>>, "str w2, [x2, 96]", jit_aarch64_asm:str_w(r2, {r2, 96}) + ), + ?_assertAsmEqual( + <<16#b93ffc60:32/little>>, "str w0, [x3, 16380]", jit_aarch64_asm:str_w(r0, {r3, 16380}) + ) + ]. + +mov_test_() -> + [ + % mov immediate - simple cases + ?_assertAsmEqual(<<16#D2800000:32/little>>, "mov x0, #0", jit_aarch64_asm:mov(r0, 0)), + ?_assertAsmEqual(<<16#D2801901:32/little>>, "mov x1, #200", jit_aarch64_asm:mov(r1, 200)), + ?_assertAsmEqual(<<16#d28000b3:32/little>>, "mov x19, #5", jit_aarch64_asm:mov(r19, 5)), + ?_assertAsmEqual(<<16#92800094:32/little>>, "mov x20, #-5", jit_aarch64_asm:mov(r20, -5)), + ?_assertAsmEqual(<<16#d2800015:32/little>>, "mov x21, #0", jit_aarch64_asm:mov(r21, 0)), + ?_assertAsmEqual( + <<16#d29ffff0:32/little>>, "mov x16, #0xffff", jit_aarch64_asm:mov(r16, 16#FFFF) + ), + ?_assertAsmEqual( + <<16#929fffcf:32/little>>, "mov x15, #-0xffff", jit_aarch64_asm:mov(r15, -16#FFFF) + ), + + % mov immediate - complex cases requiring multiple instructions + ?_assertAsmEqual( + <<16#d2a00052:32/little>>, "mov x18, #0x20000", jit_aarch64_asm:mov(r18, 16#20000) + ), + ?_assertAsmEqual( + <<16#b26fbbf1:32/little>>, "mov x17, #-0x20000", jit_aarch64_asm:mov(r17, -131072) + ), + + % mov immediate - very large value requiring multiple instructions + ?_assertAsmEqualLargeInt( + <<16#D29579A1:32/little, 16#F2B7C041:32/little, 16#F2DFD741:32/little, + 16#F2EFF941:32/little>>, + "mov x1, #0xabcd\n" + "movk x1, #0xbe02, lsl #16\n" + "movk x1, #0xfeba, lsl #32\n" + "movk x1, #0x7fca, lsl #48", + jit_aarch64_asm:mov(r1, ?MODULE:list_to_integer("9208452466117618637")) + ), + + % mov register + ?_assertAsmEqual(<<16#AA0103E0:32/little>>, "mov x0, x1", jit_aarch64_asm:mov(r0, r1)), + ?_assertAsmEqual(<<16#AA0703E1:32/little>>, "mov x1, x7", jit_aarch64_asm:mov(r1, r7)), + + %% Test mov with zero immediate (should use movz with 0) + ?_assertAsmEqual( + <<16#d2800000:32/little>>, "movz x0, #0", jit_aarch64_asm:mov(r0, 0) + ), + + %% Test 4-bit pattern encoding + ?_assertAsmEqual( + <<16#929fffe0:32/little>>, + "mov x0, #-65536", + jit_aarch64_asm:mov(r0, -65536) + ), + %% Test complex immediate that will use fallback sequence + ?_assertAsmEqualLargeInt( + << + 16#d29bde00:32/little, + 16#f2b35780:32/little, + 16#f2cacf00:32/little, + 16#f2e24680:32/little + >>, + "mov x0, #0xdef0\n" + "movk x0, #0x9abc, lsl #16\n" + "movk x0, #0x5678, lsl #32\n" + "movk x0, #0x1234, lsl #48", + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("123456789ABCDEF0", 16)) + ), + + %% Test negative immediate that uses build_negative_immediate fallback + ?_assertAsmEqualLargeInt( + << + 16#d2842200:32/little, + 16#f2aca860:32/little, + 16#f2d530e0:32/little, + 16#f2fdb960:32/little + >>, + "mov x0, #0x2110\n" + "movk x0, #0x6543, lsl #16\n" + "movk x0, #0xa987, lsl #32\n" + "movk x0, #0xedcb, lsl #48", + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("-123456789ABCDEF0", 16)) + ), + + %% Test bitmask patterns with different sizes + %% Size 16 pattern: repeats every 16 bits + ?_assertAsmEqualLargeInt( + <<16#b20083e0:32/little>>, + "mov x0, #0x0001000100010001", + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("0001000100010001", 16)) + ), + %% Size 4 pattern: repeats every 4 bits + ?_assertAsmEqualLargeInt( + <<16#b200e7e0:32/little>>, + "mov x0, #0x3333333333333333", + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("3333333333333333", 16)) + ), + %% Size 2 pattern: repeats every 2 bits + ?_assertAsmEqualLargeInt( + <<16#b200f3e0:32/little>>, + "mov x0, #0x5555555555555555", + jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("5555555555555555", 16)) + ) + ]. + +orr_test_() -> + [ + % ORR Rd, XZR, Rm (MOV) + ?_assertAsmEqual( + <<16#AA0103E0:32/little>>, "orr x0, xzr, x1", jit_aarch64_asm:orr(r0, xzr, r1) + ), + % ORR Rd, Rn, Rm + ?_assertAsmEqual( + <<16#AA010020:32/little>>, "orr x0, x1, x1", jit_aarch64_asm:orr(r0, r1, r1) + ), + ?_assertAsmEqual( + <<16#AA020041:32/little>>, "orr x1, x2, x2", jit_aarch64_asm:orr(r1, r2, r2) + ), + + %% Test orr with valid bitmask immediate + ?_assertAsmEqual( + <<16#b24007e0:32/little>>, "orr x0, xzr, #0x3", jit_aarch64_asm:orr(r0, xzr, 16#3) + ), + %% Test orr with another bitmask pattern + ?_assertAsmEqual( + <<16#b27f1fe0:32/little>>, "orr x0, xzr, #0x1fe", jit_aarch64_asm:orr(r0, xzr, 16#1fe) + ), + + %% Test orr with unencodable immediate + ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:orr(r0, r0, 16#123456)) + ]. + +str_test_() -> + [ + ?_assertAsmEqual( + <<16#F9000421:32/little>>, "str x1, [x1, #8]", jit_aarch64_asm:str(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#F9003042:32/little>>, "str x2, [x2, #96]", jit_aarch64_asm:str(r2, {r2, 96}) + ), + % str with xzr (zero register) - stores zero to memory + ?_assertAsmEqual( + <<16#F900001F:32/little>>, "str xzr, [x0]", jit_aarch64_asm:str(xzr, {r0, 0}) + ), + ?_assertAsmEqual( + <<16#F900043F:32/little>>, "str xzr, [x1, #8]", jit_aarch64_asm:str(xzr, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#F900085F:32/little>>, "str xzr, [x2, #16]", jit_aarch64_asm:str(xzr, {r2, 16}) + ), + % Store-update (writeback) with SP + ?_assertAsmEqual( + <<16#F81F0FE7:32/little>>, + "str x7, [sp, #-16]!", + jit_aarch64_asm:str(r7, {sp, -16}, '!') + ), + % Store-update (writeback) with SP, positive offset + ?_assertAsmEqual( + <<16#F8010FE7:32/little>>, "str x7, [sp, #16]!", jit_aarch64_asm:str(r7, {sp, 16}, '!') + ), + % Store-update (writeback) with SP, zero offset + ?_assertAsmEqual( + <<16#F80007E7:32/little>>, "str x7, [sp], #0", jit_aarch64_asm:str(r7, {sp}, 0) + ), + % shift + ?_assertAsmEqual( + <<16#f8237841:32/little>>, + "str x1, [x2, x3, lsl #3]", + jit_aarch64_asm:str(r1, {r2, r3, lsl, 3}) + ) + ]. + +cmp_test_() -> + [ + % cmp reg, reg + ?_assertAsmEqual(<<16#EB01001F:32/little>>, "cmp x0, x1", jit_aarch64_asm:cmp(r0, r1)), + % cmp reg, imm + ?_assertAsmEqual(<<16#F100001F:32/little>>, "cmp x0, #0", jit_aarch64_asm:cmp(r0, 0)), + ?_assertAsmEqual(<<16#F103001F:32/little>>, "cmp x0, #192", jit_aarch64_asm:cmp(r0, 192)), + + %% Test large immediate compare (uses temporary register) + ?_assertAsmEqual( + << + 16#d28acf10:32/little, + 16#f2a24690:32/little, + 16#eb10001f:32/little + >>, + "mov x16, #0x5678\n" + "movk x16, #0x1234, lsl #16\n" + "cmp x0, x16", + jit_aarch64_asm:cmp(r0, 16#12345678) + ), + + %% Test negative immediate compare (uses MOVN) + ?_assertAsmEqual( + << + 16#92800010:32/little, + 16#eb1000ff:32/little + >>, + "movn x16, #0\n" + "cmp x7, x16", + jit_aarch64_asm:cmp(r7, -1) + ) + ]. + +cmp_w_test_() -> + [ + % cmp_w reg, imm + ?_assertAsmEqual(<<16#7100001F:32/little>>, "cmp w0, #0", jit_aarch64_asm:cmp_w(r0, 0)), + ?_assertAsmEqual(<<16#7103001F:32/little>>, "cmp w0, #192", jit_aarch64_asm:cmp_w(r0, 192)), + + %% Test 32-bit compare with negative immediate + ?_assertAsmEqual( + <<16#3100041f:32/little>>, "adds wzr, w0, #1", jit_aarch64_asm:cmp_w(r0, -1) + ), + ?_assertAsmEqual( + <<16#31000c1f:32/little>>, "adds wzr, w0, #3", jit_aarch64_asm:cmp_w(r0, -3) + ) + ]. + +and_test_() -> + [ + % AND reg, reg, reg + ?_assertAsmEqual( + <<16#8A010020:32/little>>, "and x0, x1, x1", jit_aarch64_asm:and_(r0, r1, r1) + ), + % AND reg, reg, imm + ?_assertAsmEqual( + <<16#927A0420:32/little>>, "and x0, x1, #0xc0", jit_aarch64_asm:and_(r0, r1, 192) + ), + ?_assertAsmEqual( + <<16#927ff8e7:32/little>>, + "and x7, x7, #0xfffffffffffffffe", + jit_aarch64_asm:and_(r7, r7, -2) + ), + ?_assertAsmEqual( + <<16#9200cc41:32/little>>, + "and x1, x2, #0xf0f0f0f0f0f0f0f", + jit_aarch64_asm:and_(r1, r2, 16#f0f0f0f0f0f0f0f) + ), + ?_assertAsmEqual( + <<16#92603c62:32/little>>, + "and x2, x3, #0xffff00000000", + jit_aarch64_asm:and_(r2, r3, 16#ffff00000000) + ), + ?_assertAsmEqual( + <<16#92785c83:32/little>>, + "and x3, x4, #0xffffff00", + jit_aarch64_asm:and_(r3, r4, 16#ffffff00) + ), + %% Test and_ with unencodable immediate + ?_assertError( + {unencodable_immediate, 16#123456}, jit_aarch64_asm:and_(r0, r0, 16#123456) + ) + ]. + +lsl_test_() -> + [ + ?_assertAsmEqual( + <<16#D3607C00:32/little>>, "lsl x0, x0, #32", jit_aarch64_asm:lsl(r0, r0, 32) + ) + ]. + +lsr_test_() -> + [ + ?_assertAsmEqual( + <<16#D340FC00:32/little>>, "lsr x0, x0, 0", jit_aarch64_asm:lsr(r0, r0, 0) + ), + ?_assertAsmEqual( + <<16#D340FC01:32/little>>, "lsr x1, x0, 0", jit_aarch64_asm:lsr(r1, r0, 0) + ), + ?_assertAsmEqual( + <<16#D360FC00:32/little>>, "lsr x0, x0, #32", jit_aarch64_asm:lsr(r0, r0, 32) + ) + ]. + +ret_test_() -> + [ + ?_assertAsmEqual(<<16#D65F03C0:32/little>>, "ret", jit_aarch64_asm:ret()) + ]. + +tst_test_() -> + [ + ?_assertAsmEqual(<<16#EA01001F:32/little>>, "tst x0, x1", jit_aarch64_asm:tst(r0, r1)), + ?_assertAsmEqual(<<16#f240003f:32/little>>, "tst x1, #1", jit_aarch64_asm:tst(r1, 1)), + ?_assertAsmEqual(<<16#f27c005f:32/little>>, "tst x2, #16", jit_aarch64_asm:tst(r2, 16)), + ?_assertAsmEqual(<<16#f2401c7f:32/little>>, "tst x3, #255", jit_aarch64_asm:tst(r3, 255)), + ?_assertAsmEqual(<<16#f240249f:32/little>>, "tst x4, #1023", jit_aarch64_asm:tst(r4, 1023)), + ?_assertAsmEqual(<<16#f24014bf:32/little>>, "tst x5, #63", jit_aarch64_asm:tst(r5, 63)), + ?_assertAsmEqual(<<16#f27b00df:32/little>>, "tst x6, #32", jit_aarch64_asm:tst(r6, 32)), + ?_assertAsmEqual(<<16#f27a00ff:32/little>>, "tst x7, #64", jit_aarch64_asm:tst(r7, 64)), + ?_assertAsmEqual(<<16#f27e051f:32/little>>, "tst x8, #0xc", jit_aarch64_asm:tst(r8, 16#c)), + + %% Test tst with unencodable immediate + ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:tst(r0, 16#123456)) + ]. + +tst_w_test_() -> + [ + ?_assertAsmEqual(<<16#6a01001f:32/little>>, "tst w0, w1", jit_aarch64_asm:tst_w(r0, r1)), + ?_assertAsmEqual(<<16#7200003f:32/little>>, "tst w1, #1", jit_aarch64_asm:tst_w(r1, 1)), + ?_assertAsmEqual(<<16#721c005f:32/little>>, "tst w2, #16", jit_aarch64_asm:tst_w(r2, 16)), + ?_assertAsmEqual(<<16#72001c7f:32/little>>, "tst w3, #255", jit_aarch64_asm:tst_w(r3, 255)), + ?_assertAsmEqual( + <<16#7200249f:32/little>>, "tst w4, #1023", jit_aarch64_asm:tst_w(r4, 1023) + ), + ?_assertAsmEqual(<<16#720014bf:32/little>>, "tst w5, #63", jit_aarch64_asm:tst_w(r5, 63)), + ?_assertAsmEqual(<<16#721b00df:32/little>>, "tst w6, #32", jit_aarch64_asm:tst_w(r6, 32)), + ?_assertAsmEqual(<<16#721a00ff:32/little>>, "tst w7, #64", jit_aarch64_asm:tst_w(r7, 64)), + ?_assertAsmEqual( + <<16#721e051f:32/little>>, "tst w8, #0xc", jit_aarch64_asm:tst_w(r8, 16#c) + ), + + %% Test tst_w with unencodable immediate + ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:tst_w(r0, 16#123456)) + ]. + +bcc_test_() -> + [ + ?_assertAsmEqual(<<16#54000000:32/little>>, "b.eq .+0", jit_aarch64_asm:bcc(eq, 0)), + ?_assertAsmEqual(<<16#54000001:32/little>>, "b.ne .+0", jit_aarch64_asm:bcc(ne, 0)), + ?_assertAsmEqual(<<16#54fffe01:32/little>>, "b.ne .-64", jit_aarch64_asm:bcc(ne, -64)), + ?_assertAsmEqual(<<16#54000400:32/little>>, "b.eq 128", jit_aarch64_asm:bcc(eq, 128)), + ?_assertAsmEqual(<<16#54000402:32/little>>, "b.cs 128", jit_aarch64_asm:bcc(cs, 128)), + ?_assertAsmEqual(<<16#54000403:32/little>>, "b.cc 128", jit_aarch64_asm:bcc(cc, 128)), + ?_assertAsmEqual(<<16#54000404:32/little>>, "b.mi 128", jit_aarch64_asm:bcc(mi, 128)), + ?_assertAsmEqual(<<16#54000405:32/little>>, "b.pl 128", jit_aarch64_asm:bcc(pl, 128)), + ?_assertAsmEqual(<<16#54000406:32/little>>, "b.vs 128", jit_aarch64_asm:bcc(vs, 128)), + ?_assertAsmEqual(<<16#54000408:32/little>>, "b.hi 128", jit_aarch64_asm:bcc(hi, 128)), + ?_assertAsmEqual(<<16#54000409:32/little>>, "b.ls 128", jit_aarch64_asm:bcc(ls, 128)), + ?_assertAsmEqual(<<16#5400040a:32/little>>, "b.ge 128", jit_aarch64_asm:bcc(ge, 128)), + ?_assertAsmEqual(<<16#5400040b:32/little>>, "b.lt 128", jit_aarch64_asm:bcc(lt, 128)), + ?_assertAsmEqual(<<16#5400040c:32/little>>, "b.gt 128", jit_aarch64_asm:bcc(gt, 128)), + ?_assertAsmEqual(<<16#5400040d:32/little>>, "b.le 128", jit_aarch64_asm:bcc(le, 128)), + ?_assertAsmEqual(<<16#5400040e:32/little>>, "b.al 128", jit_aarch64_asm:bcc(al, 128)), + ?_assertAsmEqual(<<16#5400040f:32/little>>, "b.nv 128", jit_aarch64_asm:bcc(nv, 128)), + ?_assertAsmEqual(<<16#54000007:32/little>>, "b.vc .+0", jit_aarch64_asm:bcc(vc, 0)) + ]. + +cbnz_test_() -> + [ + ?_assertAsmEqual(<<16#b5000401:32/little>>, "cbnz x1, 128", jit_aarch64_asm:cbnz(r1, 128)), + ?_assertAsmEqual( + <<16#35000402:32/little>>, "cbnz w2, 128", jit_aarch64_asm:cbnz_w(r2, 128) + ), + ?_assertAsmEqual(<<16#b5fffc03:32/little>>, "cbnz x3, -128", jit_aarch64_asm:cbnz(r3, -128)) + ]. + +tbz_test_() -> + [ + ?_assertAsmEqual( + <<16#b6f80400:32/little>>, "tbz x0, #63, 128", jit_aarch64_asm:tbz(r0, 63, 128) + ), + ?_assertAsmEqual( + <<16#36180400:32/little>>, "tbz x0, #3, 128", jit_aarch64_asm:tbz(r0, 3, 128) + ), + ?_assertAsmEqual( + <<16#363ffc03:32/little>>, "tbz x3, #7, -128", jit_aarch64_asm:tbz(r3, 7, -128) + ) + ]. + +tbnz_test_() -> + [ + ?_assertAsmEqual( + <<16#37000400:32/little>>, "tbnz x0, #0, 128", jit_aarch64_asm:tbnz(r0, 0, 128) + ), + ?_assertAsmEqual( + <<16#37180400:32/little>>, "tbnz x0, #3, 128", jit_aarch64_asm:tbnz(r0, 3, 128) + ), + ?_assertAsmEqual( + <<16#373ffc03:32/little>>, "tbnz x3, #7, -128", jit_aarch64_asm:tbnz(r3, 7, -128) + ) + ]. + +stp_test_() -> + [ + ?_assertAsmEqual( + <<16#a8815113:32/little>>, + "stp x19, x20, [x8], #16", + jit_aarch64_asm:stp(r19, r20, {r8}, 16) + ), + ?_assertAsmEqual( + <<16#a88153f3:32/little>>, + "stp x19, x20, [sp], #16", + jit_aarch64_asm:stp(r19, r20, {sp}, 16) + ), + % Store-update (writeback) variants + ?_assertAsmEqual( + <<16#a9bf27e8:32/little>>, + "stp x8, x9, [sp, #-16]!", + jit_aarch64_asm:stp(r8, r9, {sp, -16}, '!') + ), + ?_assertAsmEqual( + <<16#a98127e8:32/little>>, + "stp x8, x9, [sp, #16]!", + jit_aarch64_asm:stp(r8, r9, {sp, 16}, '!') + ), + ?_assertAsmEqual( + <<16#a98027e8:32/little>>, + "stp x8, x9, [sp, #0]!", + jit_aarch64_asm:stp(r8, r9, {sp, 0}, '!') + ) + ]. + +ldp_test_() -> + [ + ?_assertAsmEqual( + <<16#a8c15113:32/little>>, + "ldp x19, x20, [x8], #16", + jit_aarch64_asm:ldp(r19, r20, {r8}, 16) + ), + ?_assertAsmEqual( + <<16#a8c153f3:32/little>>, + "ldp x19, x20, [sp], #16", + jit_aarch64_asm:ldp(r19, r20, {sp}, 16) + ) + ]. + +subs_test_() -> + [ + % SUBS with immediate + ?_assertAsmEqual( + <<16#F1000021:32/little>>, "subs x1, x1, #0", jit_aarch64_asm:subs(r1, r1, 0) + ), + ?_assertAsmEqual( + <<16#F1000421:32/little>>, "subs x1, x1, #1", jit_aarch64_asm:subs(r1, r1, 1) + ), + % SUBS with register + ?_assertAsmEqual( + <<16#eb000021:32/little>>, "subs x1, x1, x0", jit_aarch64_asm:subs(r1, r1, r0) + ), + ?_assertAsmEqual( + <<16#eb0a0021:32/little>>, "subs x1, x1, x10", jit_aarch64_asm:subs(r1, r1, r10) + ) + ]. + +adr_test_() -> + [ + %% ADR x0, #0 + ?_assertAsmEqual(<<16#10000000:32/little>>, "adr x0, .+0", jit_aarch64_asm:adr(r0, 0)), + %% ADR x1, #4 + ?_assertAsmEqual(<<16#10000021:32/little>>, "adr x1, .+4", jit_aarch64_asm:adr(r1, 4)), + %% ADR x2, #-4 + ?_assertAsmEqual(<<16#10ffffe2:32/little>>, "adr x2, .-4", jit_aarch64_asm:adr(r2, -4)), + %% ADR x3, #1048572 (max positive) + ?_assertAsmEqual( + <<16#107fffe3:32/little>>, "adr x3, .+1048572", jit_aarch64_asm:adr(r3, 1048572) + ), + %% ADR x4, #-1048576 (max negative) + ?_assertAsmEqual( + <<16#10800004:32/little>>, "adr x4, .-1048576", jit_aarch64_asm:adr(r4, -1048576) + ), + %% ADR with offset not a multiple of 4 is valid + ?_assertAsmEqual(<<16#70000000:32/little>>, "adr x0, .+3", jit_aarch64_asm:adr(r0, 3)) + ]. + +%% Test nop instruction +nop_test_() -> + [ + ?_assertAsmEqual( + <<16#d503201f:32/little>>, "nop", jit_aarch64_asm:nop() + ) + ]. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl new file mode 100644 index 0000000000..087ab9074d --- /dev/null +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -0,0 +1,1764 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_aarch64_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_aarch64). + +% disassembly obtained with: +% aarch64-elf-objdump -b binary -D dump.bin -M aarch64 + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400050 ldr x16, [x2]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400450 ldr x16, [x2, #8]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400850 ldr x16, [x2, #16]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d2800541 mov x1, #0x2a // #42\n" + " 10: d2800562 mov x2, #0x2b // #43\n" + " 14: d2800583 mov x3, #0x2c // #44\n" + " 18: d63f0200 blr x16\n" + " 1c: aa0003e7 mov x7, x0\n" + " 20: a8c10be1 ldp x1, x2, [sp], #16\n" + " 24: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + "\n" + " 0: f9404850 ldr x16, [x2, #144]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d2800261 mov x1, #0x13 // #19\n" + " 10: d63f0200 blr x16\n" + " 14: aa0003e7 mov x7, x0\n" + " 18: a8c10be1 ldp x1, x2, [sp], #16\n" + " 1c: a8c103fe ldp x30, x0, [sp], #16\n" + " 20: f9404850 ldr x16, [x2, #144]\n" + " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 2c: f81f0fe7 str x7, [sp, #-16]!\n" + " 30: d2800281 mov x1, #0x14 // #20\n" + " 34: d63f0200 blr x16\n" + " 38: aa0003e8 mov x8, x0\n" + " 3c: f84107e7 ldr x7, [sp], #16\n" + " 40: a8c10be1 ldp x1, x2, [sp], #16\n" + " 44: a8c103fe ldp x30, x0, [sp], #16\n" + " 48: f9404850 ldr x16, [x2, #144]\n" + " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 54: a9bf1fe8 stp x8, x7, [sp, #-16]!\n" + " 58: d2800261 mov x1, #0x13 // #19\n" + " 5c: d63f0200 blr x16\n" + " 60: aa0003e9 mov x9, x0\n" + " 64: a8c11fe8 ldp x8, x7, [sp], #16\n" + " 68: a8c10be1 ldp x1, x2, [sp], #16\n" + " 6c: a8c103fe ldp x30, x0, [sp], #16\n" + " 70: f9403450 ldr x16, [x2, #104]\n" + " 74: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 78: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 7c: f81f0fe9 str x9, [sp, #-16]!\n" + " 80: f94000e1 ldr x1, [x7]\n" + " 84: f9400102 ldr x2, [x8]\n" + " 88: d63f0200 blr x16\n" + " 8c: aa0003e7 mov x7, x0\n" + " 90: f84107e9 ldr x9, [sp], #16\n" + " 94: a8c10be1 ldp x1, x2, [sp], #16\n" + " 98: a8c103fe ldp x30, x0, [sp], #16\n" + " 9c: f9000127 str x7, [x9]\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401047 ldr x7, [x2, #32]\n" + " 24: d2800042 mov x2, #0x2 // #2\n" + " 28: d2800043 mov x3, #0x2 // #2\n" + " 2c: 92800004 mov x4, #0xffffffffffffffff // #-1\n" + " 30: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401047 ldr x7, [x2, #32]\n" + " 24: d2800042 mov x2, #0x2 // #2\n" + " 28: d2800043 mov x3, #0x2 // #2\n" + " 2c: d2800144 mov x4, #0xa // #10\n" + " 30: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400047 ldr x7, [x2]\n" + " 4: d2800542 mov x2, #0x2a // #42\n" + " 8: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9405450 ldr x16, [x2, #168]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 1c: eb0000ff cmp x7, x0\n" + " 20: 54000060 b.eq 0x2c // b.none\n" + " 24: aa0703e0 mov x0, x7\n" + " 28: d65f03c0 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(r8, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: f9405450 ldr x16, [x2, #168]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 1c: aa0703e8 mov x8, x7\n" + " 20: eb00011f cmp x8, x0\n" + " 24: 54000060 b.eq 0x30 // b.none\n" + " 28: aa0803e0 mov x0, x8\n" + " 2c: d65f03c0 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94000e7 ldr x7, [x7]\n" + " 8: f9005c07 str x7, [x0, #184]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: 9100e0e7 add x7, x7, #0x38\n" + " 8: f9001407 str x7, [x0, #40]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b6f80047 tbz x7, #63, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: eb0800ff cmp x7, x8\n" + " c: 5400004a b.ge 0x14 // b.tcont\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 35000047 cbnz w7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 35000047 cbnz w7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 37000047 tbnz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 37000047 tbnz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 36000047 tbz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 36000047 tbz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f24008ff tst x7, #0x7\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: d28000a9 mov x9, #0x5 // #5\n" + " c: ea0900ff tst x7, x9\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f24008ff tst x7, #0x7\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 92400ce9 and x9, x7, #0xf\n" + " c: f1003d3f cmp x9, #0xf\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 92400ce7 and x7, x7, #0xf\n" + " c: f1003cff cmp x7, #0xf\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000061 b.ne 0x18 // b.any\n" + " 10: 91000908 add x8, x8, #0x2\n" + " 14: 14000002 b 0x1c\n" + " 18: 91001108 add x8, x8, #0x4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_right(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d343fce7 lsr x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d37df0e7 lsl x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 1400000d b 0x34\n" + " 4: 14000002 b 0xc\n" + " 8: 14000009 b 0x2c\n" + " c: b9401027 ldr w7, [x1, #16]\n" + " 10: f10004e7 subs x7, x7, #0x1\n" + " 14: b9001027 str w7, [x1, #16]\n" + " 18: 540000a1 b.ne 0x2c // b.any\n" + " 1c: 10000087 adr x7, 0x2c\n" + " 20: f9000427 str x7, [x1, #8]\n" + " 24: f9400847 ldr x7, [x2, #16]\n" + " 28: d61f00e0 br x7\n" + " 2c: f9400047 ldr x7, [x2]\n" + " 30: d61f00e0 br x7\n" + " 34: f9400447 ldr x7, [x2, #8]\n" + " 38: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 9208452466117618637]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: f9402050 ldr x16, [x2, #64]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e0 mov x0, x1\n" + " 10: d2800041 mov x1, #0x2 // #2\n" + " 14: d63f0200 blr x16\n" + " 18: aa0003e7 mov x7, x0\n" + " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 20: a8c103fe ldp x30, x0, [sp], #16\n" + " 24: f9403c50 ldr x16, [x2, #120]\n" + " 28: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 2c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 30: f81f0fe7 str x7, [sp, #-16]!\n" + " 34: d29579a1 mov x1, #0xabcd // #43981\n" + " 38: f2b7c041 movk x1, #0xbe02, lsl #16\n" + " 3c: f2dfd741 movk x1, #0xfeba, lsl #32\n" + " 40: f2eff941 movk x1, #0x7fca, lsl #48\n" + " 44: d63f0200 blr x16\n" + " 48: aa0003e8 mov x8, x0\n" + " 4c: f84107e7 ldr x7, [sp], #16\n" + " 50: a8c10be1 ldp x1, x2, [sp], #16\n" + " 54: a8c103fe ldp x30, x0, [sp], #16\n" + " 58: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 5c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 60: d2800001 mov x1, #0x0 // #0\n" + " 64: d2800022 mov x2, #0x1 // #1\n" + " 68: f9401803 ldr x3, [x0, #48]\n" + " 6c: aa0803e4 mov x4, x8\n" + " 70: d63f00e0 blr x7\n" + " 74: aa0003e7 mov x7, x0\n" + " 78: a8c10be1 ldp x1, x2, [sp], #16\n" + " 7c: a8c103fe ldp x30, x0, [sp], #16\n" + " 80: b5000087 cbnz x7, 0x90\n" + " 84: f9401847 ldr x7, [x2, #48]\n" + " 88: d2801102 mov x2, #0x88 // #136\n" + " 8c: d61f00e0 br x7\n" + " 90: f9001807 str x7, [x0, #48]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, Reg, -4), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 8: f9401408 ldr x8, [x0, #40]\n" + " c: f94004e9 ldr x9, [x7, #8]\n" + " 10: f9000509 str x9, [x8, #8]\n" + " 14: f9401408 ldr x8, [x0, #40]\n" + " 18: f94000e9 ldr x9, [x7]\n" + " 1c: f9000109 str x9, [x8]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + Offset = ?BACKEND:offset(State3), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 92400ce8 and x8, x7, #0xf\n" + " 8: f1003d1f cmp x8, #0xf\n" + " c: 54000160 b.eq 0x38 // b.none\n" + " 10: 924004e8 and x8, x7, #0x3\n" + " 14: f100091f cmp x8, #0x2\n" + " 18: 54000040 b.eq 0x20 // b.none\n" + " 1c: 14000047 b 0x138\n" + " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 24: f94000e7 ldr x7, [x7]\n" + " 28: 924014e7 and x7, x7, #0x3f\n" + " 2c: f10020ff cmp x7, #0x8\n" + " 30: 54000040 b.eq 0x38 // b.none\n" + " 34: 14000041 b 0x138" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + Offset = ?BACKEND:offset(State3), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 92400ce8 and x8, x7, #0xf\n" + " 8: f1003d1f cmp x8, #0xf\n" + " c: 540001c0 b.eq 0x44 // b.none\n" + " 10: 924004e8 and x8, x7, #0x3\n" + " 14: f100091f cmp x8, #0x2\n" + " 18: 54000040 b.eq 0x20 // b.none\n" + " 1c: 1400004a b 0x144\n" + " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 24: f94000e7 ldr x7, [x7]\n" + " 28: 924014e8 and x8, x7, #0x3f\n" + " 2c: f100211f cmp x8, #0x8\n" + " 30: 540000a0 b.eq 0x44 // b.none\n" + " 34: 924014e7 and x7, x7, #0x3f\n" + " 38: f10060ff cmp x7, #0x18\n" + " 3c: 54000040 b.eq 0x44 // b.none\n" + " 40: 14000041 b 0x144" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + Offset = ?BACKEND:offset(State3), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Offset = ?BACKEND:offset(State3), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f1012cff cmp x7, #0x4b\n" + " 8: 54000080 b.eq 0x18 // b.none\n" + " c: f1002cff cmp x7, #0xb\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 14000041 b 0x118" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9400027 ldr x7, [x1]\n" + " 24: b94000e7 ldr w7, [x7]\n" + " 28: d3689ce7 lsl x7, x7, #24\n" + " 2c: d2802610 mov x16, #0x130 // #304\n" + " 30: aa1000e7 orr x7, x7, x16\n" + " 34: f9005c07 str x7, [x0, #184]\n" + " 38: f9401047 ldr x7, [x2, #32]\n" + " 3c: d2800042 mov x2, #0x2 // #2\n" + " 40: d28000a3 mov x3, #0x5 // #5\n" + " 44: 92800004 mov x4, #0xffffffffffffffff // #-1\n" + " 48: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401807 ldr x7, [x0, #48]\n" + " 24: aa0703e8 mov x8, x7\n" + " 28: 92400509 and x9, x8, #0x3\n" + " 2c: f100093f cmp x9, #0x2\n" + " 30: 540000c0 b.eq 0x48 // b.none\n" + " 34: f9404c47 ldr x7, [x2, #152]\n" + " 38: d2800702 mov x2, #0x38 // #56\n" + " 3c: d2803163 mov x3, #0x18b // #395\n" + " 40: aa0803e4 mov x4, x8\n" + " 44: d61f00e0 br x7\n" + " 48: 927ef508 and x8, x8, #0xfffffffffffffffc\n" + " 4c: f9400108 ldr x8, [x8]\n" + " 50: 92401509 and x9, x8, #0x3f\n" + " 54: f100513f cmp x9, #0x14\n" + " 58: 540000c0 b.eq 0x70 // b.none\n" + " 5c: f9404c47 ldr x7, [x2, #152]\n" + " 60: d2800c02 mov x2, #0x60 // #96\n" + " 64: d2803163 mov x3, #0x18b // #395\n" + " 68: aa0803e4 mov x4, x8\n" + " 6c: d61f00e0 br x7\n" + " 70: f9400028 ldr x8, [x1]\n" + " 74: b9400108 ldr w8, [x8]\n" + " 78: d3689d08 lsl x8, x8, #24\n" + " 7c: d2804c10 mov x16, #0x260 // #608\n" + " 80: aa100108 orr x8, x8, x16\n" + " 84: f9005c08 str x8, [x0, #184]\n" + " 88: f9408048 ldr x8, [x2, #256]\n" + " 8c: aa0703e2 mov x2, x7\n" + " 90: d2800003 mov x3, #0x0 // #0\n" + " 94: d61f0100 br x8" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: f900181f str xzr, [x0, #48]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: f900581f str xzr, [x0, #176]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, r10}, << + " 0: f900015f str xzr, [x10]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f90008ff str xzr, [x7, #16]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f90050ff str xzr, [x7, #160]" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9001807 str x7, [x0, #48]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9005807 str x7, [x0, #176]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9000907 str x7, [x8, #16]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9005107 str x7, [x8, #160]" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, r10}, << + " 0: d2800c67 mov x7, #0x63 // #99\n" + " 4: f9000147 str x7, [x10]" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: f9401c07 ldr x7, [x0, #56]\n" + " 4: f9002007 str x7, [x0, #64]" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r8}, << + " 0: f9401c07 ldr x7, [x0, #56]\n" + " 4: f9000107 str x7, [x8]" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, r9}, {x_reg, 3}, << + " 0: f9400127 ldr x7, [x9]\n" + " 4: f9002407 str x7, [x0, #72]" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9000507 str x7, [x8, #8]" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94000e7 ldr x7, [x7]\n" + " 8: f9002407 str x7, [x0, #72]" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94004e7 ldr x7, [x7, #8]\n" + " 8: f9002407 str x7, [x0, #72]" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, r10, {x_reg, 0}, << + " 0: f900180a str x10, [x0, #48]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, r10, {x_reg, extra}, << + " 0: f900580a str x10, [x0, #176]" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, r9, {ptr, r10}, << + " 0: f9000149 str x9, [x10]" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, r10, {y_reg, 0}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f90000ea str x10, [x7]" + >>) + end), + %% Test: Large immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, 0}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9001807 str x7, [x0, #48]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, extra}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9005807 str x7, [x0, #176]\n" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 2}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9401408 ldr x8, [x0, #40]\n" + " 14: f9000907 str x7, [x8, #16]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 20}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9401408 ldr x8, [x0, #40]\n" + " 14: f9005107 str x7, [x8, #160]" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#123456789abcdef0, {ptr, r10}, << + " 0: d29bde07 mov x7, #0xdef0 // #57072\n" + " 4: f2b35787 movk x7, #0x9abc, lsl #16\n" + " 8: f2cacf07 movk x7, #0x5678, lsl #32\n" + " c: f2e24687 movk x7, #0x1234, lsl #48\n" + " 10: f9000147 str x7, [x10]" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: f9405407 ldr x7, [x0, #168]\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f9007d07 str x7, [x8, #248]" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9407ce7 ldr x7, [x7, #248]\n" + " 8: f9005407 str x7, [x0, #168]" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: 92800007 mov x7, #0xffffffffffffffff // #-1\n" + " 4: f9001807 str x7, [x0, #48]" + >>) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f94004e7 ldr x7, [x7, #8]\n" + " 8: f9406008 ldr x8, [x0, #192]\n" + " c: f9000d07 str x7, [x8, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r8, 2, {x_reg, 0}, << + " 0: f9400907 ldr x7, [x8, #16]\n" + " 4: f9001807 str x7, [x0, #48]" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, r8, 3, {ptr, r10}, << + " 0: f9400d07 ldr x7, [x8, #24]\n" + " 4: f9000147 str x7, [x10]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r8, 1, {y_reg, 2}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9400508 ldr x8, [x8, #8]\n" + " 8: f90008e8 str x8, [x7, #16]" + >>) + end), + %% move_array_element: reg[x] to native reg (r10) + ?_test(begin + move_array_element_test0(State0, r8, 1, r10, << + " 0: f940050a ldr x10, [x8, #8]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r8, 7, {y_reg, 31}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9401d08 ldr x8, [x8, #56]\n" + " 8: f9007ce8 str x8, [x7, #248]" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r8, 7, {x_reg, 15}, << + " 0: f9401d07 ldr x7, [x8, #56]\n" + " 4: f9005407 str x7, [x0, #168]" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {x_reg, 2}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " 8: f9002007 str x7, [x0, #64]" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {ptr, r10}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " 8: f9000147 str x7, [x10]" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {y_reg, 31}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " c: f9007d07 str x7, [x8, #248]" + >>) + end) + ] + end}. + +get_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401107 ldr x7, [x8, #32]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg) + end) + ] + end}. + +move_to_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9000907 str x7, [x8, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f94000e7 ldr x7, [x7]\n" + " 4: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94008e7 ldr x7, [x7, #16]\n" + " 8: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9000d07 str x7, [x8, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(7, State1, [r8, r9]), + [r8, r9] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 9100052a add x10, x9, #0x1\n" + " 8: f82a7907 str x7, [x8, x10, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(7, State1, [r8, r9]), + [r8, r9] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: 9100052a add x10, x9, #0x1\n" + " 8: f82a7907 str x7, [x8, x10, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: d2800547 mov x7, #0x2a // #42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r6, Reg), + Dump = << + " 0: f94000c6 ldr x6, [x6]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: f9402407 ldr x7, [x0, #72]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9400ce7 ldr x7, [x7, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: d2800548 mov x8, #0x2a // #42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, r7, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: aa0703e8 mov x8, x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, r7}, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f94000e8 ldr x8, [x7]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9402008 ldr x8, [x0, #64]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r8), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401408 ldr x8, [x0, #40]\n" + " 4: f9400908 ldr x8, [x8, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, r2, 2, << + "0: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 3, << + " 0: d37ff847 lsl x7, x2, #1\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 4, << + "0: d37ef442 lsl x2, x2, #2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 5, << + " 0: d37ef447 lsl x7, x2, #2\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 6, << + " 0: d37ff847 lsl x7, x2, #1\n" + " 4: 8b0200e2 add x2, x7, x2\n" + " 8: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 7, << + " 0: d37df047 lsl x7, x2, #3\n" + " 4: cb0200e2 sub x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 8, << + "0: d37df042 lsl x2, x2, #3" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 9, << + " 0: d37df047 lsl x7, x2, #3\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 10, << + " 0: d37ef447 lsl x7, x2, #2\n" + " 4: 8b0200e2 add x2, x7, x2\n" + " 8: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 11, << + " 0: d2800167 mov x7, #0xb // #11\n" + " 4: 9b077c42 mul x2, x2, x7" + >>) + end) + ] + end}. + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + %% Parse 8 hex digits (AArch64 32-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 7ccb678892..c309cae9e2 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -1269,7 +1269,7 @@ move_to_vm_register_test_() -> " 0: 49 89 02 mov %rax,(%r10)" >>) end), - %% Test: Atom register to y_reg + %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, rax, {y_reg, 0}, << " 0:\t48 8b 47 28 mov 0x28(%rdi),%rax\n" diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index 6f3f387e33..a435ab17e0 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -27,6 +27,8 @@ start() -> etest:test([ jit_tests, + jit_aarch64_tests, + jit_aarch64_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). diff --git a/tests/test.c b/tests/test.c index 14dea8fb1b..b73545d987 100644 --- a/tests/test.c +++ b/tests/test.c @@ -32,6 +32,7 @@ #include "bif.h" #include "context.h" #include "iff.h" +#include "jit.h" #include "mapped_file.h" #include "module.h" #include "term.h" @@ -699,7 +700,12 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) if (!beam) { #if JIT_ARCH_TARGET == JIT_ARCH_X86_64 if (chdir("x86_64") != 0) { - perror("Error: "); + perror("Error: cannot find x86_64 directory"); + return EXIT_FAILURE; + } +#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 + if (chdir("aarch64") != 0) { + perror("Error: cannot find aarch64 directory"); return EXIT_FAILURE; } #else