diff --git a/.github/workflows/build-and-test-macos.yaml b/.github/workflows/build-and-test-macos.yaml
index 57e9ac424c..ccac90a4dd 100644
--- a/.github/workflows/build-and-test-macos.yaml
+++ b/.github/workflows/build-and-test-macos.yaml
@@ -47,6 +47,14 @@ jobs:
           otp: "28"
           cmake_opts_other: "-DAVM_DISABLE_JIT=OFF"
 
+        - os: "macos-14"
+          otp: "28"
+          cmake_opts_other: "-DAVM_DISABLE_JIT=OFF"
+
+        - os: "macos-15"
+          otp: "28"
+          cmake_opts_other: "-DAVM_DISABLE_JIT=OFF"
+
     steps:
     # Setup
     - name: "Checkout repo"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 17eb2a02be..0c21bc78b0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -56,6 +56,10 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
     if (NOT AVM_DISABLE_JIT)
         set(AVM_JIT_TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
     endif()
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64|aarch64$")
+    if (NOT AVM_DISABLE_JIT)
+        set(AVM_JIT_TARGET_ARCH "aarch64")
+    endif()
 else()
     if (NOT AVM_DISABLE_JIT)
         message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}")
diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl
index 8fe0d6bde1..427fa40aec 100644
--- a/libs/jit/include/jit.hrl
+++ b/libs/jit/include/jit.hrl
@@ -21,6 +21,7 @@
 -define(JIT_FORMAT_VERSION, 1).
 
 -define(JIT_ARCH_X86_64, 1).
+-define(JIT_ARCH_AARCH64, 2).
 
 -define(JIT_VARIANT_PIC, 1).
 
diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt
index bc97ea6903..a5810feff9 100644
--- a/libs/jit/src/CMakeLists.txt
+++ b/libs/jit/src/CMakeLists.txt
@@ -27,6 +27,8 @@ set(ERLANG_MODULES
     jit_precompile
     jit_stream_binary
     jit_stream_mmap
+    jit_aarch64
+    jit_aarch64_asm
     jit_x86_64
     jit_x86_64_asm
 )
diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl
new file mode 100644
index 0000000000..d0ffdd892d
--- /dev/null
+++ b/libs/jit/src/jit_aarch64.erl
@@ -0,0 +1,2227 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_aarch64).
+
+-export([
+    word_size/0,
+    new/3,
+    stream/1,
+    offset/1,
+    debugger/1,
+    used_regs/1,
+    available_regs/1,
+    free_native_registers/2,
+    assert_all_native_free/1,
+    jump_table/2,
+    update_branches/1,
+    call_primitive/3,
+    call_primitive_last/3,
+    call_primitive_with_cp/3,
+    return_if_not_equal_to_ctx/2,
+    jump_to_label/2,
+    if_block/3,
+    if_else_block/4,
+    shift_right/3,
+    shift_left/3,
+    move_to_vm_register/3,
+    move_to_native_register/2,
+    move_to_native_register/3,
+    move_to_cp/2,
+    move_array_element/4,
+    move_to_array_element/4,
+    move_to_array_element/5,
+    set_bs/2,
+    copy_to_native_register/2,
+    get_array_element/3,
+    increment_sp/2,
+    set_continuation_to_label/2,
+    set_continuation_to_offset/1,
+    continuation_entry_point/1,
+    get_module_index/1,
+    and_/3,
+    or_/3,
+    add/3,
+    sub/3,
+    mul/3,
+    decrement_reductions_and_maybe_schedule_next/1,
+    call_or_schedule_next/2,
+    call_only_or_schedule_next/2,
+    call_func_ptr/3,
+    return_labels_and_lines/2,
+    add_label/2,
+    add_label/3
+]).
+
+-include_lib("jit.hrl").
+
+-include("primitives.hrl").
+
+%-define(ASSERT(Expr), true = Expr).
+-define(ASSERT(_Expr), ok).
+
+%% AArch64 ABI: r0-r7 are used for argument passing and return value.
+%% r8 is the indirect result location register (platform-specific),
+%% r9-r15 are caller-saved scratch registers (used by JIT),
+%% r16-r17 are intra-procedure-call scratch registers,
+%% r18 is platform register (reserved),
+%% r19-r28 are callee-saved,
+%% r29 is frame pointer, r30 is link register, r31 is stack pointer/zero.
+%% d0-d7 are used for FP argument passing and return value.
+%% d8-d15 are callee-saved FP registers.
+%%
+%% https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
+%%
+%% Registers used by the JIT backend:
+%%   - Scratch GPRs: r9-r15
+%%   - Argument/return: r0-r7, d0-d7
+%%   - Stack pointer: r31 (sp)
+%%   - Frame pointer: r29
+%%   - Link register: r30
+%%   - Indirect result: r8
+%%
+%% Note: r18 is reserved for platform use and must not be used.
+
+-type aarch64_register() ::
+    r0
+    | r1
+    | r2
+    | r3
+    | r4
+    | r5
+    | r6
+    | r7
+    | r8
+    | r9
+    | r10
+    | r11
+    | r12
+    | r13
+    | r14
+    | r15.
+
+-define(IS_GPR(Reg),
+    (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse
+        Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7 orelse Reg =:= r8 orelse Reg =:= r9 orelse
+        Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse
+        Reg =:= r15)
+).
+
+-type stream() :: any().
+
+-record(state, {
+    stream_module :: module(),
+    stream :: stream(),
+    offset :: non_neg_integer(),
+    branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
+    available_regs :: [aarch64_register()],
+    used_regs :: [aarch64_register()],
+    labels :: [{integer() | reference(), integer()}]
+}).
+
+-type state() :: #state{}.
+-type immediate() :: non_neg_integer().
+-type vm_register() ::
+    {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, aarch64_register()}.
+-type value() :: immediate() | vm_register() | aarch64_register() | {ptr, aarch64_register()}.
+-type arg() :: ctx | jit_state | offset | value() | {free, value()}.
+
+-type maybe_free_aarch64_register() ::
+    {free, aarch64_register()} | aarch64_register().
+
+-type condition() ::
+    {aarch64_register(), '<', integer()}
+    | {maybe_free_aarch64_register(), '<', aarch64_register()}
+    | {maybe_free_aarch64_register(), '==', integer()}
+    | {maybe_free_aarch64_register(), '!=', aarch64_register() | integer()}
+    | {'(int)', maybe_free_aarch64_register(), '==', integer()}
+    | {'(int)', maybe_free_aarch64_register(), '!=', aarch64_register() | integer()}
+    | {'(bool)', maybe_free_aarch64_register(), '==', false}
+    | {'(bool)', maybe_free_aarch64_register(), '!=', false}
+    | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()}.
+
+% ctx->e is 0x28
+% ctx->x is 0x30
+-define(WORD_SIZE, 8).
+-define(CTX_REG, r0).
+-define(JITSTATE_REG, r1).
+-define(NATIVE_INTERFACE_REG, r2).
+-define(Y_REGS, {?CTX_REG, 16#28}).
+-define(X_REG(N), {?CTX_REG, 16#30 + (N * ?WORD_SIZE)}).
+-define(CP, {?CTX_REG, 16#B8}).
+-define(FP_REGS, {?CTX_REG, 16#C0}).
+-define(BS, {?CTX_REG, 16#C8}).
+-define(BS_OFFSET, {?CTX_REG, 16#D0}).
+-define(JITSTATE_MODULE, {?JITSTATE_REG, 0}).
+-define(JITSTATE_CONTINUATION, {?JITSTATE_REG, 16#8}).
+-define(JITSTATE_REDUCTIONCOUNT, {?JITSTATE_REG, 16#10}).
+-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE}).
+-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}).
+
+% aarch64 ABI specific
+-define(LR_REG, r30).
+-define(IP0_REG, r16).
+
+-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127).
+-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000).
+-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255).
+-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000).
+
+-define(AVAILABLE_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6]).
+-define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]).
+-define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]).
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e.
+%% sizeof(uintptr_t)
+%%
+%% C code equivalent is:
+%% #if UINTPTR_MAX == UINT32_MAX
+%%    #define TERM_BYTES 4
+%% #elif UINTPTR_MAX == UINT64_MAX
+%%    #define TERM_BYTES 8
+%% #else
+%%    #error "Term size must be either 32 bit or 64 bit."
+%% #endif
+%%
+%% @end
+%% @return Word size in bytes
+%%-----------------------------------------------------------------------------
+-spec word_size() -> 4 | 8.
+word_size() -> ?WORD_SIZE.
+
+%%-----------------------------------------------------------------------------
+%% @doc Create a new backend state for provided variant, module and stream.
+%% @end
+%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC)
+%% @param StreamModule module to stream instructions
+%% @param Stream stream state
+%% @return New backend state
+%%-----------------------------------------------------------------------------
+-spec new(any(), module(), stream()) -> state().
+new(_Variant, StreamModule, Stream) ->
+    #state{
+        stream_module = StreamModule,
+        stream = Stream,
+        branches = [],
+        offset = StreamModule:offset(Stream),
+        available_regs = ?AVAILABLE_REGS,
+        used_regs = [],
+        labels = []
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Access the stream object.
+%% @end
+%% @param State current backend state
+%% @return The stream object
+%%-----------------------------------------------------------------------------
+-spec stream(state()) -> stream().
+stream(#state{stream = Stream}) ->
+    Stream.
+
+%%-----------------------------------------------------------------------------
+%% @doc Get the current offset in the stream
+%% @end
+%% @param State current backend state
+%% @return The current offset
+%%-----------------------------------------------------------------------------
+-spec offset(state()) -> non_neg_integer().
+offset(#state{stream_module = StreamModule, stream = Stream}) ->
+    StreamModule:offset(Stream).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a debugger of breakpoint instruction. This is used for debugging
+%% and not in production.
+%% @end
+%% @param State current backend state
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec debugger(state()) -> state().
+debugger(#state{stream_module = StreamModule, stream = Stream0} = State) ->
+    Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:brk(0)),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently used native registers. This is used for
+%% debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of used registers
+%%-----------------------------------------------------------------------------
+-spec used_regs(state()) -> [aarch64_register()].
+used_regs(#state{used_regs = Used}) -> Used.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently available native scratch registers. This
+%% is used for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of available registers
+%%-----------------------------------------------------------------------------
+-spec available_regs(state()) -> [aarch64_register()].
+available_regs(#state{available_regs = Available}) -> Available.
+
+%%-----------------------------------------------------------------------------
+%% @doc Free native registers. The passed list of registers can contain
+%% registers, pointer to registers or other values that are ignored.
+%% @end
+%% @param State current backend state
+%% @param Regs list of registers or other values
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec free_native_registers(state(), [value()]) -> state().
+free_native_registers(State, []) ->
+    State;
+free_native_registers(State, [Reg | Rest]) ->
+    State1 = free_native_register(State, Reg),
+    free_native_registers(State1, Rest).
+
+-spec free_native_register(state(), value()) -> state().
+free_native_register(
+    #state{available_regs = Available0, used_regs = Used0} = State,
+    Reg
+) when
+    is_atom(Reg)
+->
+    {Available1, Used1} = free_reg(Available0, Used0, Reg),
+    State#state{available_regs = Available1, used_regs = Used1};
+free_native_register(State, {ptr, Reg}) ->
+    free_native_register(State, Reg);
+free_native_register(State, _Other) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Assert that all native scratch registers are available. This is used
+%% for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return ok
+%%-----------------------------------------------------------------------------
+-spec assert_all_native_free(state()) -> ok.
+assert_all_native_free(#state{
+    available_regs = ?AVAILABLE_REGS, used_regs = []
+}) ->
+    ok.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit the jump table at the beginning of the module. Branches will be
+%% updated afterwards with update_branches/2. Emit branches for labels from
+%% 0 (special entry for lines and labels information) to LabelsCount included
+%% (special entry for OP_INT_CALL_END).
+%% @end
+%% @param State current backend state
+%% @param LabelsCount number of labels in the module.
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec jump_table(state(), pos_integer()) -> state().
+jump_table(State, LabelsCount) ->
+    jump_table0(State, 0, LabelsCount).
+
+-spec jump_table0(state(), non_neg_integer(), pos_integer()) -> state().
+jump_table0(State, N, LabelsCount) when N > LabelsCount ->
+    State;
+jump_table0(
+    #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State,
+    N,
+    LabelsCount
+) ->
+    Offset = StreamModule:offset(Stream0),
+    BranchInstr = jit_aarch64_asm:b(0),
+    Reloc = {N, Offset, b},
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount).
+
+%%-----------------------------------------------------------------------------
+%% @doc Rewrite stream to update all branches for labels.
+%% @end
+%% @param State current backend state
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec update_branches(state()) -> state().
+update_branches(#state{branches = []} = State) ->
+    State;
+update_branches(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        branches = [{Label, Offset, Type} | BranchesT],
+        labels = Labels
+    } = State
+) ->
+    {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
+    Rel = LabelOffset - Offset,
+    NewInstr =
+        case Type of
+            {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel);
+            {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel);
+            b -> jit_aarch64_asm:b(Rel)
+        end,
+    Stream1 = StreamModule:replace(Stream0, Offset, NewInstr),
+    update_branches(State#state{stream = Stream1, branches = BranchesT}).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call (call with return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention. It also saves scratch registers we need to preserve.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), aarch64_register()}.
+call_primitive(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    Primitive,
+    Args
+) ->
+    PrepCall =
+        case Primitive of
+            0 ->
+                jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, 0});
+            N ->
+                jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE})
+        end,
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+    StateCall = State#state{stream = Stream1},
+    call_func_ptr(StateCall, {free, ?IP0_REG}, Args).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump (call without return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_primitive_last(state(), non_neg_integer(), [arg()]) -> state().
+call_primitive_last(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    Primitive,
+    Args
+) ->
+    % We need a register for the function pointer that should not be used as a parameter
+    % Since we're not returning, we can use all scratch registers except
+    % registers used for parameters
+    ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)),
+    ArgsRegs = args_regs(Args),
+    ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs,
+    [Temp | AvailableRegs1] = ScratchRegs,
+    UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1,
+    PrepCall =
+        case Primitive of
+            0 ->
+                jit_aarch64_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, 0});
+            N ->
+                jit_aarch64_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE})
+        end,
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+    State1 = set_args(
+        State0#state{
+            stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs
+        },
+        Args
+    ),
+    #state{stream = Stream2} = State1,
+    Call = jit_aarch64_asm:br(Temp),
+    Stream3 = StreamModule:append(Stream2, Call),
+    State1#state{stream = Stream3, available_regs = ?AVAILABLE_REGS, used_regs = []}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a return of a value if it's not equal to ctx.
+%% This logic is used to break out to the scheduler, typically after signal
+%% messages have been processed.
+%% @end
+%% @param State current backend state
+%% @param Reg register to compare to (should be {free, Reg} as it's always freed)
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec return_if_not_equal_to_ctx(state(), {free, aarch64_register()}) -> state().
+return_if_not_equal_to_ctx(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    {free, Reg}
+) ->
+    I1 = jit_aarch64_asm:cmp(Reg, ?CTX_REG),
+    I3 =
+        case Reg of
+            % Return value is already in r0
+            r0 -> <<>>;
+            % Move to r0 (return register)
+            _ -> jit_aarch64_asm:orr(r0, xzr, Reg)
+        end,
+    I4 = jit_aarch64_asm:ret(),
+    I2 = jit_aarch64_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, Reg),
+    State#state{
+        stream = Stream1,
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump to a label. The offset of the relocation is saved and will
+%% be updated with `update_branches/2`.
+%% @end
+%% @param State current backend state
+%% @param Label to jump to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec jump_to_label(state(), integer() | reference()) -> state().
+jump_to_label(
+    #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches, labels = Labels} =
+        State,
+    Label
+) ->
+    Offset = StreamModule:offset(Stream0),
+    case lists:keyfind(Label, 1, Labels) of
+        {Label, LabelOffset} ->
+            % Label is already known, emit direct branch without relocation
+            Rel = LabelOffset - Offset,
+            I1 = jit_aarch64_asm:b(Rel),
+            Stream1 = StreamModule:append(Stream0, I1),
+            State#state{stream = Stream1};
+        false ->
+            % Label not yet known, emit placeholder and add relocation
+            I1 = jit_aarch64_asm:b(0),
+            Reloc = {Label, Offset, b},
+            Stream1 = StreamModule:append(Stream0, I1),
+            State#state{stream = Stream1, branches = [Reloc | AccBranches]}
+    end.
+
+%% @private
+-spec rewrite_branch_instruction(
+    jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer()
+) -> binary().
+rewrite_branch_instruction({cbnz, Reg}, Offset) ->
+    jit_aarch64_asm:cbnz(Reg, Offset);
+rewrite_branch_instruction({cbnz_w, Reg}, Offset) ->
+    jit_aarch64_asm:cbnz_w(Reg, Offset);
+rewrite_branch_instruction({tbz, Reg, Bit}, Offset) ->
+    jit_aarch64_asm:tbz(Reg, Bit, Offset);
+rewrite_branch_instruction({tbnz, Reg, Bit}, Offset) ->
+    jit_aarch64_asm:tbnz(Reg, Bit, Offset);
+rewrite_branch_instruction(CC, Offset) when is_atom(CC) ->
+    jit_aarch64_asm:bcc(CC, Offset).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally
+%% execute a block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockFn function to emit the block that may be executed
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state().
+if_block(
+    #state{stream_module = StreamModule} = State0,
+    {'and', CondList},
+    BlockFn
+) ->
+    {Replacements, State1} = lists:foldl(
+        fun(Cond, {AccReplacements, AccState}) ->
+            Offset = StreamModule:offset(AccState#state.stream),
+            {NewAccState, CC, ReplaceDelta} = if_block_cond(AccState, Cond),
+            {[{Offset + ReplaceDelta, CC} | AccReplacements], NewAccState}
+        end,
+        {[], State0},
+        CondList
+    ),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    Stream3 = lists:foldl(
+        fun({ReplacementOffset, CC}, AccStream) ->
+            BranchOffset = OffsetAfter - ReplacementOffset,
+            NewBranchInstr = jit_aarch64_asm:bcc(CC, BranchOffset),
+            StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr)
+        end,
+        Stream2,
+        Replacements
+    ),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs);
+if_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    %% Patch the conditional branch instruction to jump to the end of the block
+    BranchOffset = OffsetAfter - (Offset + BranchInstrOffset),
+    NewBranchInstr = rewrite_branch_instruction(CC, BranchOffset),
+    Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if else block, i.e. emit a test of a condition and
+%% conditionnally execute a block or another block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockTrueFn function to emit the block that is executed if condition is true
+%% @param BlockFalseFn function to emit the block that is executed if condition is false
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) ->
+    state().
+if_else_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockTrueFn,
+    BlockFalseFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond),
+    State2 = BlockTrueFn(State1),
+    Stream2 = State2#state.stream,
+    %% Emit unconditional branch to skip the else block (will be replaced)
+    ElseJumpOffset = StreamModule:offset(Stream2),
+    ElseJumpInstr = jit_aarch64_asm:b(0),
+    Stream3 = StreamModule:append(Stream2, ElseJumpInstr),
+    %% Else block starts here.
+    OffsetAfter = StreamModule:offset(Stream3),
+    %% Patch the conditional branch to jump to the else block
+    ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset),
+    NewBranchInstr = rewrite_branch_instruction(CC, ElseBranchOffset),
+    Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr),
+    %% Build the else block
+    StateElse = State2#state{
+        stream = Stream4,
+        used_regs = State1#state.used_regs,
+        available_regs = State1#state.available_regs
+    },
+    State3 = BlockFalseFn(StateElse),
+    Stream5 = State3#state.stream,
+    OffsetFinal = StreamModule:offset(Stream5),
+    %% Patch the unconditional branch to jump to the end
+    FinalJumpOffset = OffsetFinal - ElseJumpOffset,
+    NewElseJumpInstr = jit_aarch64_asm:b(FinalJumpOffset),
+    Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr),
+    merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs).
+
+%% @private
+-spec if_block_cond(state(), condition()) ->
+    {
+        state(),
+        jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()},
+        non_neg_integer()
+    }.
+if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) ->
+    I = jit_aarch64_asm:tbz(Reg, 63, 0),
+    Stream1 = StreamModule:append(Stream0, I),
+    State1 = State0#state{stream = Stream1},
+    {State1, {tbz, Reg, 63}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {Reg, '<', Val}
+) when is_atom(Reg), is_integer(Val) ->
+    I1 = jit_aarch64_asm:cmp(Reg, Val),
+    % ge = greater than or equal
+    I2 = jit_aarch64_asm:bcc(ge, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = State0#state{stream = Stream1},
+    {State1, ge, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '<', RegB}
+) when is_atom(RegB) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_aarch64_asm:cmp(Reg, RegB),
+    % ge = greater than or equal
+    I2 = jit_aarch64_asm:bcc(ge, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, ge, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I = jit_aarch64_asm:cbnz(Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {cbnz, Reg}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I = jit_aarch64_asm:cbnz_w(Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {cbnz_w, Reg}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {'(int)', RegOrTuple, '==', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_aarch64_asm:cmp_w(Reg, Val),
+    I2 = jit_aarch64_asm:bcc(ne, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, ne, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '!=', Val}
+) when is_integer(Val) orelse ?IS_GPR(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_aarch64_asm:cmp(Reg, Val),
+    I2 = jit_aarch64_asm:bcc(eq, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, eq, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {'(int)', RegOrTuple, '!=', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_aarch64_asm:cmp_w(Reg, Val),
+    I2 = jit_aarch64_asm:bcc(eq, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, eq, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '==', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_aarch64_asm:cmp(Reg, Val),
+    I2 = jit_aarch64_asm:bcc(ne, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, ne, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {'(bool)', RegOrTuple, '==', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Test lowest bit
+    I = jit_aarch64_asm:tbnz(Reg, 0, 0),
+    Stream1 = StreamModule:append(Stream0, I),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {tbnz, Reg, 0}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {'(bool)', RegOrTuple, '!=', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Test lowest bit
+    I = jit_aarch64_asm:tbz(Reg, 0, 0),
+    Stream1 = StreamModule:append(Stream0, I),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {tbz, Reg, 0}, 0};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {RegOrTuple, '&', Val, '!=', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Test bits
+    TestCode =
+        try
+            jit_aarch64_asm:tst(Reg, Val)
+        catch
+            error:{unencodable_immediate, Val} ->
+                TestCode0 = jit_aarch64_asm:mov(Temp, Val),
+                TestCode1 = jit_aarch64_asm:tst(Reg, Temp),
+                <<TestCode0/binary, TestCode1/binary>>
+        end,
+    I2 = jit_aarch64_asm:bcc(eq, 0),
+    Code = <<
+        TestCode/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, eq, byte_size(TestCode)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {Reg, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    % AND with mask
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = op_imm(State0, and_, Temp, Reg, Mask),
+    Stream1 = State1#state.stream,
+    % Compare with value
+    I2 = jit_aarch64_asm:cmp(Temp, Val),
+    Stream2 = StreamModule:append(Stream1, I2),
+    OffsetAfter = StreamModule:offset(Stream2),
+    I3 = jit_aarch64_asm:bcc(eq, 0),
+    Stream3 = StreamModule:append(Stream2, I3),
+    State2 = State1#state{stream = Stream3},
+    {State2, eq, OffsetAfter - OffsetBefore};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    {{free, Reg} = RegTuple, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    % AND with mask
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = and_(State0, Reg, Mask),
+    Stream1 = State1#state.stream,
+    % Compare with value
+    I2 = jit_aarch64_asm:cmp(Reg, Val),
+    Stream2 = StreamModule:append(Stream1, I2),
+    OffsetAfter = StreamModule:offset(Stream2),
+    I3 = jit_aarch64_asm:bcc(eq, 0),
+    Stream3 = StreamModule:append(Stream2, I3),
+    State3 = State1#state{stream = Stream3},
+    State4 = if_block_free_reg(RegTuple, State3),
+    {State4, eq, OffsetAfter - OffsetBefore}.
+
+%% @private
+-spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state().
+if_block_free_reg({free, Reg}, State0) ->
+    #state{available_regs = AvR0, used_regs = UR0} = State0,
+    {AvR1, UR1} = free_reg(AvR0, UR0, Reg),
+    State0#state{
+        available_regs = AvR1,
+        used_regs = UR1
+    };
+if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) ->
+    State0.
+
+%% @private
+-spec merge_used_regs(state(), [aarch64_register()]) -> state().
+merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [
+    Reg | T
+]) ->
+    case lists:member(Reg, UR0) of
+        true ->
+            merge_used_regs(State, T);
+        false ->
+            AvR1 = lists:delete(Reg, AvR0),
+            UR1 = [Reg | UR0],
+            merge_used_regs(
+                State#state{used_regs = UR1, available_regs = AvR1}, T
+            )
+    end;
+merge_used_regs(State, []) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register right by a fixed number of bits, effectively
+%% dividing it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+-spec shift_right(state(), aarch64_register(), non_neg_integer()) -> state().
+shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_aarch64_asm:lsr(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register left by a fixed number of bits, effectively
+%% multiplying it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+-spec shift_left(state(), aarch64_register(), non_neg_integer()) -> state().
+shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when
+    is_atom(Reg)
+->
+    I = jit_aarch64_asm:lsl(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call to a function pointer with arguments. This function converts
+%% arguments and passes them following the backend ABI convention.
+%% @end
+%% @param State current backend state
+%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex}
+%% @param Args arguments to pass to the function
+%% @return Updated backend state and return register
+%%-----------------------------------------------------------------------------
+-spec call_func_ptr(state(), {free, aarch64_register()} | {primitive, non_neg_integer()}, [arg()]) ->
+    {state(), aarch64_register()}.
+call_func_ptr(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State0,
+    FuncPtrTuple,
+    Args
+) ->
+    FreeRegs = lists:flatmap(
+        fun
+            ({free, ?IP0_REG}) -> [];
+            ({free, {ptr, Reg}}) -> [Reg];
+            ({free, Reg}) when is_atom(Reg) -> [Reg];
+            (_) -> []
+        end,
+        [FuncPtrTuple | Args]
+    ),
+    UsedRegs1 = UsedRegs0 -- FreeRegs,
+    SavedRegs = [?LR_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1],
+    {SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0),
+
+    % Set up arguments following AArch64 calling convention
+    State1 = set_args(State0#state{stream = Stream1}, Args),
+    #state{stream = Stream2} = State1,
+
+    {FuncPtrReg, Stream3} =
+        case FuncPtrTuple of
+            {free, Reg} ->
+                {Reg, Stream2};
+            {primitive, Primitive} ->
+                % We use r16 for the address.
+                PrepCall =
+                    case Primitive of
+                        0 ->
+                            jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, 0});
+                        N ->
+                            jit_aarch64_asm:ldr(?IP0_REG, {?NATIVE_INTERFACE_REG, N * ?WORD_SIZE})
+                    end,
+                {?IP0_REG, StreamModule:append(Stream2, PrepCall)}
+        end,
+
+    % Call the function pointer (using BLR for call with return)
+    Call = jit_aarch64_asm:blr(FuncPtrReg),
+    Stream4 = StreamModule:append(Stream3, Call),
+
+    % If r0 is in used regs, save it to another temporary register
+    FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS),
+    AvailableRegs1 = FreeGPRegs ++ AvailableRegs0,
+    {Stream5, ResultReg} =
+        case lists:member(r0, SavedRegs) of
+            true ->
+                [Temp | _] = AvailableRegs1,
+                {StreamModule:append(Stream4, jit_aarch64_asm:mov(Temp, r0)), Temp};
+            false ->
+                {Stream4, r0}
+        end,
+
+    Stream6 = pop_registers(SavedRegsOdd, lists:reverse(SavedRegs), StreamModule, Stream5),
+
+    AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1),
+    AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2),
+    UsedRegs2 = [ResultReg | UsedRegs1],
+    {
+        State1#state{
+            stream = Stream6,
+            available_regs = AvailableRegs3,
+            used_regs = UsedRegs2
+        },
+        ResultReg
+    }.
+
+%% @private
+-spec push_registers([aarch64_register()], module(), stream()) -> {boolean(), stream()}.
+push_registers([RegA, RegB | Tail], StreamModule, Stream0) ->
+    Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:stp(RegA, RegB, {sp, -16}, '!')),
+    push_registers(Tail, StreamModule, Stream1);
+push_registers([], _StreamModule, Stream0) ->
+    {false, Stream0};
+push_registers([RegA], StreamModule, Stream0) ->
+    Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:str(RegA, {sp, -16}, '!')),
+    {true, Stream1}.
+
+%% @private
+-spec pop_registers(boolean(), [aarch64_register()], module(), stream()) -> stream().
+pop_registers(true, [Reg | Tail], StreamModule, Stream0) ->
+    % Odd number of registers, pop the last one first
+    Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldr(Reg, {sp}, 16)),
+    pop_registers(false, Tail, StreamModule, Stream1);
+pop_registers(false, [], _StreamModule, Stream0) ->
+    Stream0;
+pop_registers(false, [RegB, RegA | Tail], StreamModule, Stream0) ->
+    Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:ldp(RegA, RegB, {sp}, 16)),
+    pop_registers(false, Tail, StreamModule, Stream1).
+
+%% @private
+-spec set_args(state(), [arg()]) -> state().
+set_args(
+    #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, Args
+) ->
+    ParamRegs = parameter_regs(Args),
+    ArgsRegs = args_regs(Args),
+    AvailableScratchGP =
+        ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs,
+    Offset = StreamModule:offset(Stream0),
+    Args1 = [
+        case Arg of
+            offset -> Offset;
+            _ -> Arg
+        end
+     || Arg <- Args
+    ],
+    SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, []),
+    Stream1 = StreamModule:append(Stream0, SetArgsCode),
+    NewUsedRegs = lists:foldl(
+        fun
+            ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed);
+            ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed);
+            (_, AccUsed) -> AccUsed
+        end,
+        UsedRegs,
+        Args
+    ),
+    State0#state{
+        stream = Stream1,
+        available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs,
+        used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs)
+    }.
+
+%% @private
+-spec parameter_regs([arg()]) -> [aarch64_register()].
+parameter_regs(Args) ->
+    parameter_regs0(Args, ?PARAMETER_REGS, []).
+
+%% @private
+-spec parameter_regs0([arg()], [aarch64_register()], [aarch64_register()]) -> [aarch64_register()].
+parameter_regs0([], _, Acc) ->
+    lists:reverse(Acc);
+parameter_regs0([Special | T], [GPReg | GPRegsT], Acc) when
+    Special =:= ctx orelse Special =:= jit_state orelse Special =:= offset
+->
+    parameter_regs0(T, GPRegsT, [GPReg | Acc]);
+parameter_regs0([{free, Free} | T], GPRegs, Acc) ->
+    parameter_regs0([Free | T], GPRegs, Acc);
+parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) ->
+    parameter_regs0(T, GPRegsT, [GPReg | Acc]);
+parameter_regs0([Reg | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) ->
+    parameter_regs0(T, GPRegsT, [GPReg | Acc]);
+parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], Acc) ->
+    parameter_regs0(T, GPRegsT, [GPReg | Acc]);
+parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], Acc) ->
+    parameter_regs0(T, GPRegsT, [GPReg | Acc]);
+parameter_regs0([Int | T], [GPReg | GPRegsT], Acc) when is_integer(Int) ->
+    parameter_regs0(T, GPRegsT, [GPReg | Acc]).
+
+%% @private
+-spec replace_reg([arg()], aarch64_register(), aarch64_register()) -> [arg()].
+replace_reg(Args, Reg1, Reg2) ->
+    replace_reg0(Args, Reg1, Reg2, []).
+
+%% @private
+-spec replace_reg0([arg()], aarch64_register(), aarch64_register(), [arg()]) -> [arg()].
+replace_reg0([Reg | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([Other | T], Reg, Replacement, Acc) ->
+    replace_reg0(T, Reg, Replacement, [Other | Acc]).
+
+%% @private
+-spec set_args0([arg()], [aarch64_register() | imm], [aarch64_register()], [aarch64_register()], [
+    binary()
+]) -> binary().
+set_args0([], [], [], _AvailGP, Acc) ->
+    list_to_binary(lists:reverse(Acc));
+set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc) ->
+    set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc);
+set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) ->
+    set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc);
+set_args0(
+    [jit_state | ArgsT],
+    [?JITSTATE_REG | ArgsRegs],
+    [?JITSTATE_REG | ParamRegs],
+    AvailGP,
+    Acc
+) ->
+    set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc);
+set_args0(
+    [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, Acc
+) ->
+    false = lists:member(ParamReg, ArgsRegs),
+    set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [
+        jit_aarch64_asm:mov(ParamReg, ?JITSTATE_REG) | Acc
+    ]);
+% ctx is special as we need it to access x_reg/y_reg/fp_reg
+set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) ->
+    false = lists:member(?CTX_REG, ArgsRegs),
+    J = set_args1(Arg, ?CTX_REG),
+    set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]);
+set_args0(
+    [Arg | ArgsT],
+    [_ArgReg | ArgsRegs],
+    [ParamReg | ParamRegs],
+    [Avail | AvailGPT] = AvailGP,
+    Acc
+) ->
+    J = set_args1(Arg, ParamReg),
+    case lists:member(ParamReg, ArgsRegs) of
+        false ->
+            set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]);
+        true ->
+            I = jit_aarch64_asm:mov(Avail, ParamReg),
+            NewArgsT = replace_reg(ArgsT, ParamReg, Avail),
+            set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, [J, I | Acc])
+    end.
+
+%% @private
+-spec set_args1(arg(), aarch64_register()) -> binary() | [binary()].
+set_args1(Reg, Reg) ->
+    [];
+set_args1({x_reg, extra}, Reg) ->
+    jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG));
+set_args1({x_reg, X}, Reg) ->
+    jit_aarch64_asm:ldr(Reg, ?X_REG(X));
+set_args1({ptr, Source}, Reg) ->
+    jit_aarch64_asm:ldr(Reg, {Source, 0});
+set_args1({y_reg, X}, Reg) ->
+    [
+        jit_aarch64_asm:ldr(Reg, ?Y_REGS),
+        jit_aarch64_asm:ldr(Reg, {Reg, X * ?WORD_SIZE})
+    ];
+set_args1(ArgReg, Reg) when ?IS_GPR(ArgReg) ->
+    jit_aarch64_asm:mov(Reg, ArgReg);
+set_args1(Arg, Reg) when is_integer(Arg) ->
+    jit_aarch64_asm:mov(Reg, Arg).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg)
+%% from an immediate, a native register or another vm register.
+%% @end
+%% @param State current backend state
+%% @param Src value to move to vm register
+%% @param Dest vm register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_vm_register
+    (state(), Src :: value() | vm_register(), Dest :: vm_register()) -> state();
+    (state(), Src :: {free, {ptr, aarch64_register(), 1}}, Dest :: {fp_reg, non_neg_integer()}) ->
+        state().
+% Native register to VM register
+move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) ->
+    I1 = jit_aarch64_asm:str(Src, ?X_REG(?MAX_REG)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) ->
+    I1 = jit_aarch64_asm:str(Src, ?X_REG(X)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) ->
+    I1 = jit_aarch64_asm:str(Src, {Reg, 0}),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(#state{available_regs = [Temp | _]} = State0, Src, {y_reg, Y}) when
+    is_atom(Src)
+->
+    I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS),
+    I2 = jit_aarch64_asm:str(Src, {Temp, Y * ?WORD_SIZE}),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, <<I1/binary, I2/binary>>),
+    State0#state{stream = Stream1};
+% Source is an integer
+move_to_vm_register(State, 0, Dest) ->
+    move_to_vm_register(State, xzr, Dest);
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when
+    is_integer(N)
+->
+    I1 = jit_aarch64_asm:mov(Temp, N),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+% Source is a VM register
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) ->
+    I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(?MAX_REG)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) ->
+    I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) ->
+    I1 = jit_aarch64_asm:ldr(Temp, {Reg, 0}),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) ->
+    I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS),
+    I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * ?WORD_SIZE}),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, <<I1/binary, I2/binary>>),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+% term_to_float
+move_to_vm_register(
+    #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State0,
+    {free, {ptr, Reg, 1}},
+    {fp_reg, F}
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, {Reg, ?WORD_SIZE}),
+    I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS),
+    I3 = jit_aarch64_asm:str(Reg, {Temp, F * ?WORD_SIZE}),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = free_native_register(State0, Reg),
+    State1#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move of an array element (reg[x]) to a vm or a native register.
+%% @end
+%% @param State current backend state
+%% @param Reg base register of the array
+%% @param Index index in the array, as an integer or a native register
+%% @param Dest vm or native register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_array_element(
+    state(),
+    aarch64_register(),
+    non_neg_integer() | aarch64_register(),
+    vm_register() | aarch64_register()
+) -> state().
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}),
+    I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {ptr, Dest}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}),
+    I2 = jit_aarch64_asm:str(Temp, {Dest, 0}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} =
+        State,
+    Reg,
+    Index,
+    {y_reg, Y}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_aarch64_asm:ldr(Temp1, ?Y_REGS),
+    I2 = jit_aarch64_asm:ldr(Temp2, {Reg, Index * ?WORD_SIZE}),
+    I3 = jit_aarch64_asm:str(Temp2, {Temp1, Y * ?WORD_SIZE}),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} =
+        State,
+    {free, Reg},
+    Index,
+    {y_reg, Y}
+) when is_integer(Index) ->
+    I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS),
+    I2 = jit_aarch64_asm:ldr(Reg, {Reg, Index * ?WORD_SIZE}),
+    I3 = jit_aarch64_asm:str(Reg, {Temp, Y * ?WORD_SIZE}),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest
+) when is_atom(Dest) andalso is_integer(Index) ->
+    I1 = jit_aarch64_asm:ldr(Dest, {Reg, Index * ?WORD_SIZE}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(IndexReg) ->
+    I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}),
+    I2 = jit_aarch64_asm:str(IndexReg, ?X_REG(X)),
+    {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {ptr, PtrReg}
+) when is_atom(IndexReg) ->
+    I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}),
+    I2 = jit_aarch64_asm:str(IndexReg, {PtrReg, 0}),
+    {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _] = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {y_reg, Y}
+) when ?IS_GPR(IndexReg) ->
+    I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS),
+    I2 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}),
+    I3 = jit_aarch64_asm:str(IndexReg, {Temp, Y * ?WORD_SIZE}),
+    {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg),
+    Stream1 = StreamModule:append(
+        Stream0, <<I1/binary, I2/binary, I3/binary>>
+    ),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move of an array element (reg[x]) to a new native register.
+%% @end
+%% @param State current backend state
+%% @param Reg base register of the array
+%% @param Index index in the array, as an integer or a native register
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec get_array_element(
+    state(), aarch64_register() | {free, aarch64_register()}, non_neg_integer()
+) ->
+    {state(), aarch64_register()}.
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    {free, Reg},
+    Index
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, {Reg, Index * ?WORD_SIZE}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {State#state{stream = Stream1}, Reg};
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ElemReg | AvailableT],
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    Index
+) ->
+    I1 = jit_aarch64_asm:ldr(ElemReg, {Reg, Index * ?WORD_SIZE}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {
+        State#state{
+            stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0]
+        },
+        ElemReg
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move of a value (integer, vm register or native register) to an
+%% array element (reg[x])
+%% @end
+%% @param State current backend state
+%% @param Value value to move
+%% @param Reg base register of the array
+%% @param Index index in the array, as an integer or a native register
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_array_element(
+    state(), integer() | vm_register() | aarch64_register(), aarch64_register(), non_neg_integer()
+) -> state().
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    ValueReg,
+    Reg,
+    Index
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) ->
+    I1 = jit_aarch64_asm:str(ValueReg, {Reg, Index * ?WORD_SIZE}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    ValueReg,
+    Reg,
+    IndexReg
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) ->
+    I1 = jit_aarch64_asm:str(ValueReg, {Reg, IndexReg, lsl, 3}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    Reg,
+    Index
+) ->
+    {State1, Temp} = copy_to_native_register(State0, Value),
+    State2 = move_to_array_element(State1, Temp, Reg, Index),
+    free_native_register(State2, Temp).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move of a value (integer, vm register or native register) to an
+%% array element (reg[x+offset])
+%% @end
+%% @param State current backend state
+%% @param Value value to move
+%% @param Reg base register of the array
+%% @param Index index in the array, as an integer or a native register
+%% @param Offset additional offset
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_array_element(
+    state(), value(), aarch64_register(), aarch64_register() | non_neg_integer(), integer()
+) -> state().
+move_to_array_element(
+    State,
+    Value,
+    BaseReg,
+    IndexVal,
+    Offset
+) when is_integer(IndexVal) andalso is_integer(Offset) ->
+    move_to_array_element(State, Value, BaseReg, IndexVal + Offset);
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    ValueReg,
+    BaseReg,
+    IndexReg,
+    Offset
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) ->
+    I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset),
+    I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    BaseReg,
+    IndexReg,
+    Offset
+) ->
+    {State1, ValueReg} = copy_to_native_register(State0, Value),
+    [Temp | _] = State1#state.available_regs,
+    I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset),
+    I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}),
+    Stream1 = (State1#state.stream_module):append(State1#state.stream, <<I1/binary, I2/binary>>),
+    State2 = State1#state{stream = Stream1},
+    free_native_register(State2, ValueReg).
+
+%%-----------------------------------------------------------------------------
+%% @doc Move a value (integer, vm register, pointer or native register) to a
+%% native register. This allocates a new native register from the available
+%% pool if needed.
+%% @end
+%% @param State current backend state
+%% @param Value value to move (can be an immediate, vm register, pointer, or native register)
+%% @return Tuple of {Updated backend state, Native register containing the value}
+%%-----------------------------------------------------------------------------
+-spec move_to_native_register(state(), value()) -> {state(), aarch64_register()}.
+move_to_native_register(State, Reg) when is_atom(Reg) ->
+    {State, Reg};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_aarch64_asm:ldr(Reg, {Reg, 0}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    Imm
+) when
+    is_integer(Imm)
+->
+    I1 = jit_aarch64_asm:mov(Reg, Imm),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, extra}
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, X}
+) when
+    X < ?MAX_REG
+->
+    I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(X)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {y_reg, Y}
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS),
+    I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * ?WORD_SIZE}),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Move a value (integer, vm register, pointer or native register) to a
+%% specific native register.
+%% @end
+%% @param State current backend state
+%% @param Value value to move (can be an immediate, vm register, pointer, or native register)
+%% @param TargetReg the specific native register to move the value to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_native_register(state(), value(), aarch64_register()) -> state().
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst
+) when is_atom(RegSrc) orelse is_integer(RegSrc) ->
+    I = jit_aarch64_asm:mov(RegDst, RegSrc),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst
+) when ?IS_GPR(Reg) ->
+    I1 = jit_aarch64_asm:ldr(RegDst, {Reg, 0}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst
+) ->
+    I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(?MAX_REG)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst
+) when
+    X < ?MAX_REG
+->
+    I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(X)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst
+) ->
+    I1 = jit_aarch64_asm:ldr(RegDst, ?Y_REGS),
+    I2 = jit_aarch64_asm:ldr(RegDst, {RegDst, Y * ?WORD_SIZE}),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Copy a value to a native register, allocating a new register from the
+%% available pool. Unlike move_to_native_register, this always allocates a new
+%% register and copies the value (preserving the source if it's a register).
+%% @end
+%% @param State current backend state
+%% @param Value value to copy (can be an immediate, vm register, pointer, or native register)
+%% @return Tuple of {Updated backend state, Native register containing the copied value}
+%%-----------------------------------------------------------------------------
+-spec copy_to_native_register(state(), value()) -> {state(), aarch64_register()}.
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    Reg
+) when is_atom(Reg) ->
+    I1 = jit_aarch64_asm:mov(SaveReg, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_aarch64_asm:ldr(SaveReg, {Reg, 0}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(State, Reg) ->
+    move_to_native_register(State, Reg).
+
+%%-----------------------------------------------------------------------------
+%% @doc Move a VM register value to the continuation pointer (CP).
+%% @end
+%% @param State current backend state
+%% @param VMReg VM register to move to CP
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_cp(state(), vm_register()) -> state().
+move_to_cp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State,
+    {y_reg, Y}
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS),
+    I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * ?WORD_SIZE}),
+    I3 = jit_aarch64_asm:str(Reg, ?CP),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Increment the stack pointer (SP) by a given offset.
+%% @end
+%% @param State current backend state
+%% @param Offset offset to add to SP (in words, will be multiplied by 8)
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec increment_sp(state(), integer()) -> state().
+increment_sp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State,
+    Offset
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS),
+    I2 = jit_aarch64_asm:add(Reg, Reg, Offset * ?WORD_SIZE),
+    I3 = jit_aarch64_asm:str(Reg, ?Y_REGS),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Set the continuation address to point to a specific label. The actual
+%% address will be resolved during branch update.
+%% @end
+%% @param State current backend state
+%% @param Label label to set as continuation target
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec set_continuation_to_label(state(), integer() | reference()) -> state().
+set_continuation_to_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        branches = Branches
+    } = State,
+    Label
+) ->
+    Offset = StreamModule:offset(Stream0),
+    I1 = jit_aarch64_asm:adr(Temp, 0),
+    Reloc = {Label, Offset, {adr, Temp}},
+    I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1, branches = [Reloc | Branches]}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Set the continuation address to the current offset, creating a
+%% reference for later resolution. Returns a reference that can be used
+%% to add the label at the target location.
+%% @end
+%% @param State current backend state
+%% @return Tuple of {Updated backend state, Reference for the continuation offset}
+%%-----------------------------------------------------------------------------
+-spec set_continuation_to_offset(state()) -> {state(), reference()}.
+set_continuation_to_offset(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        branches = Branches
+    } = State
+) ->
+    OffsetRef = make_ref(),
+    Offset = StreamModule:offset(Stream0),
+    I1 = jit_aarch64_asm:adr(Temp, 0),
+    Reloc = {OffsetRef, Offset, {adr, Temp}},
+    I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Implement a continuation entry point. On AArch64 this is a nop
+%% as we don't need to save any register.
+%% @end
+%% @param State current backend state
+%% @return Updated backend state (unchanged on AArch64)
+%%-----------------------------------------------------------------------------
+-spec continuation_entry_point(#state{}) -> #state{}.
+continuation_entry_point(State) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Get the module index from the JIT state and load it into a native
+%% register.
+%% @end
+%% @param State current backend state
+%% @return Tuple of {Updated backend state, Native register containing module index}
+%%-----------------------------------------------------------------------------
+-spec get_module_index(state()) -> {state(), aarch64_register()}.
+get_module_index(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailableT],
+        used_regs = UsedRegs0
+    } = State
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, ?JITSTATE_MODULE),
+    I2 = jit_aarch64_asm:ldr_w(Reg, ?MODULE_INDEX(Reg)),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {
+        State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]},
+        Reg
+    }.
+
+%% @private
+-spec op_imm(state(), atom(), aarch64_register(), aarch64_register(), integer()) -> state().
+op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, Reg, Reg, Val) ->
+    Stream1 =
+        try
+            I = jit_aarch64_asm:Op(Reg, Reg, Val),
+            StreamModule:append(Stream0, I)
+        catch
+            error:{unencodable_immediate, Val} ->
+                [Temp | _] = State#state.available_regs,
+                I1 = jit_aarch64_asm:mov(Temp, Val),
+                I2 = jit_aarch64_asm:Op(Reg, Reg, Temp),
+                StreamModule:append(Stream0, <<I1/binary, I2/binary>>)
+        end,
+    State#state{stream = Stream1};
+op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, RegB, Val) ->
+    Stream1 =
+        try
+            I = jit_aarch64_asm:Op(RegA, RegB, Val),
+            StreamModule:append(Stream0, I)
+        catch
+            error:{unencodable_immediate, Val} ->
+                MoveI = jit_aarch64_asm:mov(RegA, Val),
+                AndI = jit_aarch64_asm:Op(RegA, RegB, RegA),
+                StreamModule:append(Stream0, <<MoveI/binary, AndI/binary>>)
+        end,
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Perform bitwise AND of a register with an immediate value.
+%% @end
+%% @param State current backend state
+%% @param Reg register to AND with value
+%% @param Val immediate value to AND
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec and_(state(), aarch64_register(), integer()) -> state().
+and_(State, Reg, Val) ->
+    op_imm(State, and_, Reg, Reg, Val).
+
+%%-----------------------------------------------------------------------------
+%% @doc Perform bitwise OR of a register with an immediate value.
+%% @end
+%% @param State current backend state
+%% @param Reg register to OR with value
+%% @param Val immediate value to OR
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec or_(state(), aarch64_register(), integer()) -> state().
+or_(State, Reg, Val) ->
+    op_imm(State, orr, Reg, Reg, Val).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add an immediate value to a register.
+%% @end
+%% @param State current backend state
+%% @param Reg register to add to
+%% @param Val immediate value to add
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add(state(), aarch64_register(), integer()) -> state().
+add(State, Reg, Val) ->
+    op_imm(State, add, Reg, Reg, Val).
+
+%%-----------------------------------------------------------------------------
+%% @doc Subtract an immediate value from a register.
+%% @end
+%% @param State current backend state
+%% @param Reg register to subtract from
+%% @param Val immediate value to subtract
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec sub(state(), aarch64_register(), integer()) -> state().
+sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
+    I1 = jit_aarch64_asm:sub(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Multiply a register by a constant value. Uses optimized instruction
+%% sequences for common multipliers (powers of 2, small values).
+%% @end
+%% @param State current backend state
+%% @param Reg register to multiply
+%% @param Val constant multiplier (non-negative integer)
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec mul(state(), aarch64_register(), non_neg_integer()) -> state().
+mul(State, _Reg, 1) ->
+    State;
+mul(State, Reg, 2) ->
+    shift_left(State, Reg, 1);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 3) ->
+    I1 = jit_aarch64_asm:lsl(Temp, Reg, 1),
+    I2 = jit_aarch64_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 4) ->
+    shift_left(State, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 5) ->
+    I1 = jit_aarch64_asm:lsl(Temp, Reg, 2),
+    I2 = jit_aarch64_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 6) ->
+    State1 = mul(State0, Reg, 3),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 7) ->
+    I1 = jit_aarch64_asm:lsl(Temp, Reg, 3),
+    I2 = jit_aarch64_asm:sub(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 8) ->
+    shift_left(State, Reg, 3);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 9) ->
+    I1 = jit_aarch64_asm:lsl(Temp, Reg, 3),
+    I2 = jit_aarch64_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 10) ->
+    State1 = mul(State0, Reg, 5),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 15) ->
+    I1 = jit_aarch64_asm:lsl(Temp, Reg, 4),
+    I2 = jit_aarch64_asm:sub(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 16) ->
+    shift_left(State, Reg, 4);
+mul(State, Reg, 32) ->
+    shift_left(State, Reg, 5);
+mul(State, Reg, 64) ->
+    shift_left(State, Reg, 6);
+mul(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Val
+) ->
+    % multiply by decomposing by power of 2
+    I1 = jit_aarch64_asm:mov(Temp, Val),
+    I2 = jit_aarch64_asm:mul(Reg, Reg, Temp),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Decrement the reduction count and schedule the next process if it
+%% reaches zero. If reductions remain, execution continues; otherwise, the
+%% continuation is set and the scheduler is invoked.
+%% @end
+%% @param State current backend state
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec decrement_reductions_and_maybe_schedule_next(state()) -> state().
+decrement_reductions_and_maybe_schedule_next(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0
+) ->
+    % Load reduction count
+    I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
+    % Decrement reduction count
+    I2 = jit_aarch64_asm:subs(Temp, Temp, 1),
+    % Store back the decremented value
+    I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    BNEOffset = StreamModule:offset(Stream1),
+    % Branch if reduction count is not zero
+    I4 = jit_aarch64_asm:bcc(ne, 0),
+    % Set continuation to the next instruction
+    ADROffset = BNEOffset + byte_size(I4),
+    I5 = jit_aarch64_asm:adr(Temp, 0),
+    I6 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION),
+    % Append the instructions to the stream
+    Stream2 = StreamModule:append(Stream1, <<I4/binary, I5/binary, I6/binary>>),
+    State1 = State0#state{stream = Stream2},
+    State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]),
+    % Rewrite the branch and adr instructions
+    #state{stream = Stream3} = State2,
+    NewOffset = StreamModule:offset(Stream3),
+    NewI4 = jit_aarch64_asm:bcc(ne, NewOffset - BNEOffset),
+    NewI5 = jit_aarch64_asm:adr(Temp, NewOffset - ADROffset),
+    Stream4 = StreamModule:replace(
+        Stream3, BNEOffset, <<NewI4/binary, NewI5/binary>>
+    ),
+    merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call to a label with automatic scheduling. Decrements reductions
+%% and calls the label if reductions remain, otherwise schedules the next
+%% process. Sets the continuation pointer before the call.
+%% @end
+%% @param State current backend state
+%% @param Label label to call
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_or_schedule_next(state(), non_neg_integer()) -> state().
+call_or_schedule_next(State0, Label) ->
+    {State1, RewriteOffset, RewriteSize} = set_cp(State0),
+    State2 = call_only_or_schedule_next(State1, Label),
+    rewrite_cp_offset(State2, RewriteOffset, RewriteSize).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a tail call to a label with automatic scheduling. Decrements
+%% reductions and jumps to the label if reductions remain, otherwise schedules
+%% the next process. Does not set a new continuation pointer (tail call).
+%% @end
+%% @param State current backend state
+%% @param Label label to jump to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_only_or_schedule_next(state(), non_neg_integer()) -> state().
+call_only_or_schedule_next(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        branches = Branches,
+        available_regs = [Temp | _]
+    } = State0,
+    Label
+) ->
+    % Load reduction count
+    I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
+    % Decrement reduction count
+    I2 = jit_aarch64_asm:subs(Temp, Temp, 1),
+    % Store back the decremented value
+    I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    BNEOffset = StreamModule:offset(Stream1),
+    % Branch to label if reduction count is not zero
+    I4 = jit_aarch64_asm:bcc(ne, 0),
+    Reloc1 = {Label, BNEOffset, {bcc, ne}},
+    Stream2 = StreamModule:append(Stream1, I4),
+    State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]},
+    State2 = set_continuation_to_label(State1, Label),
+    call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call to a primitive with continuation pointer setup. This is
+%% used for primitives that may not return directly (e.g., those that can
+%% trap or reschedule). Sets CP before calling the primitive.
+%% @end
+%% @param State current backend state
+%% @param Primitive index of the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_primitive_with_cp(state(), non_neg_integer(), [arg()]) -> state().
+call_primitive_with_cp(State0, Primitive, Args) ->
+    {State1, RewriteOffset, RewriteSize} = set_cp(State0),
+    State2 = call_primitive_last(State1, Primitive, Args),
+    rewrite_cp_offset(State2, RewriteOffset, RewriteSize).
+
+%% @private
+-spec set_cp(state()) -> {state(), non_neg_integer(), 4 | 8}.
+set_cp(State0) ->
+    % get module index (dynamically)
+    {#state{stream_module = StreamModule, stream = Stream0} = State1, Reg} = get_module_index(
+        State0
+    ),
+    Offset = StreamModule:offset(Stream0),
+    % build cp with module_index << 24
+    I1 = jit_aarch64_asm:lsl(Reg, Reg, 24),
+    if
+        Offset >= 16250 ->
+            I2 = jit_aarch64_asm:nop(),
+            I3 = jit_aarch64_asm:nop(),
+            RewriteSize = 8;
+        true ->
+            I2 = jit_aarch64_asm:nop(),
+            I3 = <<>>,
+            RewriteSize = 4
+    end,
+    MOVOffset = Offset + byte_size(I1),
+    I4 = jit_aarch64_asm:orr(Reg, Reg, ?IP0_REG),
+    I5 = jit_aarch64_asm:str(Reg, ?CP),
+    Code = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State2 = State1#state{stream = Stream1},
+    State3 = free_native_register(State2, Reg),
+    {State3, MOVOffset, RewriteSize}.
+
+%% @private
+-spec rewrite_cp_offset(state(), non_neg_integer(), 4 | 8) -> state().
+rewrite_cp_offset(
+    #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0,
+    RewriteOffset,
+    _RewriteSize
+) ->
+    NewOffset = StreamModule:offset(Stream0) - CodeOffset,
+    NewMoveInstr = jit_aarch64_asm:mov(?IP0_REG, NewOffset bsl 2),
+    ?ASSERT(byte_size(NewMoveInstr) =< _RewriteSize),
+    Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr),
+    State0#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Set the binary state (BS) register to point to a term and reset the
+%% BS offset to zero. Used for binary matching operations.
+%% @end
+%% @param State current backend state
+%% @param TermReg register containing the term to set as binary state
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec set_bs(state(), aarch64_register()) -> state().
+set_bs(#state{stream_module = StreamModule, stream = Stream0} = State0, TermReg) ->
+    I1 = jit_aarch64_asm:str(TermReg, ?BS),
+    I2 = jit_aarch64_asm:str(xzr, ?BS_OFFSET),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State0#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @param State current state
+%% @param SortedLines line information, sorted by offset
+%% @doc Build labels and line tables and encode a function that returns it.
+%% In this case, the function returns the effective address of what immediately
+%% follows.
+%% @end
+%% @return New state
+%%-----------------------------------------------------------------------------
+-spec return_labels_and_lines(state(), [{non_neg_integer(), non_neg_integer()}]) -> state().
+return_labels_and_lines(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        labels = Labels
+    } = State,
+    SortedLines
+) ->
+    SortedLabels = lists:keysort(2, [
+        {Label, LabelOffset}
+     || {Label, LabelOffset} <- Labels, is_integer(Label)
+    ]),
+
+    I1 = jit_aarch64_asm:adr(r0, 8),
+    I2 = jit_aarch64_asm:ret(),
+    LabelsTable = <<<<Label:16, Offset:32>> || {Label, Offset} <- SortedLabels>>,
+    LinesTable = <<<<Line:16, Offset:32>> || {Line, Offset} <- SortedLines>>,
+    Stream1 = StreamModule:append(
+        Stream0,
+        <<I1/binary, I2/binary, (length(SortedLabels)):16, LabelsTable/binary,
+            (length(SortedLines)):16, LinesTable/binary>>
+    ),
+    State#state{stream = Stream1}.
+
+%% @private
+-spec free_reg([aarch64_register()], [aarch64_register()], aarch64_register()) ->
+    {[aarch64_register()], [aarch64_register()]}.
+free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) ->
+    AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []),
+    true = lists:member(Reg, UsedRegs0),
+    UsedRegs1 = lists:delete(Reg, UsedRegs0),
+    {AvailableRegs1, UsedRegs1}.
+
+%% @private
+-spec free_reg0([aarch64_register()], [aarch64_register()], aarch64_register(), [aarch64_register()]) ->
+    [aarch64_register()].
+free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) ->
+    lists:reverse(Acc, [Reg | PrevRegs0]);
+free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) ->
+    free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]);
+free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) ->
+    free_reg0(SortedT, PrevRegs, Reg, Acc).
+
+%% @private
+-spec args_regs([arg()]) -> [aarch64_register() | imm].
+args_regs(Args) ->
+    lists:map(
+        fun
+            ({free, {ptr, Reg}}) -> Reg;
+            ({free, Reg}) when is_atom(Reg) -> Reg;
+            ({free, Imm}) when is_integer(Imm) -> imm;
+            (offset) -> imm;
+            (ctx) -> ?CTX_REG;
+            (jit_state) -> ?JITSTATE_REG;
+            (Reg) when is_atom(Reg) -> Reg;
+            (Imm) when is_integer(Imm) -> imm;
+            ({ptr, Reg}) -> Reg;
+            ({x_reg, _}) -> ?CTX_REG;
+            ({y_reg, _}) -> ?CTX_REG;
+            ({fp_reg, _}) -> ?CTX_REG;
+            ({free, {x_reg, _}}) -> ?CTX_REG;
+            ({free, {y_reg, _}}) -> ?CTX_REG;
+            ({free, {fp_reg, _}}) -> ?CTX_REG
+        end,
+        Args
+    ).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at the current offset
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference()) -> state().
+add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) ->
+    Offset = StreamModule:offset(Stream),
+    add_label(State, Label, Offset).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at a specific offset
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @param Offset the explicit offset for this label
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference(), integer()) -> state().
+add_label(#state{labels = Labels} = State, Label, Offset) ->
+    State#state{labels = [{Label, Offset} | Labels]}.
diff --git a/libs/jit/src/jit_aarch64_asm.erl b/libs/jit/src/jit_aarch64_asm.erl
new file mode 100644
index 0000000000..66e46b3255
--- /dev/null
+++ b/libs/jit/src/jit_aarch64_asm.erl
@@ -0,0 +1,982 @@
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+
+-module(jit_aarch64_asm).
+
+-export([
+    add/3,
+    add/4,
+    sub/3,
+    sub/4,
+    mul/3,
+    madd/4,
+    b/1,
+    bcc/2,
+    blr/1,
+    br/1,
+    brk/1,
+    cbnz/2,
+    cbnz_w/2,
+    tbz/3,
+    tbnz/3,
+    cmp/2,
+    cmp_w/2,
+    and_/3,
+    ldr/2,
+    ldr_w/2,
+    ldr/3,
+    lsl/3,
+    lsr/3,
+    mov/2,
+    movk/3,
+    movz/3,
+    orr/3,
+    ret/0,
+    nop/0,
+    str/2,
+    str_w/2,
+    str/3,
+    tst/2,
+    tst_w/2,
+    stp/4,
+    ldp/4,
+    subs/3,
+    adr/2
+]).
+
+-export_type([
+    cc/0
+]).
+
+-type aarch64_gpr_register() ::
+    r0
+    | r1
+    | r2
+    | r3
+    | r4
+    | r5
+    | r6
+    | r7
+    | r8
+    | r9
+    | r10
+    | r11
+    | r12
+    | r13
+    | r14
+    | r15
+    | r16
+    | r17
+    | r18
+    | r19
+    | r20
+    | r21
+    | r22
+    | r23
+    | r24
+    | r25
+    | r26
+    | r27
+    | r28
+    | r29
+    | r30
+    | sp
+    | xzr.
+
+-type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al | nv.
+
+%% Emit an ADD instruction (AArch64 encoding)
+%% ADD Rd, Rn, #imm - adds immediate value to register
+-spec add(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary().
+add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    %% AArch64 ADD (immediate) encoding: 1001000100iiiiiiiiiiiinnnnndddddd
+    %% 0x91000000 | Imm << 10 | Rn << 5 | Rd
+    <<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>;
+add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) ->
+    error({unencodable_immediate, Imm});
+add(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
+    add(Rd, Rn, Rm, {lsl, 0}).
+
+%% ADD (shifted register)
+%% ADD Rd, Rn, Rm, {lsl, #amount}
+-spec add(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), {lsl, 0..63}) ->
+    binary().
+add(Rd, Rn, Rm, {lsl, Amount}) when
+    is_atom(Rd), is_atom(Rn), is_atom(Rm), is_integer(Amount), Amount >= 0, Amount =< 63
+->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% AArch64 ADD (shifted register) encoding: 10001011000mmmmmiiiiiinnnnndddddd
+    %% 0x8B000000 | Rm << 16 | Amount << 10 | Rn << 5 | Rd
+    <<
+        (16#8B000000 bor (RmNum bsl 16) bor ((Amount band 16#3F) bsl 10) bor (RnNum bsl 5) bor
+            RdNum):32/little
+    >>.
+
+%% Emit an unconditional branch (B) to a 32-bit relative offset (AArch64 encoding)
+%% offset is in bytes, relative to the next instruction
+-spec b(integer()) -> binary().
+b(Offset) when is_integer(Offset) ->
+    %% AArch64 B encoding: 0b000101 | imm26 | 00000
+    %% imm26 is (Offset / 4) signed, fits in 26 bits
+    Offset26 = Offset div 4,
+    <<(16#14000000 bor (Offset26 band 16#3FFFFFF)):32/little>>.
+
+%% Emit a breakpoint (BRK) instruction with immediate (AArch64 encoding)
+%% imm is a 16-bit immediate value (usually 0 for debuggers)
+-spec brk(integer()) -> binary().
+brk(Imm) when is_integer(Imm), Imm >= 0, Imm =< 16#FFFF ->
+    %% AArch64 BRK encoding: 11010100 00100000 00000000 iiiiiiii iiiiiiii
+    %% 0xd4200000 | Imm << 5
+    <<(16#D4200000 bor ((Imm band 16#FFFF) bsl 5)):32/little>>.
+
+%% Emit a branch with link register (BLR) instruction (AArch64 encoding)
+%% Register is the register atom (r0-r15)
+-spec blr(aarch64_gpr_register()) -> binary().
+blr(Reg) when is_atom(Reg) ->
+    RegNum = reg_to_num(Reg),
+    %% AArch64 BLR encoding: 1101011000111111000000rrrrr00000
+    %% 0xd63f0000 | (Reg << 5)
+    <<(16#D63F0000 bor (RegNum bsl 5)):32/little>>.
+
+%% Emit a branch register (BR) instruction (AArch64 encoding)
+%% Register is the register atom (r0-r15)
+-spec br(aarch64_gpr_register()) -> binary().
+br(Reg) when is_atom(Reg) ->
+    RegNum = reg_to_num(Reg),
+    %% AArch64 BR encoding: 1101011000011111000000rrrrr00000
+    %% 0xd61f0000 | (Reg << 5)
+    <<(16#D61F0000 bor (RegNum bsl 5)):32/little>>.
+
+%% Emit a load register (LDR) instruction for 64-bit load from memory (AArch64 encoding)
+%% Dst is destination register atom, Src is {BaseReg, Offset} tuple
+-spec ldr(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary().
+ldr(Dst, {BaseReg, Offset}) when
+    is_atom(Dst),
+    is_atom(BaseReg),
+    is_integer(Offset),
+    Offset >= 0,
+    Offset =< 32760,
+    (Offset rem 8) =:= 0
+->
+    DstNum = reg_to_num(Dst),
+    BaseRegNum = reg_to_num(BaseReg),
+    %% AArch64 LDR (immediate) encoding for 64-bit: 11111001010iiiiiiiiiiibbbbbttttt
+    %% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst
+    <<
+        (16#F9400000 bor ((Offset div 8) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little
+    >>;
+ldr(Xt, {Xn, Xm}) when
+    is_atom(Xt),
+    is_atom(Xn),
+    is_atom(Xm)
+->
+    ldr(Xt, {Xn, Xm, lsl, 0});
+ldr(Xt, {Xn, Xm, lsl, Amount}) when
+    is_atom(Xt),
+    is_atom(Xn),
+    is_atom(Xm),
+    Amount =:= 0 orelse Amount =:= 3
+->
+    XtNum = reg_to_num(Xt),
+    XnNum = reg_to_num(Xn),
+    XmNum = reg_to_num(Xm),
+    S = Amount div 3,
+    <<
+        (16#F8606800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little
+    >>.
+
+%% Emit a load register (LDR) instruction for 32-bit load from memory (AArch64 encoding)
+%% Dst is destination register atom, Src is {BaseReg, Offset} tuple
+-spec ldr_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary().
+ldr_w(Dst, {BaseReg, Offset}) when
+    is_atom(Dst),
+    is_atom(BaseReg),
+    is_integer(Offset),
+    Offset >= 0,
+    Offset =< 16380,
+    (Offset rem 4) =:= 0
+->
+    DstNum = reg_to_num(Dst),
+    BaseRegNum = reg_to_num(BaseReg),
+    <<
+        (16#B9400000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little
+    >>.
+
+%% Emit a move immediate (MOV) instruction for various immediate sizes (AArch64 encoding)
+%% Dst is destination register atom, Imm is immediate value
+%% Returns a binary that may contain multiple instructions for complex immediates
+-spec mov(aarch64_gpr_register(), integer() | aarch64_gpr_register()) -> binary().
+mov(Dst, Imm) when is_atom(Dst), is_integer(Imm) ->
+    mov_immediate(Dst, Imm);
+mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) ->
+    orr(Rd, xzr, Rm).
+
+%% Helper function to encode immediate values using optimal instruction sequence
+-spec mov_immediate(aarch64_gpr_register(), integer()) -> binary().
+mov_immediate(Dst, Imm) when Imm >= 0, Imm =< 16#FFFF ->
+    %% Simple 16-bit positive immediate
+    movz(Dst, Imm, 0);
+mov_immediate(Dst, Imm) when Imm < 0, (-Imm - 1) =< 16#FFFF ->
+    %% Simple 16-bit negative immediate using MOVN
+    %% MOVN encodes ~immediate, so we can use it when ~Imm fits in 16 bits
+    DstNum = reg_to_num(Dst),
+    <<(16#92800000 bor (((-Imm - 1) band 16#FFFF) bsl 5) bor DstNum):32/little>>;
+mov_immediate(Dst, Imm) when Imm >= 0 ->
+    %% Complex positive immediate - build with MOVZ + MOVK sequence
+    build_positive_immediate(Dst, <<Imm:64>>);
+mov_immediate(Dst, Imm) when Imm < 0 ->
+    %% Complex negative immediate - try MOVN approach first
+    build_negative_immediate(Dst, <<Imm:64>>).
+
+%% Build positive immediate using MOVZ + MOVK sequence
+-spec build_positive_immediate(aarch64_gpr_register(), binary()) -> binary().
+build_positive_immediate(Dst, <<Imm4:16, Imm3:16, Imm2:16, Imm1:16>> = ImmB) ->
+    %% First try simple MOVZ/MOVK sequence for values with few non-zero chunks
+    Chunks = [
+        Imm1,
+        Imm2,
+        Imm3,
+        Imm4
+    ],
+    NonZeroChunks = length([C || C <- Chunks, C =/= 0]),
+
+    if
+        NonZeroChunks =< 2 ->
+            %% Use simple MOVZ/MOVK sequence for 1-2 chunks
+            build_immediate_sequence(Dst, Chunks);
+        true ->
+            %% For complex values, try bitmask immediate first
+            case encode_bitmask_immediate(ImmB) of
+                {ok, N, Immr, Imms} ->
+                    %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm)
+                    orr_immediate(Dst, N, Immr, Imms);
+                error ->
+                    %% Fallback to multi-instruction sequence
+                    build_immediate_sequence(Dst, Chunks)
+            end
+    end.
+
+%% Build negative immediate using MOVN or fallback to positive approach
+-spec build_negative_immediate(aarch64_gpr_register(), binary()) -> binary().
+build_negative_immediate(Dst, ImmB) ->
+    %% First try to encode as bitmask immediate with ORR
+    case encode_bitmask_immediate(ImmB) of
+        {ok, N, Immr, Imms} ->
+            %% Use ORR immediate (MOV Rd, #imm is ORR Rd, XZR, #imm)
+            orr_immediate(Dst, N, Immr, Imms);
+        error ->
+            %% Fallback to multi-instruction sequence
+            build_positive_immediate(Dst, ImmB)
+    end.
+
+%% Build instruction sequence from chunks
+-spec build_immediate_sequence(aarch64_gpr_register(), [integer()]) -> binary().
+build_immediate_sequence(Dst, [C0, C1, C2, C3]) ->
+    %% Find the first non-zero chunk to start with MOVZ
+    {Index, Value} = find_first_nonzero_chunk([C0, C1, C2, C3]),
+    First = movz(Dst, Value, Index * 16),
+    Rest = build_movk_sequence(Dst, [C0, C1, C2, C3], Index),
+    <<First/binary, Rest/binary>>.
+
+%% Find the first non-zero chunk
+-spec find_first_nonzero_chunk([integer()]) -> {integer(), integer()} | none.
+find_first_nonzero_chunk(Chunks) ->
+    find_first_nonzero_chunk(Chunks, 0).
+
+find_first_nonzero_chunk([0 | Rest], Index) -> find_first_nonzero_chunk(Rest, Index + 1);
+find_first_nonzero_chunk([Chunk | _], Index) -> {Index, Chunk}.
+
+%% Build MOVK sequence for remaining non-zero chunks
+-spec build_movk_sequence(aarch64_gpr_register(), [integer()], integer()) -> binary().
+build_movk_sequence(Dst, Chunks, SkipIndex) ->
+    build_movk_sequence(Dst, Chunks, SkipIndex, 0, <<>>).
+
+build_movk_sequence(_, [], _, _, Acc) ->
+    Acc;
+build_movk_sequence(Dst, [Chunk | Rest], SkipIndex, CurrentIndex, Acc) ->
+    NewAcc =
+        if
+            CurrentIndex =:= SkipIndex orelse Chunk =:= 0 ->
+                Acc;
+            true ->
+                MovkInstr = movk(Dst, Chunk, CurrentIndex * 16),
+                <<Acc/binary, MovkInstr/binary>>
+        end,
+    build_movk_sequence(Dst, Rest, SkipIndex, CurrentIndex + 1, NewAcc).
+
+%% Emit a MOVZ instruction (move with zero)
+-spec movz(aarch64_gpr_register(), integer(), integer()) -> binary().
+movz(Dst, Imm, Shift) when
+    is_atom(Dst),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 16#FFFF,
+    Shift rem 16 =:= 0,
+    Shift >= 0,
+    Shift =< 48
+->
+    DstNum = reg_to_num(Dst),
+    Hw = Shift div 16,
+    %% AArch64 MOVZ encoding: 1101001000hwiiiiiiiiiiiiiiiiibbbbb
+    <<(16#D2800000 bor (Hw bsl 21) bor ((Imm band 16#FFFF) bsl 5) bor DstNum):32/little>>.
+
+%% Emit a MOVK instruction (move with keep)
+-spec movk(aarch64_gpr_register(), integer(), integer()) -> binary().
+movk(Dst, Imm, Shift) when
+    is_atom(Dst),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 16#FFFF,
+    Shift rem 16 =:= 0,
+    Shift >= 0,
+    Shift =< 48
+->
+    DstNum = reg_to_num(Dst),
+    Hw = Shift div 16,
+    %% AArch64 MOVK encoding: 1111001000hwiiiiiiiiiiiiiiiiibbbbb
+    <<(16#F2800000 bor (Hw bsl 21) bor ((Imm band 16#FFFF) bsl 5) bor DstNum):32/little>>.
+
+%% Emit an ORR immediate instruction (used for MOV with bitmask immediates)
+-spec orr_immediate(aarch64_gpr_register(), integer(), integer(), integer()) -> binary().
+orr_immediate(Dst, N, Immr, Imms) when
+    is_atom(Dst),
+    N >= 0,
+    N =< 1,
+    Immr >= 0,
+    Immr =< 63,
+    Imms >= 0,
+    Imms =< 63
+->
+    DstNum = reg_to_num(Dst),
+    %% AArch64 ORR (immediate) encoding: sf 01 100100 N immr imms Rn Rd
+    %% For MOV Rd, #imm: ORR Rd, XZR, #imm (Rn = 31)
+
+    % 64-bit operation
+    Sf = 1,
+    <<
+        ((Sf bsl 31) bor (16#B2000000) bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor
+            (31 bsl 5) bor DstNum):32/little
+    >>.
+
+%% Encode a value as AArch64 bitmask immediate
+%% Returns {ok, N, Immr, Imms} if encodable, error otherwise
+-spec encode_bitmask_immediate(binary()) -> {ok, 0..1, integer(), integer()} | error.
+encode_bitmask_immediate(Value) when byte_size(Value) =:= 8 ->
+    %% Try different pattern sizes (64, 32, 16, 8, 4, 2)
+    PatternSizes = [64, 32, 16, 8, 4, 2],
+    try_pattern_sizes(Value, PatternSizes).
+
+%% Encode a value as AArch64 bitmask immediate for 32 bits values
+%% Returns {ok, Immr, Imms} if encodable, error otherwise
+-spec encode_bitmask_immediate_w(binary()) -> {ok, integer(), integer()} | error.
+encode_bitmask_immediate_w(Value) when byte_size(Value) =:= 4 ->
+    %% Try different pattern sizes (32, 16, 8, 4, 2)
+    PatternSizes = [32, 16, 8, 4, 2],
+    case try_pattern_sizes(Value, PatternSizes) of
+        {ok, 0, Immr, Imms} -> {ok, Immr, Imms};
+        error -> error
+    end.
+
+%% Try encoding with different pattern sizes
+-spec try_pattern_sizes(binary(), [integer()]) -> {ok, integer(), integer(), integer()} | error.
+try_pattern_sizes(_, []) ->
+    error;
+try_pattern_sizes(Value, [Size | Rest]) ->
+    case try_encode_pattern_size(Value, Size) of
+        {ok, N, Immr, Imms} -> {ok, N, Immr, Imms};
+        error -> try_pattern_sizes(Value, Rest)
+    end.
+
+%% Try to encode value with a specific pattern size
+-spec try_encode_pattern_size(binary(), integer()) ->
+    {ok, integer(), integer(), integer()} | error.
+try_encode_pattern_size(Value, Size) ->
+    <<Rest:(byte_size(Value) * 8 - Size), Pattern:Size>> = Value,
+    if
+        Value =:= <<Pattern:Size, Rest:(byte_size(Value) * 8 - Size)>> ->
+            try_encode_single_pattern(Pattern, Size);
+        true ->
+            error
+    end.
+
+%% Try to encode a single pattern as bitmask immediate
+-spec try_encode_single_pattern(integer(), integer()) ->
+    {ok, integer(), integer(), integer()} | error.
+try_encode_single_pattern(Pattern, Size) ->
+    %% Find runs of consecutive 1s and 0s
+    case find_single_run_of_ones(Pattern, Size) of
+        {ok, OnesCount, StartPos} ->
+            %% Calculate N, Immr, Imms
+            N =
+                case Size of
+                    64 -> 1;
+                    32 -> 0;
+                    16 -> 0;
+                    8 -> 0;
+                    4 -> 0;
+                    2 -> 0
+                end,
+
+            %% For N=0 patterns, we need to encode the size in imms
+            Imms =
+                case Size of
+                    64 -> OnesCount - 1;
+                    32 -> OnesCount - 1;
+                    16 -> 2#100000 bor (OnesCount - 1);
+                    8 -> 2#110000 bor (OnesCount - 1);
+                    4 -> 2#111000 bor (OnesCount - 1);
+                    2 -> 2#111100 bor (OnesCount - 1)
+                end,
+            %% immr is the rotation amount (negate of start position)
+            Immr = (-StartPos) band (Size - 1),
+
+            {ok, N, Immr, Imms};
+        error ->
+            error
+    end.
+
+%% Find a single run of consecutive 1s in the pattern
+-spec find_single_run_of_ones(integer(), integer()) -> {ok, integer(), integer()} | error.
+find_single_run_of_ones(Pattern, Size) ->
+    %% Convert to binary string for easier analysis
+    Bits = [(Pattern bsr I) band 1 || I <- lists:seq(0, Size - 1)],
+    find_ones_run(Bits, 0, 0, 0, none).
+
+find_ones_run([], _, OnesCount, StartPos, in_ones) when OnesCount > 0 ->
+    %% Reached end while in ones run
+    {ok, OnesCount, StartPos};
+find_ones_run([1 | Rest], Pos, 0, _, none) ->
+    %% Start of ones run
+    find_ones_run(Rest, Pos + 1, 1, Pos, in_ones);
+find_ones_run([1 | Rest], Pos, OnesCount, StartPos, in_ones) ->
+    %% Continue ones run
+    find_ones_run(Rest, Pos + 1, OnesCount + 1, StartPos, in_ones);
+find_ones_run([0 | Rest], _Pos, OnesCount, StartPos, in_ones) ->
+    %% End of ones run - make sure rest are zeros (single run only)
+    case lists:all(fun(X) -> X =:= 0 end, Rest) of
+        true -> {ok, OnesCount, StartPos};
+        %% Multiple runs not supported in simple encoding
+        false -> error
+    end;
+find_ones_run([0 | Rest], Pos, OnesCount, StartPos, none) ->
+    %% Still looking for start of ones run
+    find_ones_run(Rest, Pos + 1, OnesCount, StartPos, none).
+
+%% Emit an ORR instruction (AArch64 encoding)
+%% ORR Rd, Rn, Rm - performs bitwise OR of Rn and Rm, storing result in Rd
+%% Special cases: ORR Rd, XZR, Rm is equivalent to MOV Rd, Rm
+-spec orr(aarch64_gpr_register(), aarch64_gpr_register() | xzr, aarch64_gpr_register()) -> binary().
+orr(DstReg, xzr, SrcReg) when is_atom(DstReg), is_atom(SrcReg) ->
+    %% ORR Rd, XZR, Rm - equivalent to MOV Rd, Rm
+    SrcNum = reg_to_num(SrcReg),
+    DstNum = reg_to_num(DstReg),
+    %% AArch64 ORR (shifted register) encoding: Rd = Rm (with XZR as Rn)
+    %% 10101010000mmmmm000000nnnnndddddd (64-bit)
+    %% 0xaa000000 | Rm << 16 | Rn << 5 | Rd (where Rn = 31 for XZR)
+    <<(16#AA0003E0 bor (SrcNum bsl 16) bor DstNum):32/little>>;
+orr(DstReg, Rn, Rm) when is_atom(DstReg), is_atom(Rn), is_atom(Rm) ->
+    %% General ORR Rd, Rn, Rm
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    DstNum = reg_to_num(DstReg),
+    %% AArch64 ORR (shifted register) encoding:
+    %% 10101010000mmmmm000000nnnnndddddd (64-bit)
+    <<
+        (16#AA000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor DstNum):32/little
+    >>;
+orr(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    case encode_bitmask_immediate(<<Imm:64>>) of
+        {ok, N, Immr, Imms} ->
+            % OR immediate encoding: sf=1(64b) 01(op) 100100 N immr imms Rn Rd
+            Opcode = 16#B2000000,
+            Instr =
+                Opcode bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5) bor
+                    RdNum,
+            <<Instr:32/little>>;
+        error ->
+            error({unencodable_immediate, Imm})
+    end.
+
+%% Emit a store register (STR) instruction for 64-bit store to memory
+-spec str(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary().
+str(SrcReg, {BaseReg, Offset}) when
+    is_atom(SrcReg),
+    is_atom(BaseReg),
+    is_integer(Offset),
+    Offset >= 0,
+    Offset =< 32760,
+    (Offset rem 8) =:= 0
+->
+    SrcNum = reg_to_num(SrcReg),
+    BaseNum = reg_to_num(BaseReg),
+    %% AArch64 STR (immediate) encoding for 64-bit: 11111001000iiiiiiiiiiibbbbbttttt
+    %% 0xf9000000 | (Offset div 8) << 10 | BaseReg << 5 | SrcReg
+    <<
+        (16#F9000000 bor ((Offset div 8) bsl 10) bor (BaseNum bsl 5) bor SrcNum):32/little
+    >>;
+str(Xt, {Xn, Xm, lsl, Amount}) when
+    is_atom(Xt),
+    is_atom(Xn),
+    is_atom(Xm),
+    Amount =:= 0 orelse Amount =:= 3
+->
+    XtNum = reg_to_num(Xt),
+    XnNum = reg_to_num(Xn),
+    XmNum = reg_to_num(Xm),
+    S = Amount div 3,
+    <<
+        (16#F8206800 bor (XmNum bsl 16) bor (S bsl 12) bor (XnNum bsl 5) bor XtNum):32/little
+    >>.
+
+%% Emit a store register (STR) instruction for 64-bit store to memory, with store-update (writeback)
+-spec str
+    (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary();
+    (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary().
+str(Reg, {Base, Imm}, '!') when
+    is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0
+->
+    RegNum = reg_to_num(Reg),
+    BaseNum = reg_to_num(Base),
+    <<(16#F8000C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>;
+str(Reg, {Base}, Imm) when
+    is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0
+->
+    RegNum = reg_to_num(Reg),
+    BaseNum = reg_to_num(Base),
+    <<(16#F8000400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>.
+
+%% Emit a store register (STR) instruction for 32-bit store to memory
+-spec str_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary().
+str_w(Src, {BaseReg, Offset}) when
+    is_atom(Src),
+    is_atom(BaseReg),
+    is_integer(Offset),
+    Offset >= 0,
+    Offset =< 16380,
+    (Offset rem 4) =:= 0
+->
+    SrcNum = reg_to_num(Src),
+    BaseRegNum = reg_to_num(BaseReg),
+    <<
+        (16#B9000000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor SrcNum):32/little
+    >>.
+
+%% Emit a load register (LDR) instruction for 64-bit store to memory, with store-update (writeback)
+-spec ldr
+    (aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary();
+    (aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) -> binary().
+ldr(Reg, {Base, Imm}, '!') when
+    is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0
+->
+    RegNum = reg_to_num(Reg),
+    BaseNum = reg_to_num(Base),
+    <<(16#F8400C00 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>;
+ldr(Reg, {Base}, Imm) when
+    is_atom(Reg), is_atom(Base), is_integer(Imm), Imm >= -256, Imm < 256, (Imm rem 8) =:= 0
+->
+    RegNum = reg_to_num(Reg),
+    BaseNum = reg_to_num(Base),
+    <<(16#F8400400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>.
+
+%% Emit a store pair (STP) instruction for 64-bit registers
+%% stp(Rn, Rm, {Base}, Imm) -> binary()
+%% stp(Rn, Rm, {Base, Imm}, '!') -> binary() (store-update)
+-spec stp(
+    aarch64_gpr_register(),
+    aarch64_gpr_register(),
+    {aarch64_gpr_register()} | {aarch64_gpr_register(), integer()},
+    integer() | '!'
+) -> binary().
+stp(Rn, Rm, {Base}, Imm) when
+    is_atom(Rn),
+    is_atom(Rm),
+    is_atom(Base),
+    is_integer(Imm),
+    Imm >= -512,
+    Imm =< 504,
+    (Imm rem 8) =:= 0
+->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    BaseNum = reg_to_num(Base),
+    %% STP encoding: 1010100010|imm7|base|rm|rn
+    %% 0xa9bf0000 | ((Imm div 8) band 0x7f) << 15 | Base << 5 | Rm << 10 | Rn
+    <<
+        (16#A8800000 bor ((Imm div 8) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor RnNum):32/little
+    >>;
+stp(Rn, Rm, {Base, Imm}, '!') when
+    is_atom(Rn),
+    is_atom(Rm),
+    is_atom(Base),
+    is_integer(Imm),
+    Imm >= -512,
+    Imm =< 504,
+    (Imm rem 8) =:= 0
+->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    BaseNum = reg_to_num(Base),
+    <<
+        (16#A9800000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor
+            RnNum):32/little
+    >>.
+
+%% Emit a load pair (LDP) instruction for 64-bit registers
+%% ldp(Rn, Rm, {Base}, Imm) -> binary()
+-spec ldp(aarch64_gpr_register(), aarch64_gpr_register(), {aarch64_gpr_register()}, integer()) ->
+    binary().
+ldp(Rn, Rm, {Base}, Imm) when
+    is_atom(Rn),
+    is_atom(Rm),
+    is_atom(Base),
+    is_integer(Imm),
+    Imm >= -512,
+    Imm =< 504,
+    (Imm rem 8) =:= 0
+->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    BaseNum = reg_to_num(Base),
+    %% LDP encoding: 1010100011|imm7|base|rm|rn
+    <<
+        (16#A8C00000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor
+            RnNum):32/little
+    >>.
+
+%%-----------------------------------------------------------------------------
+%% Helper functions
+%%-----------------------------------------------------------------------------
+
+%% Convert register atoms to register numbers for assembly generation
+%% for r0 to r30
+reg_to_num(r0) -> 0;
+reg_to_num(r1) -> 1;
+reg_to_num(r2) -> 2;
+reg_to_num(r3) -> 3;
+reg_to_num(r4) -> 4;
+reg_to_num(r5) -> 5;
+reg_to_num(r6) -> 6;
+reg_to_num(r7) -> 7;
+reg_to_num(r8) -> 8;
+reg_to_num(r9) -> 9;
+reg_to_num(r10) -> 10;
+reg_to_num(r11) -> 11;
+reg_to_num(r12) -> 12;
+reg_to_num(r13) -> 13;
+reg_to_num(r14) -> 14;
+reg_to_num(r15) -> 15;
+reg_to_num(r16) -> 16;
+reg_to_num(r17) -> 17;
+reg_to_num(r18) -> 18;
+reg_to_num(r19) -> 19;
+reg_to_num(r20) -> 20;
+reg_to_num(r21) -> 21;
+reg_to_num(r22) -> 22;
+reg_to_num(r23) -> 23;
+reg_to_num(r24) -> 24;
+reg_to_num(r25) -> 25;
+reg_to_num(r26) -> 26;
+reg_to_num(r27) -> 27;
+reg_to_num(r28) -> 28;
+reg_to_num(r29) -> 29;
+reg_to_num(r30) -> 30;
+%% Stack pointer (SP) is r31
+reg_to_num(sp) -> 31;
+%% Zero register (XZR) is also r31
+reg_to_num(xzr) -> 31.
+
+%% Emit a conditional branch instruction
+-spec bcc(cc(), integer()) -> binary().
+bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) ->
+    CondNum =
+        case Cond of
+            % Equal (Z set)
+            eq -> 0;
+            % Not equal (Z clear)
+            ne -> 1;
+            % Carry set
+            cs -> 2;
+            % Carry clear
+            cc -> 3;
+            % Minus (N set)
+            mi -> 4;
+            % Plus (N clear)
+            pl -> 5;
+            % Overflow set
+            vs -> 6;
+            % Overflow clear
+            vc -> 7;
+            % Higher (unsigned)
+            hi -> 8;
+            % Lower or same (unsigned)
+            ls -> 9;
+            % Greater than or equal (signed)
+            ge -> 10;
+            % Less than (signed)
+            lt -> 11;
+            % Greater than (signed)
+            gt -> 12;
+            % Less than or equal (signed)
+            le -> 13;
+            % Always
+            al -> 14;
+            % Never
+            nv -> 15
+        end,
+    Offset19 = Offset div 4,
+    <<(16#54000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor CondNum):32/little>>.
+
+%% Emit a compare and branch on non-zero
+-spec cbnz(aarch64_gpr_register(), integer()) -> binary().
+cbnz(Rt, Offset) when is_integer(Offset) ->
+    RtNum = reg_to_num(Rt),
+    Offset19 = Offset div 4,
+    <<(16#B5000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor RtNum):32/little>>.
+
+-spec cbnz_w(aarch64_gpr_register(), integer()) -> binary().
+cbnz_w(Rt, Offset) when is_integer(Offset) ->
+    RtNum = reg_to_num(Rt),
+    Offset19 = Offset div 4,
+    <<(16#35000000 bor ((Offset19 band 16#7FFFF) bsl 5) bor RtNum):32/little>>.
+
+%% Emit a test bit and branch if zero
+-spec tbz(aarch64_gpr_register(), 0..63, integer()) -> binary().
+tbz(Rt, Bit, Offset) when Offset >= -32768 andalso Offset < 32768 ->
+    RtNum = reg_to_num(Rt),
+    Offset14 = Offset div 4,
+    <<
+        ((Bit band 32 bsl 26) bor 16#36000000 bor (Bit band 31 bsl 19) bor
+            ((Offset14 band 16#3FFF) bsl 5) bor RtNum):32/little
+    >>.
+
+%% Emit a test bit and branch if not zero
+-spec tbnz(aarch64_gpr_register(), 0..63, integer()) -> binary().
+tbnz(Rt, Bit, Offset) when Offset >= -32768 andalso Offset < 32768 ->
+    RtNum = reg_to_num(Rt),
+    Offset14 = Offset div 4,
+    <<
+        ((Bit band 32 bsl 26) bor 16#37000000 bor (Bit band 31 bsl 19) bor
+            ((Offset14 band 16#3FFF) bsl 5) bor RtNum):32/little
+    >>.
+
+%% Emit a compare instruction
+-spec cmp(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary().
+cmp(Rn, Rm) when is_atom(Rn), is_atom(Rm) ->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% AArch64 CMP (shifted register) encoding: CMP Rn, Rm
+    %% This is SUBS XZR, Rn, Rm: 11101011000mmmmm000000nnnnn11111
+    <<(16#EB00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>;
+cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 ->
+    RnNum = reg_to_num(Rn),
+    %% AArch64 CMP (immediate) encoding: CMP Rn, #imm
+    %% This is SUBS XZR, Rn, #imm: 1111000100iiiiiiiiiiiinnnnn11111
+    <<(16#F100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>;
+cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm) ->
+    %% For large immediates, load into a temporary register and compare
+    %% Use r16 as temporary register (caller-saved)
+    TempReg = r16,
+    LoadInstr = mov_immediate(TempReg, Imm),
+    CmpInstr = cmp(Rn, TempReg),
+    <<LoadInstr/binary, CmpInstr/binary>>.
+
+%% Emit a 32-bit compare instruction
+-spec cmp_w(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary().
+cmp_w(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 ->
+    RnNum = reg_to_num(Rn),
+    %% AArch64 CMP (32-bit immediate) encoding: CMP Wn, #imm
+    %% This is SUBS WZR, Wn, #imm: 0111000100iiiiiiiiiiiinnnnn11111
+    <<(16#7100001F bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>;
+cmp_w(Rn, Imm) when is_atom(Rn), is_integer(Imm), Imm < 0, Imm >= -4095 ->
+    RnNum = reg_to_num(Rn),
+    %% For negative immediates, use ADD form: CMP Wn, #(-imm) becomes ADDS WZR, Wn, #(-imm)
+    %% AArch64 ADDS (32-bit immediate) encoding: 0011000100iiiiiiiiiiiinnnnn11111
+    PosImm = -Imm,
+    <<(16#3100001F bor ((PosImm band 16#FFF) bsl 10) bor (RnNum bsl 5)):32/little>>.
+
+%% Emit an AND instruction (bitwise AND)
+-spec and_(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register() | integer()) ->
+    binary().
+and_(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% AArch64 AND (shifted register) encoding: AND Rd, Rn, Rm
+    %% 10001010000mmmmm000000nnnnndddddd (64-bit)
+    <<
+        (16#8A000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little
+    >>;
+and_(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    case encode_bitmask_immediate(<<Imm:64>>) of
+        {ok, N, Immr, Imms} ->
+            % AND immediate encoding: sf=1(64b) 00(op) 100100 N immr imms Rn Rd
+            Opcode = 16#92000000,
+            Instr =
+                Opcode bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5) bor
+                    RdNum,
+            <<Instr:32/little>>;
+        error ->
+            error({unencodable_immediate, Imm})
+    end.
+
+%% Emit a logical shift left instruction
+-spec lsl(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary().
+lsl(Rd, Rn, Shift) when is_atom(Rd), is_atom(Rn), is_integer(Shift), Shift >= 0, Shift =< 63 ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    %% AArch64 LSL (immediate) encoding: LSL Rd, Rn, #shift
+    %% This is UBFM Rd, Rn, #(-shift MOD 64), #(63-shift): 1101001101ssssssrrrrrrnnnnndddddd
+    NegShift = (-Shift) band 63,
+    Width = 63 - Shift,
+    <<
+        (16#D3400000 bor ((NegShift band 16#3F) bsl 16) bor ((Width band 16#3F) bsl 10) bor
+            (RnNum bsl 5) bor RdNum):32/little
+    >>.
+
+%% Emit a logical shift right instruction
+-spec lsr(aarch64_gpr_register(), aarch64_gpr_register(), integer()) -> binary().
+lsr(Rd, Rn, Shift) when is_atom(Rd), is_atom(Rn), is_integer(Shift), Shift >= 0, Shift =< 63 ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    %% AArch64 LSR (immediate) encoding: LSR Rd, Rn, #shift
+    %% This is UBFM Rd, Rn, #shift, #63: 1101001101ssssss111111nnnnndddddd
+    <<
+        (16#D340FC00 bor ((Shift band 16#3F) bsl 16) bor (RnNum bsl 5) bor RdNum):32/little
+    >>.
+
+%% Emit a return instruction
+-spec ret() -> binary().
+ret() ->
+    %% AArch64 RET encoding: RET (defaults to X30/LR)
+    %% 11010110010111110000001111000000
+    <<16#D65F03C0:32/little>>.
+
+%% Emit a NOP instruction
+-spec nop() -> binary().
+nop() ->
+    %% 11010101000000110010000000011111
+    <<16#d503201f:32/little>>.
+
+%% Emit a test instruction (bitwise AND, discarding result)
+-spec tst(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary().
+tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) ->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% AArch64 TST (shifted register) encoding: TST Rn, Rm
+    %% This is ANDS XZR, Rn, Rm: 11101010000mmmmm000000nnnnn11111
+    <<(16#EA00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>;
+tst(Rn, Imm) when is_atom(Rn), is_integer(Imm) ->
+    RnNum = reg_to_num(Rn),
+    case encode_bitmask_immediate(<<Imm:64>>) of
+        {ok, N, Immr, Imms} ->
+            <<
+                (16#F200001F bor (N bsl 22) bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little
+            >>;
+        _ ->
+            error({unencodable_immediate, Imm})
+    end.
+
+%% Emit a 32-bit test instruction (bitwise AND, discarding result)
+-spec tst_w(aarch64_gpr_register(), aarch64_gpr_register() | integer()) -> binary().
+tst_w(Rn, Rm) when is_atom(Rn), is_atom(Rm) ->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% AArch64 TST (32-bit shifted register) encoding: TST Wn, Wm
+    %% This is ANDS WZR, Wn, Wm: 01101010000mmmmm000000nnnnn11111
+    <<(16#6A00001F bor (RmNum bsl 16) bor (RnNum bsl 5)):32/little>>;
+tst_w(Rn, Imm) when is_atom(Rn), is_integer(Imm) ->
+    RnNum = reg_to_num(Rn),
+    case encode_bitmask_immediate_w(<<Imm:32>>) of
+        {ok, Immr, Imms} ->
+            <<(16#7200001F bor (Immr bsl 16) bor (Imms bsl 10) bor (RnNum bsl 5)):32/little>>;
+        _ ->
+            error({unencodable_immediate, Imm})
+    end.
+
+%% Emit a subtract and set flags (SUBS) instruction (AArch64 encoding)
+%% SUBS Rd, Rn, Rm/imm - subtracts and sets condition flags
+-spec subs(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) ->
+    binary().
+subs(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    %% AArch64 SUBS (immediate): 1111000101iiiiiiiiiiiinnnnndddddd
+    <<(16#F1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>;
+subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% AArch64 SUBS (register): 11101011000mmmmm000000nnnnndddddd
+    <<(16#EB000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little>>.
+
+-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) ->
+    binary().
+sub(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    <<(16#D1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>;
+sub(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
+    sub(Rd, Rn, Rm, {lsl, 0}).
+
+-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), {lsl, 0..63}) ->
+    binary().
+sub(Rd, Rn, Rm, {lsl, Amount}) when
+    is_atom(Rd), is_atom(Rn), is_atom(Rm), is_integer(Amount), Amount >= 0, Amount =< 63
+->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    <<
+        (16#CB000000 bor (RmNum bsl 16) bor ((Amount band 16#3F) bsl 10) bor (RnNum bsl 5) bor
+            RdNum):32/little
+    >>.
+
+%% Emit an ADR (PC-relative address) instruction (AArch64 encoding)
+%% Dst is destination register atom, Offset is signed immediate (in bytes, -1MB..+1MB)
+-spec adr(aarch64_gpr_register(), integer()) -> binary().
+adr(Dst, Imm) when is_atom(Dst), is_integer(Imm), Imm >= -1048576, Imm =< 1048572 ->
+    DstNum = reg_to_num(Dst),
+    ImmLo = Imm band 3,
+    ImmHi = Imm bsr 2,
+    Word = (16#10000000) bor (ImmLo bsl 29) bor ((ImmHi band 16#7FFFF) bsl 5) bor DstNum,
+    <<Word:32/little>>.
+
+-spec mul(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register()) -> binary().
+mul(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
+    madd(Rd, Rn, Rm, xzr).
+
+-spec madd(
+    aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register()
+) -> binary().
+madd(Rd, Rn, Rm, Ra) when is_atom(Rd), is_atom(Rn), is_atom(Rm), is_atom(Ra) ->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    RaNum = reg_to_num(Ra),
+    <<
+        (16#9B000000 bor (RmNum bsl 16) bor (RaNum bsl 10) bor (RnNum bsl 5) bor
+            RdNum):32/little
+    >>.
diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl
index f358138e60..5885c113e8 100644
--- a/libs/jit/src/jit_precompile.erl
+++ b/libs/jit/src/jit_precompile.erl
@@ -65,8 +65,16 @@ compile(Target, Dir, Path) ->
         Stream0 = jit_stream_binary:new(0),
         <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> =
             CodeChunk,
+
+        Arch =
+            case Target of
+                "x86_64" -> ?JIT_ARCH_X86_64;
+                "aarch64" -> ?JIT_ARCH_AARCH64;
+                _ -> error({unsupported_target, Target})
+            end,
+
         Stream1 = jit_stream_binary:append(
-            Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC)
+            Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC)
         ),
         Backend = list_to_atom("jit_" ++ Target),
         Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1),
diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def
index 6ce8523320..04aff1f840 100644
--- a/src/libAtomVM/defaultatoms.def
+++ b/src/libAtomVM/defaultatoms.def
@@ -207,3 +207,4 @@ X(EMU_FLAVOR_ATOM, "\xA", "emu_flavor")
 X(CODE_SERVER_ATOM, "\xB", "code_server")
 X(LOAD_ATOM, "\x4", "load")
 X(JIT_X86_64_ATOM, "\xA", "jit_x86_64")
+X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64")
diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c
index 8ce68b333c..6cd22607bf 100644
--- a/src/libAtomVM/jit.c
+++ b/src/libAtomVM/jit.c
@@ -72,6 +72,19 @@ _Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in
 _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_x86_64.erl");
 _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_x86_64.erl");
 _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_x86_64.erl");
+#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64
+_Static_assert(offsetof(Context, e) == 0x28, "ctx->e is 0x28 in jit/src/jit_aarch64.erl");
+_Static_assert(offsetof(Context, x) == 0x30, "ctx->x is 0x30 in jit/src/jit_aarch64.erl");
+_Static_assert(offsetof(Context, cp) == 0xB8, "ctx->cp is 0xB8 in jit/src/jit_aarch64.erl");
+_Static_assert(offsetof(Context, fr) == 0xC0, "ctx->fr is 0xC0 in jit/src/jit_aarch64.erl");
+_Static_assert(offsetof(Context, bs) == 0xC8, "ctx->bs is 0xC8 in jit/src/jit_aarch64.erl");
+_Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in jit/src/jit_aarch64.erl");
+
+_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_aarch64.erl");
+_Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_aarch64.erl");
+_Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_aarch64.erl");
+#else
+#error Unknown jit target
 #endif
 
 #define PROCESS_MAYBE_TRAP_RETURN_VALUE(return_value, offset) \
diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h
index 972a1a1180..e158cf87cd 100644
--- a/src/libAtomVM/jit.h
+++ b/src/libAtomVM/jit.h
@@ -172,14 +172,27 @@ enum TrapAndLoadResult
 #define JIT_FORMAT_VERSION 1
 
 #define JIT_ARCH_X86_64 1
+#define JIT_ARCH_AARCH64 2
 
 #define JIT_VARIANT_PIC 1
 
+#ifndef AVM_NO_JIT
+
 #ifdef __x86_64__
 #define JIT_ARCH_TARGET JIT_ARCH_X86_64
 #define JIT_JUMPTABLE_ENTRY_SIZE 5
 #endif
 
+#if defined(__arm64__) || defined(__aarch64__)
+#define JIT_ARCH_TARGET JIT_ARCH_AARCH64
+#define JIT_JUMPTABLE_ENTRY_SIZE 4
+#endif
+
+#ifndef JIT_ARCH_TARGET
+#error Unknown JIT target
+#endif
+#endif
+
 /**
  * @brief Return the entry point from a given jit stream
  *
diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c
index a5f5c907ed..bbfd839938 100644
--- a/src/libAtomVM/module.c
+++ b/src/libAtomVM/module.c
@@ -338,7 +338,8 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary
             for (int arch_index = 0; arch_index < ENDIAN_SWAP_16(native_code->architectures_count); arch_index++) {
                 if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == JIT_VARIANT_PIC) {
                     size_t offset = ENDIAN_SWAP_32(native_code->info_size) + ENDIAN_SWAP_32(native_code->architectures[arch_index].offset) + sizeof(native_code->info_size);
-                    module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), (ModuleNativeEntryPoint) ((const uint8_t *) &native_code->info_size + offset));
+                    ModuleNativeEntryPoint module_entry_point = sys_map_native_code((const uint8_t *) &native_code->info_size, ENDIAN_SWAP_32(native_code->size), offset);
+                    module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), module_entry_point);
                     break;
                 }
             }
diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c
index 52e621ca48..37a38d06b5 100644
--- a/src/libAtomVM/nifs.c
+++ b/src/libAtomVM/nifs.c
@@ -5678,6 +5678,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[])
 
 #if JIT_ARCH_TARGET == JIT_ARCH_X86_64
     return JIT_X86_64_ATOM;
+#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64
+    return JIT_AARCH64_ATOM;
 #else
 #error Unknown JIT target
 #endif
diff --git a/src/libAtomVM/sys.h b/src/libAtomVM/sys.h
index fdbfa66b85..0735d86ed3 100644
--- a/src/libAtomVM/sys.h
+++ b/src/libAtomVM/sys.h
@@ -284,6 +284,18 @@ void sys_init_platform(GlobalContext *global);
  */
 void sys_free_platform(GlobalContext *global);
 
+/**
+ * @brief Map precompiled native code to a module entry point.
+ *
+ * @details If mmap module is executable, returns native_code + offset.
+ * Otherwise (Apple Silicon) copy it to an executable buffer. Only implemented
+ * on platforms with JIT.
+ * @param native_code pointer to native code chunk
+ * @param size size of native code chunk
+ * @param offset offset to the module entry point
+ */
+ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/platforms/generic_unix/CMakeLists.txt b/src/platforms/generic_unix/CMakeLists.txt
index 668cb3db1a..933971dd39 100644
--- a/src/platforms/generic_unix/CMakeLists.txt
+++ b/src/platforms/generic_unix/CMakeLists.txt
@@ -39,11 +39,6 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
     endif()
 endif()
 
-if(NOT AVM_DISABLE_JIT)
-include(DefineIfExists)
-define_if_function_exists(AtomVM pthread_jit_write_protect_np "pthread.h" PUBLIC HAVE_PTHREAD_JIT_WRITE_PROTECT_NP)
-define_if_symbol_exists(AtomVM MAP_JIT "sys/mman.h" PUBLIC HAVE_MAP_JIT)
-endif()
 
 add_subdirectory(lib)
 target_include_directories(AtomVM PUBLIC lib/)
@@ -57,17 +52,21 @@ set(
 )
 target_link_libraries(AtomVM PRIVATE libAtomVM${PLATFORM_LIB_SUFFIX})
 
+if(AVM_DISABLE_JIT)
+set(precompiled_suffix)
+else()
+set(precompiled_suffix "-${AVM_JIT_TARGET_ARCH}")
+include(DefineIfExists)
+define_if_function_exists(libAtomVM${PLATFORM_LIB_SUFFIX} pthread_jit_write_protect_np "pthread.h" PUBLIC HAVE_PTHREAD_JIT_WRITE_PROTECT_NP)
+define_if_symbol_exists(libAtomVM${PLATFORM_LIB_SUFFIX} MAP_JIT "sys/mman.h" PUBLIC HAVE_MAP_JIT)
+endif()
+
 if (COVERAGE)
     include(CodeCoverage)
     append_coverage_compiler_flags_to_target(AtomVM)
     append_coverage_linker_flags_to_target(AtomVM)
 endif()
 
-if(AVM_DISABLE_JIT)
-set(precompiled_suffix)
-else()
-set(precompiled_suffix "-${AVM_JIT_TARGET_ARCH}")
-endif()
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/atomvm ${CMAKE_CURRENT_BINARY_DIR}/atomvm @ONLY)
 
 install(TARGETS AtomVM DESTINATION lib/atomvm)
diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c
index bed7819c2f..376f7384d0 100644
--- a/src/platforms/generic_unix/lib/jit_stream_mmap.c
+++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c
@@ -29,6 +29,7 @@
 #include "platform_defaultatoms.h"
 #include "term.h"
 
+#include <errno.h>
 #include <pthread.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -74,6 +75,7 @@ static term nif_jit_stream_mmap_new(Context *ctx, int argc, term argv[])
 
     uint8_t *addr = (uint8_t *) mmap(0, size, prot, flags, fd, offset);
     if (addr == MAP_FAILED) {
+        fprintf(stderr, "Could not allocate mmap for JIT: size=%zu, errno=%d\n", size, errno);
         RAISE_ERROR(BADARG_ATOM);
     }
 
@@ -87,15 +89,6 @@ static term nif_jit_stream_mmap_new(Context *ctx, int argc, term argv[])
     js->stream_offset = 0;
     js->stream_size = size;
 
-#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
-    pthread_jit_write_protect_np(0);
-#endif
-#if defined(__APPLE__)
-    sys_icache_invalidate(addr, size);
-#elif defined(__GNUC__)
-    __builtin___clear_cache(addr, addr + size);
-#endif
-
     term obj = enif_make_resource(erl_nif_env_from_context(ctx), js);
     enif_release_resource(js); // decrement refcount after enif_alloc_resource
     return obj;
@@ -127,9 +120,17 @@ static term nif_jit_stream_mmap_append(Context *ctx, int argc, term argv[])
 
     size_t binary_size = term_binary_size(argv[1]);
     const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[1]);
-    assert(js_obj->stream_offset + binary_size < js_obj->stream_size);
+    if (UNLIKELY(js_obj->stream_offset + binary_size > js_obj->stream_size)) {
+        RAISE_ERROR(BADARG_ATOM);
+    }
 
+#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
+    pthread_jit_write_protect_np(0);
+#endif
     memcpy(js_obj->stream_base + js_obj->stream_offset, binary_data, binary_size);
+#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
+    pthread_jit_write_protect_np(1);
+#endif
     js_obj->stream_offset += binary_size;
 
     return argv[0];
@@ -155,7 +156,13 @@ static term nif_jit_stream_mmap_replace(Context *ctx, int argc, term argv[])
         RAISE_ERROR(BADARG_ATOM);
     }
 
+#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
+    pthread_jit_write_protect_np(0);
+#endif
     memcpy(js_obj->stream_base + offset, binary_data, binary_size);
+#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
+    pthread_jit_write_protect_np(1);
+#endif
 
     return argv[0];
 }
@@ -232,9 +239,6 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
         return NULL;
     }
 
-#if HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
-    pthread_jit_write_protect_np(1);
-#endif
 #if defined(__APPLE__)
     sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size);
 #elif defined(__GNUC__)
diff --git a/src/platforms/generic_unix/lib/mapped_file.c b/src/platforms/generic_unix/lib/mapped_file.c
index f33aa183a1..3a58802180 100644
--- a/src/platforms/generic_unix/lib/mapped_file.c
+++ b/src/platforms/generic_unix/lib/mapped_file.c
@@ -22,6 +22,7 @@
 
 #include "utils.h"
 
+#include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -50,9 +51,18 @@ MappedFile *mapped_file_open_beam(const char *file_name)
     fstat(mf->fd, &file_stats);
     mf->size = file_stats.st_size;
 
-    mf->mapped = mmap(NULL, mf->size, PROT_READ | PROT_EXEC, MAP_SHARED, mf->fd, 0);
-    if (IS_NULL_PTR(mf->mapped)) {
-        fprintf(stderr, "Cannot mmap %s\n", file_name);
+    int prot;
+#ifdef AVM_NO_JIT
+    prot = PROT_READ;
+#elif defined(__APPLE__) && defined(__arm64__)
+    prot = PROT_READ;
+#else
+    prot = PROT_READ | PROT_EXEC;
+#endif
+
+    mf->mapped = mmap(NULL, mf->size, prot, MAP_SHARED, mf->fd, 0);
+    if (UNLIKELY(mf->mapped == MAP_FAILED)) {
+        fprintf(stderr, "Cannot mmap %s -- errno=%d\n", file_name, errno);
         close(mf->fd);
         free(mf);
         return NULL;
diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c
index 439304870b..6e7272095c 100644
--- a/src/platforms/generic_unix/lib/sys.c
+++ b/src/platforms/generic_unix/lib/sys.c
@@ -47,6 +47,14 @@
 
 #ifndef AVM_NO_JIT
 #include "jit_stream_mmap.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#if defined(__APPLE__)
+#include <libkern/OSCacheControl.h>
+#endif
 #endif
 
 #include <fcntl.h>
@@ -805,3 +813,24 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global)
 }
 
 #endif
+
+#ifndef AVM_NO_JIT
+ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset)
+{
+#if defined(__APPLE__) && defined(__arm64__)
+    uint8_t *native_code_mmap = (uint8_t *) mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT, -1, 0);
+    if (native_code_mmap == MAP_FAILED) {
+        fprintf(stderr, "Could not allocate mmap for native code: size=%zu, errno=%d\n", size, errno);
+        return NULL;
+    }
+    pthread_jit_write_protect_np(0);
+    memcpy(native_code_mmap, native_code, size);
+    pthread_jit_write_protect_np(1);
+    sys_icache_invalidate(native_code_mmap, size);
+    return (ModuleNativeEntryPoint) (native_code_mmap + offset);
+#else
+    UNUSED(size);
+    return (ModuleNativeEntryPoint) (native_code + offset);
+#endif
+}
+#endif
diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt
index 7bb3df40de..ff8ede14e2 100644
--- a/tests/erlang_tests/CMakeLists.txt
+++ b/tests/erlang_tests/CMakeLists.txt
@@ -70,10 +70,37 @@ endfunction()
 set(TO_HRL_PATH ${CMAKE_CURRENT_LIST_DIR})
 
 function(generate_hrl out_file def_name in_file)
+    if(AVM_DISABLE_JIT)
+        # For non-JIT builds, use the base file
+        set(selected_file ${in_file})
+    else()
+        # For JIT builds, determine the architecture-specific file
+        get_filename_component(base_name ${in_file} NAME_WE)
+        get_filename_component(base_ext ${in_file} EXT)
+        get_filename_component(base_dir ${in_file} DIRECTORY)
+
+        # Check if it's a .avm file (pack) or .beam file
+        if(base_ext STREQUAL ".avm")
+            # For .avm files: name.avm -> name-arch.avm
+            if(base_dir)
+                set(selected_file ${base_dir}/${base_name}-${AVM_JIT_TARGET_ARCH}${base_ext})
+            else()
+                set(selected_file ${base_name}-${AVM_JIT_TARGET_ARCH}${base_ext})
+            endif()
+        else()
+            # For .beam files: name.beam -> arch/name.beam
+            if(base_dir)
+                set(selected_file ${base_dir}/${AVM_JIT_TARGET_ARCH}/${base_name}${base_ext})
+            else()
+                set(selected_file ${AVM_JIT_TARGET_ARCH}/${base_name}${base_ext})
+            endif()
+        endif()
+    endif()
+
     add_custom_command(
         OUTPUT ${out_file}
-        COMMAND escript ${TO_HRL_PATH}/to_hrl.erl ${in_file} ${def_name} ${out_file}
-        DEPENDS ${in_file}
+        COMMAND escript ${TO_HRL_PATH}/to_hrl.erl ${selected_file} ${def_name} ${out_file}
+        DEPENDS ${selected_file}
         COMMENT "Generating ${out_file}"
     )
 endfunction()
diff --git a/tests/erlang_tests/code_load/CMakeLists.txt b/tests/erlang_tests/code_load/CMakeLists.txt
index e8c88ea657..e12b6b5b59 100644
--- a/tests/erlang_tests/code_load/CMakeLists.txt
+++ b/tests/erlang_tests/code_load/CMakeLists.txt
@@ -37,9 +37,7 @@ set(code_load_deps
     code_load_pack_data.hrl
 )
 if(NOT AVM_DISABLE_JIT)
-    generate_hrl(export_test_module_data_${AVM_JIT_TARGET_ARCH}.hrl EXPORT_TEST_MODULE_DATA_${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/export_test_module.beam)
-    generate_hrl(code_load_pack_data_${AVM_JIT_TARGET_ARCH}.hrl CODE_LOAD_PACK_DATA_${AVM_JIT_TARGET_ARCH} code_load_pack-${AVM_JIT_TARGET_ARCH}.avm)
-    set(code_load_deps ${code_load_deps} export_test_module_data_${AVM_JIT_TARGET_ARCH}.hrl code_load_pack_data_${AVM_JIT_TARGET_ARCH}.hrl jit)
+    set(code_load_deps ${code_load_deps} jit)
 endif()
 
 add_custom_target(code_load_files DEPENDS ${code_load_deps})
diff --git a/tests/erlang_tests/test_add_avm_pack_binary.erl b/tests/erlang_tests/test_add_avm_pack_binary.erl
index 807b4a9559..ed2509bfb0 100644
--- a/tests/erlang_tests/test_add_avm_pack_binary.erl
+++ b/tests/erlang_tests/test_add_avm_pack_binary.erl
@@ -22,17 +22,10 @@
 
 -export([start/0]).
 
--ifdef(AVM_DISABLE_JIT).
 -include("code_load/code_load_pack_data.hrl").
 
 load_pack_data() ->
     ?CODE_LOAD_PACK_DATA.
--else.
--include("code_load/code_load_pack_data_x86_64.hrl").
-
-load_pack_data() ->
-    ?CODE_LOAD_PACK_DATA_x86_64.
--endif.
 
 start() ->
     Bin = load_pack_data(),
diff --git a/tests/erlang_tests/test_add_avm_pack_file.erl b/tests/erlang_tests/test_add_avm_pack_file.erl
index 53551c3e37..5533c2ff51 100644
--- a/tests/erlang_tests/test_add_avm_pack_file.erl
+++ b/tests/erlang_tests/test_add_avm_pack_file.erl
@@ -22,11 +22,15 @@
 
 -export([start/0]).
 
+-ifdef(AVM_DISABLE_JIT).
+path() ->
+    "code_load/code_load_pack.avm".
+-else.
+path() ->
+    "../code_load/code_load_pack-" ++ atom_to_list(?AVM_JIT_TARGET_ARCH) ++ ".avm".
+-endif.
+
 start() ->
-    AVM =
-        case erlang:system_info(emu_flavor) of
-            emu -> "code_load/code_load_pack.avm";
-            jit -> "../code_load/code_load_pack-x86_64.avm"
-        end,
+    AVM = path(),
     erlang:display(atomvm:add_avm_pack_file(AVM, [])),
     export_test_module:exported_func(4).
diff --git a/tests/erlang_tests/test_close_avm_pack.erl b/tests/erlang_tests/test_close_avm_pack.erl
index f89feccb07..74ef632038 100644
--- a/tests/erlang_tests/test_close_avm_pack.erl
+++ b/tests/erlang_tests/test_close_avm_pack.erl
@@ -22,17 +22,10 @@
 
 -export([start/0]).
 
--ifdef(AVM_DISABLE_JIT).
 -include("code_load/code_load_pack_data.hrl").
 
 load_pack_data() ->
     ?CODE_LOAD_PACK_DATA.
--else.
--include("code_load/code_load_pack_data_x86_64.hrl").
-
-load_pack_data() ->
-    ?CODE_LOAD_PACK_DATA_x86_64.
--endif.
 
 start() ->
     Bin = load_pack_data(),
diff --git a/tests/erlang_tests/test_code_load_abs.erl b/tests/erlang_tests/test_code_load_abs.erl
index a421ac5980..a3292fe4c3 100644
--- a/tests/erlang_tests/test_code_load_abs.erl
+++ b/tests/erlang_tests/test_code_load_abs.erl
@@ -22,14 +22,19 @@
 
 -export([start/0]).
 
+-ifdef(AVM_DISABLE_JIT).
+path() ->
+    "code_load/export_test_module".
+-else.
+path() ->
+    "../code_load/" ++ atom_to_list(?AVM_JIT_TARGET_ARCH) ++ "/export_test_module".
+-endif.
+
 start() ->
     Path =
         case erlang:system_info(machine) of
             "ATOM" ->
-                case erlang:system_info(emu_flavor) of
-                    emu -> "code_load/export_test_module";
-                    jit -> "../code_load/x86_64/export_test_module"
-                end;
+                path();
             "BEAM" ->
                 "code_load/export_test_module"
         end,
diff --git a/tests/erlang_tests/test_code_load_binary.erl b/tests/erlang_tests/test_code_load_binary.erl
index 758e0ac69f..56f3356f5c 100644
--- a/tests/erlang_tests/test_code_load_binary.erl
+++ b/tests/erlang_tests/test_code_load_binary.erl
@@ -22,17 +22,10 @@
 
 -export([start/0]).
 
--ifdef(AVM_DISABLE_JIT).
 -include("code_load/export_test_module_data.hrl").
 
 export_test_module_data() ->
     ?EXPORT_TEST_MODULE_DATA.
--else.
--include("code_load/export_test_module_data_x86_64.hrl").
-
-export_test_module_data() ->
-    ?EXPORT_TEST_MODULE_DATA_x86_64.
--endif.
 
 start() ->
     Bin = export_test_module_data(),
diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt
index 9dbe754525..70f46ccc09 100644
--- a/tests/libs/jit/CMakeLists.txt
+++ b/tests/libs/jit/CMakeLists.txt
@@ -26,6 +26,8 @@ set(ERLANG_MODULES
     tests
     jit_tests
     jit_tests_common
+    jit_aarch64_tests
+    jit_aarch64_asm_tests
     jit_x86_64_tests
     jit_x86_64_asm_tests
 )
diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl
new file mode 100644
index 0000000000..7e43ddb654
--- /dev/null
+++ b/tests/libs/jit/jit_aarch64_asm_tests.erl
@@ -0,0 +1,692 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_aarch64_asm_tests).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
+
+-export([
+    list_to_integer/1,
+    list_to_integer/2
+]).
+
+list_to_integer(X) -> erlang:list_to_integer(X).
+list_to_integer(X, B) -> erlang:list_to_integer(X, B).
+
+-define(_assertAsmEqual(Bin, Str, Value),
+    ?_assertEqual(jit_tests_common:asm(aarch64, Bin, Str), Value)
+).
+-define(_assertAsmEqualLargeInt(Bin, Str, Value),
+    ?_test(begin
+        case erlang:system_info(machine) of
+            "BEAM" ->
+                ?assertEqual(jit_tests_common:asm(aarch64, Bin, Str), Value);
+            "ATOM" ->
+                % AtomVM doesn't handle large integers yet.
+                % Skip the test
+                ok
+        end
+    end)
+).
+
+add_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9100e0e7:32/little>>, "add x7, x7, #56", jit_aarch64_asm:add(r7, r7, 56)
+        ),
+        ?_assertAsmEqual(
+            <<16#91000000:32/little>>, "add x0, x0, #0", jit_aarch64_asm:add(r0, r0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#91000421:32/little>>, "add x1, x1, #1", jit_aarch64_asm:add(r1, r1, 1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b031041:32/little>>,
+            "add x1, x2, x3, lsl #4",
+            jit_aarch64_asm:add(r1, r2, r3, {lsl, 4})
+        ),
+        ?_assertAsmEqual(
+            <<16#8b030041:32/little>>, "add x1, x2, x3", jit_aarch64_asm:add(r1, r2, r3)
+        ),
+        %% Test add with invalid immediate
+        ?_assertError({unencodable_immediate, 16#FFFF}, jit_aarch64_asm:add(r0, r0, 16#FFFF)),
+
+        %% Test cases for additional registers (r11, r12, r14, r22-r30)
+        ?_assertAsmEqual(
+            <<16#8b0b000b:32/little>>, "add x11, x0, x11", jit_aarch64_asm:add(r11, r0, r11)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b0c000c:32/little>>, "add x12, x0, x12", jit_aarch64_asm:add(r12, r0, r12)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b0e000e:32/little>>, "add x14, x0, x14", jit_aarch64_asm:add(r14, r0, r14)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b160016:32/little>>, "add x22, x0, x22", jit_aarch64_asm:add(r22, r0, r22)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b170017:32/little>>, "add x23, x0, x23", jit_aarch64_asm:add(r23, r0, r23)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b180018:32/little>>, "add x24, x0, x24", jit_aarch64_asm:add(r24, r0, r24)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b190019:32/little>>, "add x25, x0, x25", jit_aarch64_asm:add(r25, r0, r25)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b1a001a:32/little>>, "add x26, x0, x26", jit_aarch64_asm:add(r26, r0, r26)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b1b001b:32/little>>, "add x27, x0, x27", jit_aarch64_asm:add(r27, r0, r27)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b1c001c:32/little>>, "add x28, x0, x28", jit_aarch64_asm:add(r28, r0, r28)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b1d001d:32/little>>, "add x29, x0, x29", jit_aarch64_asm:add(r29, r0, r29)
+        ),
+        ?_assertAsmEqual(
+            <<16#8b1e001e:32/little>>, "add x30, x0, x30", jit_aarch64_asm:add(r30, r0, r30)
+        )
+    ].
+
+sub_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#d100e0e7:32/little>>, "sub x7, x7, #56", jit_aarch64_asm:sub(r7, r7, 56)
+        ),
+        ?_assertAsmEqual(
+            <<16#d1000000:32/little>>, "sub x0, x0, #0", jit_aarch64_asm:sub(r0, r0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#d1000421:32/little>>, "sub x1, x1, #1", jit_aarch64_asm:sub(r1, r1, 1)
+        ),
+        ?_assertAsmEqual(
+            <<16#cb031041:32/little>>,
+            "sub x1, x2, x3, lsl #4",
+            jit_aarch64_asm:sub(r1, r2, r3, {lsl, 4})
+        ),
+        ?_assertAsmEqual(
+            <<16#cb030041:32/little>>, "sub x1, x2, x3", jit_aarch64_asm:sub(r1, r2, r3)
+        )
+    ].
+
+madd_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9b037c41:32/little>>, "mul x1, x2, x3", jit_aarch64_asm:mul(r1, r2, r3)
+        ),
+        ?_assertAsmEqual(
+            <<16#9b031041:32/little>>, "madd x1, x2, x3, x4", jit_aarch64_asm:madd(r1, r2, r3, r4)
+        )
+    ].
+
+b_test_() ->
+    [
+        ?_assertAsmEqual(<<16#14000000:32/little>>, "b .+0", jit_aarch64_asm:b(0)),
+        ?_assertAsmEqual(<<16#14000004:32/little>>, "b .+16", jit_aarch64_asm:b(16)),
+        ?_assertAsmEqual(<<16#17fffff0:32/little>>, "b .-64", jit_aarch64_asm:b(-64)),
+        ?_assertAsmEqual(<<16#14000001:32/little>>, "b .+4", jit_aarch64_asm:b(4))
+    ].
+
+brk_test_() ->
+    [
+        ?_assertAsmEqual(<<16#D4200000:32/little>>, "brk #0", jit_aarch64_asm:brk(0)),
+        ?_assertAsmEqual(<<16#D4201900:32/little>>, "brk #200", jit_aarch64_asm:brk(200))
+    ].
+
+blr_test_() ->
+    [
+        ?_assertAsmEqual(<<16#D63F0000:32/little>>, "blr x0", jit_aarch64_asm:blr(r0)),
+        ?_assertAsmEqual(<<16#D63F0020:32/little>>, "blr x1", jit_aarch64_asm:blr(r1)),
+        ?_assertAsmEqual(<<16#D63F01A0:32/little>>, "blr x13", jit_aarch64_asm:blr(r13))
+    ].
+
+br_test_() ->
+    [
+        ?_assertAsmEqual(<<16#D61F0000:32/little>>, "br x0", jit_aarch64_asm:br(r0)),
+        ?_assertAsmEqual(<<16#D61F0020:32/little>>, "br x1", jit_aarch64_asm:br(r1)),
+        ?_assertAsmEqual(<<16#D61F01A0:32/little>>, "br x13", jit_aarch64_asm:br(r13))
+    ].
+
+ldr_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#F9400421:32/little>>, "ldr x1, [x1, #8]", jit_aarch64_asm:ldr(r1, {r1, 8})
+        ),
+        ?_assertAsmEqual(
+            <<16#F9403042:32/little>>, "ldr x2, [x2, #96]", jit_aarch64_asm:ldr(r2, {r2, 96})
+        ),
+        % Load-update (writeback) with SP, negative offset
+        ?_assertAsmEqual(
+            <<16#F85F0FE7:32/little>>,
+            "ldr x7, [sp, #-16]!",
+            jit_aarch64_asm:ldr(r7, {sp, -16}, '!')
+        ),
+        % Load-update (writeback) with SP, positive offset
+        ?_assertAsmEqual(
+            <<16#F8410FE7:32/little>>, "ldr x7, [sp, #16]!", jit_aarch64_asm:ldr(r7, {sp, 16}, '!')
+        ),
+        % Load-update (writeback) with SP, zero offset
+        ?_assertAsmEqual(
+            <<16#F84007E7:32/little>>, "ldr x7, [sp], #0", jit_aarch64_asm:ldr(r7, {sp}, 0)
+        ),
+        % shift
+        ?_assertAsmEqual(
+            <<16#f8637841:32/little>>,
+            "ldr x1, [x2, x3, lsl #3]",
+            jit_aarch64_asm:ldr(r1, {r2, r3, lsl, 3})
+        ),
+        ?_assertAsmEqual(
+            <<16#f8677907:32/little>>,
+            "ldr x7, [x8, x7, lsl #3]",
+            jit_aarch64_asm:ldr(r7, {r8, r7, lsl, 3})
+        ),
+        ?_assertAsmEqual(
+            <<16#f8636841:32/little>>, "ldr x1, [x2, x3]", jit_aarch64_asm:ldr(r1, {r2, r3})
+        )
+    ].
+
+ldr_w_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#b9400821:32/little>>, "ldr w1, [x1, 8]", jit_aarch64_asm:ldr_w(r1, {r1, 8})
+        ),
+        ?_assertAsmEqual(
+            <<16#b9406042:32/little>>, "ldr w2, [x2, 96]", jit_aarch64_asm:ldr_w(r2, {r2, 96})
+        ),
+        ?_assertAsmEqual(
+            <<16#b97ffc60:32/little>>, "ldr w0, [x3, 16380]", jit_aarch64_asm:ldr_w(r0, {r3, 16380})
+        )
+    ].
+
+str_w_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#b9000821:32/little>>, "str w1, [x1, 8]", jit_aarch64_asm:str_w(r1, {r1, 8})
+        ),
+        ?_assertAsmEqual(
+            <<16#b9006042:32/little>>, "str w2, [x2, 96]", jit_aarch64_asm:str_w(r2, {r2, 96})
+        ),
+        ?_assertAsmEqual(
+            <<16#b93ffc60:32/little>>, "str w0, [x3, 16380]", jit_aarch64_asm:str_w(r0, {r3, 16380})
+        )
+    ].
+
+mov_test_() ->
+    [
+        % mov immediate - simple cases
+        ?_assertAsmEqual(<<16#D2800000:32/little>>, "mov x0, #0", jit_aarch64_asm:mov(r0, 0)),
+        ?_assertAsmEqual(<<16#D2801901:32/little>>, "mov x1, #200", jit_aarch64_asm:mov(r1, 200)),
+        ?_assertAsmEqual(<<16#d28000b3:32/little>>, "mov x19, #5", jit_aarch64_asm:mov(r19, 5)),
+        ?_assertAsmEqual(<<16#92800094:32/little>>, "mov x20, #-5", jit_aarch64_asm:mov(r20, -5)),
+        ?_assertAsmEqual(<<16#d2800015:32/little>>, "mov x21, #0", jit_aarch64_asm:mov(r21, 0)),
+        ?_assertAsmEqual(
+            <<16#d29ffff0:32/little>>, "mov x16, #0xffff", jit_aarch64_asm:mov(r16, 16#FFFF)
+        ),
+        ?_assertAsmEqual(
+            <<16#929fffcf:32/little>>, "mov x15, #-0xffff", jit_aarch64_asm:mov(r15, -16#FFFF)
+        ),
+
+        % mov immediate - complex cases requiring multiple instructions
+        ?_assertAsmEqual(
+            <<16#d2a00052:32/little>>, "mov x18, #0x20000", jit_aarch64_asm:mov(r18, 16#20000)
+        ),
+        ?_assertAsmEqual(
+            <<16#b26fbbf1:32/little>>, "mov x17, #-0x20000", jit_aarch64_asm:mov(r17, -131072)
+        ),
+
+        % mov immediate - very large value requiring multiple instructions
+        ?_assertAsmEqualLargeInt(
+            <<16#D29579A1:32/little, 16#F2B7C041:32/little, 16#F2DFD741:32/little,
+                16#F2EFF941:32/little>>,
+            "mov x1, #0xabcd\n"
+            "movk x1, #0xbe02, lsl #16\n"
+            "movk x1, #0xfeba, lsl #32\n"
+            "movk x1, #0x7fca, lsl #48",
+            jit_aarch64_asm:mov(r1, ?MODULE:list_to_integer("9208452466117618637"))
+        ),
+
+        % mov register
+        ?_assertAsmEqual(<<16#AA0103E0:32/little>>, "mov x0, x1", jit_aarch64_asm:mov(r0, r1)),
+        ?_assertAsmEqual(<<16#AA0703E1:32/little>>, "mov x1, x7", jit_aarch64_asm:mov(r1, r7)),
+
+        %% Test mov with zero immediate (should use movz with 0)
+        ?_assertAsmEqual(
+            <<16#d2800000:32/little>>, "movz x0, #0", jit_aarch64_asm:mov(r0, 0)
+        ),
+
+        %% Test 4-bit pattern encoding
+        ?_assertAsmEqual(
+            <<16#929fffe0:32/little>>,
+            "mov x0, #-65536",
+            jit_aarch64_asm:mov(r0, -65536)
+        ),
+        %% Test complex immediate that will use fallback sequence
+        ?_assertAsmEqualLargeInt(
+            <<
+                16#d29bde00:32/little,
+                16#f2b35780:32/little,
+                16#f2cacf00:32/little,
+                16#f2e24680:32/little
+            >>,
+            "mov x0, #0xdef0\n"
+            "movk x0, #0x9abc, lsl #16\n"
+            "movk x0, #0x5678, lsl #32\n"
+            "movk x0, #0x1234, lsl #48",
+            jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("123456789ABCDEF0", 16))
+        ),
+
+        %% Test negative immediate that uses build_negative_immediate fallback
+        ?_assertAsmEqualLargeInt(
+            <<
+                16#d2842200:32/little,
+                16#f2aca860:32/little,
+                16#f2d530e0:32/little,
+                16#f2fdb960:32/little
+            >>,
+            "mov	x0, #0x2110\n"
+            "movk	x0, #0x6543, lsl #16\n"
+            "movk	x0, #0xa987, lsl #32\n"
+            "movk	x0, #0xedcb, lsl #48",
+            jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("-123456789ABCDEF0", 16))
+        ),
+
+        %% Test bitmask patterns with different sizes
+        %% Size 16 pattern: repeats every 16 bits
+        ?_assertAsmEqualLargeInt(
+            <<16#b20083e0:32/little>>,
+            "mov	x0, #0x0001000100010001",
+            jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("0001000100010001", 16))
+        ),
+        %% Size 4 pattern: repeats every 4 bits
+        ?_assertAsmEqualLargeInt(
+            <<16#b200e7e0:32/little>>,
+            "mov	x0, #0x3333333333333333",
+            jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("3333333333333333", 16))
+        ),
+        %% Size 2 pattern: repeats every 2 bits
+        ?_assertAsmEqualLargeInt(
+            <<16#b200f3e0:32/little>>,
+            "mov	x0, #0x5555555555555555",
+            jit_aarch64_asm:mov(r0, ?MODULE:list_to_integer("5555555555555555", 16))
+        )
+    ].
+
+orr_test_() ->
+    [
+        % ORR Rd, XZR, Rm (MOV)
+        ?_assertAsmEqual(
+            <<16#AA0103E0:32/little>>, "orr x0, xzr, x1", jit_aarch64_asm:orr(r0, xzr, r1)
+        ),
+        % ORR Rd, Rn, Rm
+        ?_assertAsmEqual(
+            <<16#AA010020:32/little>>, "orr x0, x1, x1", jit_aarch64_asm:orr(r0, r1, r1)
+        ),
+        ?_assertAsmEqual(
+            <<16#AA020041:32/little>>, "orr x1, x2, x2", jit_aarch64_asm:orr(r1, r2, r2)
+        ),
+
+        %% Test orr with valid bitmask immediate
+        ?_assertAsmEqual(
+            <<16#b24007e0:32/little>>, "orr x0, xzr, #0x3", jit_aarch64_asm:orr(r0, xzr, 16#3)
+        ),
+        %% Test orr with another bitmask pattern
+        ?_assertAsmEqual(
+            <<16#b27f1fe0:32/little>>, "orr x0, xzr, #0x1fe", jit_aarch64_asm:orr(r0, xzr, 16#1fe)
+        ),
+
+        %% Test orr with unencodable immediate
+        ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:orr(r0, r0, 16#123456))
+    ].
+
+str_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#F9000421:32/little>>, "str x1, [x1, #8]", jit_aarch64_asm:str(r1, {r1, 8})
+        ),
+        ?_assertAsmEqual(
+            <<16#F9003042:32/little>>, "str x2, [x2, #96]", jit_aarch64_asm:str(r2, {r2, 96})
+        ),
+        % str with xzr (zero register) - stores zero to memory
+        ?_assertAsmEqual(
+            <<16#F900001F:32/little>>, "str xzr, [x0]", jit_aarch64_asm:str(xzr, {r0, 0})
+        ),
+        ?_assertAsmEqual(
+            <<16#F900043F:32/little>>, "str xzr, [x1, #8]", jit_aarch64_asm:str(xzr, {r1, 8})
+        ),
+        ?_assertAsmEqual(
+            <<16#F900085F:32/little>>, "str xzr, [x2, #16]", jit_aarch64_asm:str(xzr, {r2, 16})
+        ),
+        % Store-update (writeback) with SP
+        ?_assertAsmEqual(
+            <<16#F81F0FE7:32/little>>,
+            "str x7, [sp, #-16]!",
+            jit_aarch64_asm:str(r7, {sp, -16}, '!')
+        ),
+        % Store-update (writeback) with SP, positive offset
+        ?_assertAsmEqual(
+            <<16#F8010FE7:32/little>>, "str x7, [sp, #16]!", jit_aarch64_asm:str(r7, {sp, 16}, '!')
+        ),
+        % Store-update (writeback) with SP, zero offset
+        ?_assertAsmEqual(
+            <<16#F80007E7:32/little>>, "str x7, [sp], #0", jit_aarch64_asm:str(r7, {sp}, 0)
+        ),
+        % shift
+        ?_assertAsmEqual(
+            <<16#f8237841:32/little>>,
+            "str x1, [x2, x3, lsl #3]",
+            jit_aarch64_asm:str(r1, {r2, r3, lsl, 3})
+        )
+    ].
+
+cmp_test_() ->
+    [
+        % cmp reg, reg
+        ?_assertAsmEqual(<<16#EB01001F:32/little>>, "cmp x0, x1", jit_aarch64_asm:cmp(r0, r1)),
+        % cmp reg, imm
+        ?_assertAsmEqual(<<16#F100001F:32/little>>, "cmp x0, #0", jit_aarch64_asm:cmp(r0, 0)),
+        ?_assertAsmEqual(<<16#F103001F:32/little>>, "cmp x0, #192", jit_aarch64_asm:cmp(r0, 192)),
+
+        %% Test large immediate compare (uses temporary register)
+        ?_assertAsmEqual(
+            <<
+                16#d28acf10:32/little,
+                16#f2a24690:32/little,
+                16#eb10001f:32/little
+            >>,
+            "mov x16, #0x5678\n"
+            "movk x16, #0x1234, lsl #16\n"
+            "cmp x0, x16",
+            jit_aarch64_asm:cmp(r0, 16#12345678)
+        ),
+
+        %% Test negative immediate compare (uses MOVN)
+        ?_assertAsmEqual(
+            <<
+                16#92800010:32/little,
+                16#eb1000ff:32/little
+            >>,
+            "movn x16, #0\n"
+            "cmp x7, x16",
+            jit_aarch64_asm:cmp(r7, -1)
+        )
+    ].
+
+cmp_w_test_() ->
+    [
+        % cmp_w reg, imm
+        ?_assertAsmEqual(<<16#7100001F:32/little>>, "cmp w0, #0", jit_aarch64_asm:cmp_w(r0, 0)),
+        ?_assertAsmEqual(<<16#7103001F:32/little>>, "cmp w0, #192", jit_aarch64_asm:cmp_w(r0, 192)),
+
+        %% Test 32-bit compare with negative immediate
+        ?_assertAsmEqual(
+            <<16#3100041f:32/little>>, "adds wzr, w0, #1", jit_aarch64_asm:cmp_w(r0, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#31000c1f:32/little>>, "adds wzr, w0, #3", jit_aarch64_asm:cmp_w(r0, -3)
+        )
+    ].
+
+and_test_() ->
+    [
+        % AND reg, reg, reg
+        ?_assertAsmEqual(
+            <<16#8A010020:32/little>>, "and x0, x1, x1", jit_aarch64_asm:and_(r0, r1, r1)
+        ),
+        % AND reg, reg, imm
+        ?_assertAsmEqual(
+            <<16#927A0420:32/little>>, "and x0, x1, #0xc0", jit_aarch64_asm:and_(r0, r1, 192)
+        ),
+        ?_assertAsmEqual(
+            <<16#927ff8e7:32/little>>,
+            "and x7, x7, #0xfffffffffffffffe",
+            jit_aarch64_asm:and_(r7, r7, -2)
+        ),
+        ?_assertAsmEqual(
+            <<16#9200cc41:32/little>>,
+            "and x1, x2, #0xf0f0f0f0f0f0f0f",
+            jit_aarch64_asm:and_(r1, r2, 16#f0f0f0f0f0f0f0f)
+        ),
+        ?_assertAsmEqual(
+            <<16#92603c62:32/little>>,
+            "and x2, x3, #0xffff00000000",
+            jit_aarch64_asm:and_(r2, r3, 16#ffff00000000)
+        ),
+        ?_assertAsmEqual(
+            <<16#92785c83:32/little>>,
+            "and x3, x4, #0xffffff00",
+            jit_aarch64_asm:and_(r3, r4, 16#ffffff00)
+        ),
+        %% Test and_ with unencodable immediate
+        ?_assertError(
+            {unencodable_immediate, 16#123456}, jit_aarch64_asm:and_(r0, r0, 16#123456)
+        )
+    ].
+
+lsl_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#D3607C00:32/little>>, "lsl x0, x0, #32", jit_aarch64_asm:lsl(r0, r0, 32)
+        )
+    ].
+
+lsr_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#D340FC00:32/little>>, "lsr x0, x0, 0", jit_aarch64_asm:lsr(r0, r0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#D340FC01:32/little>>, "lsr x1, x0, 0", jit_aarch64_asm:lsr(r1, r0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#D360FC00:32/little>>, "lsr x0, x0, #32", jit_aarch64_asm:lsr(r0, r0, 32)
+        )
+    ].
+
+ret_test_() ->
+    [
+        ?_assertAsmEqual(<<16#D65F03C0:32/little>>, "ret", jit_aarch64_asm:ret())
+    ].
+
+tst_test_() ->
+    [
+        ?_assertAsmEqual(<<16#EA01001F:32/little>>, "tst x0, x1", jit_aarch64_asm:tst(r0, r1)),
+        ?_assertAsmEqual(<<16#f240003f:32/little>>, "tst x1, #1", jit_aarch64_asm:tst(r1, 1)),
+        ?_assertAsmEqual(<<16#f27c005f:32/little>>, "tst x2, #16", jit_aarch64_asm:tst(r2, 16)),
+        ?_assertAsmEqual(<<16#f2401c7f:32/little>>, "tst x3, #255", jit_aarch64_asm:tst(r3, 255)),
+        ?_assertAsmEqual(<<16#f240249f:32/little>>, "tst x4, #1023", jit_aarch64_asm:tst(r4, 1023)),
+        ?_assertAsmEqual(<<16#f24014bf:32/little>>, "tst x5, #63", jit_aarch64_asm:tst(r5, 63)),
+        ?_assertAsmEqual(<<16#f27b00df:32/little>>, "tst x6, #32", jit_aarch64_asm:tst(r6, 32)),
+        ?_assertAsmEqual(<<16#f27a00ff:32/little>>, "tst x7, #64", jit_aarch64_asm:tst(r7, 64)),
+        ?_assertAsmEqual(<<16#f27e051f:32/little>>, "tst x8, #0xc", jit_aarch64_asm:tst(r8, 16#c)),
+
+        %% Test tst with unencodable immediate
+        ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:tst(r0, 16#123456))
+    ].
+
+tst_w_test_() ->
+    [
+        ?_assertAsmEqual(<<16#6a01001f:32/little>>, "tst w0, w1", jit_aarch64_asm:tst_w(r0, r1)),
+        ?_assertAsmEqual(<<16#7200003f:32/little>>, "tst w1, #1", jit_aarch64_asm:tst_w(r1, 1)),
+        ?_assertAsmEqual(<<16#721c005f:32/little>>, "tst w2, #16", jit_aarch64_asm:tst_w(r2, 16)),
+        ?_assertAsmEqual(<<16#72001c7f:32/little>>, "tst w3, #255", jit_aarch64_asm:tst_w(r3, 255)),
+        ?_assertAsmEqual(
+            <<16#7200249f:32/little>>, "tst w4, #1023", jit_aarch64_asm:tst_w(r4, 1023)
+        ),
+        ?_assertAsmEqual(<<16#720014bf:32/little>>, "tst w5, #63", jit_aarch64_asm:tst_w(r5, 63)),
+        ?_assertAsmEqual(<<16#721b00df:32/little>>, "tst w6, #32", jit_aarch64_asm:tst_w(r6, 32)),
+        ?_assertAsmEqual(<<16#721a00ff:32/little>>, "tst w7, #64", jit_aarch64_asm:tst_w(r7, 64)),
+        ?_assertAsmEqual(
+            <<16#721e051f:32/little>>, "tst w8, #0xc", jit_aarch64_asm:tst_w(r8, 16#c)
+        ),
+
+        %% Test tst_w with unencodable immediate
+        ?_assertError({unencodable_immediate, 16#123456}, jit_aarch64_asm:tst_w(r0, 16#123456))
+    ].
+
+bcc_test_() ->
+    [
+        ?_assertAsmEqual(<<16#54000000:32/little>>, "b.eq .+0", jit_aarch64_asm:bcc(eq, 0)),
+        ?_assertAsmEqual(<<16#54000001:32/little>>, "b.ne .+0", jit_aarch64_asm:bcc(ne, 0)),
+        ?_assertAsmEqual(<<16#54fffe01:32/little>>, "b.ne .-64", jit_aarch64_asm:bcc(ne, -64)),
+        ?_assertAsmEqual(<<16#54000400:32/little>>, "b.eq 128", jit_aarch64_asm:bcc(eq, 128)),
+        ?_assertAsmEqual(<<16#54000402:32/little>>, "b.cs 128", jit_aarch64_asm:bcc(cs, 128)),
+        ?_assertAsmEqual(<<16#54000403:32/little>>, "b.cc 128", jit_aarch64_asm:bcc(cc, 128)),
+        ?_assertAsmEqual(<<16#54000404:32/little>>, "b.mi 128", jit_aarch64_asm:bcc(mi, 128)),
+        ?_assertAsmEqual(<<16#54000405:32/little>>, "b.pl 128", jit_aarch64_asm:bcc(pl, 128)),
+        ?_assertAsmEqual(<<16#54000406:32/little>>, "b.vs 128", jit_aarch64_asm:bcc(vs, 128)),
+        ?_assertAsmEqual(<<16#54000408:32/little>>, "b.hi 128", jit_aarch64_asm:bcc(hi, 128)),
+        ?_assertAsmEqual(<<16#54000409:32/little>>, "b.ls 128", jit_aarch64_asm:bcc(ls, 128)),
+        ?_assertAsmEqual(<<16#5400040a:32/little>>, "b.ge 128", jit_aarch64_asm:bcc(ge, 128)),
+        ?_assertAsmEqual(<<16#5400040b:32/little>>, "b.lt 128", jit_aarch64_asm:bcc(lt, 128)),
+        ?_assertAsmEqual(<<16#5400040c:32/little>>, "b.gt 128", jit_aarch64_asm:bcc(gt, 128)),
+        ?_assertAsmEqual(<<16#5400040d:32/little>>, "b.le 128", jit_aarch64_asm:bcc(le, 128)),
+        ?_assertAsmEqual(<<16#5400040e:32/little>>, "b.al 128", jit_aarch64_asm:bcc(al, 128)),
+        ?_assertAsmEqual(<<16#5400040f:32/little>>, "b.nv 128", jit_aarch64_asm:bcc(nv, 128)),
+        ?_assertAsmEqual(<<16#54000007:32/little>>, "b.vc .+0", jit_aarch64_asm:bcc(vc, 0))
+    ].
+
+cbnz_test_() ->
+    [
+        ?_assertAsmEqual(<<16#b5000401:32/little>>, "cbnz x1, 128", jit_aarch64_asm:cbnz(r1, 128)),
+        ?_assertAsmEqual(
+            <<16#35000402:32/little>>, "cbnz w2, 128", jit_aarch64_asm:cbnz_w(r2, 128)
+        ),
+        ?_assertAsmEqual(<<16#b5fffc03:32/little>>, "cbnz x3, -128", jit_aarch64_asm:cbnz(r3, -128))
+    ].
+
+tbz_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#b6f80400:32/little>>, "tbz x0, #63, 128", jit_aarch64_asm:tbz(r0, 63, 128)
+        ),
+        ?_assertAsmEqual(
+            <<16#36180400:32/little>>, "tbz x0, #3, 128", jit_aarch64_asm:tbz(r0, 3, 128)
+        ),
+        ?_assertAsmEqual(
+            <<16#363ffc03:32/little>>, "tbz x3, #7, -128", jit_aarch64_asm:tbz(r3, 7, -128)
+        )
+    ].
+
+tbnz_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#37000400:32/little>>, "tbnz x0, #0, 128", jit_aarch64_asm:tbnz(r0, 0, 128)
+        ),
+        ?_assertAsmEqual(
+            <<16#37180400:32/little>>, "tbnz x0, #3, 128", jit_aarch64_asm:tbnz(r0, 3, 128)
+        ),
+        ?_assertAsmEqual(
+            <<16#373ffc03:32/little>>, "tbnz x3, #7, -128", jit_aarch64_asm:tbnz(r3, 7, -128)
+        )
+    ].
+
+stp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#a8815113:32/little>>,
+            "stp x19, x20, [x8], #16",
+            jit_aarch64_asm:stp(r19, r20, {r8}, 16)
+        ),
+        ?_assertAsmEqual(
+            <<16#a88153f3:32/little>>,
+            "stp x19, x20, [sp], #16",
+            jit_aarch64_asm:stp(r19, r20, {sp}, 16)
+        ),
+        % Store-update (writeback) variants
+        ?_assertAsmEqual(
+            <<16#a9bf27e8:32/little>>,
+            "stp x8, x9, [sp, #-16]!",
+            jit_aarch64_asm:stp(r8, r9, {sp, -16}, '!')
+        ),
+        ?_assertAsmEqual(
+            <<16#a98127e8:32/little>>,
+            "stp x8, x9, [sp, #16]!",
+            jit_aarch64_asm:stp(r8, r9, {sp, 16}, '!')
+        ),
+        ?_assertAsmEqual(
+            <<16#a98027e8:32/little>>,
+            "stp x8, x9, [sp, #0]!",
+            jit_aarch64_asm:stp(r8, r9, {sp, 0}, '!')
+        )
+    ].
+
+ldp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#a8c15113:32/little>>,
+            "ldp x19, x20, [x8], #16",
+            jit_aarch64_asm:ldp(r19, r20, {r8}, 16)
+        ),
+        ?_assertAsmEqual(
+            <<16#a8c153f3:32/little>>,
+            "ldp x19, x20, [sp], #16",
+            jit_aarch64_asm:ldp(r19, r20, {sp}, 16)
+        )
+    ].
+
+subs_test_() ->
+    [
+        % SUBS with immediate
+        ?_assertAsmEqual(
+            <<16#F1000021:32/little>>, "subs x1, x1, #0", jit_aarch64_asm:subs(r1, r1, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#F1000421:32/little>>, "subs x1, x1, #1", jit_aarch64_asm:subs(r1, r1, 1)
+        ),
+        % SUBS with register
+        ?_assertAsmEqual(
+            <<16#eb000021:32/little>>, "subs x1, x1, x0", jit_aarch64_asm:subs(r1, r1, r0)
+        ),
+        ?_assertAsmEqual(
+            <<16#eb0a0021:32/little>>, "subs x1, x1, x10", jit_aarch64_asm:subs(r1, r1, r10)
+        )
+    ].
+
+adr_test_() ->
+    [
+        %% ADR x0, #0
+        ?_assertAsmEqual(<<16#10000000:32/little>>, "adr x0, .+0", jit_aarch64_asm:adr(r0, 0)),
+        %% ADR x1, #4
+        ?_assertAsmEqual(<<16#10000021:32/little>>, "adr x1, .+4", jit_aarch64_asm:adr(r1, 4)),
+        %% ADR x2, #-4
+        ?_assertAsmEqual(<<16#10ffffe2:32/little>>, "adr x2, .-4", jit_aarch64_asm:adr(r2, -4)),
+        %% ADR x3, #1048572 (max positive)
+        ?_assertAsmEqual(
+            <<16#107fffe3:32/little>>, "adr x3, .+1048572", jit_aarch64_asm:adr(r3, 1048572)
+        ),
+        %% ADR x4, #-1048576 (max negative)
+        ?_assertAsmEqual(
+            <<16#10800004:32/little>>, "adr x4, .-1048576", jit_aarch64_asm:adr(r4, -1048576)
+        ),
+        %% ADR with offset not a multiple of 4 is valid
+        ?_assertAsmEqual(<<16#70000000:32/little>>, "adr x0, .+3", jit_aarch64_asm:adr(r0, 3))
+    ].
+
+%% Test nop instruction
+nop_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#d503201f:32/little>>, "nop", jit_aarch64_asm:nop()
+        )
+    ].
diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl
new file mode 100644
index 0000000000..087ab9074d
--- /dev/null
+++ b/tests/libs/jit/jit_aarch64_tests.erl
@@ -0,0 +1,1764 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_aarch64_tests).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
+
+-include("jit/include/jit.hrl").
+-include("jit/src/term.hrl").
+-include("jit/src/default_atoms.hrl").
+-include("jit/src/primitives.hrl").
+
+-define(BACKEND, jit_aarch64).
+
+% disassembly obtained with:
+% aarch64-elf-objdump -b binary -D dump.bin -M aarch64
+
+call_primitive_0_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9400050 	ldr	x16, [x2]\n"
+            "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "   c:	d63f0200 	blr	x16\n"
+            "  10:	aa0003e7 	mov	x7, x0\n"
+            "  14:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  18:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_1_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9400450 	ldr	x16, [x2, #8]\n"
+            "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "   c:	d63f0200 	blr	x16\n"
+            "  10:	aa0003e7 	mov	x7, x0\n"
+            "  14:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  18:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_2_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9400850 	ldr	x16, [x2, #16]\n"
+            "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "   c:	d2800541 	mov	x1, #0x2a                  	// #42\n"
+            "  10:	d2800562 	mov	x2, #0x2b                  	// #43\n"
+            "  14:	d2800583 	mov	x3, #0x2c                  	// #44\n"
+            "  18:	d63f0200 	blr	x16\n"
+            "  1c:	aa0003e7 	mov	x7, x0\n"
+            "  20:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  24:	a8c103fe 	ldp	x30, x0, [sp], #16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_extended_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]),
+    {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [
+        ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}}
+    ]),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "\n"
+            "   0:	f9404850 	ldr	x16, [x2, #144]\n"
+            "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "   c:	d2800261 	mov	x1, #0x13                  	// #19\n"
+            "  10:	d63f0200 	blr	x16\n"
+            "  14:	aa0003e7 	mov	x7, x0\n"
+            "  18:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  1c:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+            "  20:	f9404850 	ldr	x16, [x2, #144]\n"
+            "  24:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "  28:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "  2c:	f81f0fe7 	str	x7, [sp, #-16]!\n"
+            "  30:	d2800281 	mov	x1, #0x14                  	// #20\n"
+            "  34:	d63f0200 	blr	x16\n"
+            "  38:	aa0003e8 	mov	x8, x0\n"
+            "  3c:	f84107e7 	ldr	x7, [sp], #16\n"
+            "  40:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  44:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+            "  48:	f9404850 	ldr	x16, [x2, #144]\n"
+            "  4c:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "  50:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "  54:	a9bf1fe8 	stp	x8, x7, [sp, #-16]!\n"
+            "  58:	d2800261 	mov	x1, #0x13                  	// #19\n"
+            "  5c:	d63f0200 	blr	x16\n"
+            "  60:	aa0003e9 	mov	x9, x0\n"
+            "  64:	a8c11fe8 	ldp	x8, x7, [sp], #16\n"
+            "  68:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  6c:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+            "  70:	f9403450 	ldr	x16, [x2, #104]\n"
+            "  74:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "  78:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "  7c:	f81f0fe9 	str	x9, [sp, #-16]!\n"
+            "  80:	f94000e1 	ldr	x1, [x7]\n"
+            "  84:	f9400102 	ldr	x2, [x8]\n"
+            "  88:	d63f0200 	blr	x16\n"
+            "  8c:	aa0003e7 	mov	x7, x0\n"
+            "  90:	f84107e9 	ldr	x9, [sp], #16\n"
+            "  94:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  98:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+            "  9c:	f9000127 	str	x7, [x9]\n"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_only_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, -1]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	b9401027 	ldr	w7, [x1, #16]\n"
+        "   4:	f10004e7 	subs	x7, x7, #0x1\n"
+        "   8:	b9001027 	str	w7, [x1, #16]\n"
+        "   c:	540000a1 	b.ne	0x20  // b.any\n"
+        "  10:	10000087 	adr	x7, 0x20\n"
+        "  14:	f9000427 	str	x7, [x1, #8]\n"
+        "  18:	f9400847 	ldr	x7, [x2, #16]\n"
+        "  1c:	d61f00e0 	br	x7\n"
+        "  20:	f9401047 	ldr	x7, [x2, #32]\n"
+        "  24:	d2800042 	mov	x2, #0x2                   	// #2\n"
+        "  28:	d2800043 	mov	x3, #0x2                   	// #2\n"
+        "  2c:	92800004 	mov	x4, #0xffffffffffffffff    	// #-1\n"
+        "  30:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, 10]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	b9401027 	ldr	w7, [x1, #16]\n"
+        "   4:	f10004e7 	subs	x7, x7, #0x1\n"
+        "   8:	b9001027 	str	w7, [x1, #16]\n"
+        "   c:	540000a1 	b.ne	0x20  // b.any\n"
+        "  10:	10000087 	adr	x7, 0x20\n"
+        "  14:	f9000427 	str	x7, [x1, #8]\n"
+        "  18:	f9400847 	ldr	x7, [x2, #16]\n"
+        "  1c:	d61f00e0 	br	x7\n"
+        "  20:	f9401047 	ldr	x7, [x2, #32]\n"
+        "  24:	d2800042 	mov	x2, #0x2                   	// #2\n"
+        "  28:	d2800043 	mov	x3, #0x2                   	// #2\n"
+        "  2c:	d2800144 	mov	x4, #0xa                   	// #10\n"
+        "  30:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9400047 	ldr	x7, [x2]\n"
+            "   4:	d2800542 	mov	x2, #0x2a                  	// #42\n"
+            "   8:	d61f00e0 	br	x7"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+return_if_not_equal_to_ctx_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(r7, ResultReg),
+                    State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump =
+                        <<
+                            "   0:	f9405450 	ldr	x16, [x2, #168]\n"
+                            "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+                            "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+                            "   c:	d63f0200 	blr	x16\n"
+                            "  10:	aa0003e7 	mov	x7, x0\n"
+                            "  14:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+                            "  18:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+                            "  1c:	eb0000ff 	cmp	x7, x0\n"
+                            "  20:	54000060 	b.eq	0x2c  // b.none\n"
+                            "  24:	aa0703e0 	mov	x0, x7\n"
+                            "  28:	d65f03c0 	ret"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(r7, ResultReg),
+                    {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg),
+                    ?assertEqual(r8, OtherReg),
+                    State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump =
+                        <<
+                            "   0:	f9405450 	ldr	x16, [x2, #168]\n"
+                            "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+                            "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+                            "   c:	d63f0200 	blr	x16\n"
+                            "  10:	aa0003e7 	mov	x7, x0\n"
+                            "  14:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+                            "  18:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+                            "  1c:	aa0703e8 	mov	x8, x7\n"
+                            "  20:	eb00011f 	cmp	x8, x0\n"
+                            "  24:	54000060 	b.eq	0x30  // b.none\n"
+                            "  28:	aa0803e0 	mov	x0, x8\n"
+                            "  2c:	d65f03c0 	ret"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_cp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+            "   4:	f94000e7 	ldr	x7, [x7]\n"
+            "   8:	f9005c07 	str	x7, [x0, #184]"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+increment_sp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:increment_sp(State0, 7),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+            "   4:	9100e0e7 	add	x7, x7, #0x38\n"
+            "   8:	f9001407 	str	x7, [x0, #40]"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+if_block_test_() ->
+    {setup,
+        fun() ->
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+            {State2, RegA, RegB}
+        end,
+        fun({State0, RegA, RegB}) ->
+            [
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	b6f80047 	tbz	x7, #63, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', RegB},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	eb0800ff 	cmp	x7, x8\n"
+                        "   c:	5400004a 	b.ge	0x14  // b.tcont\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	b5000047 	cbnz	x7, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	b5000047 	cbnz	x7, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	35000047 	cbnz	w7, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	35000047 	cbnz	w7, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f100ecff 	cmp	x7, #0x3b\n"
+                        "   c:	54000040 	b.eq	0x14  // b.none\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f100ecff 	cmp	x7, #0x3b\n"
+                        "   c:	54000040 	b.eq	0x14  // b.none\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	7100a8ff 	cmp	w7, #0x2a\n"
+                        "   c:	54000040 	b.eq	0x14  // b.none\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	7100a8ff 	cmp	w7, #0x2a\n"
+                        "   c:	54000040 	b.eq	0x14  // b.none\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f100ecff 	cmp	x7, #0x3b\n"
+                        "   c:	54000041 	b.ne	0x14  // b.any\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f100ecff 	cmp	x7, #0x3b\n"
+                        "   c:	54000041 	b.ne	0x14  // b.any\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	7100a8ff 	cmp	w7, #0x2a\n"
+                        "   c:	54000041 	b.ne	0x14  // b.any\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	7100a8ff 	cmp	w7, #0x2a\n"
+                        "   c:	54000041 	b.ne	0x14  // b.any\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	37000047 	tbnz	w7, #0, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	37000047 	tbnz	w7, #0, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	36000047 	tbz	w7, #0, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	36000047 	tbz	w7, #0, 0x10\n"
+                        "   c:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f24008ff 	tst	x7, #0x7\n"
+                        "   c:	54000040 	b.eq	0x14  // b.none\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#5, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	d28000a9 	mov	x9, #0x5                   	// #5\n"
+                        "   c:	ea0900ff 	tst	x7, x9\n"
+                        "  10:	54000040 	b.eq	0x18  // b.none\n"
+                        "  14:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	f24008ff 	tst	x7, #0x7\n"
+                        "   c:	54000040 	b.eq	0x14  // b.none\n"
+                        "  10:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	92400ce9 	and	x9, x7, #0xf\n"
+                        "   c:	f1003d3f 	cmp	x9, #0xf\n"
+                        "  10:	54000040 	b.eq	0x18  // b.none\n"
+                        "  14:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+                        "   8:	92400ce7 	and	x7, x7, #0xf\n"
+                        "   c:	f1003cff 	cmp	x7, #0xf\n"
+                        "  10:	54000040 	b.eq	0x18  // b.none\n"
+                        "  14:	91000908 	add	x8, x8, #0x2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end)
+            ]
+        end}.
+
+if_else_block_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    State3 = ?BACKEND:if_else_block(
+        State2,
+        {Reg1, '==', ?TERM_NIL},
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 2)
+        end,
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 4)
+        end
+    ),
+    Stream = ?BACKEND:stream(State3),
+    Dump =
+        <<
+            "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+            "   4:	f9401c08 	ldr	x8, [x0, #56]\n"
+            "   8:	f100ecff 	cmp	x7, #0x3b\n"
+            "   c:	54000061 	b.ne	0x18  // b.any\n"
+            "  10:	91000908 	add	x8, x8, #0x2\n"
+            "  14:	14000002 	b	0x1c\n"
+            "  18:	91001108 	add	x8, x8, #0x4"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+shift_right_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:shift_right(State1, Reg, 3),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+            "   4:	d343fce7 	lsr	x7, x7, #3"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+shift_left_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:shift_left(State1, Reg, 3),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+            "   4:	d37df0e7 	lsl	x7, x7, #3"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_only_or_schedule_next_and_label_relocation_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    Dump =
+        <<
+            "   0:	1400000d 	b	0x34\n"
+            "   4:	14000002 	b	0xc\n"
+            "   8:	14000009 	b	0x2c\n"
+            "   c:	b9401027 	ldr	w7, [x1, #16]\n"
+            "  10:	f10004e7 	subs	x7, x7, #0x1\n"
+            "  14:	b9001027 	str	w7, [x1, #16]\n"
+            "  18:	540000a1 	b.ne	0x2c  // b.any\n"
+            "  1c:	10000087 	adr	x7, 0x2c\n"
+            "  20:	f9000427 	str	x7, [x1, #8]\n"
+            "  24:	f9400847 	ldr	x7, [x2, #16]\n"
+            "  28:	d61f00e0 	br	x7\n"
+            "  2c:	f9400047 	ldr	x7, [x2]\n"
+            "  30:	d61f00e0 	br	x7\n"
+            "  34:	f9400447 	ldr	x7, [x2, #8]\n"
+            "  38:	d61f00e0 	br	x7"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_bif_with_large_literal_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]),
+    {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 9208452466117618637]),
+    {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [
+        ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg}
+    ]),
+    State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) ->
+        ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset])
+    end),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:	f9402050 	ldr	x16, [x2, #64]\n"
+            "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "   c:	aa0103e0 	mov	x0, x1\n"
+            "  10:	d2800041 	mov	x1, #0x2                   	// #2\n"
+            "  14:	d63f0200 	blr	x16\n"
+            "  18:	aa0003e7 	mov	x7, x0\n"
+            "  1c:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  20:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+            "  24:	f9403c50 	ldr	x16, [x2, #120]\n"
+            "  28:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "  2c:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "  30:	f81f0fe7 	str	x7, [sp, #-16]!\n"
+            "  34:	d29579a1 	mov	x1, #0xabcd                	// #43981\n"
+            "  38:	f2b7c041 	movk	x1, #0xbe02, lsl #16\n"
+            "  3c:	f2dfd741 	movk	x1, #0xfeba, lsl #32\n"
+            "  40:	f2eff941 	movk	x1, #0x7fca, lsl #48\n"
+            "  44:	d63f0200 	blr	x16\n"
+            "  48:	aa0003e8 	mov	x8, x0\n"
+            "  4c:	f84107e7 	ldr	x7, [sp], #16\n"
+            "  50:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  54:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+            "  58:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "  5c:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "  60:	d2800001 	mov	x1, #0x0                   	// #0\n"
+            "  64:	d2800022 	mov	x2, #0x1                   	// #1\n"
+            "  68:	f9401803 	ldr	x3, [x0, #48]\n"
+            "  6c:	aa0803e4 	mov	x4, x8\n"
+            "  70:	d63f00e0 	blr	x7\n"
+            "  74:	aa0003e7 	mov	x7, x0\n"
+            "  78:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  7c:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+            "  80:	b5000087 	cbnz	x7, 0x90\n"
+            "  84:	f9401847 	ldr	x7, [x2, #48]\n"
+            "  88:	d2801102 	mov	x2, #0x88                  	// #136\n"
+            "  8c:	d61f00e0 	br	x7\n"
+            "  90:	f9001807 	str	x7, [x0, #48]"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+get_list_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:and_(State1, Reg, -4),
+    State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}),
+    State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}),
+    State5 = ?BACKEND:free_native_registers(State4, [Reg]),
+    ?BACKEND:assert_all_native_free(State5),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   4:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
+        "   8:	f9401408 	ldr	x8, [x0, #40]\n"
+        "   c:	f94004e9 	ldr	x9, [x7, #8]\n"
+        "  10:	f9000509 	str	x9, [x8, #8]\n"
+        "  14:	f9401408 	ldr	x8, [x0, #40]\n"
+        "  18:	f94000e9 	ldr	x9, [x7]\n"
+        "  1c:	f9000109 	str	x9, [x8]"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
+    State2 = ?BACKEND:if_block(
+        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
+            MSt1 = ?BACKEND:if_block(
+                MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            ),
+            MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg),
+            ?BACKEND:if_block(
+                MSt3,
+                {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            )
+        end
+    ),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    Offset = ?BACKEND:offset(State3),
+    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   4:	92400ce8 	and	x8, x7, #0xf\n"
+        "   8:	f1003d1f 	cmp	x8, #0xf\n"
+        "   c:	54000160 	b.eq	0x38  // b.none\n"
+        "  10:	924004e8 	and	x8, x7, #0x3\n"
+        "  14:	f100091f 	cmp	x8, #0x2\n"
+        "  18:	54000040 	b.eq	0x20  // b.none\n"
+        "  1c:	14000047 	b	0x138\n"
+        "  20:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
+        "  24:	f94000e7 	ldr	x7, [x7]\n"
+        "  28:	924014e7 	and	x7, x7, #0x3f\n"
+        "  2c:	f10020ff 	cmp	x7, #0x8\n"
+        "  30:	54000040 	b.eq	0x38  // b.none\n"
+        "  34:	14000041 	b	0x138"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+cond_jump_to_label(Cond, Label, MMod, MSt0) ->
+    MMod:if_block(MSt0, Cond, fun(BSt0) ->
+        MMod:jump_to_label(BSt0, Label)
+    end).
+
+is_number_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
+    State2 = ?BACKEND:if_block(
+        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
+            BSt1 = cond_jump_to_label(
+                {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0
+            ),
+            BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg),
+            cond_jump_to_label(
+                {'and', [
+                    {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                    {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT}
+                ]},
+                Label,
+                ?BACKEND,
+                BSt3
+            )
+        end
+    ),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    Offset = ?BACKEND:offset(State3),
+    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   4:	92400ce8 	and	x8, x7, #0xf\n"
+        "   8:	f1003d1f 	cmp	x8, #0xf\n"
+        "   c:	540001c0 	b.eq	0x44  // b.none\n"
+        "  10:	924004e8 	and	x8, x7, #0x3\n"
+        "  14:	f100091f 	cmp	x8, #0x2\n"
+        "  18:	54000040 	b.eq	0x20  // b.none\n"
+        "  1c:	1400004a 	b	0x144\n"
+        "  20:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
+        "  24:	f94000e7 	ldr	x7, [x7]\n"
+        "  28:	924014e8 	and	x8, x7, #0x3f\n"
+        "  2c:	f100211f 	cmp	x8, #0x8\n"
+        "  30:	540000a0 	b.eq	0x44  // b.none\n"
+        "  34:	924014e7 	and	x7, x7, #0x3f\n"
+        "  38:	f10060ff 	cmp	x7, #0x18\n"
+        "  3c:	54000040 	b.eq	0x44  // b.none\n"
+        "  40:	14000041 	b	0x144"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    Offset = ?BACKEND:offset(State3),
+    State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Offset = ?BACKEND:offset(State3),
+    Dump = <<
+        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   4:	f1012cff 	cmp	x7, #0x4b\n"
+        "   8:	54000080 	b.eq	0x18  // b.none\n"
+        "   c:	f1002cff 	cmp	x7, #0xb\n"
+        "  10:	54000040 	b.eq	0x18  // b.none\n"
+        "  14:	14000041 	b	0x118"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]),
+    ?BACKEND:assert_all_native_free(State2),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	b9401027 	ldr	w7, [x1, #16]\n"
+        "   4:	f10004e7 	subs	x7, x7, #0x1\n"
+        "   8:	b9001027 	str	w7, [x1, #16]\n"
+        "   c:	540000a1 	b.ne	0x20  // b.any\n"
+        "  10:	10000087 	adr	x7, 0x20\n"
+        "  14:	f9000427 	str	x7, [x1, #8]\n"
+        "  18:	f9400847 	ldr	x7, [x2, #16]\n"
+        "  1c:	d61f00e0 	br	x7\n"
+        "  20:	f9400027 	ldr	x7, [x1]\n"
+        "  24:	b94000e7 	ldr	w7, [x7]\n"
+        "  28:	d3689ce7 	lsl	x7, x7, #24\n"
+        "  2c:	d2802610 	mov	x16, #0x130                 	// #304\n"
+        "  30:	aa1000e7 	orr	x7, x7, x16\n"
+        "  34:	f9005c07 	str	x7, [x0, #184]\n"
+        "  38:	f9401047 	ldr	x7, [x2, #32]\n"
+        "  3c:	d2800042 	mov	x2, #0x2                   	// #2\n"
+        "  40:	d28000a3 	mov	x3, #0x5                   	// #5\n"
+        "  44:	92800004 	mov	x4, #0xffffffffffffffff    	// #-1\n"
+        "  48:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_fun_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    FuncReg = {x_reg, 0},
+    ArgsCount = 0,
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg),
+    {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg),
+    State4 = ?BACKEND:if_block(
+        State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK),
+    State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy),
+    State7 = ?BACKEND:if_block(
+        State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    State8 = ?BACKEND:free_native_registers(State7, [RegCopy]),
+    State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [
+        ctx, jit_state, Reg, ArgsCount
+    ]),
+    ?BACKEND:assert_all_native_free(State9),
+    Stream = ?BACKEND:stream(State9),
+    Dump = <<
+        "   0:	b9401027 	ldr	w7, [x1, #16]\n"
+        "   4:	f10004e7 	subs	x7, x7, #0x1\n"
+        "   8:	b9001027 	str	w7, [x1, #16]\n"
+        "   c:	540000a1 	b.ne	0x20  // b.any\n"
+        "  10:	10000087 	adr	x7, 0x20\n"
+        "  14:	f9000427 	str	x7, [x1, #8]\n"
+        "  18:	f9400847 	ldr	x7, [x2, #16]\n"
+        "  1c:	d61f00e0 	br	x7\n"
+        "  20:	f9401807 	ldr	x7, [x0, #48]\n"
+        "  24:	aa0703e8 	mov	x8, x7\n"
+        "  28:	92400509 	and	x9, x8, #0x3\n"
+        "  2c:	f100093f 	cmp	x9, #0x2\n"
+        "  30:	540000c0 	b.eq	0x48  // b.none\n"
+        "  34:	f9404c47 	ldr	x7, [x2, #152]\n"
+        "  38:	d2800702 	mov	x2, #0x38                  	// #56\n"
+        "  3c:	d2803163 	mov	x3, #0x18b                 	// #395\n"
+        "  40:	aa0803e4 	mov	x4, x8\n"
+        "  44:	d61f00e0 	br	x7\n"
+        "  48:	927ef508 	and	x8, x8, #0xfffffffffffffffc\n"
+        "  4c:	f9400108 	ldr	x8, [x8]\n"
+        "  50:	92401509 	and	x9, x8, #0x3f\n"
+        "  54:	f100513f 	cmp	x9, #0x14\n"
+        "  58:	540000c0 	b.eq	0x70  // b.none\n"
+        "  5c:	f9404c47 	ldr	x7, [x2, #152]\n"
+        "  60:	d2800c02 	mov	x2, #0x60                  	// #96\n"
+        "  64:	d2803163 	mov	x3, #0x18b                 	// #395\n"
+        "  68:	aa0803e4 	mov	x4, x8\n"
+        "  6c:	d61f00e0 	br	x7\n"
+        "  70:	f9400028 	ldr	x8, [x1]\n"
+        "  74:	b9400108 	ldr	w8, [x8]\n"
+        "  78:	d3689d08 	lsl	x8, x8, #24\n"
+        "  7c:	d2804c10 	mov	x16, #0x260                 	// #608\n"
+        "  80:	aa100108 	orr	x8, x8, x16\n"
+        "  84:	f9005c08 	str	x8, [x0, #184]\n"
+        "  88:	f9408048 	ldr	x8, [x2, #256]\n"
+        "  8c:	aa0703e2 	mov	x2, x7\n"
+        "  90:	d2800003 	mov	x3, #0x0                   	// #0\n"
+        "  94:	d61f0100 	br	x8"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test0(State, Source, Dest, Dump) ->
+    State1 = ?BACKEND:move_to_vm_register(State, Source, Dest),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, 0}, <<
+                        "   0:	f900181f 	str	xzr, [x0, #48]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, extra}, <<
+                        "   0:	f900581f 	str	xzr, [x0, #176]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {ptr, r10}, <<
+                        "   0:	f900015f 	str	xzr, [x10]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 2}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f90008ff 	str	xzr, [x7, #16]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 20}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f90050ff 	str	xzr, [x7, #160]"
+                    >>)
+                end),
+                %% Test: Immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, 0}, <<
+                        "   0:	d2800547 	mov	x7, #0x2a                  	// #42\n"
+                        "   4:	f9001807 	str	x7, [x0, #48]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, extra}, <<
+                        "   0:	d2800547 	mov	x7, #0x2a                  	// #42\n"
+                        "   4:	f9005807 	str	x7, [x0, #176]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 2}, <<
+                        "   0:	d2800547 	mov	x7, #0x2a                  	// #42\n"
+                        "   4:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "   8:	f9000907 	str	x7, [x8, #16]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 20}, <<
+                        "   0:	d2800547 	mov	x7, #0x2a                  	// #42\n"
+                        "   4:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "   8:	f9005107 	str	x7, [x8, #160]"
+                    >>)
+                end),
+                %% Test: Immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 99, {ptr, r10}, <<
+                        "   0:	d2800c67 	mov	x7, #0x63                  	// #99\n"
+                        "   4:	f9000147 	str	x7, [x10]"
+                    >>)
+                end),
+                %% Test: x_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, <<
+                        "   0:	f9401c07 	ldr	x7, [x0, #56]\n"
+                        "   4:	f9002007 	str	x7, [x0, #64]"
+                    >>)
+                end),
+                %% Test: x_reg to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r8}, <<
+                        "   0:	f9401c07 	ldr	x7, [x0, #56]\n"
+                        "   4:	f9000107 	str	x7, [x8]"
+                    >>)
+                end),
+                %% Test: ptr to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {ptr, r9}, {x_reg, 3}, <<
+                        "   0:	f9400127 	ldr	x7, [x9]\n"
+                        "   4:	f9002407 	str	x7, [x0, #72]"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "   8:	f9000507 	str	x7, [x8, #8]"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f94000e7 	ldr	x7, [x7]\n"
+                        "   8:	f9002407 	str	x7, [x0, #72]"
+                    >>)
+                end),
+                %% Test: y_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f94004e7 	ldr	x7, [x7, #8]\n"
+                        "   8:	f9002407 	str	x7, [x0, #72]"
+                    >>)
+                end),
+                %% Test: Native register to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r10, {x_reg, 0}, <<
+                        "   0:	f900180a 	str	x10, [x0, #48]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r10, {x_reg, extra}, <<
+                        "   0:	f900580a 	str	x10, [x0, #176]"
+                    >>)
+                end),
+                %% Test: Native register to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r9, {ptr, r10}, <<
+                        "   0:	f9000149 	str	x9, [x10]"
+                    >>)
+                end),
+                %% Test: Native register to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r10, {y_reg, 0}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f90000ea 	str	x10, [x7]"
+                    >>)
+                end),
+                %% Test: Large immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, 0}, <<
+                        "   0:	d29bde07 	mov	x7, #0xdef0                	// #57072\n"
+                        "   4:	f2b35787 	movk	x7, #0x9abc, lsl #16\n"
+                        "   8:	f2cacf07 	movk	x7, #0x5678, lsl #32\n"
+                        "   c:	f2e24687 	movk	x7, #0x1234, lsl #48\n"
+                        "  10:	f9001807 	str	x7, [x0, #48]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#123456789abcdef0, {x_reg, extra}, <<
+                        "   0:	d29bde07 	mov	x7, #0xdef0                	// #57072\n"
+                        "   4:	f2b35787 	movk	x7, #0x9abc, lsl #16\n"
+                        "   8:	f2cacf07 	movk	x7, #0x5678, lsl #32\n"
+                        "   c:	f2e24687 	movk	x7, #0x1234, lsl #48\n"
+                        "  10:	f9005807 	str	x7, [x0, #176]\n"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 2}, <<
+                        "   0:	d29bde07 	mov	x7, #0xdef0                	// #57072\n"
+                        "   4:	f2b35787 	movk	x7, #0x9abc, lsl #16\n"
+                        "   8:	f2cacf07 	movk	x7, #0x5678, lsl #32\n"
+                        "   c:	f2e24687 	movk	x7, #0x1234, lsl #48\n"
+                        "  10:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "  14:	f9000907 	str	x7, [x8, #16]"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#123456789abcdef0, {y_reg, 20}, <<
+                        "   0:	d29bde07 	mov	x7, #0xdef0                	// #57072\n"
+                        "   4:	f2b35787 	movk	x7, #0x9abc, lsl #16\n"
+                        "   8:	f2cacf07 	movk	x7, #0x5678, lsl #32\n"
+                        "   c:	f2e24687 	movk	x7, #0x1234, lsl #48\n"
+                        "  10:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "  14:	f9005107 	str	x7, [x8, #160]"
+                    >>)
+                end),
+                %% Test: Large immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#123456789abcdef0, {ptr, r10}, <<
+                        "   0:	d29bde07 	mov	x7, #0xdef0                	// #57072\n"
+                        "   4:	f2b35787 	movk	x7, #0x9abc, lsl #16\n"
+                        "   8:	f2cacf07 	movk	x7, #0x5678, lsl #32\n"
+                        "   c:	f2e24687 	movk	x7, #0x1234, lsl #48\n"
+                        "  10:	f9000147 	str	x7, [x10]"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, <<
+                        "   0:	f9405407 	ldr	x7, [x0, #168]\n"
+                        "   4:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "   8:	f9007d07 	str	x7, [x8, #248]"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f9407ce7 	ldr	x7, [x7, #248]\n"
+                        "   8:	f9005407 	str	x7, [x0, #168]"
+                    >>)
+                end),
+                %% Test: Negative immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -1, {x_reg, 0}, <<
+                        "   0:	92800007 	mov	x7, #0xffffffffffffffff    	// #-1\n"
+                        "   4:	f9001807 	str	x7, [x0, #48]"
+                    >>)
+                end),
+                %% Test: ptr with offset to fp_reg (term_to_float)
+                ?_test(begin
+                    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+                    State2 = ?BACKEND:move_to_vm_register(
+                        State1, {free, {ptr, RegA, 1}}, {fp_reg, 3}
+                    ),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f94004e7 	ldr	x7, [x7, #8]\n"
+                        "   8:	f9406008 	ldr	x8, [x0, #192]\n"
+                        "   c:	f9000d07 	str	x7, [x8, #24]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_array_element_test0(State, Reg, Index, Dest, Dump) ->
+    State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r8, 2, {x_reg, 0}, <<
+                        "   0:	f9400907 	ldr	x7, [x8, #16]\n"
+                        "   4:	f9001807 	str	x7, [x0, #48]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to ptr
+                ?_test(begin
+                    move_array_element_test0(State0, r8, 3, {ptr, r10}, <<
+                        "   0:	f9400d07 	ldr	x7, [x8, #24]\n"
+                        "   4:	f9000147 	str	x7, [x10]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r8, 1, {y_reg, 2}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f9400508 	ldr	x8, [x8, #8]\n"
+                        "   8:	f90008e8 	str	x8, [x7, #16]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to native reg (r10)
+                ?_test(begin
+                    move_array_element_test0(State0, r8, 1, r10, <<
+                        "   0:	f940050a 	ldr	x10, [x8, #8]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r8, 7, {y_reg, 31}, <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f9401d08 	ldr	x8, [x8, #56]\n"
+                        "   8:	f9007ce8 	str	x8, [x7, #248]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r8, 7, {x_reg, 15}, <<
+                        "   0:	f9401d07 	ldr	x7, [x8, #56]\n"
+                        "   4:	f9005407 	str	x7, [x0, #168]"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to x_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4),
+                    move_array_element_test0(State1, r8, {free, Reg}, {x_reg, 2}, <<
+                        "   0:	f9401107 	ldr	x7, [x8, #32]\n"
+                        "   4:	f8677907 	ldr	x7, [x8, x7, lsl #3]\n"
+                        "   8:	f9002007 	str	x7, [x0, #64]"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to pointer (large x reg)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4),
+                    move_array_element_test0(State1, r8, {free, Reg}, {ptr, r10}, <<
+                        "   0:	f9401107 	ldr	x7, [x8, #32]\n"
+                        "   4:	f8677907 	ldr	x7, [x8, x7, lsl #3]\n"
+                        "   8:	f9000147 	str	x7, [x10]"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to y_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4),
+                    move_array_element_test0(State1, r8, {free, Reg}, {y_reg, 31}, <<
+                        "   0:	f9401107 	ldr	x7, [x8, #32]\n"
+                        "   4:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "   8:	f8677907 	ldr	x7, [x8, x7, lsl #3]\n"
+                        "   c:	f9007d07 	str	x7, [x8, #248]"
+                    >>)
+                end)
+            ]
+        end}.
+
+get_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% get_array_element: reg[x] to new native reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401107 	ldr	x7, [x8, #32]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(r7, Reg)
+                end)
+            ]
+        end}.
+
+move_to_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_array_element/4: x_reg to reg[x]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9000907 	str	x7, [x8, #16]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: x_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "  0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f8297907 	str	x7, [x8, x9, lsl #3]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: ptr to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r8, r9),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f94000e7 	ldr	x7, [x7]\n"
+                        "   4:	f8297907 	str	x7, [x8, x9, lsl #3]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: y_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r8, r9),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f94008e7 	ldr	x7, [x7, #16]\n"
+                        "   8:	f8297907 	str	x7, [x8, x9, lsl #3]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2, 1),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	f9000d07 	str	x7, [x8, #24]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]),
+                    State2 = setelement(7, State1, [r8, r9]),
+                    [r8, r9] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                        "   4:	9100052a 	add	x10, x9, #0x1\n"
+                        "   8:	f82a7907 	str	x7, [x8, x10, lsl #3]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: imm to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]),
+                    State2 = setelement(7, State1, [r8, r9]),
+                    [r8, r9] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "   0:	d2800547 	mov	x7, #0x2a                  	// #42\n"
+                        "   4:	9100052a 	add	x10, x9, #0x1\n"
+                        "   8:	f82a7907 	str	x7, [x8, x10, lsl #3]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_native_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_native_register/2: imm
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	d2800547 	mov	x7, #0x2a                  	// #42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {ptr, reg}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r6, Reg),
+                    Dump = <<
+                        "   0:	f94000c6 	ldr	x6, [x6]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {x_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 3}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	f9402407 	ldr	x7, [x0, #72]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {y_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	f9401407 	ldr	x7, [x0, #40]\n"
+                        "   4:	f9400ce7 	ldr	x7, [x7, #24]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: imm to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, 42, r8),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	d2800548 	mov	x8, #0x2a                  	// #42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: reg to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, r7, r8),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	aa0703e8 	mov	x8, x7"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {ptr, reg} to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {ptr, r7}, r8),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f94000e8 	ldr	x8, [x7]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {x_reg, x} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r8),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9402008 	ldr	x8, [x0, #64]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {y_reg, y} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r8),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	f9401408 	ldr	x8, [x0, #40]\n"
+                        "   4:	f9400908 	ldr	x8, [x8, #16]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+mul_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:mul(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+mul_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    mul_test0(State0, r2, 2, <<
+                        "0:	d37ff842 	lsl	x2, x2, #1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 3, <<
+                        "   0:	d37ff847 	lsl	x7, x2, #1\n"
+                        "   4:	8b0200e2 	add	x2, x7, x2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 4, <<
+                        "0:	d37ef442 	lsl	x2, x2, #2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 5, <<
+                        "   0:	d37ef447 	lsl	x7, x2, #2\n"
+                        "   4:	8b0200e2 	add	x2, x7, x2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 6, <<
+                        "   0:	d37ff847 	lsl	x7, x2, #1\n"
+                        "   4:	8b0200e2 	add	x2, x7, x2\n"
+                        "   8:	d37ff842 	lsl	x2, x2, #1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 7, <<
+                        "   0:	d37df047 	lsl	x7, x2, #3\n"
+                        "   4:	cb0200e2 	sub	x2, x7, x2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 8, <<
+                        "0:	d37df042 	lsl	x2, x2, #3"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 9, <<
+                        "   0:	d37df047 	lsl	x7, x2, #3\n"
+                        "   4:	8b0200e2 	add	x2, x7, x2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 10, <<
+                        "   0:	d37ef447 	lsl	x7, x2, #2\n"
+                        "   4:	8b0200e2 	add	x2, x7, x2\n"
+                        "   8:	d37ff842 	lsl	x2, x2, #1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 11, <<
+                        "   0:	d2800167 	mov	x7, #0xb                   	// #11\n"
+                        "   4:	9b077c42 	mul	x2, x2, x7"
+                    >>)
+                end)
+            ]
+        end}.
+
+dump_to_bin(Dump) ->
+    dump_to_bin0(Dump, addr, []).
+
+-define(IS_HEX_DIGIT(C),
+    ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F))
+).
+
+dump_to_bin0(<<N, $:, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<N, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<H1, H2, H3, H4, H5, H6, H7, H8, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4) andalso
+        ?IS_HEX_DIGIT(H5) andalso
+        ?IS_HEX_DIGIT(H6) andalso
+        ?IS_HEX_DIGIT(H7) andalso
+        ?IS_HEX_DIGIT(H8)
+->
+    %% Parse 8 hex digits (AArch64 32-bit instruction)
+    Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16),
+    dump_to_bin0(Rest, instr, [<<Instr:32/little>> | Acc]);
+dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, instr, Acc);
+dump_to_bin0(<<>>, _, Acc) ->
+    list_to_binary(lists:reverse(Acc)).
diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl
index 7ccb678892..c309cae9e2 100644
--- a/tests/libs/jit/jit_x86_64_tests.erl
+++ b/tests/libs/jit/jit_x86_64_tests.erl
@@ -1269,7 +1269,7 @@ move_to_vm_register_test_() ->
                         "   0:	49 89 02             	mov    %rax,(%r10)"
                     >>)
                 end),
-                %% Test: Atom register to y_reg
+                %% Test: Native register to y_reg
                 ?_test(begin
                     move_to_vm_register_test0(State0, rax, {y_reg, 0}, <<
                         "   0:\t48 8b 47 28           mov    0x28(%rdi),%rax\n"
diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl
index 6f3f387e33..a435ab17e0 100644
--- a/tests/libs/jit/tests.erl
+++ b/tests/libs/jit/tests.erl
@@ -27,6 +27,8 @@
 start() ->
     etest:test([
         jit_tests,
+        jit_aarch64_tests,
+        jit_aarch64_asm_tests,
         jit_x86_64_tests,
         jit_x86_64_asm_tests
     ]).
diff --git a/tests/test.c b/tests/test.c
index 14dea8fb1b..b73545d987 100644
--- a/tests/test.c
+++ b/tests/test.c
@@ -32,6 +32,7 @@
 #include "bif.h"
 #include "context.h"
 #include "iff.h"
+#include "jit.h"
 #include "mapped_file.h"
 #include "module.h"
 #include "term.h"
@@ -699,7 +700,12 @@ int test_modules_execution(bool beam, bool skip, int count, char **item)
     if (!beam) {
 #if JIT_ARCH_TARGET == JIT_ARCH_X86_64
         if (chdir("x86_64") != 0) {
-            perror("Error: ");
+            perror("Error: cannot find x86_64 directory");
+            return EXIT_FAILURE;
+        }
+#elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64
+        if (chdir("aarch64") != 0) {
+            perror("Error: cannot find aarch64 directory");
             return EXIT_FAILURE;
         }
 #else