Skip to content

InlineFloatCtx accessors using bad layout? #28

@aquacluck

Description

@aquacluck

see below, this comment irrelevant now

unlike X and W registers, float registers cluster smaller sizes into larger sizes instead of inheriting the largest register's index+stride: https://developer.arm.com/documentation/dht0002/a/Introducing-NEON/NEON-architecture-overview/NEON-registers

The current impl seems to treat them the same as X and W, ie it assumes that S1 is located at Q1, S2 is at Q2, etc.

from source/lib/hook/nx64/inline_impl.hpp, see ISSUE lines:

    union FloatRegister {
        Vec128 V128;
        f128 Q;
        Vec64 V64;
        f64 D;
        f32 S; // ISSUE: FloatRegister should hold 4 of S?
        f16 H;
        /* TODO: 8-bit floats? That's a thing? */
        // f8 B;
    };

    struct FloatRegisters {
        FloatRegister m_Fr[32];
    };

    namespace impl {
        /* This type is only unioned with GpRegisters, so this is valid. */
        struct GpRegisterAccessorImpl {
            GpRegisters& Get() {
                return *reinterpret_cast<GpRegisters*>(this);
            }
        };
        struct FloatRegisterAccessorImpl {
            FloatRegisters& Get() {
                return *reinterpret_cast<FloatRegisters*>(this);
            }
        };

        #define ACCESSOR(inherit, name, type, length, arr, member)                                          \
            struct name##RegisterAccessor : public inherit##Impl {                                          \
                type& operator[](int index) {                                                               \
                    EXL_ASSERT(0 <= index && index < length, "Register index must not be out of bounds!");  \
                    return Get().arr[index].member;                                                         \
                    // ^ ISSUE: S should use arr[index >> 1], then something like member[(index & 1) ^ 1]?  \
                }                                                                                           \
            }
        
        ACCESSOR(GpRegisterAccessor,    Gp64,       u64,    31, m_Gp, X);
        ACCESSOR(GpRegisterAccessor,    Gp32,       u32,    31, m_Gp, W);
        ACCESSOR(FloatRegisterAccessor, Vector128,  Vec128, 32, m_Fr, V128);
        ACCESSOR(FloatRegisterAccessor, Vector64,   Vec64,  32, m_Fr, V64);
        ACCESSOR(FloatRegisterAccessor, Float128,   f128,   32, m_Fr, Q);
        ACCESSOR(FloatRegisterAccessor, Float64,    f64,    32, m_Fr, D);
        ACCESSOR(FloatRegisterAccessor, Float32,    f32,    32, m_Fr, S);
        ACCESSOR(FloatRegisterAccessor, Float16,    f16,    32, m_Fr, H);
        /* TODO: 8-bit floats? That's a thing? */
        //ACCESSOR(FloatRegisterAccessor, Float8,    f8,    32, m_Fr, B);

        #undef ACCESSOR
    }

thanks dt for explaining this and providing the docs, i didn't really do anything, also for a prior solution (somewhat untested):
https://github.com/dt-12345/MSFix/blob/main/source/lib/hook/nx64/inline_impl.hpp#L62-L72

        /*
        index has to be adjusted because of how the registesr are layed out
        |----------Q0----------|
        |----D1----||----D0----|
        |-S1-||-S0-||-S3-||-S2-|
        */
        struct FpRegisterAccessor32 : public FpRegisterAccessorImpl {
            float& operator[](int index) {
                return Get().m_Fp[index >> 1].S[(index & 1) ^ 1];
            }
        };

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions