From 923efad6dd5da1dcef0e53c8e6cac318abe032ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Piku=C5=82a?= <m.pikula@partner.samsung.com>
Date: Thu, 20 Feb 2025 16:59:09 +0100
Subject: [PATCH] Streamline implementation overrides
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Decrease code repetition for arch-specific function overrides.

Signed-off-by: Marek Pikuła <m.pikula@partner.samsung.com>
---
 silk/NSQ.c                    | 19 +------
 silk/VQ_WMat_EC.c             | 15 +-----
 silk/fixed/encode_frame_FIX.c |  9 ++--
 silk/float/wrappers_FLP.c     |  4 +-
 silk/main.h                   | 48 +++---------------
 silk/main_overrides.h         | 88 ++++++++++++++++++++++++++++++++
 silk/quant_LTP_gains.c        |  4 +-
 silk/x86/NSQ_sse4_1.c         | 18 +------
 silk/x86/VQ_WMat_EC_sse4_1.c  | 15 +-----
 silk/x86/main_sse.h           | 96 ++++-------------------------------
 silk/x86/x86_silk_map.c       | 33 +-----------
 silk_headers.mk               |  1 +
 12 files changed, 121 insertions(+), 229 deletions(-)
 create mode 100644 silk/main_overrides.h

diff --git a/silk/NSQ.c b/silk/NSQ.c
index 1caa829bb..b6529f4cf 100644
--- a/silk/NSQ.c
+++ b/silk/NSQ.c
@@ -73,24 +73,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
 );
 #endif
 
-void silk_NSQ_c
-(
-    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
-    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
-    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
-    const opus_int16            x16[],                                        /* I    Input                           */
-    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
-    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
-    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
-    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
-    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
-    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
-    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
-    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
-    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
-    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
-    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
-)
+SILK_NSQ_DECL(c)
 {
     opus_int            k, lag, start_idx, LSF_interpolation_flag;
     const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
diff --git a/silk/VQ_WMat_EC.c b/silk/VQ_WMat_EC.c
index 245a7e4b0..acdd1d75a 100644
--- a/silk/VQ_WMat_EC.c
+++ b/silk/VQ_WMat_EC.c
@@ -32,20 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "main.h"
 
 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
-void silk_VQ_WMat_EC_c(
-    opus_int8                   *ind,                           /* O    index of best codebook vector               */
-    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
-    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
-    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
-    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
-    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
-    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
-    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
-    const opus_int              L                               /* I    number of vectors in codebook               */
-)
+SILK_VQ_WMAT_EC_DECL(c)
 {
     opus_int   k, gain_tmp_Q7;
     const opus_int8 *cb_row_Q7;
diff --git a/silk/fixed/encode_frame_FIX.c b/silk/fixed/encode_frame_FIX.c
index 7c83360ba..4600422ef 100644
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -206,10 +206,9 @@ opus_int silk_encode_frame_FIX(
                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
                            psEnc->sCmn.arch );
                 } else {
-                    silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses,
+                    silk_NSQ( psEnc->sCmn.arch, &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses,
                             sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14,
-                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
-                            psEnc->sCmn.arch);
+                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
                 }
 
                 if ( iter == maxIter && !found_lower ) {
@@ -432,10 +431,10 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
                 psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
                 psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
         } else {
-            silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16,
+            silk_NSQ( psEnc->sCmn.arch, &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16,
                 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
                 psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
-                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
+                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
         }
 
         /* Restore original gains */
diff --git a/silk/float/wrappers_FLP.c b/silk/float/wrappers_FLP.c
index c0c183e35..4c8de4dc3 100644
--- a/silk/float/wrappers_FLP.c
+++ b/silk/float/wrappers_FLP.c
@@ -164,8 +164,8 @@ void silk_NSQ_wrapper_FLP(
         silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
             AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch );
     } else {
-        silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
-            AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch );
+        silk_NSQ( psEnc->sCmn.arch, &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
+            AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
     }
 }
 
diff --git a/silk/main.h b/silk/main.h
index cd576d8cc..abfab5b07 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "debug.h"
 #include "entenc.h"
 #include "entdec.h"
+#include "main_overrides.h"
 
 #if defined(OPUS_X86_MAY_HAVE_SSE4_1)
 #include "x86/main_sse.h"
@@ -222,53 +223,18 @@ void silk_quant_LTP_gains(
 );
 
 /* Entropy constrained matrix-weighted VQ, for a single input data vector */
-void silk_VQ_WMat_EC_c(
-    opus_int8                   *ind,                           /* O    index of best codebook vector               */
-    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
-    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
-    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
-    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
-    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
-    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
-    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
-    const opus_int              L                               /* I    number of vectors in codebook               */
-);
-
-#if !defined(OVERRIDE_silk_VQ_WMat_EC)
-#define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, subfr_len, max_gain_Q7, L, arch) \
-    ((void)(arch),silk_VQ_WMat_EC_c(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, subfr_len, max_gain_Q7, L))
+SILK_VQ_WMAT_EC_DECL(c);
+#ifndef silk_VQ_WMat_EC
+#define silk_VQ_WMat_EC(...) OVERRIDE_IMPL_SINGLE(silk_VQ_WMat_EC, c, __VA_ARGS__)
 #endif
 
 /************************************/
 /* Noise shaping quantization (NSQ) */
 /************************************/
 
-void silk_NSQ_c(
-    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
-    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
-    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
-    const opus_int16            x16[],                                        /* I    Input                           */
-    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
-    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
-    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
-    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
-    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
-    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
-    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
-    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
-    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
-    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
-    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
-);
-
-#if !defined(OVERRIDE_silk_NSQ)
-#define silk_NSQ(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
-                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
-    ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
-                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+SILK_NSQ_DECL(c);
+#ifndef silk_NSQ
+#define silk_NSQ(...) OVERRIDE_IMPL_SINGLE(silk_NSQ, c, __VA_ARGS__)
 #endif
 
 /* Noise shaping using delayed decision */
diff --git a/silk/main_overrides.h b/silk/main_overrides.h
new file mode 100644
index 000000000..7b1143664
--- /dev/null
+++ b/silk/main_overrides.h
@@ -0,0 +1,88 @@
+/* Copyright (c) 2025, Samsung R&D Institute Poland
+   Written by Marek Pikula
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SILK_MAIN_OVERRIDES_H
+#define SILK_MAIN_OVERRIDES_H
+
+/** Override implementation with a single function. */
+#define OVERRIDE_IMPL_SINGLE(func, impl, arch, ...) \
+    ((void)(arch), func ## _ ## impl(__VA_ARGS__))
+
+/** Override implementation with a multiple arch-specific functions.
+ *
+ * It uses an array declared with OVERRIDE_IMPL_ARRAY_DECL().
+ */
+#define OVERRIDE_IMPL_ARRAY(func, arch, ...) \
+    ((*OVERRIDE_MAP_ ## func[(arch) & OPUS_ARCHMASK])(__VA_ARGS__))
+
+/** Declare a mapping array for use with OVERRIDE_IMPL_ARRAY(). */
+#define OVERRIDE_IMPL_ARRAY_DECL(func) \
+    const func ## _t OVERRIDE_MAP_ ## func[ OPUS_ARCHMASK + 1 ]
+
+
+// Function declarations.
+
+/* Entropy constrained matrix-weighted VQ, for a single input data vector */
+#define SILK_VQ_WMAT_EC_DECL(impl, ...) \
+    void (__VA_ARGS__ silk_VQ_WMat_EC_ ## impl)( \
+        opus_int8           *ind,           /* O    index of best codebook vector               */ \
+        opus_int32          *res_nrg_Q15,   /* O    best residual energy                        */ \
+        opus_int32          *rate_dist_Q8,  /* O    best total bitrate                          */ \
+        opus_int            *gain_Q7,       /* O    sum of absolute LTP coefficients            */ \
+        const opus_int32    *XX_Q17,        /* I    correlation matrix                          */ \
+        const opus_int32    *xX_Q17,        /* I    correlation vector                          */ \
+        const opus_int8     *cb_Q7,         /* I    codebook                                    */ \
+        const opus_uint8    *cb_gain_Q7,    /* I    codebook effective gain                     */ \
+        const opus_uint8    *cl_Q5,         /* I    code length for each codebook vector        */ \
+        const opus_int      subfr_len,      /* I    number of samples per subframe              */ \
+        const opus_int32    max_gain_Q7,    /* I    maximum sum of absolute LTP coefficients    */ \
+        const opus_int      L               /* I    number of vectors in codebook               */ \
+    )
+typedef SILK_VQ_WMAT_EC_DECL(t, *const);
+
+/* Noise shaping quantization (NSQ) */
+#define SILK_NSQ_DECL(impl, ...) \
+    void (__VA_ARGS__ silk_NSQ_ ## impl)( \
+        const silk_encoder_state    *psEncC,                                        /* I    Encoder State               */ \
+        silk_nsq_state              *NSQ,                                           /* I/O  NSQ state                   */ \
+        SideInfoIndices             *psIndices,                                     /* I/O  Quantization Indices        */ \
+        const opus_int16            x16[],                                          /* I    Input                       */ \
+        opus_int8                   pulses[],                                       /* O    Quantized pulse signal      */ \
+        const opus_int16            *PredCoef_Q12,                                  /* I    Short term prediction coefs */ \
+        const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],        /* I    Long term prediction coefs  */ \
+        const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ],   /* I    Noise shaping coefs         */ \
+        const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],              /* I    Long term shaping coefs     */ \
+        const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                       /* I    Spectral tilt               */ \
+        const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                     /* I    Low frequency shaping coefs */ \
+        const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                      /* I    Quantization step sizes     */ \
+        const opus_int              pitchL[ MAX_NB_SUBFR ],                         /* I    Pitch lags                  */ \
+        const opus_int              Lambda_Q10,                                     /* I    Rate/distortion tradeoff    */ \
+        const opus_int              LTP_scale_Q14                                   /* I    LTP state scaling           */ \
+    )
+typedef SILK_NSQ_DECL(t, *const);
+
+#endif // SILK_MAIN_OVERRIDES_H
diff --git a/silk/quant_LTP_gains.c b/silk/quant_LTP_gains.c
index d6b8eff8d..8a7e812ea 100644
--- a/silk/quant_LTP_gains.c
+++ b/silk/quant_LTP_gains.c
@@ -82,6 +82,7 @@ void silk_quant_LTP_gains(
             max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 )
                                         + SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
             silk_VQ_WMat_EC(
+                arch,                   /* I    Run-time architecture                                   */
                 &temp_idx[ j ],         /* O    index of best codebook vector                           */
                 &res_nrg_Q15_subfr,     /* O    residual energy                                         */
                 &rate_dist_Q7_subfr,    /* O    best weighted quantization error + mu * rate            */
@@ -93,8 +94,7 @@ void silk_quant_LTP_gains(
                 cl_ptr_Q5,              /* I    code length for each codebook vector                    */
                 subfr_len,              /* I    number of samples per subframe                          */
                 max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */
-                cbk_size,               /* I    number of vectors in codebook                           */
-                arch                    /* I    Run-time architecture                                   */
+                cbk_size                /* I    number of vectors in codebook                           */
             );
 
             res_nrg_Q15  = silk_ADD_POS_SAT32( res_nrg_Q15, res_nrg_Q15_subfr );
diff --git a/silk/x86/NSQ_sse4_1.c b/silk/x86/NSQ_sse4_1.c
index 3c9aca7ba..7a0170bff 100644
--- a/silk/x86/NSQ_sse4_1.c
+++ b/silk/x86/NSQ_sse4_1.c
@@ -71,23 +71,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
     opus_int32          table[][4]              /* I                                    */
 );
 
-void silk_NSQ_sse4_1(
-    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
-    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
-    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
-    const opus_int16            x16[],                                        /* I    Input                           */
-    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
-    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
-    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
-    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
-    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
-    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
-    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
-    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
-    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
-    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
-    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
-)
+SILK_NSQ_DECL(sse4_1)
 {
     opus_int            k, lag, start_idx, LSF_interpolation_flag;
     const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
diff --git a/silk/x86/VQ_WMat_EC_sse4_1.c b/silk/x86/VQ_WMat_EC_sse4_1.c
index df4626b60..012f0a38a 100644
--- a/silk/x86/VQ_WMat_EC_sse4_1.c
+++ b/silk/x86/VQ_WMat_EC_sse4_1.c
@@ -36,20 +36,7 @@
 #include "celt/x86/x86cpu.h"
 
 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
-void silk_VQ_WMat_EC_sse4_1(
-    opus_int8                   *ind,                           /* O    index of best codebook vector               */
-    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
-    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
-    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
-    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
-    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
-    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
-    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
-    const opus_int              L                               /* I    number of vectors in codebook               */
-)
+SILK_VQ_WMAT_EC_DECL(sse4_1)
 {
     opus_int   k, gain_tmp_Q7;
     const opus_int8 *cb_row_Q7;
diff --git a/silk/x86/main_sse.h b/silk/x86/main_sse.h
index b254d53e7..057eb1eb7 100644
--- a/silk/x86/main_sse.h
+++ b/silk/x86/main_sse.h
@@ -32,107 +32,33 @@
 #  include "config.h"
 # endif
 
+#include "main_overrides.h"
+
 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
 
-void silk_VQ_WMat_EC_sse4_1(
-    opus_int8                   *ind,                           /* O    index of best codebook vector               */
-    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
-    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
-    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
-    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
-    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
-    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
-    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
-    const opus_int              L                               /* I    number of vectors in codebook               */
-);
+SILK_VQ_WMAT_EC_DECL(sse4_1);
 
-#  if defined OPUS_X86_PRESUME_SSE4_1
+#  if defined(OPUS_X86_PRESUME_SSE4_1)
 
-#   define OVERRIDE_silk_VQ_WMat_EC
-#   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
-                           subfr_len, max_gain_Q7, L, arch) \
-    ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
-                          subfr_len, max_gain_Q7, L))
+#   define silk_VQ_WMat_EC(...) OVERRIDE_IMPL_SINGLE(silk_VQ_WMat_EC, sse4_1, __VA_ARGS__)
 
 #  elif defined(OPUS_HAVE_RTCD)
 
-extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
-    opus_int8                   *ind,                           /* O    index of best codebook vector               */
-    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
-    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
-    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
-    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
-    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
-    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
-    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
-    const opus_int              L                               /* I    number of vectors in codebook               */
-);
-
-#   define OVERRIDE_silk_VQ_WMat_EC
-#   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
-                           subfr_len, max_gain_Q7, L, arch) \
-    ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
-                          subfr_len, max_gain_Q7, L))
+extern OVERRIDE_IMPL_ARRAY_DECL(silk_VQ_WMat_EC);
+#   define silk_VQ_WMat_EC(...) OVERRIDE_IMPL_ARRAY(silk_VQ_WMat_EC, __VA_ARGS__)
 
 #  endif
 
-void silk_NSQ_sse4_1(
-    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
-    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
-    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
-    const opus_int16            x16[],                                        /* I    Input                           */
-    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
-    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
-    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
-    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
-    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
-    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
-    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
-    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
-    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
-    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
-    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
-);
+SILK_NSQ_DECL(sse4_1);
 
 #  if defined OPUS_X86_PRESUME_SSE4_1
 
-#   define OVERRIDE_silk_NSQ
-#   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
-                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
-    ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
-                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+#   define silk_NSQ(...) OVERRIDE_IMPL_SINGLE(silk_NSQ, sse4_1, __VA_ARGS__)
 
 #  elif defined(OPUS_HAVE_RTCD)
 
-extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
-    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
-    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
-    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
-    const opus_int16            x16[],                                        /* I    Input                           */
-    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
-    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
-    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
-    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
-    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
-    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
-    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
-    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
-    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
-    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
-    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
-);
-
-#   define OVERRIDE_silk_NSQ
-#   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
-                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
-    ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
-                   HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+extern OVERRIDE_IMPL_ARRAY_DECL(silk_NSQ);
+#   define silk_NSQ(...) OVERRIDE_IMPL_ARRAY(silk_NSQ, __VA_ARGS__)
 
 #  endif
 
diff --git a/silk/x86/x86_silk_map.c b/silk/x86/x86_silk_map.c
index 39ad75276..eb4d728d6 100644
--- a/silk/x86/x86_silk_map.c
+++ b/silk/x86/x86_silk_map.c
@@ -69,23 +69,7 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )(
   MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 )  /* avx */
 };
 
-void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
-    const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
-    silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
-    SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
-    const opus_int16            x16[],                                        /* I    Input                           */
-    opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
-    const opus_int16            *PredCoef_Q12,                                /* I    Short term prediction coefs     */
-    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
-    const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
-    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
-    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
-    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
-    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
-    const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
-    const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
-    const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
-) = {
+OVERRIDE_IMPL_ARRAY_DECL(silk_NSQ) = {
   silk_NSQ_c,                  /* non-sse */
   silk_NSQ_c,
   silk_NSQ_c,
@@ -93,20 +77,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
   MAY_HAVE_SSE4_1( silk_NSQ )  /* avx */
 };
 
-void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
-    opus_int8                   *ind,                           /* O    index of best codebook vector               */
-    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
-    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
-    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
-    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
-    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
-    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
-    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
-    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
-    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
-    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
-    const opus_int              L                               /* I    number of vectors in codebook               */
-) = {
+OVERRIDE_IMPL_ARRAY_DECL(silk_VQ_WMat_EC) = {
   silk_VQ_WMat_EC_c,                  /* non-sse */
   silk_VQ_WMat_EC_c,
   silk_VQ_WMat_EC_c,
diff --git a/silk_headers.mk b/silk_headers.mk
index 2588067c7..7888c1af4 100644
--- a/silk_headers.mk
+++ b/silk_headers.mk
@@ -6,6 +6,7 @@ silk/API.h \
 silk/typedef.h \
 silk/define.h \
 silk/main.h \
+silk/main_overrides.h \
 silk/x86/main_sse.h \
 silk/PLC.h \
 silk/structs.h \