From 923efad6dd5da1dcef0e53c8e6cac318abe032ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Piku=C5=82a?= <m.pikula@partner.samsung.com> Date: Thu, 20 Feb 2025 16:59:09 +0100 Subject: [PATCH] Streamline implementation overrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decrease code repetition for arch-specific function overrides. Signed-off-by: Marek PikuĊa <m.pikula@partner.samsung.com> --- silk/NSQ.c | 19 +------ silk/VQ_WMat_EC.c | 15 +----- silk/fixed/encode_frame_FIX.c | 9 ++-- silk/float/wrappers_FLP.c | 4 +- silk/main.h | 48 +++--------------- silk/main_overrides.h | 88 ++++++++++++++++++++++++++++++++ silk/quant_LTP_gains.c | 4 +- silk/x86/NSQ_sse4_1.c | 18 +------ silk/x86/VQ_WMat_EC_sse4_1.c | 15 +----- silk/x86/main_sse.h | 96 ++++------------------------------- silk/x86/x86_silk_map.c | 33 +----------- silk_headers.mk | 1 + 12 files changed, 121 insertions(+), 229 deletions(-) create mode 100644 silk/main_overrides.h diff --git a/silk/NSQ.c b/silk/NSQ.c index 1caa829bb..b6529f4cf 100644 --- a/silk/NSQ.c +++ b/silk/NSQ.c @@ -73,24 +73,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer( ); #endif -void silk_NSQ_c -( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) +SILK_NSQ_DECL(c) { opus_int k, lag, start_idx, LSF_interpolation_flag; const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; diff --git a/silk/VQ_WMat_EC.c b/silk/VQ_WMat_EC.c index 245a7e4b0..acdd1d75a 100644 --- a/silk/VQ_WMat_EC.c +++ b/silk/VQ_WMat_EC.c @@ -32,20 +32,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ -void silk_VQ_WMat_EC_c( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ -) +SILK_VQ_WMAT_EC_DECL(c) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; diff --git a/silk/fixed/encode_frame_FIX.c b/silk/fixed/encode_frame_FIX.c index 7c83360ba..4600422ef 100644 --- a/silk/fixed/encode_frame_FIX.c +++ b/silk/fixed/encode_frame_FIX.c @@ -206,10 +206,9 @@ opus_int silk_encode_frame_FIX( sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses, + silk_NSQ( psEnc->sCmn.arch, &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses, sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14, - sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, - psEnc->sCmn.arch); + sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 ); } if ( iter == maxIter && !found_lower ) { @@ -432,10 +431,10 @@ static OPUS_INLINE void silk_LBRR_encode_FIX( psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16, + silk_NSQ( psEnc->sCmn.arch, &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16, psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, - psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); + psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 ); } /* Restore original gains */ diff --git a/silk/float/wrappers_FLP.c b/silk/float/wrappers_FLP.c index c0c183e35..4c8de4dc3 100644 --- a/silk/float/wrappers_FLP.c +++ b/silk/float/wrappers_FLP.c @@ -164,8 +164,8 @@ void silk_NSQ_wrapper_FLP( silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); } else { - silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); + silk_NSQ( psEnc->sCmn.arch, &psEnc->sCmn, psNSQ, psIndices, x16, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, + AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 ); } } diff --git a/silk/main.h b/silk/main.h index cd576d8cc..abfab5b07 100644 --- a/silk/main.h +++ b/silk/main.h @@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "debug.h" #include "entenc.h" #include "entdec.h" +#include "main_overrides.h" #if defined(OPUS_X86_MAY_HAVE_SSE4_1) #include "x86/main_sse.h" @@ -222,53 +223,18 @@ void silk_quant_LTP_gains( ); /* Entropy constrained matrix-weighted VQ, for a single input data vector */ -void silk_VQ_WMat_EC_c( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ -); - -#if !defined(OVERRIDE_silk_VQ_WMat_EC) -#define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, subfr_len, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_c(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, subfr_len, max_gain_Q7, L)) +SILK_VQ_WMAT_EC_DECL(c); +#ifndef silk_VQ_WMat_EC +#define silk_VQ_WMat_EC(...) OVERRIDE_IMPL_SINGLE(silk_VQ_WMat_EC, c, __VA_ARGS__) #endif /************************************/ /* Noise shaping quantization (NSQ) */ /************************************/ -void silk_NSQ_c( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if !defined(OVERRIDE_silk_NSQ) -#define silk_NSQ(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) +SILK_NSQ_DECL(c); +#ifndef silk_NSQ +#define silk_NSQ(...) OVERRIDE_IMPL_SINGLE(silk_NSQ, c, __VA_ARGS__) #endif /* Noise shaping using delayed decision */ diff --git a/silk/main_overrides.h b/silk/main_overrides.h new file mode 100644 index 000000000..7b1143664 --- /dev/null +++ b/silk/main_overrides.h @@ -0,0 +1,88 @@ +/* Copyright (c) 2025, Samsung R&D Institute Poland + Written by Marek Pikula + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SILK_MAIN_OVERRIDES_H +#define SILK_MAIN_OVERRIDES_H + +/** Override implementation with a single function. */ +#define OVERRIDE_IMPL_SINGLE(func, impl, arch, ...) \ + ((void)(arch), func ## _ ## impl(__VA_ARGS__)) + +/** Override implementation with a multiple arch-specific functions. + * + * It uses an array declared with OVERRIDE_IMPL_ARRAY_DECL(). + */ +#define OVERRIDE_IMPL_ARRAY(func, arch, ...) \ + ((*OVERRIDE_MAP_ ## func[(arch) & OPUS_ARCHMASK])(__VA_ARGS__)) + +/** Declare a mapping array for use with OVERRIDE_IMPL_ARRAY(). */ +#define OVERRIDE_IMPL_ARRAY_DECL(func) \ + const func ## _t OVERRIDE_MAP_ ## func[ OPUS_ARCHMASK + 1 ] + + +// Function declarations. + +/* Entropy constrained matrix-weighted VQ, for a single input data vector */ +#define SILK_VQ_WMAT_EC_DECL(impl, ...) \ + void (__VA_ARGS__ silk_VQ_WMat_EC_ ## impl)( \ + opus_int8 *ind, /* O index of best codebook vector */ \ + opus_int32 *res_nrg_Q15, /* O best residual energy */ \ + opus_int32 *rate_dist_Q8, /* O best total bitrate */ \ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ \ + const opus_int32 *XX_Q17, /* I correlation matrix */ \ + const opus_int32 *xX_Q17, /* I correlation vector */ \ + const opus_int8 *cb_Q7, /* I codebook */ \ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ \ + const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ \ + const opus_int subfr_len, /* I number of samples per subframe */ \ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ \ + const opus_int L /* I number of vectors in codebook */ \ + ) +typedef SILK_VQ_WMAT_EC_DECL(t, *const); + +/* Noise shaping quantization (NSQ) */ +#define SILK_NSQ_DECL(impl, ...) \ + void (__VA_ARGS__ silk_NSQ_ ## impl)( \ + const silk_encoder_state *psEncC, /* I Encoder State */ \ + silk_nsq_state *NSQ, /* I/O NSQ state */ \ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ \ + const opus_int16 x16[], /* I Input */ \ + opus_int8 pulses[], /* O Quantized pulse signal */ \ + const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ \ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ \ + const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ \ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ \ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ \ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ \ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ \ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ \ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ \ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ \ + ) +typedef SILK_NSQ_DECL(t, *const); + +#endif // SILK_MAIN_OVERRIDES_H diff --git a/silk/quant_LTP_gains.c b/silk/quant_LTP_gains.c index d6b8eff8d..8a7e812ea 100644 --- a/silk/quant_LTP_gains.c +++ b/silk/quant_LTP_gains.c @@ -82,6 +82,7 @@ void silk_quant_LTP_gains( max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; silk_VQ_WMat_EC( + arch, /* I Run-time architecture */ &temp_idx[ j ], /* O index of best codebook vector */ &res_nrg_Q15_subfr, /* O residual energy */ &rate_dist_Q7_subfr, /* O best weighted quantization error + mu * rate */ @@ -93,8 +94,7 @@ void silk_quant_LTP_gains( cl_ptr_Q5, /* I code length for each codebook vector */ subfr_len, /* I number of samples per subframe */ max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - cbk_size, /* I number of vectors in codebook */ - arch /* I Run-time architecture */ + cbk_size /* I number of vectors in codebook */ ); res_nrg_Q15 = silk_ADD_POS_SAT32( res_nrg_Q15, res_nrg_Q15_subfr ); diff --git a/silk/x86/NSQ_sse4_1.c b/silk/x86/NSQ_sse4_1.c index 3c9aca7ba..7a0170bff 100644 --- a/silk/x86/NSQ_sse4_1.c +++ b/silk/x86/NSQ_sse4_1.c @@ -71,23 +71,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( opus_int32 table[][4] /* I */ ); -void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) +SILK_NSQ_DECL(sse4_1) { opus_int k, lag, start_idx, LSF_interpolation_flag; const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; diff --git a/silk/x86/VQ_WMat_EC_sse4_1.c b/silk/x86/VQ_WMat_EC_sse4_1.c index df4626b60..012f0a38a 100644 --- a/silk/x86/VQ_WMat_EC_sse4_1.c +++ b/silk/x86/VQ_WMat_EC_sse4_1.c @@ -36,20 +36,7 @@ #include "celt/x86/x86cpu.h" /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ -void silk_VQ_WMat_EC_sse4_1( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ -) +SILK_VQ_WMAT_EC_DECL(sse4_1) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; diff --git a/silk/x86/main_sse.h b/silk/x86/main_sse.h index b254d53e7..057eb1eb7 100644 --- a/silk/x86/main_sse.h +++ b/silk/x86/main_sse.h @@ -32,107 +32,33 @@ # include "config.h" # endif +#include "main_overrides.h" + # if defined(OPUS_X86_MAY_HAVE_SSE4_1) -void silk_VQ_WMat_EC_sse4_1( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ -); +SILK_VQ_WMAT_EC_DECL(sse4_1); -# if defined OPUS_X86_PRESUME_SSE4_1 +# if defined(OPUS_X86_PRESUME_SSE4_1) -# define OVERRIDE_silk_VQ_WMat_EC -# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ - subfr_len, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ - subfr_len, max_gain_Q7, L)) +# define silk_VQ_WMat_EC(...) OVERRIDE_IMPL_SINGLE(silk_VQ_WMat_EC, sse4_1, __VA_ARGS__) # elif defined(OPUS_HAVE_RTCD) -extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ -); - -# define OVERRIDE_silk_VQ_WMat_EC -# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ - subfr_len, max_gain_Q7, L, arch) \ - ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \ - subfr_len, max_gain_Q7, L)) +extern OVERRIDE_IMPL_ARRAY_DECL(silk_VQ_WMat_EC); +# define silk_VQ_WMat_EC(...) OVERRIDE_IMPL_ARRAY(silk_VQ_WMat_EC, __VA_ARGS__) # endif -void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); +SILK_NSQ_DECL(sse4_1); # if defined OPUS_X86_PRESUME_SSE4_1 -# define OVERRIDE_silk_NSQ -# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) +# define silk_NSQ(...) OVERRIDE_IMPL_SINGLE(silk_NSQ, sse4_1, __VA_ARGS__) # elif defined(OPUS_HAVE_RTCD) -extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -# define OVERRIDE_silk_NSQ -# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) +extern OVERRIDE_IMPL_ARRAY_DECL(silk_NSQ); +# define silk_NSQ(...) OVERRIDE_IMPL_ARRAY(silk_NSQ, __VA_ARGS__) # endif diff --git a/silk/x86/x86_silk_map.c b/silk/x86/x86_silk_map.c index 39ad75276..eb4d728d6 100644 --- a/silk/x86/x86_silk_map.c +++ b/silk/x86/x86_silk_map.c @@ -69,23 +69,7 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ }; -void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int16 x16[], /* I Input */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) = { +OVERRIDE_IMPL_ARRAY_DECL(silk_NSQ) = { silk_NSQ_c, /* non-sse */ silk_NSQ_c, silk_NSQ_c, @@ -93,20 +77,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ }; -void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *res_nrg_Q15, /* O best residual energy */ - opus_int32 *rate_dist_Q8, /* O best total bitrate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int32 *XX_Q17, /* I correlation matrix */ - const opus_int32 *xX_Q17, /* I correlation vector */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int subfr_len, /* I number of samples per subframe */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - const opus_int L /* I number of vectors in codebook */ -) = { +OVERRIDE_IMPL_ARRAY_DECL(silk_VQ_WMat_EC) = { silk_VQ_WMat_EC_c, /* non-sse */ silk_VQ_WMat_EC_c, silk_VQ_WMat_EC_c, diff --git a/silk_headers.mk b/silk_headers.mk index 2588067c7..7888c1af4 100644 --- a/silk_headers.mk +++ b/silk_headers.mk @@ -6,6 +6,7 @@ silk/API.h \ silk/typedef.h \ silk/define.h \ silk/main.h \ +silk/main_overrides.h \ silk/x86/main_sse.h \ silk/PLC.h \ silk/structs.h \