Skip to content

Commit 9f502e4

Browse files
committed
Prepare flint_mpn_(mul|sqr)high_n for tuning
Move related parameters to flint-mparam.h
1 parent c6e6991 commit 9f502e4

File tree

10 files changed

+1074
-153
lines changed

10 files changed

+1074
-153
lines changed

src/mpn_extras.h

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#endif
2323

2424
#include <gmp.h>
25+
#include "flint-mparam.h"
2526
#include "longlong.h"
2627

2728
#ifdef __cplusplus
@@ -594,13 +595,7 @@ FLINT_DLL extern const flint_mpn_sqrhigh_normalised_func_t flint_mpn_sqrhigh_nor
594595

595596
#endif
596597

597-
/* FIXME: this tuning is for x86_64_adx with fft_small */
598-
/* NOTE: we assume that the same cutoff is optimal for both mulhigh and mullow */
599-
#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50
600-
#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000
601-
#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048
602-
603-
FLINT_DLL extern const signed short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE];
598+
FLINT_DLL extern const short flint_mpn_mulhigh_k_tab[];
604599

605600
mp_limb_t flint_mpn_mullow_basecase(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n);
606601
void _flint_mpn_mullow_n_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n);
@@ -667,10 +662,6 @@ void flint_mpn_mul_or_mulhigh_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t
667662
flint_mpn_mul_n(rp, xp, yp, n);
668663
}
669664

670-
#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90
671-
#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000
672-
#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048
673-
674665
#if FLINT_HAVE_ASSEMBLY_x86_64_adx
675666
mp_limb_t _flint_mpn_sqrhigh_basecase_even(mp_ptr, mp_srcptr, mp_size_t);
676667
mp_limb_t _flint_mpn_sqrhigh_basecase_odd(mp_ptr, mp_srcptr, mp_size_t);

src/mpn_extras/arm64/applem1/flint-mparam.h

Lines changed: 152 additions & 0 deletions
Large diffs are not rendered by default.

src/mpn_extras/arm64/flint-mparam.h

Lines changed: 152 additions & 0 deletions
Large diffs are not rendered by default.

src/mpn_extras/generic/flint-mparam.h

Lines changed: 152 additions & 0 deletions
Large diffs are not rendered by default.

src/mpn_extras/mulhigh.c

Lines changed: 7 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18,77 +18,11 @@
1818
#include <string.h>
1919
#include "mpn_extras.h"
2020

21-
/* Generated by tune-mulhigh.c */
22-
const signed short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE] =
23-
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18,
24-
18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36,
25-
38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56,
26-
60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76,
27-
88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88,
28-
80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112,
29-
120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140,
30-
144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156,
31-
160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172,
32-
172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188,
33-
188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188,
34-
184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196,
35-
188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264,
36-
284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272,
37-
284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284,
38-
276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324,
39-
300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324,
40-
300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456,
41-
472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456,
42-
480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480,
43-
472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472,
44-
480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560,
45-
576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544,
46-
576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576,
47-
552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560,
48-
568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568,
49-
576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776,
50-
776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776,
51-
792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824,
52-
870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856,
53-
840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872,
54-
856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872,
55-
872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904,
56-
896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928,
57-
928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912,
58-
928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024,
59-
928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056,
60-
1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104,
61-
1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104,
62-
1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136,
63-
1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136,
64-
1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152,
65-
1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152,
66-
1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152,
67-
1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152,
68-
1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152,
69-
1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152,
70-
1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152,
71-
1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152,
72-
1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344,
73-
1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344,
74-
1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552,
75-
1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568,
76-
1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584,
77-
1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632,
78-
1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632,
79-
1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648,
80-
1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664,
81-
1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712,
82-
1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728,
83-
1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728,
84-
1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728,
85-
1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712,
86-
1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728,
87-
1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728,
88-
1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856,
89-
1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920,
90-
1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016,
91-
1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696, };
21+
#if TUNE_PROGRAM
22+
short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE];
23+
#else
24+
const short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE] = {FLINT_MPN_MULHIGH_K_TAB};
25+
#endif
9226

9327
void
9428
_flint_mpn_mulhigh_n_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
@@ -155,6 +89,7 @@ _flint_mpn_mulhigh_n_mulders(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n)
15589
return bot;
15690
}
15791

92+
#if !TUNE_PROGRAM
15893
mp_limb_t
15994
_flint_mpn_mulhigh_n_mul(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n)
16095
{
@@ -203,3 +138,4 @@ mp_limb_pair_t _flint_mpn_mulhigh_normalised(mp_ptr rp, mp_srcptr xp, mp_srcptr
203138

204139
return ret;
205140
}
141+
#endif

0 commit comments

Comments
 (0)