Skip to content

Commit

Permalink
Merge pull request flintlib#1686 from albinahlback/strongly_ordered_f…
Browse files Browse the repository at this point in the history
…unctions

Strongly ordered functions
  • Loading branch information
albinahlback authored Dec 28, 2023
2 parents be934dc + 1fd6de1 commit d1f9151
Show file tree
Hide file tree
Showing 23 changed files with 186 additions and 110 deletions.
2 changes: 2 additions & 0 deletions CMake/cmake_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#cmakedefine FLINT_HAVE_FFT_SMALL

#cmakedefine01 FLINT_KNOW_STRONG_ORDER

#ifdef _MSC_VER
# if defined(FLINT_BUILD_DLL)
# define FLINT_DLL __declspec(dllexport)
Expand Down
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ endif()
option(BUILD_SHARED_LIBS "Build shared libs" on)
option(WITH_NTL "Build tests for NTL interface or not" off)

# Check if strongly ordered memory
set(STRONGLY_ORDERED_CPUS x86_64 x86 i386 i586 AMD64)
if(CMAKE_SYSTEM_PROCESSOR IN_LIST STRONGLY_ORDERED_CPUS)
message(STATUS "Checking if system is strongly ordered - yes")
set(FLINT_KNOW_STRONG_ORDER ON)
else()
message(STATUS "Checking if system is strongly ordered - unsure")
set(FLINT_KNOW_STRONG_ORDER OFF)
endif()

# Find dependencies
find_package(PkgConfig REQUIRED)
pkg_check_modules(GMP REQUIRED IMPORTED_TARGET gmp>=6.2.1)
Expand Down
65 changes: 18 additions & 47 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,24 @@ then
fi
AC_SUBST(LDCONFIG)

dnl FIXME: Improve this check to include more processors.
AC_MSG_CHECKING([if memory is strongly-ordered])
case "$host_cpu" in
x86_64|x86|i386|i586)
flint_know_strong_order="yes"
AC_MSG_RESULT([yes])
;;
*)
flint_know_strong_order="no"
AC_MSG_RESULT([unsure])
;;
esac

if test "$flint_know_strong_order" = "yes";
then
AC_DEFINE(FLINT_KNOW_STRONG_ORDER,1,[Define if system is strongly ordered])
fi

################################################################################
# check headers
################################################################################
Expand Down Expand Up @@ -871,53 +889,6 @@ AC_MSG_ERROR([Couldn't find alloca, which is required for FLINT. Please submit a
report to <https://github.com/flintlib/flint/issues/> and specify your
operating system.])])

AC_MSG_CHECKING([if $CC has popcount intrinsics])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([],[long int x = __builtin_popcountl(3);])],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([],[long long int x = __builtin_popcountll(3);])],
[AC_MSG_RESULT([yes])
has_popcount="yes"
AC_DEFINE(FLINT_HAS_POPCNT,1,[Define if compiler has popcount intrinsics])],
[AC_MSG_RESULT([no])]
)],
[AC_MSG_RESULT([no])]
)

AC_MSG_CHECKING([if $CC has CLZ intrinsics])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([],[long int x = __builtin_clzl(3);])],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([],[long long int x = __builtin_clzll(3);])],
[AC_MSG_RESULT([yes])
has_clz="yes"
AC_DEFINE(FLINT_HAS_CLZ,1,[Define if compiler has CLZ intrinsics])],
[AC_MSG_RESULT([no])]
)],
[AC_MSG_RESULT([no])]
)

AC_MSG_CHECKING([if $CC has CTZ intrinsics])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([],[long int x = __builtin_ctzl(3);])],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([],[long long int x = __builtin_ctzll(3);])],
[AC_MSG_RESULT([yes])
has_ctz="yes"
AC_DEFINE(FLINT_HAS_CTZ,1,[Define if compiler has CTZ intrinsics])],
[AC_MSG_RESULT([no])]
)],
[AC_MSG_RESULT([no])]
)

AC_MSG_CHECKING([if $CC has __builtin_constant_p])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([], [int a = __builtin_constant_p(0 == 1);])],
[AC_MSG_RESULT([yes])
AC_DEFINE(FLINT_HAVE_CONSTANT_P,1,[Define if compiler has __builtin_constant_p])],
[AC_MSG_RESULT([no])]
)

################################################################################
# CFLAGS
################################################################################
Expand Down
25 changes: 19 additions & 6 deletions doc/source/fmpz_mpoly.rst
Original file line number Diff line number Diff line change
Expand Up @@ -777,15 +777,28 @@ Internal Functions

.. function:: int fmpz_mpoly_divides_monagan_pearce(fmpz_mpoly_t poly1, const fmpz_mpoly_t poly2, const fmpz_mpoly_t poly3, const fmpz_mpoly_ctx_t ctx)

Set ``poly1`` to ``poly2`` divided by ``poly3`` and return 1 if the quotient
is exact. Otherwise return 0. The function uses the algorithm of Michael
Monagan and Roman Pearce. Note that the function
``fmpz_mpoly_div_monagan_pearce`` below may be much faster if the quotient
is known to be exact.

.. function:: int fmpz_mpoly_divides_heap_threaded(fmpz_mpoly_t Q, const fmpz_mpoly_t A, const fmpz_mpoly_t B, const fmpz_mpoly_ctx_t ctx)

Set ``poly1`` to ``poly2`` divided by ``poly3`` and return 1 if
the quotient is exact. Otherwise return 0. The function uses the algorithm
of Michael Monagan and Roman Pearce. Note that the function
``fmpz_mpoly_div_monagan_pearce`` below may be much faster if the
quotient is known to be exact.
The same method as used as in :func:``fmpz_mpoly_divides_monagan_pearce``,
but is also multi-threaded.

.. note::

This function is only defined if the machine is known to be strongly ordered
during the configuration. To check whether this function is defined during
compilation-time, use the C preprocessor macro
``#ifdef fmpz_mpoly_divides_heap_threaded``.

The threaded version takes an upper limit on the number of threads to use, while the first version always uses one thread.
Note that, if the system is known to be strongly ordered, the underlying
algorithm for this function is utilized in :func:``fmpz_mpoly_divides``.
Hence, you may find it easier to use this function instead if the C
preprocessor is not available.

.. function:: slong _fmpz_mpoly_div_monagan_pearce(fmpz ** polyq, ulong ** expq, slong * allocq, const fmpz * poly2, const ulong * exp2, slong len2, const fmpz * poly3, const ulong * exp3, slong len3, slong bits, slong N, const mp_limb_t * cmpmask)

Expand Down
17 changes: 15 additions & 2 deletions doc/source/nmod_mpoly.rst
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,21 @@ The division functions assume that the modulus is prime.

.. function:: int nmod_mpoly_divides_heap_threaded(nmod_mpoly_t Q, const nmod_mpoly_t A, const nmod_mpoly_t B, const nmod_mpoly_ctx_t ctx)

Do the operation of ``nmod_mpoly_divides`` using a heap and multiple threads.
This function should only be called once ``global_thread_pool`` has been initialized.
Do the operation of ``nmod_mpoly_divides`` using the heap and multiple
threads. This function should only be called once ``global_thread_pool`` has
been initialized.

.. note::

This function is only defined if the machine is known to be strongly ordered
during the configuration. To check whether this function is defined during
compilation-time, use the C preprocessor macro
``#ifdef nmod_mpoly_divides_heap_threaded``.

Note that, if the system is known to be strongly ordered, the underlying
algorithm for this function is utilized in :func:``nmod_mpoly_divides``.
Hence, you may find it easier to use this function instead if the C
preprocessor is not available.


Greatest Common Divisor
Expand Down
13 changes: 2 additions & 11 deletions src/flint-config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,15 @@
/* Define if system is big endian. */
#undef FLINT_BIG_ENDIAN

/* Define if compiler has CLZ intrinsics */
#undef FLINT_HAS_CLZ

/* Define if compiler has CTZ intrinsics */
#undef FLINT_HAS_CTZ

/* Define if compiler has popcount intrinsics */
#undef FLINT_HAS_POPCNT
/* Define if system is strongly ordered */
#undef FLINT_KNOW_STRONG_ORDER

/* Define if system has AVX2 */
#undef FLINT_HAVE_AVX2

/* Define if system has AVX512 */
#undef FLINT_HAVE_AVX512

/* Define if compiler has __builtin_constant_p */
#undef FLINT_HAVE_CONSTANT_P

/* Define to use the fft_small module */
#undef FLINT_HAVE_FFT_SMALL

Expand Down
8 changes: 2 additions & 6 deletions src/flint.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,6 @@
# define FLINT_HAVE_FILE
#endif

#ifdef FLINT_HAVE_CONSTANT_P
# define FLINT_CONSTANT_P __builtin_constant_p
#else
# define FLINT_CONSTANT_P(x) 0
#endif

#ifdef FLINT_INLINES_C
# define FLINT_INLINE
#else
Expand Down Expand Up @@ -123,6 +117,7 @@ typedef struct __FLINT_FILE FLINT_FILE;
# define FLINT_OPTIMIZE_NESTED_2(part) FLINT_OPTIMIZE_NESTED_3(GCC optimize part)
# define FLINT_OPTIMIZE_NESTED_1(part) FLINT_OPTIMIZE_NESTED_2(#part)
# define FLINT_OPTIMIZE(x) FLINT_OPTIMIZE_NESTED_1(x)
# define FLINT_CONSTANT_P __builtin_constant_p
# define FLINT_UNREACHABLE __builtin_unreachable()
#else
# define __attribute__(x)
Expand All @@ -143,6 +138,7 @@ typedef struct __FLINT_FILE FLINT_FILE;
# define FLINT_PUSH_OPTIONS
# define FLINT_POP_OPTIONS
# define FLINT_OPTIMIZE(x)
# define FLINT_CONSTANT_P(x) 0
#endif

#if defined(__cplusplus)
Expand Down
1 change: 1 addition & 0 deletions src/fmpq_poly/test/t-resultant.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "fmpq_poly.h"

#ifdef __GNUC__
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Woverlength-strings"
#endif

Expand Down
1 change: 1 addition & 0 deletions src/fmpq_poly/test/t-resultant_div.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "fmpq_poly.h"

#ifdef __GNUC__
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Woverlength-strings"
#endif

Expand Down
4 changes: 4 additions & 0 deletions src/fmpz_mpoly.h
Original file line number Diff line number Diff line change
Expand Up @@ -770,12 +770,16 @@ int fmpz_mpoly_divides(fmpz_mpoly_t Q,
int fmpz_mpoly_divides_monagan_pearce(fmpz_mpoly_t Q,
const fmpz_mpoly_t A, const fmpz_mpoly_t B, const fmpz_mpoly_ctx_t ctx);

#if FLINT_KNOW_STRONG_ORDER
#define fmpz_mpoly_divides_heap_threaded fmpz_mpoly_divides_heap_threaded
int fmpz_mpoly_divides_heap_threaded(fmpz_mpoly_t Q,
const fmpz_mpoly_t A, const fmpz_mpoly_t B, const fmpz_mpoly_ctx_t ctx);

#define _fmpz_mpoly_divides_heap_threaded_pool _fmpz_mpoly_divides_heap_threaded_pool
int _fmpz_mpoly_divides_heap_threaded_pool(fmpz_mpoly_t Q,
const fmpz_mpoly_t A, const fmpz_mpoly_t B, const fmpz_mpoly_ctx_t ctx,
const thread_pool_handle * handles, slong num_handles);
#endif

slong _fmpz_mpoly_divides_array(fmpz ** poly1, ulong ** exp1,
slong * alloc, const fmpz * poly2, const ulong * exp2, slong len2,
Expand Down
25 changes: 24 additions & 1 deletion src/fmpz_mpoly/divides.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
(at your option) any later version. See <https://www.gnu.org/licenses/>.
*/

#include "thread_support.h"
#include "fmpz_mpoly.h"

#ifdef _fmpz_mpoly_divides_heap_threaded_pool

#include "thread_support.h"

int fmpz_mpoly_divides(
fmpz_mpoly_t Q,
const fmpz_mpoly_t A,
Expand Down Expand Up @@ -52,4 +55,24 @@ int fmpz_mpoly_divides(

return divides;
}
#else
int fmpz_mpoly_divides(
fmpz_mpoly_t Q,
const fmpz_mpoly_t A,
const fmpz_mpoly_t B,
const fmpz_mpoly_ctx_t ctx)
{
if (B->length == 0)
{
flint_throw(FLINT_DIVZERO, "Divide by zero in fmpz_mpoly_divides");
}

if (A->length == 0)
{
fmpz_mpoly_zero(Q, ctx);
return 1;
}

return fmpz_mpoly_divides_monagan_pearce(Q, A, B, ctx);
}
#endif
Loading

0 comments on commit d1f9151

Please sign in to comment.