Skip to content

Commit

Permalink
avx512 and sse4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
lyciumlee committed Jan 12, 2022
0 parents commit 77b97b9
Show file tree
Hide file tree
Showing 3,167 changed files with 957,877 additions and 0 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
11 changes: 11 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# quick compile yafu 2.0
lanch compile.sh !

warning: This yafu use avx512 and sse4.1 in default.


# references
https://www.mersenneforum.org/showthread.php?t=23087
https://www.mersenneforum.org/showthread.php?t=26681
https://github.com/bbuhrow/yafu

25 changes: 25 additions & 0 deletions avx-ecm/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
BSD 2-Clause License

Copyright (c) 2019, bbuhrow
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
193 changes: 193 additions & 0 deletions avx-ecm/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#
# Copyright (c) 2019, Ben Buhrow
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the FreeBSD Project.
#
#

#--------------------------- flags -------------------------
CC = gcc
WARN_FLAGS = -Wall #-W -Wconversion
OPT_FLAGS = -O3
INC = -I.
LIBS =
BINNAME = avx-ecm
COMPILER = gcc


#--------------------------- make options -------------------------

ifeq ($(COMPILER),mingw)
# NOTE: Using -fcall-used instead of -ffixed is much better and still works.
# -fcall-used simply prevents the named registers from being saved/restored while
# -ffixed prevents them from being used at all. The code benefits a lot from being
# able to use all 32 zmm registers.
CC = gcc
CFLAGS += -fcall-used-xmm16 -fcall-used-xmm17 -fcall-used-xmm18 -fcall-used-xmm19
CFLAGS += -fcall-used-xmm20 -fcall-used-xmm21 -fcall-used-xmm22 -fcall-used-xmm23
CFLAGS += -fcall-used-xmm24 -fcall-used-xmm25 -fcall-used-xmm26 -fcall-used-xmm27
CFLAGS += -fcall-used-xmm28 -fcall-used-xmm29 -fcall-used-xmm30 -fcall-used-xmm31
INC = -I. -I/y/projects/factoring/gmp/include/mingw
LIBS = -L/y/projects/factoring/gmp/lib/mingw/x86_64
else ifeq ($(COMPILER),gcc)
CC = gcc
INC = -I. -I../gmp-6.2.1
LIBS = -L../gmp-6.2.1/.libs
else ifeq ($(COMPILER),gcc730)
CC = gcc
INC = -I. -I/sppdg/scratch/buhrow/projects/gmp_install/include
LIBS = -L/sppdg/scratch/buhrow/projects/gmp_install/install/lib
else
CC = icc
INC = -I. -I/sppdg/scratch/buhrow/projects/gmp_install/include
LIBS = -L/sppdg/scratch/buhrow/projects/gmp_install/lib
endif

ifdef MAXBITS
CFLAGS += -DMAXBITS=$(MAXBITS)
endif

ifdef DIGITBITS
CFLAGS += -DDIGITBITS=$(DIGITBITS)
endif

OBJ_EXT = .o
OPT_FLAGS += -mavx

ifeq ($(KNL),1)
ifeq ($(COMPILER),icc)
CFLAGS += -xMIC-AVX512 -DTARGET_KNL
else
CFLAGS += -march=knl -DTARGET_KNL
endif
BINNAME := ${BINNAME:%=%_knl}
OBJ_EXT = .ko
endif

ifeq ($(SKYLAKEX),1)
OPT_FLAGS += -march=skylake-avx512 -DSKYLAKEX
OBJ_EXT = .o
endif


ifeq ($(ICELAKE),1)
OPT_FLAGS += -march=icelake-client -DIFMA -DICELAKE
OBJ_EXT = .o
endif

ifeq ($(TIGERLAKE),1)
OPT_FLAGS += -march=tigerlake -DIFMA
OBJ_EXT = .o
endif


ifeq ($(NO_THREADS),1)
CFLAGS += -DNO_THREADS
else
NO_THREADS = 0
endif

ifeq ($(CC),icc)
ifeq ($(KNL),1)
CFLAGS += -mkl
else
CFLAGS += -L/usr/lib/gcc/x86_64-redhat-linux/4.4.4 -L/lib
ifeq ($(NO_THREADS),0)
CFLAGS += -mkl
endif
endif
endif

ifeq ($(PROFILE),1)
CFLAGS += -pg
BINNAME := ${BINNAME:%=%_prof}
endif


CFLAGS += -g -msse4.1 $(OPT_FLAGS) $(WARN_FLAGS) $(INC)

ifeq ($(STATIC),1)
CFLAGS += -static-intel
LIBS += -L/usr/lib/x86_64-redhat-linux6E/lib64/ /sppdg/scratch/buhrow/projects/gmp_install/lib/libgmp.a
else
LIBS += -lm -lgmp -lpthread
endif

#--------------------------- file lists -------------------------
SRCS = \
eratosthenes/presieve.c \
eratosthenes/count.c \
eratosthenes/offsets.c \
eratosthenes/primes.c \
eratosthenes/roots.c \
eratosthenes/linesieve.c \
eratosthenes/soe.c \
eratosthenes/tiny.c \
eratosthenes/worker.c \
eratosthenes/soe_util.c \
eratosthenes/wrapper.c \
threadpool.c \
main.c \
ecm.c \
util.c \
vecarith.c \
vecarith52.c \
vec_common.c \
calc.c \
queue.c


OBJS = $(SRCS:.c=$(OBJ_EXT))



#---------------------------Header file lists -------------------------
HEAD = \
avx_ecm.h \
eratosthenes/soe.h \
threadpool.h \
util.h \
calc.h \
queue.h

#---------------------------Make Targets -------------------------

all: $(OBJS)
rm -f libavxecm.a
ar r libavxecm.a $(OBJS)
ranlib libavxecm.a
$(CC) $(CFLAGS) $(OBJS) -o $(BINNAME) libavxecm.a $(LIBS)


clean:
rm -f $(OBJS)

#---------------------------Build Rules -------------------------


%$(OBJ_EXT): %.c $(HEAD)
$(CC) $(CFLAGS) -c -o $@ $<

37 changes: 37 additions & 0 deletions avx-ecm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# avx-ecm
Computes parallel curves of the ECM (Elliptic Curve Method) factoring algorithm using AVX-512 vector arithmetic. On CPUs where AVX512 is supported, AVX-ECM has been observed to have about 1.5 to 2.5 times the throughput (curves/sec) of GMP-ECM, for numbers up to a thousand bits or so. Large numbers will be more efficient with GMP-ECM because AVX-ECM so far does not use sub-quadratic multiplier algorithms.

Both stage 1 and stage 2 are computed in parallel, but stage 2 is just the standard continuation with pairing (by default, B2=100xB1).

The program is also multi-threaded using the pthreads library.

It will output a savefile after stage 1 that will work with GMP-ECM stage 2, if desired.
e.g.:
ecm -resume save_b1.txt 1000000 < input.txt

Compile on linux using either icc or gcc-7.3.0 or above (I've only tested icc and gcc-7.3.0).

e.g.:
make COMPILER=gcc730 SKYLAKEX=1

COMPILER options currently are gcc730 (which invokes gcc-7.3.0), gcc (which invokes gcc), mingw (using MSYS2 and mingw64 on windows) and icc (icc). Edit the makefile if you have some other compiler name (ymmv).

Use SKYLAKEX=1 to build for skylakex CPUs. Alternatively use KNL=1 to build for Knight's Landing Xeon Phi systems.

The program will run constant-time curves in steps of 208 bits (meaning, e.g., that 417 bit curves take the same amount of time as 623-bit curves) and 8 curves are performed in parallel per thread.

You can optionally specify DIGITBITS=32 during make. Doing so results in constant-time curves in steps of 128 bits and 16 curves performed in parallel per thread.

The 52-bit version is generally faster (and the default) but for some sizes a 32-bit version can have higher throughput (because of the 208-bit jumps between sizes).

Command line:
avx-ecm input curves B1 threads B2

The input number can be specified using these operators if desired: +,-,*,/,^,%,# (primorial), ! (factorial), fib(), and luc()

Example:
./avx-ecm "fib(791)/13/677/216416017" 8 1000000 1

See https://www.mersenneforum.org/showthread.php?t=25056 for more info.

Happy factoring!
Binary file added avx-ecm/avx-ecm
Binary file not shown.
Binary file added avx-ecm/avx-ecm-32.exe
Binary file not shown.
Binary file added avx-ecm/avx-ecm-52-icc-static
Binary file not shown.
Binary file added avx-ecm/avx-ecm-52.exe
Binary file not shown.
31 changes: 31 additions & 0 deletions avx-ecm/avx-ecm-vc/avx-ecm-vc.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28307.271
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "avx-ecm-vc", "avx-ecm-vc.vcxproj", "{47524AD9-2D20-4AD2-B7FE-521EBE485A42}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Debug|x64.ActiveCfg = Debug|x64
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Debug|x64.Build.0 = Debug|x64
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Debug|x86.ActiveCfg = Debug|Win32
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Debug|x86.Build.0 = Debug|Win32
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Release|x64.ActiveCfg = Release|x64
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Release|x64.Build.0 = Release|x64
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Release|x86.ActiveCfg = Release|Win32
{47524AD9-2D20-4AD2-B7FE-521EBE485A42}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {D937E06E-DE8E-4AFA-B0A3-6D2B3D4DF0D3}
EndGlobalSection
EndGlobal
Loading

0 comments on commit 77b97b9

Please sign in to comment.