Skip to content

Commit 6e25fe4

Browse files
slarenangt
authored andcommitted
ggml : fix arm build (ggml-org#10890)
* ggml: GGML_NATIVE uses -mcpu=native on ARM Signed-off-by: Adrien Gallouët <[email protected]> * ggml: Show detected features with GGML_NATIVE Signed-off-by: Adrien Gallouët <[email protected]> * remove msvc support, add GGML_CPU_ARM_ARCH option * disable llamafile in android example * march -> mcpu, skip adding feature macros ggml-ci --------- Signed-off-by: Adrien Gallouët <[email protected]> Co-authored-by: Adrien Gallouët <[email protected]>
1 parent 6212bd2 commit 6e25fe4

File tree

5 files changed

+65
-91
lines changed

5 files changed

+65
-91
lines changed

examples/llama.android/llama/build.gradle.kts

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ android {
1919
externalNativeBuild {
2020
cmake {
2121
arguments += "-DLLAMA_BUILD_COMMON=ON"
22+
arguments += "-DGGML_LLAMAFILE=OFF"
2223
arguments += "-DCMAKE_BUILD_TYPE=Release"
2324
cppFlags += listOf()
2425
arguments += listOf()

ggml/CMakeLists.txt

+6-5
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
7474
endif()
7575

7676
# general
77-
option(GGML_STATIC "ggml: static link libraries" OFF)
78-
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
79-
option(GGML_LTO "ggml: enable link time optimization" OFF)
80-
option(GGML_CCACHE "ggml: use ccache if available" ON)
77+
option(GGML_STATIC "ggml: static link libraries" OFF)
78+
option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
79+
option(GGML_LTO "ggml: enable link time optimization" OFF)
80+
option(GGML_CCACHE "ggml: use ccache if available" ON)
8181

8282
# debug
8383
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
@@ -120,8 +120,9 @@ endif()
120120
option(GGML_LASX "ggml: enable lasx" ON)
121121
option(GGML_LSX "ggml: enable lsx" ON)
122122
option(GGML_RVV "ggml: enable rvv" ON)
123-
option(GGML_SVE "ggml: enable SVE" OFF)
123+
124124
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
125+
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
125126

126127

127128
if (WIN32)

ggml/src/ggml-cpu/CMakeLists.txt

+51-86
Original file line numberDiff line numberDiff line change
@@ -74,112 +74,77 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
7474

7575
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
7676
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
77-
(NOT CMAKE_OSX_ARCHITECTURES AND
78-
NOT CMAKE_GENERATOR_PLATFORM_LWR AND
77+
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
7978
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
8079

8180
message(STATUS "ARM detected")
8281

83-
if (MSVC)
84-
list(APPEND ARCH_DEFINITIONS __aarch64__) # MSVC defines _M_ARM64 instead
85-
list(APPEND ARCH_DEFINITIONS __ARM_NEON)
86-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FMA)
87-
88-
set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
89-
string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
90-
91-
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
92-
if (GGML_COMPILER_SUPPORT_DOTPROD)
93-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
94-
95-
message(STATUS "ARM feature DOTPROD enabled")
96-
endif ()
97-
98-
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
99-
100-
if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
101-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
102-
103-
message(STATUS "ARM feature MATMUL_INT8 enabled")
104-
endif ()
105-
106-
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
107-
if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
108-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
109-
110-
message(STATUS "ARM feature FP16_VECTOR_ARITHMETIC enabled")
111-
endif ()
82+
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
83+
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
84+
else()
85+
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
86+
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
87+
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
88+
endif()
11289

113-
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
114-
elseif (APPLE)
11590
if (GGML_NATIVE)
116-
set(USER_PROVIDED_MARCH FALSE)
117-
foreach(flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS)
118-
if ("${${flag_var}}" MATCHES "-march=[a-zA-Z0-9+._-]+")
119-
set(USER_PROVIDED_MARCH TRUE)
120-
break()
121-
endif()
122-
endforeach()
123-
124-
if (NOT USER_PROVIDED_MARCH)
125-
set(MARCH_FLAGS "-march=armv8.2a")
126-
127-
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
128-
if (GGML_COMPILER_SUPPORT_DOTPROD)
129-
set(MARCH_FLAGS "${MARCH_FLAGS}+dotprod")
130-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
91+
list(APPEND ARCH_FLAGS -mcpu=native)
13192

132-
message(STATUS "ARM feature DOTPROD enabled")
133-
endif ()
93+
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
13494

135-
set(TEST_I8MM_FLAGS "-march=armv8.2a+i8mm")
95+
# -mcpu=native does not always enable all the features in some compilers,
96+
# so we check for them manually and enable them if available
13697

137-
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
138-
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS}")
98+
include(CheckCXXSourceRuns)
13999

140-
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
141-
if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
142-
set(MARCH_FLAGS "${MARCH_FLAGS}+i8mm")
143-
list(APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
100+
set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+dotprod")
101+
check_cxx_source_runs(
102+
"#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
103+
GGML_COMPILER_SUPPORT_DOTPROD)
104+
if (GGML_COMPILER_SUPPORT_DOTPROD)
105+
set(ARCH_FLAGS "${ARCH_FLAGS}+dotprod")
106+
endif()
144107

145-
message(STATUS "ARM feature MATMUL_INT8 enabled")
146-
endif ()
108+
set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}+i8mm")
109+
check_cxx_source_runs(
110+
"#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
111+
GGML_COMPILER_SUPPORT_I8MM)
112+
if (GGML_COMPILER_SUPPORT_I8MM)
113+
set(ARCH_FLAGS "${ARCH_FLAGS}+i8mm")
114+
endif()
147115

148-
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
116+
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
149117

150-
list(APPEND ARCH_FLAGS "${MARCH_FLAGS}")
151-
endif ()
152-
endif ()
153-
else()
154-
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
155-
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
156-
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
157-
endif()
158-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
159-
# Raspberry Pi 1, Zero
160-
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
161-
endif()
162-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
163-
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
164-
# Android armeabi-v7a
165-
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
166-
else()
167-
# Raspberry Pi 2
168-
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
118+
else()
119+
if (GGML_CPU_ARM_ARCH)
120+
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
169121
endif()
170122
endif()
171-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
172-
# Android arm64-v8a
173-
# Raspberry Pi 3, 4, Zero 2 (32-bit)
174-
list(APPEND ARCH_FLAGS -mno-unaligned-access)
175-
endif()
176-
if (GGML_SVE)
177-
list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
123+
124+
# show enabled features
125+
execute_process(
126+
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
127+
INPUT_FILE "/dev/null"
128+
OUTPUT_VARIABLE ARM_FEATURE
129+
RESULT_VARIABLE ARM_FEATURE_RESULT
130+
)
131+
if (ARM_FEATURE_RESULT)
132+
message(FATAL_ERROR "Failed to get ARM features")
133+
else()
134+
foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
135+
string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
136+
if (NOT ${feature_pos} EQUAL -1)
137+
message(STATUS "ARM feature ${feature} enabled")
138+
endif()
139+
endforeach()
178140
endif()
179141
endif()
180142
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
181143
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
182144
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
145+
146+
message(STATUS "x86 detected")
147+
183148
if (MSVC)
184149
# instruction set detection for MSVC only
185150
if (GGML_NATIVE)

ggml/src/ggml-cpu/ggml-cpu.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
522522
if (ggml_cpu_has_sve()) {
523523
features.push_back({ "SVE", "1" });
524524
}
525+
if (ggml_cpu_has_dotprod()) {
526+
features.push_back({ "DOTPROD", "1" });
527+
}
528+
if (ggml_cpu_has_matmul_int8()) {
529+
features.push_back({ "MATMUL_INT8", "1" });
530+
}
525531
if (ggml_cpu_get_sve_cnt() > 0) {
526532
static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
527533
features.push_back({ "SVE_CNT", sve_cnt.c_str() });

ggml/src/ggml-cpu/llamafile/sgemm.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ template <> inline float32x4_t load(const float *p) {
204204
return vld1q_f32(p);
205205
}
206206
#if !defined(_MSC_VER)
207+
// FIXME: this should check for __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
207208
template <> inline float16x8_t load(const ggml_fp16_t *p) {
208209
return vld1q_f16((const float16_t *)p);
209210
}

0 commit comments

Comments
 (0)