@@ -74,112 +74,77 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
74
74
75
75
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
76
76
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
77
- (NOT CMAKE_OSX_ARCHITECTURES AND
78
- NOT CMAKE_GENERATOR_PLATFORM_LWR AND
77
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
79
78
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$" ))
80
79
81
80
message (STATUS "ARM detected" )
82
81
83
- if (MSVC )
84
- list (APPEND ARCH_DEFINITIONS __aarch64__) # MSVC defines _M_ARM64 instead
85
- list (APPEND ARCH_DEFINITIONS __ARM_NEON)
86
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_FMA)
87
-
88
- set (CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS} )
89
- string (JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2" )
90
-
91
- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
92
- if (GGML_COMPILER_SUPPORT_DOTPROD)
93
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
94
-
95
- message (STATUS "ARM feature DOTPROD enabled" )
96
- endif ()
97
-
98
- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
99
-
100
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
101
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
102
-
103
- message (STATUS "ARM feature MATMUL_INT8 enabled" )
104
- endif ()
105
-
106
- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
107
- if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
108
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
109
-
110
- message (STATUS "ARM feature FP16_VECTOR_ARITHMETIC enabled" )
111
- endif ()
82
+ if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang" )
83
+ message (FATAL_ERROR "MSVC is not supported for ARM, use clang" )
84
+ else ()
85
+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
86
+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
87
+ list (APPEND ARCH_FLAGS -mfp16-format=ieee)
88
+ endif ()
112
89
113
- set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV} )
114
- elseif (APPLE )
115
90
if (GGML_NATIVE)
116
- set (USER_PROVIDED_MARCH FALSE )
117
- foreach (flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS)
118
- if ("${${flag_var} }" MATCHES "-march=[a-zA-Z0-9+._-]+" )
119
- set (USER_PROVIDED_MARCH TRUE )
120
- break ()
121
- endif ()
122
- endforeach ()
123
-
124
- if (NOT USER_PROVIDED_MARCH)
125
- set (MARCH_FLAGS "-march=armv8.2a" )
126
-
127
- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
128
- if (GGML_COMPILER_SUPPORT_DOTPROD)
129
- set (MARCH_FLAGS "${MARCH_FLAGS} +dotprod" )
130
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
91
+ list (APPEND ARCH_FLAGS -mcpu=native)
131
92
132
- message (STATUS "ARM feature DOTPROD enabled" )
133
- endif ()
93
+ set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
134
94
135
- set (TEST_I8MM_FLAGS "-march=armv8.2a+i8mm" )
95
+ # -mcpu=native does not always enable all the features in some compilers,
96
+ # so we check for them manually and enable them if available
136
97
137
- set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
138
- set (CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS} " )
98
+ include (CheckCXXSourceRuns)
139
99
140
- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
141
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
142
- set (MARCH_FLAGS "${MARCH_FLAGS} +i8mm" )
143
- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
100
+ set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS} +dotprod" )
101
+ check_cxx_source_runs(
102
+ "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
103
+ GGML_COMPILER_SUPPORT_DOTPROD)
104
+ if (GGML_COMPILER_SUPPORT_DOTPROD)
105
+ set (ARCH_FLAGS "${ARCH_FLAGS} +dotprod" )
106
+ endif ()
144
107
145
- message (STATUS "ARM feature MATMUL_INT8 enabled" )
146
- endif ()
108
+ set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS} +i8mm" )
109
+ check_cxx_source_runs(
110
+ "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
111
+ GGML_COMPILER_SUPPORT_I8MM)
112
+ if (GGML_COMPILER_SUPPORT_I8MM)
113
+ set (ARCH_FLAGS "${ARCH_FLAGS} +i8mm" )
114
+ endif ()
147
115
148
- set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
116
+ set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
149
117
150
- list (APPEND ARCH_FLAGS "${MARCH_FLAGS} " )
151
- endif ()
152
- endif ()
153
- else ()
154
- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
155
- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
156
- list (APPEND ARCH_FLAGS -mfp16-format=ieee)
157
- endif ()
158
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
159
- # Raspberry Pi 1, Zero
160
- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
161
- endif ()
162
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
163
- if ("${CMAKE_SYSTEM_NAME} " STREQUAL "Android" )
164
- # Android armeabi-v7a
165
- list (APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
166
- else ()
167
- # Raspberry Pi 2
168
- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
118
+ else ()
119
+ if (GGML_CPU_ARM_ARCH)
120
+ list (APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH} )
169
121
endif ()
170
122
endif ()
171
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
172
- # Android arm64-v8a
173
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
174
- list (APPEND ARCH_FLAGS -mno-unaligned-access)
175
- endif ()
176
- if (GGML_SVE)
177
- list (APPEND ARCH_FLAGS -march=armv8.6-a+sve)
123
+
124
+ # show enabled features
125
+ execute_process (
126
+ COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
127
+ INPUT_FILE "/dev/null"
128
+ OUTPUT_VARIABLE ARM_FEATURE
129
+ RESULT_VARIABLE ARM_FEATURE_RESULT
130
+ )
131
+ if (ARM_FEATURE_RESULT)
132
+ message (FATAL_ERROR "Failed to get ARM features" )
133
+ else ()
134
+ foreach (feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
135
+ string (FIND "${ARM_FEATURE} " "__ARM_FEATURE_${feature} 1" feature_pos)
136
+ if (NOT ${feature_pos} EQUAL -1)
137
+ message (STATUS "ARM feature ${feature} enabled" )
138
+ endif ()
139
+ endforeach ()
178
140
endif ()
179
141
endif ()
180
142
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
181
143
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
182
144
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$" ))
145
+
146
+ message (STATUS "x86 detected" )
147
+
183
148
if (MSVC )
184
149
# instruction set detection for MSVC only
185
150
if (GGML_NATIVE)
0 commit comments