Skip to content

Commit

Permalink
Add support for Windows ARM64
Browse files Browse the repository at this point in the history
Signed-off-by: Anthony Roberts <[email protected]>
  • Loading branch information
anthony-linaro committed Nov 7, 2024
1 parent 9bc5b8e commit 53fb3d3
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 17 deletions.
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ option(OCIO_USE_AVX2 "Specify whether to enable AVX2 CPU performance optimizatio
option(OCIO_USE_AVX512 "Specify whether to enable AVX512 CPU performance optimizations" ${OCIO_BUILD_ENABLE_OPTIMIZATIONS_AVX})
option(OCIO_USE_F16C "Specify whether to enable F16C CPU performance optimizations" ${OCIO_BUILD_ENABLE_OPTIMIZATIONS_F16C})

if (APPLE)
if (APPLE OR WIN32)
# TODO: Revisit whether that option is necessary.
option(OCIO_USE_SSE2NEON "Specify whether to enable SSE CPU performance optimizations using SSE2NEON for Apple ARM architecture" ON)
mark_as_advanced(OCIO_USE_SSE2NEON)
Expand Down Expand Up @@ -332,8 +332,10 @@ if(OCIO_USE_SIMD AND OCIO_USE_SSE2NEON AND COMPILER_SUPPORTS_ARM_NEON)
add_library(sse2neon INTERFACE)
# Add the include directories to the target.
target_include_directories(sse2neon INTERFACE "${sse2neon_INCLUDE_DIR}")
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
if(NOT MSVC)
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
endif()
endif()
endif()

Expand Down
8 changes: 5 additions & 3 deletions share/cmake/modules/install/Installsse2neon.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ include(FetchContent)
set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/ext/build/sse2neon")
FetchContent_Declare(sse2neon
GIT_REPOSITORY https://github.com/DLTcollab/sse2neon.git
GIT_TAG v1.6.0
GIT_TAG 227cc413fb2d50b2a10073087be96b59d5364aea
)

# FetchContent_MakeAvailable is not available until CMake 3.14+.
Expand All @@ -38,6 +38,8 @@ if(NOT sse2neon_POPULATED)
add_library(sse2neon INTERFACE)
# Add the include directories to the target.
target_include_directories(sse2neon INTERFACE "${sse2neon_INCLUDE_DIR}")
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
if(NOT MSVC)
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
endif()
endif()
10 changes: 8 additions & 2 deletions share/cmake/utils/CheckSupportSSEUsingSSE2NEON.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ include(CheckCXXSourceCompiles)
set(_cmake_required_flags_orig "${CMAKE_REQUIRED_FLAGS}")
set(_cmake_required_includes_orig "${CMAKE_REQUIRED_INCLUDES}")
set(_cmake_osx_architectures_orig "${CMAKE_OSX_ARCHITECTURES}")
set(_cmake_cxx_flags_orig "${CMAKE_CXX_FLAGS}")

if(APPLE AND COMPILER_SUPPORTS_ARM_NEON)
if(MSVC)
set(CMAKE_CXX_FLAGS "/Zc:preprocessor")
endif()

if((APPLE OR WIN32) AND COMPILER_SUPPORTS_ARM_NEON)

if("${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64;x86_64" OR
"${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86_64;arm64")
Expand Down Expand Up @@ -63,8 +68,9 @@ endif()
set(CMAKE_REQUIRED_FLAGS "${_cmake_required_flags_orig}")
set(CMAKE_REQUIRED_INCLUDES "${_cmake_required_includes_orig}")
set(CMAKE_OSX_ARCHITECTURES "${_cmake_osx_architectures_orig}")
set(CMAKE_CXX_FLAGS "${_cmake_cxx_flags_orig}")

unset(_cmake_required_flags_orig)
unset(_cmake_required_includes_orig)
unset(_cmake_osx_architectures_orig)

unset(_cmake_cxx_flags_orig)
3 changes: 3 additions & 0 deletions share/cmake/utils/CompilerFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ if(OCIO_USE_SIMD)
if (OCIO_USE_SSE2NEON AND COMPILER_SUPPORTS_ARM_NEON)
include(CheckSupportSSEUsingSSE2NEON)
if(NOT COMPILER_SUPPORTS_SSE_WITH_SSE2NEON)
# Enable the "new" preprocessor, to more closely match Clang/GCC, required for sse2neon
set(PLATFORM_COMPILE_OPTIONS "${PLATFORM_COMPILE_OPTIONS};/Zc:preprocessor")
else()
set(OCIO_USE_SSE2NEON OFF)
endif()
endif()
Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/CPUInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ CPUInfo::CPUInfo()
}
}

#elif defined(__aarch64__) // ARM Processor or Apple ARM.
#elif defined(__aarch64__) || defined(_M_ARM64) // ARM Processor or Apple ARM.

CPUInfo::CPUInfo()
{
Expand Down
4 changes: 2 additions & 2 deletions src/OpenColorIO/CPUInfoConfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#cmakedefine01 OCIO_ARCH_X86_32

// Relevant only for arm64 architecture.
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64)
#cmakedefine01 OCIO_USE_SSE2NEON
#else
#define OCIO_USE_SSE2NEON 0
Expand All @@ -23,7 +23,7 @@

// Building for x86_64 processor on a non-ARM host architecture
// OR Building on/for an ARM architecture and using SSE2NEON.
#if (OCIO_ARCH_X86 && !defined(__aarch64__)) || (defined(__aarch64__) && OCIO_USE_SSE2NEON)
#if (OCIO_ARCH_X86 && !defined(__aarch64__)) || ((defined(__aarch64__) || defined(_M_ARM64)) && OCIO_USE_SSE2NEON)
#cmakedefine01 OCIO_USE_SSE2
#cmakedefine01 OCIO_USE_SSE3
#cmakedefine01 OCIO_USE_SSSE3
Expand Down
6 changes: 3 additions & 3 deletions src/OpenColorIO/SSE.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#if OCIO_USE_SSE2

// Include the appropriate SIMD intrinsics header based on the architecture (Intel vs. ARM).
#if !defined(__aarch64__)
#if !defined(__aarch64__) && !defined(_M_ARM64)
#if OCIO_USE_SSE2
#include <emmintrin.h>
#endif
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
// ARM architecture A64 (ARM64)
#if OCIO_USE_SSE2NEON
#include <sse2neon.h>
Expand All @@ -30,7 +30,7 @@ namespace OCIO_NAMESPACE
// Note that it is important for the code below this ifdef stays in the OCIO_NAMESPACE since
// it is redefining two of the functions from sse2neon.

#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64)
#if OCIO_USE_SSE2NEON
// Using vmaxnmq_f32 and vminnmq_f32 rather than sse2neon's vmaxq_f32 and vminq_f32 due to
// NaN handling. This doesn't seem to be significantly slower than the default sse2neon behavior.
Expand Down
6 changes: 3 additions & 3 deletions src/OpenColorIO/SSE2.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#if OCIO_USE_SSE2

// Include the appropriate SIMD intrinsics header based on the architecture (Intel vs. ARM).
#if !defined(__aarch64__)
#if !defined(__aarch64__) && !defined(_M_ARM64)
#include <emmintrin.h>
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
// ARM architecture A64 (ARM64)
#if OCIO_USE_SSE2NEON
#include <sse2neon.h>
Expand All @@ -30,7 +30,7 @@ namespace OCIO_NAMESPACE
// Note that it is important for the code below this ifdef stays in the OCIO_NAMESPACE since
// it is redefining two of the functions from sse2neon.

#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64)
#if OCIO_USE_SSE2NEON
// Using vmaxnmq_f32 and vminnmq_f32 rather than sse2neon's vmaxq_f32 and vminq_f32 due to
// NaN handling. This doesn't seem to be significantly slower than the default sse2neon behavior.
Expand Down

0 comments on commit 53fb3d3

Please sign in to comment.