Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Windows ARM64 #2089

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ option(OCIO_USE_AVX2 "Specify whether to enable AVX2 CPU performance optimizatio
option(OCIO_USE_AVX512 "Specify whether to enable AVX512 CPU performance optimizations" ${OCIO_BUILD_ENABLE_OPTIMIZATIONS_AVX})
option(OCIO_USE_F16C "Specify whether to enable F16C CPU performance optimizations" ${OCIO_BUILD_ENABLE_OPTIMIZATIONS_F16C})

if (APPLE)
if (APPLE OR WIN32)
# TODO: Revisit whether that option is necessary.
option(OCIO_USE_SSE2NEON "Specify whether to enable SSE CPU performance optimizations using SSE2NEON for Apple ARM architecture" ON)
mark_as_advanced(OCIO_USE_SSE2NEON)
Expand Down Expand Up @@ -332,8 +332,10 @@ if(OCIO_USE_SIMD AND OCIO_USE_SSE2NEON AND COMPILER_SUPPORTS_ARM_NEON)
add_library(sse2neon INTERFACE)
# Add the include directories to the target.
target_include_directories(sse2neon INTERFACE "${sse2neon_INCLUDE_DIR}")
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
if(NOT MSVC)
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
endif()
endif()
endif()

Expand Down
8 changes: 5 additions & 3 deletions share/cmake/modules/install/Installsse2neon.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ include(FetchContent)
set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/ext/build/sse2neon")
FetchContent_Declare(sse2neon
GIT_REPOSITORY https://github.com/DLTcollab/sse2neon.git
GIT_TAG v1.6.0
GIT_TAG 227cc413fb2d50b2a10073087be96b59d5364aea
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason you couldn't pick an official version number here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Long story short, we uncovered some issues with 1.7.0 in Blender on various platforms, and needed to update to a non-versioned commit, as they only version sse2neon once a year - the issues may or may not affect OCIO, but this is at least a known working and tested version that Blender uses.

)

# FetchContent_MakeAvailable is not available until CMake 3.14+.
Expand All @@ -38,6 +38,8 @@ if(NOT sse2neon_POPULATED)
add_library(sse2neon INTERFACE)
# Add the include directories to the target.
target_include_directories(sse2neon INTERFACE "${sse2neon_INCLUDE_DIR}")
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
if(NOT MSVC)
# Ignore the warnings coming from sse2neon.h as they are false positives.
target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
endif()
endif()
10 changes: 8 additions & 2 deletions share/cmake/utils/CheckSupportSSEUsingSSE2NEON.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ include(CheckCXXSourceCompiles)
set(_cmake_required_flags_orig "${CMAKE_REQUIRED_FLAGS}")
set(_cmake_required_includes_orig "${CMAKE_REQUIRED_INCLUDES}")
set(_cmake_osx_architectures_orig "${CMAKE_OSX_ARCHITECTURES}")
set(_cmake_cxx_flags_orig "${CMAKE_CXX_FLAGS}")

if(APPLE AND COMPILER_SUPPORTS_ARM_NEON)
if(MSVC)
set(CMAKE_CXX_FLAGS "/Zc:preprocessor")
endif()

if((APPLE OR WIN32) AND COMPILER_SUPPORTS_ARM_NEON)

if("${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64;x86_64" OR
"${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86_64;arm64")
Expand Down Expand Up @@ -63,8 +68,9 @@ endif()
set(CMAKE_REQUIRED_FLAGS "${_cmake_required_flags_orig}")
set(CMAKE_REQUIRED_INCLUDES "${_cmake_required_includes_orig}")
set(CMAKE_OSX_ARCHITECTURES "${_cmake_osx_architectures_orig}")
set(CMAKE_CXX_FLAGS "${_cmake_cxx_flags_orig}")

unset(_cmake_required_flags_orig)
unset(_cmake_required_includes_orig)
unset(_cmake_osx_architectures_orig)

unset(_cmake_cxx_flags_orig)
3 changes: 3 additions & 0 deletions share/cmake/utils/CompilerFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ if(OCIO_USE_SIMD)
if (OCIO_USE_SSE2NEON AND COMPILER_SUPPORTS_ARM_NEON)
include(CheckSupportSSEUsingSSE2NEON)
if(NOT COMPILER_SUPPORTS_SSE_WITH_SSE2NEON)
# Enable the "new" preprocessor, to more closely match Clang/GCC, required for sse2neon
set(PLATFORM_COMPILE_OPTIONS "${PLATFORM_COMPILE_OPTIONS};/Zc:preprocessor")
else()
set(OCIO_USE_SSE2NEON OFF)
endif()
endif()
Expand Down
2 changes: 1 addition & 1 deletion src/OpenColorIO/CPUInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ CPUInfo::CPUInfo()
}
}

#elif defined(__aarch64__) // ARM Processor or Apple ARM.
#elif defined(__aarch64__) || defined(_M_ARM64) // ARM Processor or Apple ARM.
anthony-linaro marked this conversation as resolved.
Show resolved Hide resolved

CPUInfo::CPUInfo()
{
Expand Down
4 changes: 2 additions & 2 deletions src/OpenColorIO/CPUInfoConfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#cmakedefine01 OCIO_ARCH_X86_32

// Relevant only for arm64 architecture.
#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64)
#cmakedefine01 OCIO_USE_SSE2NEON
#else
#define OCIO_USE_SSE2NEON 0
Expand All @@ -23,7 +23,7 @@

// Building for x86_64 processor on a non-ARM host architecture
// OR Building on/for an ARM architecture and using SSE2NEON.
#if (OCIO_ARCH_X86 && !defined(__aarch64__)) || (defined(__aarch64__) && OCIO_USE_SSE2NEON)
#if (OCIO_ARCH_X86 && !defined(__aarch64__)) || ((defined(__aarch64__) || defined(_M_ARM64)) && OCIO_USE_SSE2NEON)
#cmakedefine01 OCIO_USE_SSE2
#cmakedefine01 OCIO_USE_SSE3
#cmakedefine01 OCIO_USE_SSSE3
Expand Down
6 changes: 3 additions & 3 deletions src/OpenColorIO/SSE.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#if OCIO_USE_SSE2

// Include the appropriate SIMD intrinsics header based on the architecture (Intel vs. ARM).
#if !defined(__aarch64__)
#if !defined(__aarch64__) && !defined(_M_ARM64)
#if OCIO_USE_SSE2
#include <emmintrin.h>
#endif
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
// ARM architecture A64 (ARM64)
#if OCIO_USE_SSE2NEON
#include <sse2neon.h>
Expand All @@ -30,7 +30,7 @@ namespace OCIO_NAMESPACE
// Note that it is important for the code below this ifdef stays in the OCIO_NAMESPACE since
// it is redefining two of the functions from sse2neon.

#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64)
#if OCIO_USE_SSE2NEON
// Using vmaxnmq_f32 and vminnmq_f32 rather than sse2neon's vmaxq_f32 and vminq_f32 due to
// NaN handling. This doesn't seem to be significantly slower than the default sse2neon behavior.
Expand Down
6 changes: 3 additions & 3 deletions src/OpenColorIO/SSE2.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
#if OCIO_USE_SSE2

// Include the appropriate SIMD intrinsics header based on the architecture (Intel vs. ARM).
#if !defined(__aarch64__)
#if !defined(__aarch64__) && !defined(_M_ARM64)
#include <emmintrin.h>
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
// ARM architecture A64 (ARM64)
#if OCIO_USE_SSE2NEON
#include <sse2neon.h>
Expand All @@ -30,7 +30,7 @@ namespace OCIO_NAMESPACE
// Note that it is important for the code below this ifdef stays in the OCIO_NAMESPACE since
// it is redefining two of the functions from sse2neon.

#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64)
#if OCIO_USE_SSE2NEON
// Using vmaxnmq_f32 and vminnmq_f32 rather than sse2neon's vmaxq_f32 and vminq_f32 due to
// NaN handling. This doesn't seem to be significantly slower than the default sse2neon behavior.
Expand Down
Loading