Skip to content

Commit

Permalink
ARM 32-bit support for Windows (will probably work on Linux/Android t…
Browse files Browse the repository at this point in the history
…oo) (#332)
  • Loading branch information
jrouwe authored Nov 12, 2022
1 parent 84b751b commit 69fcdfe
Show file tree
Hide file tree
Showing 20 changed files with 69 additions and 35 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,24 @@ jobs:
- name: Build
run: msbuild Build\VS2022_CL_ARM\JoltPhysics.sln /property:Configuration=${{matrix.build_type}}

msvc_cl_arm_32_bit:
runs-on: windows-latest
name: Visual Studio CL ARM 32-bit
strategy:
fail-fast: false
matrix:
build_type: [Debug, Release]

steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Add msbuild to PATH
uses: microsoft/[email protected]
- name: Configure CMake
run: cmake -B ${{github.workspace}}/Build/VS2022_CL_ARM_32_BIT -G "Visual Studio 17 2022" -A ARM Build
- name: Build
run: msbuild Build\VS2022_CL_ARM_32_BIT\JoltPhysics.sln /property:Configuration=${{matrix.build_type}}

macos:
runs-on: macos-latest
name: macOS
Expand Down
11 changes: 8 additions & 3 deletions Build/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ if (("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUA
# Set compiler flag for disabling RTTI
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-")

if ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM")
# On ARM the exception handling flag is missing which causes warnings
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
endif()

# Set compiler flags for various configurations
set(CMAKE_CXX_FLAGS_DEBUG "/GS /Od /Ob0 /RTC1")
set(CMAKE_CXX_FLAGS_RELEASE "/GS- /Gy /O2 /Oi /Ot")
Expand All @@ -69,8 +74,8 @@ if (("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" OR "${CMAKE_SYSTEM_NAME}" STREQUA
set(CMAKE_CXX_FLAGS_RELEASEUBSAN "-fsanitize=undefined,implicit-conversion,float-divide-by-zero,local-bounds -fno-sanitize-recover=all")
set(CMAKE_CXX_FLAGS_RELEASECOVERAGE "-fprofile-instr-generate -fcoverage-mapping")

if (NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM64"))
# On ARM64, whole program optimization triggers an internal compiler error during code gen, so we don't turn it on
if (NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM64") AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM"))
# On ARM, whole program optimization triggers an internal compiler error during code gen, so we don't turn it on
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL")
set(CMAKE_CXX_FLAGS_DISTRIBUTION "${CMAKE_CXX_FLAGS_DISTRIBUTION} /GL")
endif()
Expand Down Expand Up @@ -276,7 +281,7 @@ if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
endif()
endif()

if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows" AND NOT ("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "ARM")) # ARM 32-bit is missing dinput8.lib
# Windows only targets
if (TARGET_SAMPLES OR TARGET_VIEWER)
include(${PHYSICS_REPO_ROOT}/TestFramework/TestFramework.cmake)
Expand Down
3 changes: 3 additions & 0 deletions Build/cmake_vs2022_cl_arm_32bit.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@echo off
cmake -S . -B VS2022_CL_ARM_32BIT -G "Visual Studio 17 2022" -A ARM %*
echo Open VS2022_CL_ARM_32BIT\JoltPhysics.sln to build the project.
20 changes: 14 additions & 6 deletions Jolt/Core/Core.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#define JPH_CPU_ADDRESS_BITS 32
#endif
#define JPH_USE_SSE
#define JPH_VECTOR_ALIGNMENT 16

// Detect enabled instruction sets
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && !defined(JPH_USE_AVX512)
Expand Down Expand Up @@ -95,15 +96,22 @@
#error Undefined compiler
#endif
#endif
#elif defined(__aarch64__) || defined(_M_ARM64)
// ARM64 CPU architecture
#define JPH_CPU_ARM64
#define JPH_USE_NEON
#define JPH_CPU_ADDRESS_BITS 64
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
// ARM CPU architecture
#define JPH_CPU_ARM
#if defined(__aarch64__) || defined(_M_ARM64)
#define JPH_CPU_ADDRESS_BITS 64
#define JPH_USE_NEON
#define JPH_VECTOR_ALIGNMENT 16
#else
#define JPH_CPU_ADDRESS_BITS 32
#define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries
#endif
#elif defined(JPH_PLATFORM_WASM)
// WebAssembly CPU architecture
#define JPH_CPU_WASM
#define JPH_CPU_ADDRESS_BITS 32
#define JPH_VECTOR_ALIGNMENT 16
#define JPH_DISABLE_CUSTOM_ALLOCATOR
#else
#error Unsupported CPU architecture
Expand Down Expand Up @@ -197,7 +205,7 @@
#elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS)
#if defined(JPH_CPU_X86)
#define JPH_BREAKPOINT __asm volatile ("int $0x3")
#elif defined(JPH_CPU_ARM64)
#elif defined(JPH_CPU_ARM)
#define JPH_BREAKPOINT __builtin_trap()
#endif
#elif defined(JPH_PLATFORM_WASM)
Expand Down
4 changes: 2 additions & 2 deletions Jolt/Core/FPControlWord.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class FPControlWord : public NonCopyable
uint mPrevState;
};

#elif defined(JPH_USE_NEON) && defined(JPH_COMPILER_MSVC)
#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)

/// Helper class that needs to be put on the stack to update the state of the floating point control word.
/// This state is kept per thread.
Expand Down Expand Up @@ -59,7 +59,7 @@ class FPControlWord : public NonCopyable
unsigned int mPrevState;
};

#elif defined(JPH_USE_NEON)
#elif defined(JPH_CPU_ARM)

/// Helper class that needs to be put on the stack to update the state of the floating point control word.
/// This state is kept per thread.
Expand Down
4 changes: 2 additions & 2 deletions Jolt/Core/FPException.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class FPExceptionDisableInvalid : public FPControlWord<_MM_MASK_INVALID, _MM_MAS
/// Disable division by zero floating point exceptions
class FPExceptionDisableDivByZero : public FPControlWord<_MM_MASK_DIV_ZERO, _MM_MASK_DIV_ZERO> { };

#elif defined(JPH_USE_NEON) && defined(JPH_COMPILER_MSVC)
#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)

/// Enable floating point divide by zero exception and exceptions on invalid numbers
class FPExceptionsEnable : public FPControlWord<0, _EM_INVALID | _EM_ZERODIVIDE> { };
Expand All @@ -31,7 +31,7 @@ class FPExceptionDisableInvalid : public FPControlWord<_EM_INVALID, _EM_INVALID>
/// Disable division by zero floating point exceptions
class FPExceptionDisableDivByZero : public FPControlWord<_EM_ZERODIVIDE, _EM_ZERODIVIDE> { };

#elif defined(JPH_USE_NEON)
#elif defined(JPH_CPU_ARM)

/// Invalid operation exception bit
static constexpr uint64 FP_IOE = 1 << 8;
Expand Down
4 changes: 2 additions & 2 deletions Jolt/Core/FPFlushDenormals.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ JPH_NAMESPACE_BEGIN
/// This can make floating point operations much faster when working with very small numbers
class FPFlushDenormals : public FPControlWord<_MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_MASK> { };

#elif defined(JPH_USE_NEON) && defined(JPH_COMPILER_MSVC)
#elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)

class FPFlushDenormals : public FPControlWord<_DN_FLUSH, _MCW_DN> { };

#elif defined(JPH_USE_NEON)
#elif defined(JPH_CPU_ARM)

/// Flush denormals to zero bit
static constexpr uint64 FP_FZ = 1 << 24;
Expand Down
8 changes: 4 additions & 4 deletions Jolt/Core/TickCounter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

JPH_NAMESPACE_BEGIN

#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM64))
#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM))

uint64 GetProcessorTickCount()
{
Expand All @@ -33,10 +33,10 @@ uint64 GetProcessorTickCount()
return uint64(count.QuadPart);
}

#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM64)
#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM)

static const uint64 sProcessorTicksPerSecond = []() {
#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM64))
#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM))
LARGE_INTEGER frequency { };
QueryPerformanceFrequency(&frequency);
return uint64(frequency.QuadPart);
Expand Down Expand Up @@ -71,7 +71,7 @@ static const uint64 sProcessorTicksPerSecond = []() {

#if defined(JPH_CPU_X86)
const char *cpu_str = "cpu MHz";
#elif defined(JPH_CPU_ARM64)
#elif defined(JPH_CPU_ARM)
const char *cpu_str = "BogoMIPS";
#else
#error Unsupported CPU architecture
Expand Down
6 changes: 3 additions & 3 deletions Jolt/Core/TickCounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

JPH_NAMESPACE_BEGIN

#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM64))
#if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM))

/// Functionality to get the processors cycle counter
uint64 GetProcessorTickCount(); // Not inline to avoid having to include Windows.h
Expand All @@ -26,7 +26,7 @@ JPH_INLINE uint64 GetProcessorTickCount()
return JPH_PLATFORM_BLUE_GET_TICKS();
#elif defined(JPH_CPU_X86)
return __rdtsc();
#elif defined(JPH_CPU_ARM64)
#elif defined(JPH_CPU_ARM)
uint64 val;
asm volatile("mrs %0, cntvct_el0" : "=r" (val));
return val;
Expand All @@ -37,7 +37,7 @@ JPH_INLINE uint64 GetProcessorTickCount()
#endif
}

#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM64)
#endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM)

/// Get the amount of ticks per second, note that this number will never be fully accurate as the amound of ticks per second may vary with CPU load, so this number is only to be used to give an indication of time for profiling purposes
uint64 GetProcessorTicksPerSecond();
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Math/Mat44.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
JPH_NAMESPACE_BEGIN

/// Holds a 4x4 matrix of floats, but supports also operations on the 3x3 upper left part of the matrix.
class [[nodiscard]] alignas(16) Mat44
class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Mat44
{
public:
JPH_OVERRIDE_NEW_DELETE
Expand Down
4 changes: 2 additions & 2 deletions Jolt/Math/Math.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ inline uint CountTrailingZeros(uint32 inValue)
return 32;
return __builtin_ctz(inValue);
#endif
#elif defined(JPH_CPU_ARM64)
#elif defined(JPH_CPU_ARM)
#if defined(JPH_COMPILER_MSVC)
if (inValue == 0)
return 32;
Expand Down Expand Up @@ -139,7 +139,7 @@ inline uint CountLeadingZeros(uint32 inValue)
return 32;
return __builtin_clz(inValue);
#endif
#elif defined(JPH_CPU_ARM64)
#elif defined(JPH_CPU_ARM)
#if defined(JPH_COMPILER_MSVC)
return _CountLeadingZeros(inValue);
#else
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Math/Quat.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ JPH_NAMESPACE_BEGIN
/// it easy to extract the rotation axis of the quaternion:
///
/// q = [cos(angle / 2), sin(angle / 2) * rotation_axis]
class [[nodiscard]] alignas(16) Quat
class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Quat
{
public:
JPH_OVERRIDE_NEW_DELETE
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Math/UVec4.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

JPH_NAMESPACE_BEGIN

class [[nodiscard]] alignas(16) UVec4
class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) UVec4
{
public:
JPH_OVERRIDE_NEW_DELETE
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Math/Vec3.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ JPH_NAMESPACE_BEGIN

/// 3 component vector (stored as 4 vectors).
/// Note that we keep the 4th component the same as the 3rd component to avoid divisions by zero when JPH_FLOATING_POINT_EXCEPTIONS_ENABLED defined
class [[nodiscard]] alignas(16) Vec3
class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Vec3
{
public:
JPH_OVERRIDE_NEW_DELETE
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Math/Vec4.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

JPH_NAMESPACE_BEGIN

class [[nodiscard]] alignas(16) Vec4
class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) Vec4
{
public:
JPH_OVERRIDE_NEW_DELETE
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Physics/Body/Body.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ class Body : public NonCopyable
};

static_assert(sizeof(Body) == 128, "Body should be 128 bytes");
static_assert(alignof(Body) == 16, "Body should align to 16 bytes");
static_assert(alignof(Body) == JPH_VECTOR_ALIGNMENT, "Body should properly align");

JPH_NAMESPACE_END

Expand Down
2 changes: 1 addition & 1 deletion Jolt/Physics/Collision/BroadPhase/QuadTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ QuadTree::NodeID QuadTree::BuildTree(const BodyVector &inBodies, TrackingVector

// Calculate centers of all bodies that are to be inserted
Vec3 *centers = new Vec3 [inNumber];
JPH_ASSERT(IsAligned(centers, 16));
JPH_ASSERT(IsAligned(centers, JPH_VECTOR_ALIGNMENT));
Vec3 *c = centers;
for (const NodeID *n = ioNodeIDs, *n_end = ioNodeIDs + inNumber; n < n_end; ++n, ++c)
*c = GetNodeOrBodyBounds(inBodies, *n).GetCenter();
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Physics/Collision/Shape/ConvexHullShape.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class ConvexHullShape final : public ConvexShape
};

static_assert(sizeof(Point) == 32, "Unexpected size");
static_assert(alignof(Point) == 16, "Unexpected alignment");
static_assert(alignof(Point) == JPH_VECTOR_ALIGNMENT, "Unexpected alignment");

Vec3 mCenterOfMass; ///< Center of mass of this convex hull
Mat44 mInertia; ///< Inertia matrix assuming density is 1 (needs to be multiplied by density)
Expand Down
2 changes: 1 addition & 1 deletion Jolt/Physics/Collision/TransformedShape.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,6 @@ class TransformedShape
};

static_assert(sizeof(TransformedShape) == 64, "Not properly packed");
static_assert(alignof(TransformedShape) == 16, "Not properly aligned");
static_assert(alignof(TransformedShape) == JPH_VECTOR_ALIGNMENT, "Not properly aligned");

JPH_NAMESPACE_END
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ For more information see the [Architecture and API documentation](https://jrouwe

## Supported Platforms

* Windows (VS2019, VS2022) x64/x86/ARM64 (Desktop/UWP)
* Linux (tested on Ubuntu 20.04) x64/ARM64
* Windows (VS2019, VS2022) x86/x64/ARM/ARM64 (Desktop/UWP)
* Linux (tested on Ubuntu 22.04) x64/ARM64
* Android (tested on Android 10) x64/ARM64
* Platform Blue (a popular game console) x64
* macOS (tested on Monterey) x64/ARM64
Expand Down

0 comments on commit 69fcdfe

Please sign in to comment.