virtualx-engine/thirdparty/embree/common/simd/arm/emulation.h

// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

/* Make precision match SSE, at the cost of some performance */
#if !defined(__aarch64__)
#  define SSE2NEON_PRECISE_DIV 1
#  define SSE2NEON_PRECISE_SQRT 1
#endif

#include "sse2neon.h"

__forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
   __m128 neg_c = vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(c)));
  return _mm_fmadd_ps(a, b, neg_c);
}

__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
#if defined(__aarch64__)
    return vreinterpretq_m128_f32(vfmsq_f32(vreinterpretq_f32_m128(c),
                                            vreinterpretq_f32_m128(b),
                                            vreinterpretq_f32_m128(a)));
#else
    return _mm_sub_ps(c, _mm_mul_ps(a, b));
#endif
}

__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
  return vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(_mm_fmadd_ps(a,b,c))));
}


/* Dummy defines for floating point control */
#define _MM_MASK_MASK 0x1f80
#define _MM_MASK_DIV_ZERO 0x200
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_MASK_DENORM 0x100
#define _MM_SET_EXCEPTION_MASK(x)
#define _MM_SET_FLUSH_ZERO_MODE(x)

__forceinline int _mm_getcsr()
{
  return 0;
}

__forceinline void _mm_mfence()
{
  __sync_synchronize();
}
Upgrade Embree to the latest official release. Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library. 2021-05-20 12:49:33 +02:00			`// Copyright 2009-2020 Intel Corporation`
			`// SPDX-License-Identifier: Apache-2.0`

			`#pragma once`

			`/* Make precision match SSE, at the cost of some performance */`
			`#if !defined(__aarch64__)`
			`# define SSE2NEON_PRECISE_DIV 1`
			`# define SSE2NEON_PRECISE_SQRT 1`
			`#endif`

			`#include "sse2neon.h"`

			`__forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {`
			`__m128 neg_c = vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(c)));`
			`return _mm_fmadd_ps(a, b, neg_c);`
			`}`

			`__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {`
			`#if defined(__aarch64__)`
			`return vreinterpretq_m128_f32(vfmsq_f32(vreinterpretq_f32_m128(c),`
			`vreinterpretq_f32_m128(b),`
			`vreinterpretq_f32_m128(a)));`
			`#else`
			`return _mm_sub_ps(c, _mm_mul_ps(a, b));`
			`#endif`
			`}`

			`__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {`
			`return vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(_mm_fmadd_ps(a,b,c))));`
			`}`


			`/* Dummy defines for floating point control */`
			`#define _MM_MASK_MASK 0x1f80`
			`#define _MM_MASK_DIV_ZERO 0x200`
			`#define _MM_FLUSH_ZERO_ON 0x8000`
			`#define _MM_MASK_DENORM 0x100`
			`#define _MM_SET_EXCEPTION_MASK(x)`
			`#define _MM_SET_FLUSH_ZERO_MODE(x)`

			`__forceinline int _mm_getcsr()`
			`{`
			`return 0;`
			`}`

			`__forceinline void _mm_mfence()`
			`{`
			`__sync_synchronize();`
			`}`