2021-05-20 12:49:33 +02:00
|
|
|
// Copyright 2009-2021 Intel Corporation
|
2020-12-19 14:50:20 +01:00
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "node_intersector.h"
|
|
|
|
|
|
|
|
namespace embree
|
|
|
|
{
|
|
|
|
namespace isa
|
|
|
|
{
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Ray packet structure used in hybrid traversal
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int K, bool robust>
|
|
|
|
struct TravRayK;
|
|
|
|
|
|
|
|
/* Fast variant */
|
|
|
|
template<int K>
|
|
|
|
struct TravRayK<K, false>
|
|
|
|
{
|
|
|
|
__forceinline TravRayK() {}
|
|
|
|
|
|
|
|
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
|
|
|
{
|
|
|
|
init(ray_org, ray_dir, N);
|
|
|
|
}
|
|
|
|
|
|
|
|
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
|
|
|
{
|
|
|
|
init(ray_org, ray_dir, N);
|
|
|
|
tnear = ray_tnear;
|
|
|
|
tfar = ray_tfar;
|
|
|
|
}
|
|
|
|
|
|
|
|
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
|
|
|
{
|
|
|
|
org = ray_org;
|
|
|
|
dir = ray_dir;
|
|
|
|
rdir = rcp_safe(ray_dir);
|
2022-11-24 15:45:59 +01:00
|
|
|
#if defined(__aarch64__)
|
|
|
|
neg_org_rdir = -(org * rdir);
|
|
|
|
#elif defined(__AVX2__)
|
2020-12-19 14:50:20 +01:00
|
|
|
org_rdir = org * rdir;
|
|
|
|
#endif
|
2021-05-20 12:49:33 +02:00
|
|
|
|
2020-12-19 14:50:20 +01:00
|
|
|
if (N)
|
|
|
|
{
|
|
|
|
const int size = sizeof(float)*N;
|
|
|
|
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
|
|
|
|
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
|
|
|
|
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Vec3vf<K> org;
|
|
|
|
Vec3vf<K> dir;
|
|
|
|
Vec3vf<K> rdir;
|
2022-11-24 15:45:59 +01:00
|
|
|
#if defined(__aarch64__)
|
|
|
|
Vec3vf<K> neg_org_rdir;
|
|
|
|
#elif defined(__AVX2__)
|
2020-12-19 14:50:20 +01:00
|
|
|
Vec3vf<K> org_rdir;
|
|
|
|
#endif
|
|
|
|
Vec3vi<K> nearXYZ;
|
|
|
|
vfloat<K> tnear;
|
|
|
|
vfloat<K> tfar;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int K>
|
|
|
|
using TravRayKFast = TravRayK<K, false>;
|
|
|
|
|
|
|
|
/* Robust variant */
|
|
|
|
template<int K>
|
|
|
|
struct TravRayK<K, true>
|
|
|
|
{
|
|
|
|
__forceinline TravRayK() {}
|
|
|
|
|
|
|
|
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
|
|
|
{
|
|
|
|
init(ray_org, ray_dir, N);
|
|
|
|
}
|
|
|
|
|
|
|
|
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
|
|
|
|
{
|
|
|
|
init(ray_org, ray_dir, N);
|
|
|
|
tnear = ray_tnear;
|
|
|
|
tfar = ray_tfar;
|
|
|
|
}
|
|
|
|
|
|
|
|
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
|
|
|
|
{
|
|
|
|
org = ray_org;
|
|
|
|
dir = ray_dir;
|
|
|
|
rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
|
|
|
|
|
|
|
|
if (N)
|
|
|
|
{
|
|
|
|
const int size = sizeof(float)*N;
|
|
|
|
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
|
|
|
|
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
|
|
|
|
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Vec3vf<K> org;
|
|
|
|
Vec3vf<K> dir;
|
|
|
|
Vec3vf<K> rdir;
|
|
|
|
Vec3vi<K> nearXYZ;
|
|
|
|
vfloat<K> tnear;
|
|
|
|
vfloat<K> tfar;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int K>
|
|
|
|
using TravRayKRobust = TravRayK<K, true>;
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Fast AABBNode intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
|
|
|
|
const TravRayKFast<K>& ray, vfloat<K>& dist)
|
|
|
|
|
|
|
|
{
|
2022-11-24 15:45:59 +01:00
|
|
|
#if defined(__aarch64__)
|
|
|
|
const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
#elif defined(__AVX2__)
|
2020-12-19 14:50:20 +01:00
|
|
|
const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
|
|
|
|
#else
|
|
|
|
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
#endif
|
|
|
|
|
2021-05-20 12:49:33 +02:00
|
|
|
#if defined(__AVX512F__) // SKX
|
2020-12-19 14:50:20 +01:00
|
|
|
if (K == 16)
|
|
|
|
{
|
|
|
|
/* use mixed float/int min/max */
|
|
|
|
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
2021-05-20 12:49:33 +02:00
|
|
|
#if defined(__AVX512F__) // SKX
|
2020-12-19 14:50:20 +01:00
|
|
|
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
|
|
|
#else
|
|
|
|
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
#endif
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Robust AABBNode intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
// FIXME: use per instruction rounding for AVX512
|
|
|
|
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
const float round_up = 1.0f+3.0f*float(ulp);
|
|
|
|
const float round_down = 1.0f-3.0f*float(ulp);
|
|
|
|
const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Fast AABBNodeMB intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
|
|
|
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
|
|
|
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
|
|
|
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
|
|
|
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
|
|
|
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
|
|
|
|
2022-11-24 15:45:59 +01:00
|
|
|
#if defined(__aarch64__)
|
|
|
|
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
#elif defined(__AVX2__)
|
2020-12-19 14:50:20 +01:00
|
|
|
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
|
|
|
|
#else
|
|
|
|
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
|
|
|
#endif
|
|
|
|
|
2021-05-20 12:49:33 +02:00
|
|
|
#if defined(__AVX512F__) // SKX
|
2020-12-19 14:50:20 +01:00
|
|
|
if (K == 16)
|
|
|
|
{
|
|
|
|
/* use mixed float/int min/max */
|
|
|
|
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
2021-05-20 12:49:33 +02:00
|
|
|
#if defined(__AVX512F__) // SKX
|
2020-12-19 14:50:20 +01:00
|
|
|
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
|
|
|
#else
|
|
|
|
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
#endif
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Robust AABBNodeMB intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
|
|
|
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
|
|
|
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
|
|
|
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
|
|
|
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
|
|
|
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
|
|
|
|
|
|
|
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
|
|
|
|
|
|
|
const float round_up = 1.0f+3.0f*float(ulp);
|
|
|
|
const float round_down = 1.0f-3.0f*float(ulp);
|
|
|
|
|
2021-05-20 12:49:33 +02:00
|
|
|
#if defined(__AVX512F__) // SKX
|
2020-12-19 14:50:20 +01:00
|
|
|
if (K == 16)
|
|
|
|
{
|
|
|
|
const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Fast AABBNodeMB4D intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
|
|
|
|
|
|
|
|
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
|
|
|
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
|
|
|
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
|
|
|
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
|
|
|
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
|
|
|
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
|
|
|
|
2022-11-24 15:45:59 +01:00
|
|
|
#if defined(__aarch64__)
|
|
|
|
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
#elif defined(__AVX2__)
|
2020-12-19 14:50:20 +01:00
|
|
|
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
|
|
|
|
#else
|
|
|
|
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
|
|
|
|
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
if (unlikely(ref.isAABBNodeMB4D())) {
|
|
|
|
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
|
|
|
|
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
|
|
|
|
}
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Robust AABBNodeMB4D intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
|
|
|
|
|
|
|
|
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
|
|
|
|
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
|
|
|
|
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
|
|
|
|
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
|
|
|
|
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
|
|
|
|
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
|
|
|
|
|
|
|
|
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
|
|
|
|
|
|
|
|
const float round_up = 1.0f+3.0f*float(ulp);
|
|
|
|
const float round_down = 1.0f-3.0f*float(ulp);
|
|
|
|
const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
|
|
|
|
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
|
|
|
|
if (unlikely(ref.isAABBNodeMB4D())) {
|
|
|
|
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
|
|
|
|
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
|
|
|
|
}
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Fast OBBNode intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K, bool robust>
|
|
|
|
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
|
|
|
|
const TravRayK<K,robust>& ray, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
|
|
|
|
Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
|
|
|
|
Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
|
|
|
|
Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
|
|
|
|
|
|
|
|
const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
|
|
|
|
const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
|
|
|
|
const Vec3vf<K> org = xfmPoint(naabb, ray.org);
|
|
|
|
|
|
|
|
const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
|
|
|
|
const vfloat<K> lclipMinY = org.y * nrdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = org.z * nrdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
|
|
|
|
const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
|
|
|
|
|
|
|
|
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
|
|
|
if (robust) {
|
|
|
|
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
|
|
|
|
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
|
|
|
|
}
|
|
|
|
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Fast OBBNodeMB intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K, bool robust>
|
|
|
|
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
|
|
|
|
const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
|
|
|
|
Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
|
|
|
|
Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
|
|
|
|
Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
|
|
|
|
|
|
|
|
const Vec3vf<K> b0_lower = zero;
|
|
|
|
const Vec3vf<K> b0_upper = one;
|
|
|
|
const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
|
|
|
|
const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
|
|
|
|
const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
|
|
|
|
const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
|
|
|
|
|
|
|
|
const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
|
|
|
|
const Vec3vf<K> rdir = rcp_safe(dir);
|
|
|
|
const Vec3vf<K> org = xfmPoint(xfm, ray.org);
|
|
|
|
|
|
|
|
const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
|
|
|
|
|
|
|
|
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
|
|
|
if (robust) {
|
|
|
|
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
|
|
|
|
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
|
|
|
|
}
|
|
|
|
|
|
|
|
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// QuantizedBaseNode intersection
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
|
|
|
|
const TravRayK<K,false>& ray, vfloat<K>& dist)
|
|
|
|
|
|
|
|
{
|
|
|
|
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
|
|
|
const vfloat<N> lower_x = node->dequantizeLowerX();
|
|
|
|
const vfloat<N> upper_x = node->dequantizeUpperX();
|
|
|
|
const vfloat<N> lower_y = node->dequantizeLowerY();
|
|
|
|
const vfloat<N> upper_y = node->dequantizeUpperY();
|
|
|
|
const vfloat<N> lower_z = node->dequantizeLowerZ();
|
|
|
|
const vfloat<N> upper_z = node->dequantizeUpperZ();
|
|
|
|
|
2022-11-24 15:45:59 +01:00
|
|
|
#if defined(__aarch64__)
|
|
|
|
const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
#elif defined(__AVX2__)
|
2020-12-19 14:50:20 +01:00
|
|
|
const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
|
|
|
|
#else
|
|
|
|
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
#endif
|
|
|
|
|
2021-05-20 12:49:33 +02:00
|
|
|
#if defined(__AVX512F__) // SKX
|
2020-12-19 14:50:20 +01:00
|
|
|
if (K == 16)
|
|
|
|
{
|
|
|
|
/* use mixed float/int min/max */
|
|
|
|
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
|
2021-05-20 12:49:33 +02:00
|
|
|
#if defined(__AVX512F__) // SKX
|
2020-12-19 14:50:20 +01:00
|
|
|
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
|
|
|
|
#else
|
|
|
|
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
|
|
|
|
#endif
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
|
|
|
|
const TravRayK<K,true>& ray, vfloat<K>& dist)
|
|
|
|
|
|
|
|
{
|
|
|
|
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
|
|
|
const vfloat<N> lower_x = node->dequantizeLowerX();
|
|
|
|
const vfloat<N> upper_x = node->dequantizeUpperX();
|
|
|
|
const vfloat<N> lower_y = node->dequantizeLowerY();
|
|
|
|
const vfloat<N> upper_y = node->dequantizeUpperY();
|
|
|
|
const vfloat<N> lower_z = node->dequantizeLowerZ();
|
|
|
|
const vfloat<N> upper_z = node->dequantizeUpperZ();
|
|
|
|
|
|
|
|
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
|
|
|
|
|
|
|
|
const float round_up = 1.0f+3.0f*float(ulp);
|
|
|
|
const float round_down = 1.0f-3.0f*float(ulp);
|
|
|
|
|
|
|
|
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
|
|
|
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
|
|
|
|
{
|
|
|
|
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
|
|
|
|
2021-05-20 12:49:33 +02:00
|
|
|
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
|
|
|
|
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
|
|
|
|
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
|
|
|
|
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
|
|
|
|
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
|
|
|
|
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
|
2020-12-19 14:50:20 +01:00
|
|
|
|
2022-11-24 15:45:59 +01:00
|
|
|
#if defined(__aarch64__)
|
|
|
|
const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
|
|
|
|
#elif defined(__AVX2__)
|
2020-12-19 14:50:20 +01:00
|
|
|
const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
|
|
|
|
const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
|
|
|
|
const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
|
|
|
|
const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
|
|
|
|
#else
|
|
|
|
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
|
|
|
|
#endif
|
|
|
|
const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
|
|
|
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
|
|
|
|
{
|
|
|
|
assert(movemask(node->validMask()) & ((size_t)1 << i));
|
|
|
|
|
2021-05-20 12:49:33 +02:00
|
|
|
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
|
|
|
|
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
|
|
|
|
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
|
|
|
|
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
|
|
|
|
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
|
|
|
|
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
|
2020-12-19 14:50:20 +01:00
|
|
|
|
|
|
|
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
|
|
|
|
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
|
|
|
|
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
|
|
|
|
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
|
|
|
|
|
|
|
|
const float round_up = 1.0f+3.0f*float(ulp);
|
|
|
|
const float round_down = 1.0f-3.0f*float(ulp);
|
|
|
|
|
|
|
|
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
|
|
|
|
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
|
|
|
|
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
|
|
|
|
dist = lnearP;
|
|
|
|
return lhit;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Node intersectors used in hybrid traversal
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/*! Intersects N nodes with K rays */
|
|
|
|
template<int N, int K, int types, bool robust>
|
|
|
|
struct BVHNNodeIntersectorK;
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
|
|
|
|
{
|
|
|
|
/* vmask is both an input and an output parameter! Its initial value should be the parent node
|
|
|
|
hit mask, which is used for correctly computing the current hit mask. The parent hit mask
|
|
|
|
is actually required only for motion blur node intersections (because different rays may
|
|
|
|
have different times), so for regular nodes vmask is simply overwritten. */
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
|
|
|
|
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
|
|
|
|
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
|
|
|
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
|
|
|
|
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
|
|
|
|
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
|
|
|
|
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
|
|
|
|
assert(node.isOBBNodeMB());
|
|
|
|
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
|
|
|
|
{
|
|
|
|
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
|
|
|
|
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
|
|
|
|
{
|
|
|
|
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
|
|
|
|
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
|
|
|
|
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
|
|
|
|
assert(node.isOBBNodeMB());
|
|
|
|
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*! Intersects N nodes with K rays */
|
|
|
|
template<int N, int K, bool robust>
|
|
|
|
struct BVHNQuantizedBaseNodeIntersectorK;
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
|
|
|
|
{
|
|
|
|
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
|
|
|
|
const TravRayK<K,false>& ray, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
|
|
|
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
template<int N, int K>
|
|
|
|
struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
|
|
|
|
{
|
|
|
|
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
|
|
|
|
const TravRayK<K,true>& ray, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
|
|
|
|
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
|
|
|
|
{
|
|
|
|
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|