virtualx-engine/thirdparty/embree/kernels/bvh/bvh_intersector_stream_filters.cpp
2024-02-18 13:31:05 -07:00

658 lines
24 KiB
C++

// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_stream_filters.h"
#include "bvh_intersector_stream.h"
namespace embree
{
namespace isa
{
template<int K, bool intersect>
__noinline void RayStreamFilter::filterAOS(Scene* scene, void* _rayN, size_t N, size_t stride, IntersectContext* context)
{
RayStreamAOS rayN(_rayN);
/* use fast path for coherent ray mode */
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
{
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
/* convert from AOS to SOA */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const vint<K> offset = vij * int(stride);
const size_t packetIndex = j / K;
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
rays[packetIndex] = ray;
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
}
/* trace stream */
scene->intersectors.intersectN(rayPtrs, size, context);
/* convert from SOA to AOS */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const vint<K> offset = vij * int(stride);
const size_t packetIndex = j / K;
rayN.setHitByOffset(valid, offset, rays[packetIndex]);
}
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N;)
{
const Ray& ray = rayN.getRayByOffset(inputRayID * stride);
/* skip invalid rays */
if (unlikely(ray.tnear() > ray.tfar || ray.tfar < 0.0f)) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = movemask(vfloat4(Vec3fa(ray.dir)) < 0.0f) & 0x7;
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)inputRayID;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayIDs = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayIDs[j] * int(stride);
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayIDs[j] * int(stride);
rayN.setHitByOffset<K>(valid, offset, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < N; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(N));
const vint<K> offset = vi * int(stride);
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByOffset<K>(valid, offset, ray);
}
}
}
template<int K, bool intersect>
__noinline void RayStreamFilter::filterAOP(Scene* scene, void** _rayN, size_t N, IntersectContext* context)
{
RayStreamAOP rayN(_rayN);
/* use fast path for coherent ray mode */
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
{
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
/* convert from AOP to SOA */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t packetIndex = j / K;
RayTypeK<K, intersect> ray = rayN.getRayByIndex<K>(valid, vij);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
rays[packetIndex] = ray;
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
}
/* trace stream */
scene->intersectors.intersectN(rayPtrs, size, context);
/* convert from SOA to AOP */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t packetIndex = j / K;
rayN.setHitByIndex<K>(valid, vij, rays[packetIndex]);
}
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N;)
{
const Ray& ray = rayN.getRayByIndex(inputRayID);
/* skip invalid rays */
if (unlikely(ray.tnear() > ray.tfar || ray.tfar < 0.0f)) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = movemask(lt_mask(ray.dir,Vec3fa(0.0f)));
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)inputRayID;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayIDs = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> index = *(vint<K>*)&rayIDs[j];
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByIndex<K>(valid, index);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> index = *(vint<K>*)&rayIDs[j];
rayN.setHitByIndex<K>(valid, index, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < N; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(N));
RayTypeK<K, intersect> ray = rayN.getRayByIndex<K>(valid, vi);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByIndex<K>(valid, vi, ray);
}
}
}
template<int K, bool intersect>
__noinline void RayStreamFilter::filterSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context)
{
const size_t rayDataAlignment = (size_t)rayData % (K*sizeof(float));
const size_t offsetAlignment = (size_t)stride % (K*sizeof(float));
/* fast path for packets with the correct width and data alignment */
if (likely(N == K &&
!rayDataAlignment &&
!offsetAlignment))
{
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
size_t packetIndex = 0;
for (size_t i = 0; i < numPackets; i++)
{
const size_t offset = i * stride;
RayTypeK<K, intersect>& ray = *(RayTypeK<K, intersect>*)(rayData + offset);
rayPtrs[packetIndex++] = &ray;
/* trace as stream */
if (unlikely(packetIndex == MAX_INTERNAL_STREAM_SIZE / K))
{
const size_t size = packetIndex*K;
scene->intersectors.intersectN(rayPtrs, size, context);
packetIndex = 0;
}
}
/* flush remaining packets */
if (unlikely(packetIndex > 0))
{
const size_t size = packetIndex*K;
scene->intersectors.intersectN(rayPtrs, size, context);
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
RayStreamSOA rayN(rayData, K);
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N*numPackets;)
{
const size_t offset = (inputRayID / K) * stride + (inputRayID % K) * sizeof(float);
/* skip invalid rays */
if (unlikely(!rayN.isValidByOffset(offset))) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
__aligned(64) Ray ray = rayN.getRayByOffset(offset);
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = (unsigned int)rayN.getOctantByOffset(offset);
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)offset;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayOffsets = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
rayN.setHitByOffset(valid, offset, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < numPackets; i++)
{
const size_t offset = i * stride;
RayTypeK<K, intersect>& ray = *(RayTypeK<K, intersect>*)(rayData + offset);
const vbool<K> valid = ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
}
}
}
else
{
/* fallback to packets for arbitrary packet size and alignment */
for (size_t i = 0; i < numPackets; i++)
{
const size_t offsetN = i * stride;
RayStreamSOA rayN(rayData + offsetN, N);
for (size_t j = 0; j < N; j += K)
{
const size_t offset = j * sizeof(float);
vbool<K> valid = (vint<K>(int(j)) + vint<K>(step)) < vint<K>(int(N));
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByOffset(valid, offset, ray);
}
}
}
}
template<int K, bool intersect>
__noinline void RayStreamFilter::filterSOP(Scene* scene, const void* _rayN, size_t N, IntersectContext* context)
{
RayStreamSOP& rayN = *(RayStreamSOP*)_rayN;
/* use fast path for coherent ray mode */
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
{
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
/* convert from SOP to SOA */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t offset = (i+j) * sizeof(float);
const size_t packetIndex = j / K;
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
rays[packetIndex] = ray;
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
}
/* trace stream */
scene->intersectors.intersectN(rayPtrs, size, context);
/* convert from SOA to SOP */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t offset = (i+j) * sizeof(float);
const size_t packetIndex = j / K;
rayN.setHitByOffset(valid, offset, rays[packetIndex]);
}
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N;)
{
const size_t offset = inputRayID * sizeof(float);
/* skip invalid rays */
if (unlikely(!rayN.isValidByOffset(offset))) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
__aligned(64) Ray ray = rayN.getRayByOffset(offset);
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = (unsigned int)rayN.getOctantByOffset(offset);
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)offset;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayOffsets = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
rayN.setHitByOffset(valid, offset, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < N; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(N));
const size_t offset = i * sizeof(float);
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByOffset(valid, offset, ray);
}
}
}
void RayStreamFilter::intersectAOS(Scene* scene, RTCRayHit* _rayN, size_t N, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOS<VSIZEL, true>(scene, _rayN, N, stride, context);
else
filterAOS<VSIZEX, true>(scene, _rayN, N, stride, context);
}
void RayStreamFilter::occludedAOS(Scene* scene, RTCRay* _rayN, size_t N, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOS<VSIZEL, false>(scene, _rayN, N, stride, context);
else
filterAOS<VSIZEX, false>(scene, _rayN, N, stride, context);
}
void RayStreamFilter::intersectAOP(Scene* scene, RTCRayHit** _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOP<VSIZEL, true>(scene, (void**)_rayN, N, context);
else
filterAOP<VSIZEX, true>(scene, (void**)_rayN, N, context);
}
void RayStreamFilter::occludedAOP(Scene* scene, RTCRay** _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOP<VSIZEL, false>(scene, (void**)_rayN, N, context);
else
filterAOP<VSIZEX, false>(scene, (void**)_rayN, N, context);
}
void RayStreamFilter::intersectSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOA<VSIZEL, true>(scene, rayData, N, numPackets, stride, context);
else
filterSOA<VSIZEX, true>(scene, rayData, N, numPackets, stride, context);
}
void RayStreamFilter::occludedSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOA<VSIZEL, false>(scene, rayData, N, numPackets, stride, context);
else
filterSOA<VSIZEX, false>(scene, rayData, N, numPackets, stride, context);
}
void RayStreamFilter::intersectSOP(Scene* scene, const RTCRayHitNp* _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOP<VSIZEL, true>(scene, _rayN, N, context);
else
filterSOP<VSIZEX, true>(scene, _rayN, N, context);
}
void RayStreamFilter::occludedSOP(Scene* scene, const RTCRayNp* _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOP<VSIZEL, false>(scene, _rayN, N, context);
else
filterSOP<VSIZEX, false>(scene, _rayN, N, context);
}
RayStreamFilterFuncs rayStreamFilterFuncs() {
return RayStreamFilterFuncs(RayStreamFilter::intersectAOS, RayStreamFilter::intersectAOP, RayStreamFilter::intersectSOA, RayStreamFilter::intersectSOP,
RayStreamFilter::occludedAOS, RayStreamFilter::occludedAOP, RayStreamFilter::occludedSOA, RayStreamFilter::occludedSOP);
}
};
};