Upgrade Embree and enable ray packets

Minor patch upgrade. Enabling ray packets results in faster
processing of ray streams (i.e. occlusion culling buffer
updates) at the cost of slightly larger binary sizes.
This commit is contained in:
Joan Fons 2021-09-13 15:05:21 +02:00
parent dde48ebed6
commit 595cbacdf1
10 changed files with 2216 additions and 6 deletions

View file

@ -55,6 +55,9 @@ if env["builtin_embree"]:
"kernels/bvh/bvh_builder_sah_mb.cpp",
"kernels/bvh/bvh_builder_twolevel.cpp",
"kernels/bvh/bvh_intersector1_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_filters.cpp",
]
thirdparty_sources = [thirdparty_dir + file for file in embree_src]

View file

@ -61,6 +61,11 @@ cpp_files = [
"kernels/bvh/bvh_builder_twolevel.cpp",
"kernels/bvh/bvh_intersector1.cpp",
"kernels/bvh/bvh_intersector1_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_filters.cpp",
"kernels/bvh/bvh_intersector_hybrid.cpp",
"kernels/bvh/bvh_intersector_stream.cpp",
]
os.chdir("../../thirdparty")
@ -117,7 +122,7 @@ with open(os.path.join(dest_dir, "kernels/config.h"), "w") as config_file:
/* #undef EMBREE_GEOMETRY_INSTANCE */
/* #undef EMBREE_GEOMETRY_GRID */
/* #undef EMBREE_GEOMETRY_POINT */
/* #undef EMBREE_RAY_PACKETS */
#define EMBREE_RAY_PACKETS
/* #undef EMBREE_COMPACT_POLYS */
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
@ -249,3 +254,8 @@ with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as con
os.chdir("..")
shutil.rmtree("embree-tmp")
subprocess.run(["git", "restore", "embree/patches"])
for patch in os.listdir("embree/patches"):
subprocess.run(["git", "apply", "embree/patches/" + patch])

View file

@ -6,9 +6,9 @@
#define RTC_VERSION_MAJOR 3
#define RTC_VERSION_MINOR 13
#define RTC_VERSION_PATCH 0
#define RTC_VERSION 31300
#define RTC_VERSION_STRING "3.13.0"
#define RTC_VERSION_PATCH 1
#define RTC_VERSION 31301
#define RTC_VERSION_STRING "3.13.1"
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1

View file

@ -0,0 +1,917 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_hybrid.h"
#include "bvh_traverser1.h"
#include "node_intersector1.h"
#include "node_intersector_packet.h"
#include "../geometry/intersector_iterators.h"
#include "../geometry/triangle_intersector.h"
#include "../geometry/trianglev_intersector.h"
#include "../geometry/trianglev_mb_intersector.h"
#include "../geometry/trianglei_intersector.h"
#include "../geometry/quadv_intersector.h"
#include "../geometry/quadi_intersector.h"
#include "../geometry/curveNv_intersector.h"
#include "../geometry/curveNi_intersector.h"
#include "../geometry/curveNi_mb_intersector.h"
#include "../geometry/linei_intersector.h"
#include "../geometry/subdivpatch1_intersector.h"
#include "../geometry/object_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../geometry/subgrid_intersector.h"
#include "../geometry/subgrid_mb_intersector.h"
#include "../geometry/curve_intersector_virtual.h"
#define SWITCH_DURING_DOWN_TRAVERSAL 1
#define FORCE_SINGLE_MODE 0
#define ENABLE_FAST_COHERENT_CODEPATHS 1
namespace embree
{
namespace isa
{
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayHitK<K>& ray,
const TravRayK<K, robust>& tray,
IntersectContext* context)
{
/* stack state */
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
stack[0].ptr = root;
stack[0].dist = neg_inf;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(normal.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
tray1.tfar = ray.tfar[k];
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
IntersectContext* __restrict__ context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
intersectCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
#if defined(__AVX__)
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
#endif
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
if (single)
{
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
for (; valid_bits!=0; ) {
const size_t i = bscf(valid_bits);
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
}
return;
}
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
/* test whether we have ray with opposing direction signs in the packet */
bool split = false;
{
size_t bits = valid_bits;
vbool<K> vsplit( false );
do
{
const size_t valid_index = bsf(bits);
vbool<K> octant_valid = octant[valid_index] == octant;
bits &= ~(size_t)movemask(octant_valid);
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
} while (bits);
if (any(vsplit)) split = true;
}
do
{
const size_t valid_index = bsf(valid_bits);
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
const vint<K> count_diff_octant = \
((diff_octant >> 2) & 1) +
((diff_octant >> 1) & 1) +
((diff_octant >> 0) & 1);
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
octant = select(octant_valid,vint<K>(0xffffffff),octant);
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
intersect1(This, bvh, cur, i, pre, ray, tray, context);
}
tray.tfar = min(tray.tfar, ray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
size_t num_child_hits = 0;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we choose to continue with that child if it
is closer than the current next child, or we push it onto the stack */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push cur node onto stack and continue with hit child */
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
*sptr_node = child; sptr_node++;
*sptr_near = childDist; sptr_near++;
}
}
}
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
#endif
if (unlikely(cur == BVH::emptyNode))
goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (any(sptr_near[-2] < sptr_near[-1]))
{
std::swap(sptr_near[-2],sptr_near[-1]);
std::swap(sptr_node[-2],sptr_node[-1]);
}
if (unlikely(num_child_hits >= 3))
{
if (any(sptr_near[-3] < sptr_near[-1]))
{
std::swap(sptr_near[-3],sptr_near[-1]);
std::swap(sptr_node[-3],sptr_node[-1]);
}
if (any(sptr_near[-3] < sptr_near[-2]))
{
std::swap(sptr_near[-3],sptr_near[-2]);
std::swap(sptr_node[-3],sptr_node[-2]);
}
}
}
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
} while(valid_bits);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
IntersectContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
const vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node if behind closest hit point */
vfloat<K> curDist = *(float*)&stackPtr->dist;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active))) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
curDist = pos_inf;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const vfloat<K> childDist = fmin[i];
const NodeRef child = node->child(i);
BVHN<N>::prefetch(child);
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
stackPtr->ptr = cur;
*(float*)&stackPtr->dist = toScalar(curDist);
stackPtr++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
stackPtr->ptr = child;
*(float*)&stackPtr->dist = toScalar(childDist);
stackPtr++;
}
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (stackPtr[-2].dist < stackPtr[-1].dist)
std::swap(stackPtr[-2],stackPtr[-1]);
if (unlikely(num_child_hits >= 3))
{
if (stackPtr[-3].dist < stackPtr[-1].dist)
std::swap(stackPtr[-3],stackPtr[-1]);
if (stackPtr[-3].dist < stackPtr[-2].dist)
std::swap(stackPtr[-3],stackPtr[-2]);
}
}
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
/* reduce max distance interval on successful intersection */
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
{
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
frustum.template updateMaxDist<K>(tray.tfar);
}
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
} while(valid_bits);
}
// ===================================================================================================================================================================
// ===================================================================================================================================================================
// ===================================================================================================================================================================
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayK<K>& ray,
const TravRayK<K, robust>& tray,
IntersectContext* context)
{
/* stack state */
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
NodeRef* stackPtr = stack+1; // current stack pointer
NodeRef* stackEnd = stack+stackSizeSingle;
stack[0] = root;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = (NodeRef)*stackPtr;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(shadow.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(shadow.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
ray.tfar[k] = neg_inf;
return true;
}
if (unlikely(lazy_node)) {
*stackPtr = lazy_node;
stackPtr++;
}
}
return false;
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
IntersectContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
occludedCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out already occluded and invalid rays */
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
const size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
vbool<K> terminated = !valid;
const vfloat<K> inf = vfloat<K>(pos_inf);
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
set(terminated, i);
}
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push 'cur' node onto stack and continue with hit child */
if (likely(cur != BVH::emptyNode)) {
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
}
if (unlikely(cur == BVH::emptyNode))
goto pop;
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
IntersectContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid,ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vbool<K> terminated = !valid;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].mask = movemask(octant_valid);
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node of active rays have already been terminated */
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
if (unlikely(m_active == 0)) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
m_active = 0;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const NodeRef child = node->child(i);
assert(child != BVH::emptyNode);
BVHN<N>::prefetch(child);
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
stackPtr->ptr = cur;
stackPtr->mask = m_active;
stackPtr++;
}
cur = child;
m_active = movemask(lhit);
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
#if defined(__AVX__)
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
#endif
if (unlikely(!m_active)) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
octant_valid &= !terminated;
if (unlikely(none(octant_valid))) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->mask = movemask(octant_valid);
stackPtr++;
}
}
} while(valid_bits);
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
}
}

View file

@ -0,0 +1,59 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_hybrid.cpp"
namespace embree
{
namespace isa
{
////////////////////////////////////////////////////////////////////////////////
/// BVH4Intersector4 Definitions
////////////////////////////////////////////////////////////////////////////////
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoellerNoFilter, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoellerNoFilter,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridPluecker,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersectorK<4> >));
IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersectorK<4> >));
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
//IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4> >));
IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4MB> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorK<4>> >));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorKMB<4>> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
//IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersectorKPluecker <4 COMMA 4 COMMA true> >));
IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersectorKPluecker <4 COMMA 4 COMMA true> >));
}
}

View file

@ -0,0 +1,528 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_stream.h"
#include "../geometry/intersector_iterators.h"
#include "../geometry/triangle_intersector.h"
#include "../geometry/trianglev_intersector.h"
#include "../geometry/trianglev_mb_intersector.h"
#include "../geometry/trianglei_intersector.h"
#include "../geometry/quadv_intersector.h"
#include "../geometry/quadi_intersector.h"
#include "../geometry/linei_intersector.h"
#include "../geometry/subdivpatch1_intersector.h"
#include "../geometry/object_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../common/scene.h"
#include <bitset>
namespace embree
{
namespace isa
{
__aligned(64) static const int shiftTable[32] = {
(int)1 << 0, (int)1 << 1, (int)1 << 2, (int)1 << 3, (int)1 << 4, (int)1 << 5, (int)1 << 6, (int)1 << 7,
(int)1 << 8, (int)1 << 9, (int)1 << 10, (int)1 << 11, (int)1 << 12, (int)1 << 13, (int)1 << 14, (int)1 << 15,
(int)1 << 16, (int)1 << 17, (int)1 << 18, (int)1 << 19, (int)1 << 20, (int)1 << 21, (int)1 << 22, (int)1 << 23,
(int)1 << 24, (int)1 << 25, (int)1 << 26, (int)1 << 27, (int)1 << 28, (int)1 << 29, (int)1 << 30, (int)1 << 31
};
template<int N, int types, bool robust, typename PrimitiveIntersector>
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersect(Accel::Intersectors* __restrict__ This,
RayHitN** inputPackets,
size_t numOctantRays,
IntersectContext* context)
{
/* we may traverse an empty BVH in case all geometry was invalid */
BVH* __restrict__ bvh = (BVH*) This->ptr;
if (bvh->root == BVH::emptyNode)
return;
// Only the coherent code path is implemented
assert(context->isCoherent());
intersectCoherent(This, (RayHitK<VSIZEL>**)inputPackets, numOctantRays, context);
}
template<int N, int types, bool robust, typename PrimitiveIntersector>
template<int K>
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersectCoherent(Accel::Intersectors* __restrict__ This,
RayHitK<K>** inputPackets,
size_t numOctantRays,
IntersectContext* context)
{
assert(context->isCoherent());
BVH* __restrict__ bvh = (BVH*) This->ptr;
__aligned(64) StackItemMaskCoherent stack[stackSizeSingle]; // stack of nodes
assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
__aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
__aligned(64) Frustum<robust> frustum;
bool commonOctant = true;
const size_t m_active = initPacketsAndFrustum((RayK<K>**)inputPackets, numOctantRays, packets, frustum, commonOctant);
if (unlikely(m_active == 0)) return;
/* case of non-common origin */
if (unlikely(!commonOctant))
{
const size_t numPackets = (numOctantRays+K-1)/K;
for (size_t i = 0; i < numPackets; i++)
This->intersect(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
return;
}
stack[0].mask = m_active;
stack[0].parent = 0;
stack[0].child = bvh->root;
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
StackItemMaskCoherent* stackPtr = stack + 1;
while (1) pop:
{
if (unlikely(stackPtr == stack)) break;
STAT3(normal.trav_stack_pop,1,1,1);
stackPtr--;
/*! pop next node */
NodeRef cur = NodeRef(stackPtr->child);
size_t m_trav_active = stackPtr->mask;
assert(m_trav_active);
NodeRef parent = stackPtr->parent;
while (1)
{
if (unlikely(cur.isLeaf())) break;
const AABBNode* __restrict__ const node = cur.getAABBNode();
parent = cur;
__aligned(64) size_t maskK[N];
for (size_t i = 0; i < N; i++)
maskK[i] = m_trav_active;
vfloat<N> dist;
const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
if (unlikely(m_node_hit == 0)) goto pop;
BVHNNodeTraverserStreamHitCoherent<N, types>::traverseClosestHit(cur, m_trav_active, vbool<N>((int)m_node_hit), dist, (size_t*)maskK, stackPtr);
assert(m_trav_active);
}
/* non-root and leaf => full culling test for all rays */
if (unlikely(parent != 0 && cur.isLeaf()))
{
const AABBNode* __restrict__ const node = parent.getAABBNode();
size_t boxID = 0xff;
for (size_t i = 0; i < N; i++)
if (node->child(i) == cur) { boxID = i; break; }
assert(boxID < N);
assert(cur == node->child(boxID));
m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
}
/*! this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves, 1, 1, 1);
size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
size_t bits = m_trav_active;
/*! intersect stream of rays with all primitives */
size_t lazy_node = 0;
#if defined(__SSE4_2__)
STAT_USER(1,(popcnt(bits)+K-1)/K*4);
#endif
while(bits)
{
size_t i = bsf(bits) / K;
const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
assert(m_isec & bits);
bits &= ~m_isec;
TravRayKStream<K, robust>& p = packets[i];
vbool<K> m_valid = p.tnear <= p.tfar;
PrimitiveIntersectorK<K>::intersectK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
p.tfar = min(p.tfar, inputPackets[i]->tfar);
};
} // traversal + intersection
}
template<int N, int types, bool robust, typename PrimitiveIntersector>
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occluded(Accel::Intersectors* __restrict__ This,
RayN** inputPackets,
size_t numOctantRays,
IntersectContext* context)
{
/* we may traverse an empty BVH in case all geometry was invalid */
BVH* __restrict__ bvh = (BVH*) This->ptr;
if (bvh->root == BVH::emptyNode)
return;
if (unlikely(context->isCoherent()))
occludedCoherent(This, (RayK<VSIZEL>**)inputPackets, numOctantRays, context);
else
occludedIncoherent(This, (RayK<VSIZEX>**)inputPackets, numOctantRays, context);
}
template<int N, int types, bool robust, typename PrimitiveIntersector>
template<int K>
__noinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedCoherent(Accel::Intersectors* __restrict__ This,
RayK<K>** inputPackets,
size_t numOctantRays,
IntersectContext* context)
{
assert(context->isCoherent());
BVH* __restrict__ bvh = (BVH*)This->ptr;
__aligned(64) StackItemMaskCoherent stack[stackSizeSingle]; // stack of nodes
assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
/* inactive rays should have been filtered out before */
__aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
__aligned(64) Frustum<robust> frustum;
bool commonOctant = true;
size_t m_active = initPacketsAndFrustum(inputPackets, numOctantRays, packets, frustum, commonOctant);
/* valid rays */
if (unlikely(m_active == 0)) return;
/* case of non-common origin */
if (unlikely(!commonOctant))
{
const size_t numPackets = (numOctantRays+K-1)/K;
for (size_t i = 0; i < numPackets; i++)
This->occluded(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
return;
}
stack[0].mask = m_active;
stack[0].parent = 0;
stack[0].child = bvh->root;
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
StackItemMaskCoherent* stackPtr = stack + 1;
while (1) pop:
{
if (unlikely(stackPtr == stack)) break;
STAT3(normal.trav_stack_pop,1,1,1);
stackPtr--;
/*! pop next node */
NodeRef cur = NodeRef(stackPtr->child);
size_t m_trav_active = stackPtr->mask & m_active;
if (unlikely(!m_trav_active)) continue;
assert(m_trav_active);
NodeRef parent = stackPtr->parent;
while (1)
{
if (unlikely(cur.isLeaf())) break;
const AABBNode* __restrict__ const node = cur.getAABBNode();
parent = cur;
__aligned(64) size_t maskK[N];
for (size_t i = 0; i < N; i++)
maskK[i] = m_trav_active;
vfloat<N> dist;
const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
if (unlikely(m_node_hit == 0)) goto pop;
BVHNNodeTraverserStreamHitCoherent<N, types>::traverseAnyHit(cur, m_trav_active, vbool<N>((int)m_node_hit), (size_t*)maskK, stackPtr);
assert(m_trav_active);
}
/* non-root and leaf => full culling test for all rays */
if (unlikely(parent != 0 && cur.isLeaf()))
{
const AABBNode* __restrict__ const node = parent.getAABBNode();
size_t boxID = 0xff;
for (size_t i = 0; i < N; i++)
if (node->child(i) == cur) { boxID = i; break; }
assert(boxID < N);
assert(cur == node->child(boxID));
m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
}
/*! this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves, 1, 1, 1);
size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
size_t bits = m_trav_active & m_active;
/*! intersect stream of rays with all primitives */
size_t lazy_node = 0;
#if defined(__SSE4_2__)
STAT_USER(1,(popcnt(bits)+K-1)/K*4);
#endif
while (bits)
{
size_t i = bsf(bits) / K;
const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
assert(m_isec & bits);
bits &= ~m_isec;
TravRayKStream<K, robust>& p = packets[i];
vbool<K> m_valid = p.tnear <= p.tfar;
vbool<K> m_hit = PrimitiveIntersectorK<K>::occludedK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
inputPackets[i]->tfar = select(m_hit & m_valid, vfloat<K>(neg_inf), inputPackets[i]->tfar);
m_active &= ~((size_t)movemask(m_hit) << (i*K));
}
} // traversal + intersection
}
template<int N, int types, bool robust, typename PrimitiveIntersector>
template<int K>
__forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedIncoherent(Accel::Intersectors* __restrict__ This,
RayK<K>** inputPackets,
size_t numOctantRays,
IntersectContext* context)
{
assert(!context->isCoherent());
assert(types & BVH_FLAG_ALIGNED_NODE);
__aligned(64) TravRayKStream<K,robust> packet[MAX_INTERNAL_STREAM_SIZE/K];
assert(numOctantRays <= 32);
const size_t numPackets = (numOctantRays+K-1)/K;
size_t m_active = 0;
for (size_t i = 0; i < numPackets; i++)
{
const vfloat<K> tnear = inputPackets[i]->tnear();
const vfloat<K> tfar = inputPackets[i]->tfar;
vbool<K> m_valid = (tnear <= tfar) & (tnear >= 0.0f);
m_active |= (size_t)movemask(m_valid) << (K*i);
const Vec3vf<K>& org = inputPackets[i]->org;
const Vec3vf<K>& dir = inputPackets[i]->dir;
vfloat<K> packet_min_dist = max(tnear, 0.0f);
vfloat<K> packet_max_dist = select(m_valid, tfar, neg_inf);
new (&packet[i]) TravRayKStream<K,robust>(org, dir, packet_min_dist, packet_max_dist);
}
BVH* __restrict__ bvh = (BVH*)This->ptr;
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].mask = m_active;
size_t terminated = ~m_active;
/* near/far offsets based on first ray */
const NearFarPrecalculations nf(Vec3fa(packet[0].rdir.x[0], packet[0].rdir.y[0], packet[0].rdir.z[0]), N);
while (1) pop:
{
if (unlikely(stackPtr == stack)) break;
STAT3(shadow.trav_stack_pop,1,1,1);
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
size_t cur_mask = stackPtr->mask & (~terminated);
if (unlikely(cur_mask == 0)) continue;
while (true)
{
/*! stop if we found a leaf node */
if (unlikely(cur.isLeaf())) break;
const AABBNode* __restrict__ const node = cur.getAABBNode();
const vint<N> vmask = traverseIncoherentStream(cur_mask, packet, node, nf, shiftTable);
size_t mask = movemask(vmask != vint<N>(zero));
if (unlikely(mask == 0)) goto pop;
__aligned(64) unsigned int child_mask[N];
vint<N>::storeu(child_mask, vmask); // this explicit store here causes much better code generation
/*! one child is hit, continue with that child */
size_t r = bscf(mask);
assert(r < N);
cur = node->child(r);
BVHN<N>::prefetch(cur,types);
cur_mask = child_mask[r];
/* simple in order sequence */
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) continue;
stackPtr->ptr = cur;
stackPtr->mask = cur_mask;
stackPtr++;
for (; ;)
{
r = bscf(mask);
assert(r < N);
cur = node->child(r);
BVHN<N>::prefetch(cur,types);
cur_mask = child_mask[r];
assert(cur != BVH::emptyNode);
if (likely(mask == 0)) break;
stackPtr->ptr = cur;
stackPtr->mask = cur_mask;
stackPtr++;
}
}
/*! this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(shadow.trav_leaves,1,1,1);
size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
size_t bits = cur_mask;
size_t lazy_node = 0;
for (; bits != 0;)
{
const size_t rayID = bscf(bits);
RayK<K> &ray = *inputPackets[rayID / K];
const size_t k = rayID % K;
if (PrimitiveIntersectorK<K>::occluded(This, ray, k, context, prim, num, lazy_node))
{
ray.tfar[k] = neg_inf;
terminated |= (size_t)1 << rayID;
}
/* lazy node */
if (unlikely(lazy_node))
{
stackPtr->ptr = lazy_node;
stackPtr->mask = cur_mask;
stackPtr++;
}
}
if (unlikely(terminated == (size_t)-1)) break;
}
}
////////////////////////////////////////////////////////////////////////////////
/// ArrayIntersectorKStream Definitions
////////////////////////////////////////////////////////////////////////////////
template<bool filter>
struct Triangle4IntersectorStreamMoeller {
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMIntersectorKMoeller<4 COMMA K COMMA true>>;
};
template<bool filter>
struct Triangle4vIntersectorStreamPluecker {
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMvIntersectorKPluecker<4 COMMA K COMMA true>>;
};
template<bool filter>
struct Triangle4iIntersectorStreamMoeller {
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKMoeller<4 COMMA K COMMA true>>;
};
template<bool filter>
struct Triangle4iIntersectorStreamPluecker {
template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKPluecker<4 COMMA K COMMA true>>;
};
template<bool filter>
struct Quad4vIntersectorStreamMoeller {
template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKMoeller<4 COMMA K COMMA true>>;
};
template<bool filter>
struct Quad4iIntersectorStreamMoeller {
template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKMoeller<4 COMMA K COMMA true>>;
};
template<bool filter>
struct Quad4vIntersectorStreamPluecker {
template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKPluecker<4 COMMA K COMMA true>>;
};
template<bool filter>
struct Quad4iIntersectorStreamPluecker {
template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKPluecker<4 COMMA K COMMA true>>;
};
struct ObjectIntersectorStream {
template<int K> using Type = ArrayIntersectorKStream<K,ObjectIntersectorK<K COMMA false>>;
};
struct InstanceIntersectorStream {
template<int K> using Type = ArrayIntersectorKStream<K,InstanceIntersectorK<K>>;
};
// =====================================================================================================
// =====================================================================================================
// =====================================================================================================
template<int N>
void BVHNIntersectorStreamPacketFallback<N>::intersect(Accel::Intersectors* __restrict__ This,
RayHitN** inputRays,
size_t numTotalRays,
IntersectContext* context)
{
if (unlikely(context->isCoherent()))
intersectK(This, (RayHitK<VSIZEL>**)inputRays, numTotalRays, context);
else
intersectK(This, (RayHitK<VSIZEX>**)inputRays, numTotalRays, context);
}
template<int N>
void BVHNIntersectorStreamPacketFallback<N>::occluded(Accel::Intersectors* __restrict__ This,
RayN** inputRays,
size_t numTotalRays,
IntersectContext* context)
{
if (unlikely(context->isCoherent()))
occludedK(This, (RayK<VSIZEL>**)inputRays, numTotalRays, context);
else
occludedK(This, (RayK<VSIZEX>**)inputRays, numTotalRays, context);
}
template<int N>
template<int K>
__noinline void BVHNIntersectorStreamPacketFallback<N>::intersectK(Accel::Intersectors* __restrict__ This,
RayHitK<K>** inputRays,
size_t numTotalRays,
IntersectContext* context)
{
/* fallback to packets */
for (size_t i = 0; i < numTotalRays; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(numTotalRays));
RayHitK<K>& ray = *(inputRays[i / K]);
valid &= ray.tnear() <= ray.tfar;
This->intersect(valid, ray, context);
}
}
template<int N>
template<int K>
__noinline void BVHNIntersectorStreamPacketFallback<N>::occludedK(Accel::Intersectors* __restrict__ This,
RayK<K>** inputRays,
size_t numTotalRays,
IntersectContext* context)
{
/* fallback to packets */
for (size_t i = 0; i < numTotalRays; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(numTotalRays));
RayK<K>& ray = *(inputRays[i / K]);
valid &= ray.tnear() <= ray.tfar;
This->occluded(valid, ray, context);
}
}
}
}

View file

@ -0,0 +1,36 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_stream.cpp"
namespace embree
{
namespace isa
{
////////////////////////////////////////////////////////////////////////////////
/// General BVHIntersectorStreamPacketFallback Intersector
////////////////////////////////////////////////////////////////////////////////
DEFINE_INTERSECTORN(BVH4IntersectorStreamPacketFallback,BVHNIntersectorStreamPacketFallback<4>);
////////////////////////////////////////////////////////////////////////////////
/// BVH4IntersectorStream Definitions
////////////////////////////////////////////////////////////////////////////////
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4iIntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Triangle4iIntersectorStreamMoeller<true>>));
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4vIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Triangle4vIntersectorStreamPluecker<true>>));
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4iIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Triangle4iIntersectorStreamPluecker<true>>));
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4IntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Triangle4IntersectorStreamMoeller<true>>));
IF_ENABLED_TRIS(DEFINE_INTERSECTORN(BVH4Triangle4IntersectorStreamMoellerNoFilter, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Triangle4IntersectorStreamMoeller<false>>));
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4vIntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Quad4vIntersectorStreamMoeller<true>>));
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4vIntersectorStreamMoellerNoFilter,BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Quad4vIntersectorStreamMoeller<false>>));
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4iIntersectorStreamMoeller, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA Quad4iIntersectorStreamMoeller<true>>));
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4vIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Quad4vIntersectorStreamPluecker<true>>));
IF_ENABLED_QUADS(DEFINE_INTERSECTORN(BVH4Quad4iIntersectorStreamPluecker, BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA true COMMA Quad4iIntersectorStreamPluecker<true>>));
IF_ENABLED_USER(DEFINE_INTERSECTORN(BVH4VirtualIntersectorStream,BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA ObjectIntersectorStream>));
IF_ENABLED_INSTANCE(DEFINE_INTERSECTORN(BVH4InstanceIntersectorStream,BVHNIntersectorStream<4 COMMA BVH_AN1 COMMA false COMMA InstanceIntersectorStream>));
}
}

View file

@ -0,0 +1,657 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_stream_filters.h"
#include "bvh_intersector_stream.h"
namespace embree
{
namespace isa
{
template<int K, bool intersect>
__noinline void RayStreamFilter::filterAOS(Scene* scene, void* _rayN, size_t N, size_t stride, IntersectContext* context)
{
RayStreamAOS rayN(_rayN);
/* use fast path for coherent ray mode */
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
{
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
/* convert from AOS to SOA */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const vint<K> offset = vij * int(stride);
const size_t packetIndex = j / K;
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
rays[packetIndex] = ray;
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
}
/* trace stream */
scene->intersectors.intersectN(rayPtrs, size, context);
/* convert from SOA to AOS */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const vint<K> offset = vij * int(stride);
const size_t packetIndex = j / K;
rayN.setHitByOffset(valid, offset, rays[packetIndex]);
}
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N;)
{
const Ray& ray = rayN.getRayByOffset(inputRayID * stride);
/* skip invalid rays */
if (unlikely(ray.tnear() > ray.tfar || ray.tfar < 0.0f)) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = movemask(vfloat4(Vec3fa(ray.dir)) < 0.0f) & 0x7;
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)inputRayID;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayIDs = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayIDs[j] * int(stride);
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayIDs[j] * int(stride);
rayN.setHitByOffset<K>(valid, offset, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < N; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(N));
const vint<K> offset = vi * int(stride);
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByOffset<K>(valid, offset, ray);
}
}
}
template<int K, bool intersect>
__noinline void RayStreamFilter::filterAOP(Scene* scene, void** _rayN, size_t N, IntersectContext* context)
{
RayStreamAOP rayN(_rayN);
/* use fast path for coherent ray mode */
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
{
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
/* convert from AOP to SOA */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t packetIndex = j / K;
RayTypeK<K, intersect> ray = rayN.getRayByIndex<K>(valid, vij);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
rays[packetIndex] = ray;
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
}
/* trace stream */
scene->intersectors.intersectN(rayPtrs, size, context);
/* convert from SOA to AOP */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t packetIndex = j / K;
rayN.setHitByIndex<K>(valid, vij, rays[packetIndex]);
}
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N;)
{
const Ray& ray = rayN.getRayByIndex(inputRayID);
/* skip invalid rays */
if (unlikely(ray.tnear() > ray.tfar || ray.tfar < 0.0f)) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = movemask(lt_mask(ray.dir,Vec3fa(0.0f)));
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)inputRayID;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayIDs = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> index = *(vint<K>*)&rayIDs[j];
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByIndex<K>(valid, index);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> index = *(vint<K>*)&rayIDs[j];
rayN.setHitByIndex<K>(valid, index, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < N; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(N));
RayTypeK<K, intersect> ray = rayN.getRayByIndex<K>(valid, vi);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByIndex<K>(valid, vi, ray);
}
}
}
template<int K, bool intersect>
__noinline void RayStreamFilter::filterSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context)
{
const size_t rayDataAlignment = (size_t)rayData % (K*sizeof(float));
const size_t offsetAlignment = (size_t)stride % (K*sizeof(float));
/* fast path for packets with the correct width and data alignment */
if (likely(N == K &&
!rayDataAlignment &&
!offsetAlignment))
{
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
size_t packetIndex = 0;
for (size_t i = 0; i < numPackets; i++)
{
const size_t offset = i * stride;
RayTypeK<K, intersect>& ray = *(RayTypeK<K, intersect>*)(rayData + offset);
rayPtrs[packetIndex++] = &ray;
/* trace as stream */
if (unlikely(packetIndex == MAX_INTERNAL_STREAM_SIZE / K))
{
const size_t size = packetIndex*K;
scene->intersectors.intersectN(rayPtrs, size, context);
packetIndex = 0;
}
}
/* flush remaining packets */
if (unlikely(packetIndex > 0))
{
const size_t size = packetIndex*K;
scene->intersectors.intersectN(rayPtrs, size, context);
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
RayStreamSOA rayN(rayData, K);
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N*numPackets;)
{
const size_t offset = (inputRayID / K) * stride + (inputRayID % K) * sizeof(float);
/* skip invalid rays */
if (unlikely(!rayN.isValidByOffset(offset))) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
__aligned(64) Ray ray = rayN.getRayByOffset(offset);
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = (unsigned int)rayN.getOctantByOffset(offset);
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)offset;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayOffsets = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
rayN.setHitByOffset(valid, offset, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < numPackets; i++)
{
const size_t offset = i * stride;
RayTypeK<K, intersect>& ray = *(RayTypeK<K, intersect>*)(rayData + offset);
const vbool<K> valid = ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
}
}
}
else
{
/* fallback to packets for arbitrary packet size and alignment */
for (size_t i = 0; i < numPackets; i++)
{
const size_t offsetN = i * stride;
RayStreamSOA rayN(rayData + offsetN, N);
for (size_t j = 0; j < N; j += K)
{
const size_t offset = j * sizeof(float);
vbool<K> valid = (vint<K>(int(j)) + vint<K>(step)) < vint<K>(int(N));
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByOffset(valid, offset, ray);
}
}
}
}
template<int K, bool intersect>
__noinline void RayStreamFilter::filterSOP(Scene* scene, const void* _rayN, size_t N, IntersectContext* context)
{
RayStreamSOP& rayN = *(RayStreamSOP*)_rayN;
/* use fast path for coherent ray mode */
if (unlikely(context->isCoherent()))
{
__aligned(64) RayTypeK<K, intersect> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayTypeK<K, intersect>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
for (size_t i = 0; i < N; i += MAX_INTERNAL_STREAM_SIZE)
{
const size_t size = min(N - i, MAX_INTERNAL_STREAM_SIZE);
/* convert from SOP to SOA */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t offset = (i+j) * sizeof(float);
const size_t packetIndex = j / K;
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
rays[packetIndex] = ray;
rayPtrs[packetIndex] = &rays[packetIndex]; // rayPtrs might get reordered for occludedN
}
/* trace stream */
scene->intersectors.intersectN(rayPtrs, size, context);
/* convert from SOA to SOP */
for (size_t j = 0; j < size; j += K)
{
const vint<K> vij = vint<K>(int(i+j)) + vint<K>(step);
const vbool<K> valid = vij < vint<K>(int(N));
const size_t offset = (i+j) * sizeof(float);
const size_t packetIndex = j / K;
rayN.setHitByOffset(valid, offset, rays[packetIndex]);
}
}
}
else if (unlikely(!intersect))
{
/* octant sorting for occlusion rays */
__aligned(64) unsigned int octants[8][MAX_INTERNAL_STREAM_SIZE];
__aligned(64) RayK<K> rays[MAX_INTERNAL_STREAM_SIZE / K];
__aligned(64) RayK<K>* rayPtrs[MAX_INTERNAL_STREAM_SIZE / K];
unsigned int raysInOctant[8];
for (unsigned int i = 0; i < 8; i++)
raysInOctant[i] = 0;
size_t inputRayID = 0;
for (;;)
{
int curOctant = -1;
/* sort rays into octants */
for (; inputRayID < N;)
{
const size_t offset = inputRayID * sizeof(float);
/* skip invalid rays */
if (unlikely(!rayN.isValidByOffset(offset))) { inputRayID++; continue; } // ignore invalid or already occluded rays
#if defined(EMBREE_IGNORE_INVALID_RAYS)
__aligned(64) Ray ray = rayN.getRayByOffset(offset);
if (unlikely(!ray.valid())) { inputRayID++; continue; }
#endif
const unsigned int octantID = (unsigned int)rayN.getOctantByOffset(offset);
assert(octantID < 8);
octants[octantID][raysInOctant[octantID]++] = (unsigned int)offset;
inputRayID++;
if (unlikely(raysInOctant[octantID] == MAX_INTERNAL_STREAM_SIZE))
{
curOctant = octantID;
break;
}
}
/* need to flush rays in octant? */
if (unlikely(curOctant == -1))
{
for (unsigned int i = 0; i < 8; i++)
if (raysInOctant[i]) { curOctant = i; break; }
}
/* all rays traced? */
if (unlikely(curOctant == -1))
break;
unsigned int* const rayOffsets = &octants[curOctant][0];
const unsigned int numOctantRays = raysInOctant[curOctant];
assert(numOctantRays);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
RayK<K>& ray = rays[j/K];
rayPtrs[j/K] = &ray;
ray = rayN.getRayByOffset<K>(valid, offset);
ray.tnear() = select(valid, ray.tnear(), zero);
ray.tfar = select(valid, ray.tfar, neg_inf);
}
scene->intersectors.occludedN(rayPtrs, numOctantRays, context);
for (unsigned int j = 0; j < numOctantRays; j += K)
{
const vint<K> vi = vint<K>(int(j)) + vint<K>(step);
const vbool<K> valid = vi < vint<K>(int(numOctantRays));
const vint<K> offset = *(vint<K>*)&rayOffsets[j];
rayN.setHitByOffset(valid, offset, rays[j/K]);
}
raysInOctant[curOctant] = 0;
}
}
else
{
/* fallback to packets */
for (size_t i = 0; i < N; i += K)
{
const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
vbool<K> valid = vi < vint<K>(int(N));
const size_t offset = i * sizeof(float);
RayTypeK<K, intersect> ray = rayN.getRayByOffset<K>(valid, offset);
valid &= ray.tnear() <= ray.tfar;
scene->intersectors.intersect(valid, ray, context);
rayN.setHitByOffset(valid, offset, ray);
}
}
}
void RayStreamFilter::intersectAOS(Scene* scene, RTCRayHit* _rayN, size_t N, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOS<VSIZEL, true>(scene, _rayN, N, stride, context);
else
filterAOS<VSIZEX, true>(scene, _rayN, N, stride, context);
}
void RayStreamFilter::occludedAOS(Scene* scene, RTCRay* _rayN, size_t N, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOS<VSIZEL, false>(scene, _rayN, N, stride, context);
else
filterAOS<VSIZEX, false>(scene, _rayN, N, stride, context);
}
void RayStreamFilter::intersectAOP(Scene* scene, RTCRayHit** _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOP<VSIZEL, true>(scene, (void**)_rayN, N, context);
else
filterAOP<VSIZEX, true>(scene, (void**)_rayN, N, context);
}
void RayStreamFilter::occludedAOP(Scene* scene, RTCRay** _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterAOP<VSIZEL, false>(scene, (void**)_rayN, N, context);
else
filterAOP<VSIZEX, false>(scene, (void**)_rayN, N, context);
}
void RayStreamFilter::intersectSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOA<VSIZEL, true>(scene, rayData, N, numPackets, stride, context);
else
filterSOA<VSIZEX, true>(scene, rayData, N, numPackets, stride, context);
}
void RayStreamFilter::occludedSOA(Scene* scene, char* rayData, size_t N, size_t numPackets, size_t stride, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOA<VSIZEL, false>(scene, rayData, N, numPackets, stride, context);
else
filterSOA<VSIZEX, false>(scene, rayData, N, numPackets, stride, context);
}
void RayStreamFilter::intersectSOP(Scene* scene, const RTCRayHitNp* _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOP<VSIZEL, true>(scene, _rayN, N, context);
else
filterSOP<VSIZEX, true>(scene, _rayN, N, context);
}
void RayStreamFilter::occludedSOP(Scene* scene, const RTCRayNp* _rayN, size_t N, IntersectContext* context) {
if (unlikely(context->isCoherent()))
filterSOP<VSIZEL, false>(scene, _rayN, N, context);
else
filterSOP<VSIZEX, false>(scene, _rayN, N, context);
}
RayStreamFilterFuncs rayStreamFilterFuncs() {
return RayStreamFilterFuncs(RayStreamFilter::intersectAOS, RayStreamFilter::intersectAOP, RayStreamFilter::intersectSOA, RayStreamFilter::intersectSOP,
RayStreamFilter::occludedAOS, RayStreamFilter::occludedAOP, RayStreamFilter::occludedSOA, RayStreamFilter::occludedSOP);
}
};
};

View file

@ -16,7 +16,7 @@
/* #undef EMBREE_GEOMETRY_INSTANCE */
/* #undef EMBREE_GEOMETRY_GRID */
/* #undef EMBREE_GEOMETRY_POINT */
/* #undef EMBREE_RAY_PACKETS */
#define EMBREE_RAY_PACKETS
/* #undef EMBREE_COMPACT_POLYS */
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0

View file

@ -2,4 +2,4 @@
// Copyright 2009-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTC_HASH "7c53133eb21424f7f0ae1e25bf357e358feaf6ab"
#define RTC_HASH "12b99393438a4cc9e478e33459eed78bec6233fd"