a69cc9f13d
Since Embree v3.13.0 supports AARCH64, switch back to the
official repo instead of using Embree-aarch64.
`thirdparty/embree/patches/godot-changes.patch` should now contain
an accurate diff of the changes done to the library.
(cherry picked from commit 767e374dce
)
692 lines
28 KiB
C++
692 lines
28 KiB
C++
// Copyright 2009-2021 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
#pragma once
|
|
|
|
#define MBLUR_NUM_TEMPORAL_BINS 2
|
|
#define MBLUR_NUM_OBJECT_BINS 32
|
|
|
|
#include "../bvh/bvh.h"
|
|
#include "../common/primref_mb.h"
|
|
#include "heuristic_binning_array_aligned.h"
|
|
#include "heuristic_timesplit_array.h"
|
|
|
|
namespace embree
|
|
{
|
|
namespace isa
|
|
{
|
|
template<typename T>
|
|
struct SharedVector
|
|
{
|
|
__forceinline SharedVector() {}
|
|
|
|
__forceinline SharedVector(T* ptr, size_t refCount = 1)
|
|
: prims(ptr), refCount(refCount) {}
|
|
|
|
__forceinline void incRef() {
|
|
refCount++;
|
|
}
|
|
|
|
__forceinline void decRef()
|
|
{
|
|
if (--refCount == 0)
|
|
delete prims;
|
|
}
|
|
|
|
T* prims;
|
|
size_t refCount;
|
|
};
|
|
|
|
template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
|
|
struct LocalChildListT
|
|
{
|
|
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
|
|
|
|
__forceinline LocalChildListT (const BuildRecord& record)
|
|
: numChildren(1), numSharedPrimVecs(1)
|
|
{
|
|
/* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
|
|
children[0] = record;
|
|
primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
|
|
}
|
|
|
|
__forceinline ~LocalChildListT()
|
|
{
|
|
for (size_t i = 0; i < numChildren; i++)
|
|
primvecs[i]->decRef();
|
|
}
|
|
|
|
__forceinline BuildRecord& operator[] ( const size_t i ) {
|
|
return children[i];
|
|
}
|
|
|
|
__forceinline size_t size() const {
|
|
return numChildren;
|
|
}
|
|
|
|
__forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
|
|
{
|
|
SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
|
|
if (lrecord.prims.prims == bsharedPrimVec->prims) {
|
|
primvecs[bestChild] = bsharedPrimVec;
|
|
bsharedPrimVec->incRef();
|
|
}
|
|
else {
|
|
primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
|
|
}
|
|
|
|
if (rrecord.prims.prims == bsharedPrimVec->prims) {
|
|
primvecs[numChildren] = bsharedPrimVec;
|
|
bsharedPrimVec->incRef();
|
|
}
|
|
else {
|
|
primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
|
|
}
|
|
bsharedPrimVec->decRef();
|
|
new_vector.release();
|
|
|
|
children[bestChild] = lrecord;
|
|
children[numChildren] = rrecord;
|
|
numChildren++;
|
|
}
|
|
|
|
public:
|
|
array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
|
|
array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
|
|
size_t numChildren;
|
|
|
|
array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
|
|
size_t numSharedPrimVecs;
|
|
};
|
|
|
|
template<typename Mesh>
|
|
struct RecalculatePrimRef
|
|
{
|
|
Scene* scene;
|
|
|
|
__forceinline RecalculatePrimRef (Scene* scene)
|
|
: scene(scene) {}
|
|
|
|
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
|
{
|
|
const unsigned geomID = prim.geomID();
|
|
const unsigned primID = prim.primID();
|
|
const Mesh* mesh = scene->get<Mesh>(geomID);
|
|
const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
|
|
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
|
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
|
}
|
|
|
|
// __noinline is workaround for ICC16 bug under MacOSX
|
|
__noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
|
|
{
|
|
const unsigned geomID = prim.geomID();
|
|
const unsigned primID = prim.primID();
|
|
const Mesh* mesh = scene->get<Mesh>(geomID);
|
|
const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
|
|
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
|
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
|
}
|
|
|
|
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
|
return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
|
|
}
|
|
|
|
// __noinline is workaround for ICC16 bug under MacOSX
|
|
__noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
|
|
return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
|
|
}
|
|
};
|
|
|
|
struct VirtualRecalculatePrimRef
|
|
{
|
|
Scene* scene;
|
|
|
|
__forceinline VirtualRecalculatePrimRef (Scene* scene)
|
|
: scene(scene) {}
|
|
|
|
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
|
|
{
|
|
const unsigned geomID = prim.geomID();
|
|
const unsigned primID = prim.primID();
|
|
const Geometry* mesh = scene->get(geomID);
|
|
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
|
|
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
|
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
|
}
|
|
|
|
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
|
|
{
|
|
const unsigned geomID = prim.geomID();
|
|
const unsigned primID = prim.primID();
|
|
const Geometry* mesh = scene->get(geomID);
|
|
const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
|
|
const range<int> tbounds = mesh->timeSegmentRange(time_range);
|
|
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
|
|
}
|
|
|
|
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
|
|
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
|
|
}
|
|
|
|
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
|
|
return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
|
|
}
|
|
};
|
|
|
|
struct BVHBuilderMSMBlur
|
|
{
|
|
/*! settings for msmblur builder */
|
|
struct Settings
|
|
{
|
|
/*! default settings */
|
|
Settings ()
|
|
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
|
|
travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
|
|
singleThreadThreshold(1024) {}
|
|
|
|
|
|
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
|
|
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
|
|
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
|
|
{
|
|
minLeafSize = min(minLeafSize,maxLeafSize);
|
|
}
|
|
|
|
public:
|
|
size_t branchingFactor; //!< branching factor of BVH to build
|
|
size_t maxDepth; //!< maximum depth of BVH to build
|
|
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
|
|
size_t minLeafSize; //!< minimum size of a leaf
|
|
size_t maxLeafSize; //!< maximum size of a leaf
|
|
float travCost; //!< estimated cost of one traversal step
|
|
float intCost; //!< estimated cost of one primitive intersection
|
|
bool singleLeafTimeSegment; //!< split time to single time range
|
|
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
|
|
};
|
|
|
|
struct BuildRecord
|
|
{
|
|
public:
|
|
__forceinline BuildRecord () {}
|
|
|
|
__forceinline BuildRecord (size_t depth)
|
|
: depth(depth) {}
|
|
|
|
__forceinline BuildRecord (const SetMB& prims, size_t depth)
|
|
: depth(depth), prims(prims) {}
|
|
|
|
__forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
|
|
return a.prims.size() < b.prims.size();
|
|
}
|
|
|
|
__forceinline size_t size() const {
|
|
return prims.size();
|
|
}
|
|
|
|
public:
|
|
size_t depth; //!< Depth of the root of this subtree.
|
|
SetMB prims; //!< The list of primitives.
|
|
};
|
|
|
|
struct BuildRecordSplit : public BuildRecord
|
|
{
|
|
__forceinline BuildRecordSplit () {}
|
|
|
|
__forceinline BuildRecordSplit (size_t depth)
|
|
: BuildRecord(depth) {}
|
|
|
|
__forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
|
|
: BuildRecord(record), split(split) {}
|
|
|
|
BinSplit<MBLUR_NUM_OBJECT_BINS> split;
|
|
};
|
|
|
|
template<
|
|
typename NodeRef,
|
|
typename RecalculatePrimRef,
|
|
typename Allocator,
|
|
typename CreateAllocFunc,
|
|
typename CreateNodeFunc,
|
|
typename SetNodeFunc,
|
|
typename CreateLeafFunc,
|
|
typename ProgressMonitor>
|
|
|
|
class BuilderT
|
|
{
|
|
ALIGNED_CLASS_(16);
|
|
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
|
|
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
|
|
|
|
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
|
|
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
|
|
typedef mvector<PrimRefMB>* PrimRefVector;
|
|
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
|
|
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
|
|
typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
|
|
|
|
public:
|
|
|
|
BuilderT (MemoryMonitorInterface* device,
|
|
const RecalculatePrimRef recalculatePrimRef,
|
|
const CreateAllocFunc createAlloc,
|
|
const CreateNodeFunc createNode,
|
|
const SetNodeFunc setNode,
|
|
const CreateLeafFunc createLeaf,
|
|
const ProgressMonitor progressMonitor,
|
|
const Settings& settings)
|
|
: cfg(settings),
|
|
heuristicObjectSplit(),
|
|
heuristicTemporalSplit(device, recalculatePrimRef),
|
|
recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
|
|
progressMonitor(progressMonitor)
|
|
{
|
|
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
|
|
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
|
|
}
|
|
|
|
/*! finds the best split */
|
|
const Split find(const SetMB& set)
|
|
{
|
|
/* first try standard object split */
|
|
const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
|
|
const float object_split_sah = object_split.splitSAH();
|
|
|
|
/* test temporal splits only when object split was bad */
|
|
const float leaf_sah = set.leafSAH(cfg.logBlockSize);
|
|
if (object_split_sah < 0.50f*leaf_sah)
|
|
return object_split;
|
|
|
|
/* do temporal splits only if the time range is big enough */
|
|
if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
|
|
{
|
|
const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
|
|
const float temporal_split_sah = temporal_split.splitSAH();
|
|
|
|
/* take temporal split if it improved SAH */
|
|
if (temporal_split_sah < object_split_sah)
|
|
return temporal_split;
|
|
}
|
|
|
|
return object_split;
|
|
}
|
|
|
|
/*! array partitioning */
|
|
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
|
|
{
|
|
/* perform object split */
|
|
if (likely(split.data == Split::SPLIT_OBJECT)) {
|
|
heuristicObjectSplit.split(split,set,lset,rset);
|
|
}
|
|
/* perform temporal split */
|
|
else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
|
|
return heuristicTemporalSplit.split(split,set,lset,rset);
|
|
}
|
|
/* perform fallback split */
|
|
else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
|
|
set.deterministic_order();
|
|
splitFallback(set,lset,rset);
|
|
}
|
|
/* split by geometry */
|
|
else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
|
|
set.deterministic_order();
|
|
splitByGeometry(set,lset,rset);
|
|
}
|
|
else
|
|
assert(false);
|
|
|
|
return std::unique_ptr<mvector<PrimRefMB>>();
|
|
}
|
|
|
|
/*! finds the best fallback split */
|
|
__noinline Split findFallback(const SetMB& set)
|
|
{
|
|
/* split if primitives are not from same geometry */
|
|
if (!sameGeometry(set))
|
|
return Split(0.0f,Split::SPLIT_GEOMID);
|
|
|
|
/* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
|
|
if (cfg.singleLeafTimeSegment)
|
|
{
|
|
/* test if one primitive has more than one time segment in time range, if so split time */
|
|
for (size_t i=set.begin(); i<set.end(); i++)
|
|
{
|
|
const PrimRefMB& prim = (*set.prims)[i];
|
|
const range<int> itime_range = prim.timeSegmentRange(set.time_range);
|
|
const int localTimeSegments = itime_range.size();
|
|
assert(localTimeSegments > 0);
|
|
if (localTimeSegments > 1) {
|
|
const int icenter = (itime_range.begin() + itime_range.end())/2;
|
|
const float splitTime = prim.timeStep(icenter);
|
|
return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* otherwise return fallback split */
|
|
return Split(0.0f,Split::SPLIT_FALLBACK);
|
|
}
|
|
|
|
/*! performs fallback split */
|
|
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
|
|
{
|
|
mvector<PrimRefMB>& prims = *set.prims;
|
|
|
|
const size_t begin = set.begin();
|
|
const size_t end = set.end();
|
|
const size_t center = (begin + end)/2;
|
|
|
|
PrimInfoMB linfo = empty;
|
|
for (size_t i=begin; i<center; i++)
|
|
linfo.add_primref(prims[i]);
|
|
|
|
PrimInfoMB rinfo = empty;
|
|
for (size_t i=center; i<end; i++)
|
|
rinfo.add_primref(prims[i]);
|
|
|
|
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
|
|
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
|
|
}
|
|
|
|
/*! checks if all primitives are from the same geometry */
|
|
__forceinline bool sameGeometry(const SetMB& set)
|
|
{
|
|
if (set.size() == 0) return true;
|
|
mvector<PrimRefMB>& prims = *set.prims;
|
|
const size_t begin = set.begin();
|
|
const size_t end = set.end();
|
|
unsigned int firstGeomID = prims[begin].geomID();
|
|
for (size_t i=begin+1; i<end; i++) {
|
|
if (prims[i].geomID() != firstGeomID){
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* split by geometry ID */
|
|
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
|
|
{
|
|
assert(set.size() > 1);
|
|
|
|
mvector<PrimRefMB>& prims = *set.prims;
|
|
const size_t begin = set.begin();
|
|
const size_t end = set.end();
|
|
|
|
PrimInfoMB left(empty);
|
|
PrimInfoMB right(empty);
|
|
unsigned int geomID = prims[begin].geomID();
|
|
size_t center = serial_partitioning(prims.data(),begin,end,left,right,
|
|
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
|
|
[ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
|
|
|
|
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
|
|
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
|
|
}
|
|
|
|
const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
|
|
{
|
|
/* this should never occur but is a fatal error */
|
|
if (in.depth > cfg.maxDepth)
|
|
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
|
|
|
|
/* replace already found split by fallback split */
|
|
const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
|
|
|
|
/* special case when directly creating leaf without any splits that could shrink time_range */
|
|
bool force_split = false;
|
|
if (current.depth == 1 && current.size() > 0)
|
|
{
|
|
BBox1f c = empty;
|
|
BBox1f p = current.prims.time_range;
|
|
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
|
|
mvector<PrimRefMB>& prims = *current.prims.prims;
|
|
c.extend(prims[i].time_range);
|
|
}
|
|
|
|
force_split = c.lower > p.lower || c.upper < p.upper;
|
|
}
|
|
|
|
/* create leaf for few primitives */
|
|
if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
|
|
return createLeaf(current,alloc);
|
|
|
|
/* fill all children by always splitting the largest one */
|
|
bool hasTimeSplits = false;
|
|
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
|
LocalChildListSplit children(current);
|
|
|
|
do {
|
|
/* find best child with largest bounding box area */
|
|
size_t bestChild = -1;
|
|
size_t bestSize = 0;
|
|
for (size_t i=0; i<children.size(); i++)
|
|
{
|
|
/* ignore leaves as they cannot get split */
|
|
if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
|
|
continue;
|
|
|
|
force_split = false;
|
|
|
|
/* remember child with largest size */
|
|
if (children[i].size() > bestSize) {
|
|
bestSize = children[i].size();
|
|
bestChild = i;
|
|
}
|
|
}
|
|
if (bestChild == -1) break;
|
|
|
|
/* perform best found split */
|
|
BuildRecordSplit& brecord = children[bestChild];
|
|
BuildRecordSplit lrecord(current.depth+1);
|
|
BuildRecordSplit rrecord(current.depth+1);
|
|
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
|
|
hasTimeSplits |= new_vector != nullptr;
|
|
|
|
/* find new splits */
|
|
lrecord.split = findFallback(lrecord.prims);
|
|
rrecord.split = findFallback(rrecord.prims);
|
|
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
|
|
|
|
} while (children.size() < cfg.branchingFactor);
|
|
|
|
/* detect time_ranges that have shrunken */
|
|
for (size_t i=0; i<children.size(); i++) {
|
|
const BBox1f c = children[i].prims.time_range;
|
|
const BBox1f p = in.prims.time_range;
|
|
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
|
|
}
|
|
|
|
/* create node */
|
|
auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
|
|
|
|
/* recurse into each child and perform reduction */
|
|
LBBox3fa gbounds = empty;
|
|
for (size_t i=0; i<children.size(); i++) {
|
|
values[i] = createLargeLeaf(children[i],alloc);
|
|
gbounds.extend(values[i].lbounds);
|
|
}
|
|
|
|
setNode(current,children.children.data(),node,values,children.numChildren);
|
|
|
|
/* calculate geometry bounds of this node */
|
|
if (hasTimeSplits)
|
|
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
|
|
else
|
|
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
|
|
}
|
|
|
|
const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
|
|
{
|
|
/* get thread local allocator */
|
|
if (!alloc)
|
|
alloc = createAlloc();
|
|
|
|
/* call memory monitor function to signal progress */
|
|
if (toplevel && current.size() <= cfg.singleThreadThreshold)
|
|
progressMonitor(current.size());
|
|
|
|
/*! find best split */
|
|
const Split csplit = find(current.prims);
|
|
|
|
/*! compute leaf and split cost */
|
|
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
|
|
const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
|
|
assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
|
|
|
|
/*! create a leaf node when threshold reached or SAH tells us to stop */
|
|
if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
|
|
current.prims.deterministic_order();
|
|
return createLargeLeaf(current,alloc);
|
|
}
|
|
|
|
/*! perform initial split */
|
|
SetMB lprims,rprims;
|
|
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
|
|
bool hasTimeSplits = new_vector != nullptr;
|
|
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
|
|
LocalChildList children(current);
|
|
{
|
|
BuildRecord lrecord(lprims,current.depth+1);
|
|
BuildRecord rrecord(rprims,current.depth+1);
|
|
children.split(0,lrecord,rrecord,std::move(new_vector));
|
|
}
|
|
|
|
/*! split until node is full or SAH tells us to stop */
|
|
while (children.size() < cfg.branchingFactor)
|
|
{
|
|
/*! find best child to split */
|
|
float bestArea = neg_inf;
|
|
ssize_t bestChild = -1;
|
|
for (size_t i=0; i<children.size(); i++)
|
|
{
|
|
if (children[i].size() <= cfg.minLeafSize) continue;
|
|
if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
|
|
bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
|
|
}
|
|
}
|
|
if (bestChild == -1) break;
|
|
|
|
/* perform split */
|
|
BuildRecord& brecord = children[bestChild];
|
|
BuildRecord lrecord(current.depth+1);
|
|
BuildRecord rrecord(current.depth+1);
|
|
Split csplit = find(brecord.prims);
|
|
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
|
|
hasTimeSplits |= new_vector != nullptr;
|
|
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
|
|
}
|
|
|
|
/* detect time_ranges that have shrunken */
|
|
for (size_t i=0; i<children.size(); i++) {
|
|
const BBox1f c = children[i].prims.time_range;
|
|
const BBox1f p = current.prims.time_range;
|
|
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
|
|
}
|
|
|
|
/* sort buildrecords for simpler shadow ray traversal */
|
|
//std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
|
|
|
|
/*! create an inner node */
|
|
auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
|
|
LBBox3fa gbounds = empty;
|
|
|
|
/* spawn tasks */
|
|
if (unlikely(current.size() > cfg.singleThreadThreshold))
|
|
{
|
|
/*! parallel_for is faster than spawing sub-tasks */
|
|
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
|
|
for (size_t i=r.begin(); i<r.end(); i++) {
|
|
values[i] = recurse(children[i],nullptr,true);
|
|
_mm_mfence(); // to allow non-temporal stores during build
|
|
}
|
|
});
|
|
|
|
/*! merge bounding boxes */
|
|
for (size_t i=0; i<children.size(); i++)
|
|
gbounds.extend(values[i].lbounds);
|
|
}
|
|
/* recurse into each child */
|
|
else
|
|
{
|
|
//for (size_t i=0; i<children.size(); i++)
|
|
for (ssize_t i=children.size()-1; i>=0; i--) {
|
|
values[i] = recurse(children[i],alloc,false);
|
|
gbounds.extend(values[i].lbounds);
|
|
}
|
|
}
|
|
|
|
setNode(current,children.children.data(),node,values,children.numChildren);
|
|
|
|
/* calculate geometry bounds of this node */
|
|
if (unlikely(hasTimeSplits))
|
|
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
|
|
else
|
|
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
|
|
}
|
|
|
|
/*! builder entry function */
|
|
__forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
|
|
{
|
|
const SetMB set(pinfo,&prims);
|
|
auto ret = recurse(BuildRecord(set,1),nullptr,true);
|
|
_mm_mfence(); // to allow non-temporal stores during build
|
|
return ret;
|
|
}
|
|
|
|
private:
|
|
Settings cfg;
|
|
HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
|
|
HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
|
|
const RecalculatePrimRef recalculatePrimRef;
|
|
const CreateAllocFunc createAlloc;
|
|
const CreateNodeFunc createNode;
|
|
const SetNodeFunc setNode;
|
|
const CreateLeafFunc createLeaf;
|
|
const ProgressMonitor progressMonitor;
|
|
};
|
|
|
|
template<typename NodeRef,
|
|
typename RecalculatePrimRef,
|
|
typename CreateAllocFunc,
|
|
typename CreateNodeFunc,
|
|
typename SetNodeFunc,
|
|
typename CreateLeafFunc,
|
|
typename ProgressMonitorFunc>
|
|
|
|
static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
|
|
const PrimInfoMB& pinfo,
|
|
MemoryMonitorInterface* device,
|
|
const RecalculatePrimRef recalculatePrimRef,
|
|
const CreateAllocFunc createAlloc,
|
|
const CreateNodeFunc createNode,
|
|
const SetNodeFunc setNode,
|
|
const CreateLeafFunc createLeaf,
|
|
const ProgressMonitorFunc progressMonitor,
|
|
const Settings& settings)
|
|
{
|
|
typedef BuilderT<
|
|
NodeRef,
|
|
RecalculatePrimRef,
|
|
decltype(createAlloc()),
|
|
CreateAllocFunc,
|
|
CreateNodeFunc,
|
|
SetNodeFunc,
|
|
CreateLeafFunc,
|
|
ProgressMonitorFunc> Builder;
|
|
|
|
Builder builder(device,
|
|
recalculatePrimRef,
|
|
createAlloc,
|
|
createNode,
|
|
setNode,
|
|
createLeaf,
|
|
progressMonitor,
|
|
settings);
|
|
|
|
|
|
return builder(prims,pinfo);
|
|
}
|
|
};
|
|
}
|
|
}
|