// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "grid_soa.h" #include "../common/ray.h" #include "triangle_intersector_pluecker.h" namespace embree { namespace isa { template struct MapUV0 { const float* const grid_uv; size_t ofs00, ofs01, ofs10, ofs11; __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} __forceinline void operator() (vfloat& u, vfloat& v, Vec3vf& Ng) const { const vfloat uv00(grid_uv[ofs00]); const vfloat uv01(grid_uv[ofs01]); const vfloat uv10(grid_uv[ofs10]); const vfloat uv11(grid_uv[ofs11]); const Vec2vf uv0 = GridSOA::decodeUV(uv00); const Vec2vf uv1 = GridSOA::decodeUV(uv01); const Vec2vf uv2 = GridSOA::decodeUV(uv10); const Vec2vf uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); u = uv[0]; v = uv[1]; } }; template struct MapUV1 { const float* const grid_uv; size_t ofs00, ofs01, ofs10, ofs11; __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} __forceinline void operator() (vfloat& u, vfloat& v, Vec3vf& Ng) const { const vfloat uv00(grid_uv[ofs00]); const vfloat uv01(grid_uv[ofs01]); const vfloat uv10(grid_uv[ofs10]); const vfloat uv11(grid_uv[ofs11]); const Vec2vf uv0 = GridSOA::decodeUV(uv10); const Vec2vf uv1 = GridSOA::decodeUV(uv01); const Vec2vf uv2 = GridSOA::decodeUV(uv11); const Vec2vf uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0)); u = uv[0]; v = uv[1]; } }; template class GridSOAIntersectorK { public: typedef void Primitive; class Precalculations { #if defined(__AVX__) static const int M = 8; #else static const int M = 4; #endif public: __forceinline Precalculations (const vbool& valid, const RayK& ray) : grid(nullptr), intersector(valid,ray) {} public: GridSOA* grid; PlueckerIntersectorK intersector; // FIXME: use quad intersector }; /*! Intersect a ray with the primitive. */ static __forceinline void intersect(const vbool& valid_i, Precalculations& pre, RayHitK& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { const size_t dim_offset = pre.grid->dim_offset; const size_t line_offset = pre.grid->width; const float* const grid_x = pre.grid->decodeLeaf(0,prim); const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; const size_t max_x = pre.grid->width == 2 ? 1 : 2; const size_t max_y = pre.grid->height == 2 ? 1 : 2; for (size_t y=0; y p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); const Vec3vf p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); const Vec3vf p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); const Vec3vf p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); } } } /*! Test if the ray is occluded by the primitive */ static __forceinline vbool occluded(const vbool& valid_i, Precalculations& pre, RayK& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { const size_t dim_offset = pre.grid->dim_offset; const size_t line_offset = pre.grid->width; const float* const grid_x = pre.grid->decodeLeaf(0,prim); const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; vbool valid = valid_i; const size_t max_x = pre.grid->width == 2 ? 1 : 2; const size_t max_y = pre.grid->height == 2 ? 1 : 2; for (size_t y=0; y p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); const Vec3vf p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); const Vec3vf p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); const Vec3vf p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); if (none(valid)) break; pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); if (none(valid)) break; } } return !valid; } template static __forceinline void intersect(RayHitK& ray, size_t k, RayQueryContext* context, const float* const grid_x, const size_t line_offset, const size_t lines, Precalculations& pre) { typedef typename Loader::vfloat vfloat; const size_t dim_offset = pre.grid->dim_offset; const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; Vec3 v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV(grid_uv,line_offset,lines),Intersect1KEpilogMU(ray,k,context,pre.grid->geomID(),pre.grid->primID())); }; template static __forceinline bool occluded(RayK& ray, size_t k, RayQueryContext* context, const float* const grid_x, const size_t line_offset, const size_t lines, Precalculations& pre) { typedef typename Loader::vfloat vfloat; const size_t dim_offset = pre.grid->dim_offset; const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; Vec3 v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2); return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV(grid_uv,line_offset,lines),Occluded1KEpilogMU(ray,k,context,pre.grid->geomID(),pre.grid->primID())); } /*! Intersect a ray with the primitive. */ static __forceinline void intersect(Precalculations& pre, RayHitK& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { const size_t line_offset = pre.grid->width; const size_t lines = pre.grid->height; const float* const grid_x = pre.grid->decodeLeaf(0,prim); #if defined(__AVX__) intersect( ray, k, context, grid_x, line_offset, lines, pre); #else intersect(ray, k, context, grid_x , line_offset, lines, pre); if (likely(lines > 2)) intersect(ray, k, context, grid_x+line_offset, line_offset, lines, pre); #endif } /*! Test if the ray is occluded by the primitive */ static __forceinline bool occluded(Precalculations& pre, RayK& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { const size_t line_offset = pre.grid->width; const size_t lines = pre.grid->height; const float* const grid_x = pre.grid->decodeLeaf(0,prim); #if defined(__AVX__) return occluded( ray, k, context, grid_x, line_offset, lines, pre); #else if (occluded(ray, k, context, grid_x , line_offset, lines, pre)) return true; if (likely(lines > 2)) if (occluded(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true; #endif return false; } }; template class GridSOAMBIntersectorK { public: typedef void Primitive; typedef typename GridSOAIntersectorK::Precalculations Precalculations; /*! Intersect a ray with the primitive. */ static __forceinline void intersect(const vbool& valid_i, Precalculations& pre, RayHitK& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { vfloat vftime; vint vitime = getTimeSegment(ray.time(), vfloat((float)(pre.grid->time_steps-1)), vftime); vbool valid1 = valid_i; while (any(valid1)) { const size_t j = bsf(movemask(valid1)); const int itime = vitime[j]; const vbool valid2 = valid1 & (itime == vitime); valid1 = valid1 & !valid2; intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node); } } /*! Intersect a ray with the primitive. */ static __forceinline void intersect(const vbool& valid_i, Precalculations& pre, RayHitK& ray, const vfloat& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { const size_t grid_offset = pre.grid->gridBytes >> 2; const size_t dim_offset = pre.grid->dim_offset; const size_t line_offset = pre.grid->width; const float* const grid_x = pre.grid->decodeLeaf(itime,prim); const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; const size_t max_x = pre.grid->width == 2 ? 1 : 2; const size_t max_y = pre.grid->height == 2 ? 1 : 2; for (size_t y=0; y a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); const Vec3vf a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); const Vec3vf a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); const Vec3vf a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); ofs00 += grid_offset; ofs01 += grid_offset; ofs10 += grid_offset; ofs11 += grid_offset; const Vec3vf b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); const Vec3vf b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); const Vec3vf b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); const Vec3vf b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); const Vec3vf p00 = lerp(a00,b00,ftime); const Vec3vf p01 = lerp(a01,b01,ftime); const Vec3vf p10 = lerp(a10,b10,ftime); const Vec3vf p11 = lerp(a11,b11,ftime); pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID())); } } } /*! Test if the ray is occluded by the primitive */ static __forceinline vbool occluded(const vbool& valid_i, Precalculations& pre, RayK& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { vfloat vftime; vint vitime = getTimeSegment(ray.time(), vfloat((float)(pre.grid->time_steps-1)), vftime); vbool valid_o = valid_i; vbool valid1 = valid_i; while (any(valid1)) { const int j = int(bsf(movemask(valid1))); const int itime = vitime[j]; const vbool valid2 = valid1 & (itime == vitime); valid1 = valid1 & !valid2; valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node); } return !valid_o; } /*! Test if the ray is occluded by the primitive */ static __forceinline vbool occluded(const vbool& valid_i, Precalculations& pre, RayK& ray, const vfloat& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { const size_t grid_offset = pre.grid->gridBytes >> 2; const size_t dim_offset = pre.grid->dim_offset; const size_t line_offset = pre.grid->width; const float* const grid_x = pre.grid->decodeLeaf(itime,prim); const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; vbool valid = valid_i; const size_t max_x = pre.grid->width == 2 ? 1 : 2; const size_t max_y = pre.grid->height == 2 ? 1 : 2; for (size_t y=0; y a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); const Vec3vf a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); const Vec3vf a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); const Vec3vf a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); ofs00 += grid_offset; ofs01 += grid_offset; ofs10 += grid_offset; ofs11 += grid_offset; const Vec3vf b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]); const Vec3vf b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]); const Vec3vf b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]); const Vec3vf b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]); const Vec3vf p00 = lerp(a00,b00,ftime); const Vec3vf p01 = lerp(a01,b01,ftime); const Vec3vf p10 = lerp(a10,b10,ftime); const Vec3vf p11 = lerp(a11,b11,ftime); pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); if (none(valid)) break; pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID())); if (none(valid)) break; } } return valid; } template static __forceinline void intersect(RayHitK& ray, size_t k, const float ftime, RayQueryContext* context, const float* const grid_x, const size_t line_offset, const size_t lines, Precalculations& pre) { typedef typename Loader::vfloat vfloat; const size_t grid_offset = pre.grid->gridBytes >> 2; const size_t dim_offset = pre.grid->dim_offset; const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; Vec3 a0, a1, a2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); Vec3 b0, b1, b2; Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); Vec3 v0 = lerp(a0,b0,vfloat(ftime)); Vec3 v1 = lerp(a1,b1,vfloat(ftime)); Vec3 v2 = lerp(a2,b2,vfloat(ftime)); pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV(grid_uv,line_offset,lines),Intersect1KEpilogMU(ray,k,context,pre.grid->geomID(),pre.grid->primID())); }; template static __forceinline bool occluded(RayK& ray, size_t k, const float ftime, RayQueryContext* context, const float* const grid_x, const size_t line_offset, const size_t lines, Precalculations& pre) { typedef typename Loader::vfloat vfloat; const size_t grid_offset = pre.grid->gridBytes >> 2; const size_t dim_offset = pre.grid->dim_offset; const float* const grid_y = grid_x + 1 * dim_offset; const float* const grid_z = grid_x + 2 * dim_offset; const float* const grid_uv = grid_x + 3 * dim_offset; Vec3 a0, a1, a2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2); Vec3 b0, b1, b2; Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2); Vec3 v0 = lerp(a0,b0,vfloat(ftime)); Vec3 v1 = lerp(a1,b1,vfloat(ftime)); Vec3 v2 = lerp(a2,b2,vfloat(ftime)); return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV(grid_uv,line_offset,lines),Occluded1KEpilogMU(ray,k,context,pre.grid->geomID(),pre.grid->primID())); } /*! Intersect a ray with the primitive. */ static __forceinline void intersect(Precalculations& pre, RayHitK& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { float ftime; int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); const size_t line_offset = pre.grid->width; const size_t lines = pre.grid->height; const float* const grid_x = pre.grid->decodeLeaf(itime,prim); #if defined(__AVX__) intersect( ray, k, ftime, context, grid_x, line_offset, lines, pre); #else intersect(ray, k, ftime, context, grid_x, line_offset, lines, pre); if (likely(lines > 2)) intersect(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre); #endif } /*! Test if the ray is occluded by the primitive */ static __forceinline bool occluded(Precalculations& pre, RayK& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node) { float ftime; int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime); const size_t line_offset = pre.grid->width; const size_t lines = pre.grid->height; const float* const grid_x = pre.grid->decodeLeaf(itime,prim); #if defined(__AVX__) return occluded( ray, k, ftime, context, grid_x, line_offset, lines, pre); #else if (occluded(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true; if (likely(lines > 2)) if (occluded(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true; #endif return false; } }; } }