diff --git a/modules/bullet/SCsub b/modules/bullet/SCsub index 194ed2030ee..0ecb6ceb07c 100644 --- a/modules/bullet/SCsub +++ b/modules/bullet/SCsub @@ -183,8 +183,6 @@ if env["builtin_bullet"]: "BulletSoftBody/BulletReducedDeformableBody/btReducedDeformableBodySolver.cpp", "BulletSoftBody/BulletReducedDeformableBody/btReducedDeformableContactConstraint.cpp", "BulletSoftBody/poly34.cpp", - # clew - "clew/clew.c", # LinearMath "LinearMath/btAlignedAllocator.cpp", "LinearMath/btConvexHull.cpp", @@ -206,7 +204,7 @@ if env["builtin_bullet"]: env_bullet.Prepend(CPPPATH=[thirdparty_dir]) - env_bullet.Append(CPPDEFINES=["BT_USE_OLD_DAMPING_METHOD", "BT_THREADSAFE"]) + env_bullet.Append(CPPDEFINES=["BT_USE_OLD_DAMPING_METHOD", "BT_THREADSAFE", "BT_USE_INVERSE_DYNAMICS_WITH_BULLET2"]) env_thirdparty = env_bullet.Clone() env_thirdparty.disable_warnings() diff --git a/thirdparty/README.md b/thirdparty/README.md index 28c3e42ee89..e4c5d14b458 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -25,7 +25,7 @@ Files extracted from upstream source: Files extracted from upstream source: -- `src/*` apart from CMakeLists.txt and premake4.lua files +- `src/*` minus `Bullet3*` and `clew` folders, and CMakeLists.txt and premake4.lua files - `LICENSE.txt`, and `VERSION` as `VERSION.txt` Includes some patches in the `patches` folder which have been sent upstream. diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h deleted file mode 100644 index bec0800a6ff..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_BROADPHASE_CALLBACK_H -#define B3_BROADPHASE_CALLBACK_H - -#include "Bullet3Common/b3Vector3.h" -struct b3BroadphaseProxy; - -struct b3BroadphaseAabbCallback -{ - virtual ~b3BroadphaseAabbCallback() {} - virtual bool process(const b3BroadphaseProxy* proxy) = 0; -}; - -struct b3BroadphaseRayCallback : public b3BroadphaseAabbCallback -{ - ///added some cached data to accelerate ray-AABB tests - b3Vector3 m_rayDirectionInverse; - unsigned int m_signs[3]; - b3Scalar m_lambda_max; - - virtual ~b3BroadphaseRayCallback() {} -}; - -#endif //B3_BROADPHASE_CALLBACK_H diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp deleted file mode 100644 index a0dc1da95d8..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp +++ /dev/null @@ -1,1352 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -///b3DynamicBvh implementation by Nathanael Presson - -#include "b3DynamicBvh.h" - -// -typedef b3AlignedObjectArray b3NodeArray; -typedef b3AlignedObjectArray b3ConstNodeArray; - -// -struct b3DbvtNodeEnumerator : b3DynamicBvh::ICollide -{ - b3ConstNodeArray nodes; - void Process(const b3DbvtNode* n) { nodes.push_back(n); } -}; - -// -static B3_DBVT_INLINE int b3IndexOf(const b3DbvtNode* node) -{ - return (node->parent->childs[1] == node); -} - -// -static B3_DBVT_INLINE b3DbvtVolume b3Merge(const b3DbvtVolume& a, - const b3DbvtVolume& b) -{ -#if (B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE) - B3_ATTRIBUTE_ALIGNED16(char locals[sizeof(b3DbvtAabbMm)]); - b3DbvtVolume& res = *(b3DbvtVolume*)locals; -#else - b3DbvtVolume res; -#endif - b3Merge(a, b, res); - return (res); -} - -// volume+edge lengths -static B3_DBVT_INLINE b3Scalar b3Size(const b3DbvtVolume& a) -{ - const b3Vector3 edges = a.Lengths(); - return (edges.x * edges.y * edges.z + - edges.x + edges.y + edges.z); -} - -// -static void b3GetMaxDepth(const b3DbvtNode* node, int depth, int& maxdepth) -{ - if (node->isinternal()) - { - b3GetMaxDepth(node->childs[0], depth + 1, maxdepth); - b3GetMaxDepth(node->childs[1], depth + 1, maxdepth); - } - else - maxdepth = b3Max(maxdepth, depth); -} - -// -static B3_DBVT_INLINE void b3DeleteNode(b3DynamicBvh* pdbvt, - b3DbvtNode* node) -{ - b3AlignedFree(pdbvt->m_free); - pdbvt->m_free = node; -} - -// -static void b3RecurseDeleteNode(b3DynamicBvh* pdbvt, - b3DbvtNode* node) -{ - if (!node->isleaf()) - { - b3RecurseDeleteNode(pdbvt, node->childs[0]); - b3RecurseDeleteNode(pdbvt, node->childs[1]); - } - if (node == pdbvt->m_root) pdbvt->m_root = 0; - b3DeleteNode(pdbvt, node); -} - -// -static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt, - b3DbvtNode* parent, - void* data) -{ - b3DbvtNode* node; - if (pdbvt->m_free) - { - node = pdbvt->m_free; - pdbvt->m_free = 0; - } - else - { - node = new (b3AlignedAlloc(sizeof(b3DbvtNode), 16)) b3DbvtNode(); - } - node->parent = parent; - node->data = data; - node->childs[1] = 0; - return (node); -} - -// -static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt, - b3DbvtNode* parent, - const b3DbvtVolume& volume, - void* data) -{ - b3DbvtNode* node = b3CreateNode(pdbvt, parent, data); - node->volume = volume; - return (node); -} - -// -static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt, - b3DbvtNode* parent, - const b3DbvtVolume& volume0, - const b3DbvtVolume& volume1, - void* data) -{ - b3DbvtNode* node = b3CreateNode(pdbvt, parent, data); - b3Merge(volume0, volume1, node->volume); - return (node); -} - -// -static void b3InsertLeaf(b3DynamicBvh* pdbvt, - b3DbvtNode* root, - b3DbvtNode* leaf) -{ - if (!pdbvt->m_root) - { - pdbvt->m_root = leaf; - leaf->parent = 0; - } - else - { - if (!root->isleaf()) - { - do - { - root = root->childs[b3Select(leaf->volume, - root->childs[0]->volume, - root->childs[1]->volume)]; - } while (!root->isleaf()); - } - b3DbvtNode* prev = root->parent; - b3DbvtNode* node = b3CreateNode(pdbvt, prev, leaf->volume, root->volume, 0); - if (prev) - { - prev->childs[b3IndexOf(root)] = node; - node->childs[0] = root; - root->parent = node; - node->childs[1] = leaf; - leaf->parent = node; - do - { - if (!prev->volume.Contain(node->volume)) - b3Merge(prev->childs[0]->volume, prev->childs[1]->volume, prev->volume); - else - break; - node = prev; - } while (0 != (prev = node->parent)); - } - else - { - node->childs[0] = root; - root->parent = node; - node->childs[1] = leaf; - leaf->parent = node; - pdbvt->m_root = node; - } - } -} - -// -static b3DbvtNode* b3RemoveLeaf(b3DynamicBvh* pdbvt, - b3DbvtNode* leaf) -{ - if (leaf == pdbvt->m_root) - { - pdbvt->m_root = 0; - return (0); - } - else - { - b3DbvtNode* parent = leaf->parent; - b3DbvtNode* prev = parent->parent; - b3DbvtNode* sibling = parent->childs[1 - b3IndexOf(leaf)]; - if (prev) - { - prev->childs[b3IndexOf(parent)] = sibling; - sibling->parent = prev; - b3DeleteNode(pdbvt, parent); - while (prev) - { - const b3DbvtVolume pb = prev->volume; - b3Merge(prev->childs[0]->volume, prev->childs[1]->volume, prev->volume); - if (b3NotEqual(pb, prev->volume)) - { - prev = prev->parent; - } - else - break; - } - return (prev ? prev : pdbvt->m_root); - } - else - { - pdbvt->m_root = sibling; - sibling->parent = 0; - b3DeleteNode(pdbvt, parent); - return (pdbvt->m_root); - } - } -} - -// -static void b3FetchLeaves(b3DynamicBvh* pdbvt, - b3DbvtNode* root, - b3NodeArray& leaves, - int depth = -1) -{ - if (root->isinternal() && depth) - { - b3FetchLeaves(pdbvt, root->childs[0], leaves, depth - 1); - b3FetchLeaves(pdbvt, root->childs[1], leaves, depth - 1); - b3DeleteNode(pdbvt, root); - } - else - { - leaves.push_back(root); - } -} - -static bool b3LeftOfAxis(const b3DbvtNode* node, - const b3Vector3& org, - const b3Vector3& axis) -{ - return b3Dot(axis, node->volume.Center() - org) <= 0; -} - -// Partitions leaves such that leaves[0, n) are on the -// left of axis, and leaves[n, count) are on the right -// of axis. returns N. -static int b3Split(b3DbvtNode** leaves, - int count, - const b3Vector3& org, - const b3Vector3& axis) -{ - int begin = 0; - int end = count; - for (;;) - { - while (begin != end && b3LeftOfAxis(leaves[begin], org, axis)) - { - ++begin; - } - - if (begin == end) - { - break; - } - - while (begin != end && !b3LeftOfAxis(leaves[end - 1], org, axis)) - { - --end; - } - - if (begin == end) - { - break; - } - - // swap out of place nodes - --end; - b3DbvtNode* temp = leaves[begin]; - leaves[begin] = leaves[end]; - leaves[end] = temp; - ++begin; - } - - return begin; -} - -// -static b3DbvtVolume b3Bounds(b3DbvtNode** leaves, - int count) -{ -#if B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE - B3_ATTRIBUTE_ALIGNED16(char locals[sizeof(b3DbvtVolume)]); - b3DbvtVolume& volume = *(b3DbvtVolume*)locals; - volume = leaves[0]->volume; -#else - b3DbvtVolume volume = leaves[0]->volume; -#endif - for (int i = 1, ni = count; i < ni; ++i) - { - b3Merge(volume, leaves[i]->volume, volume); - } - return (volume); -} - -// -static void b3BottomUp(b3DynamicBvh* pdbvt, - b3DbvtNode** leaves, - int count) -{ - while (count > 1) - { - b3Scalar minsize = B3_INFINITY; - int minidx[2] = {-1, -1}; - for (int i = 0; i < count; ++i) - { - for (int j = i + 1; j < count; ++j) - { - const b3Scalar sz = b3Size(b3Merge(leaves[i]->volume, leaves[j]->volume)); - if (sz < minsize) - { - minsize = sz; - minidx[0] = i; - minidx[1] = j; - } - } - } - b3DbvtNode* n[] = {leaves[minidx[0]], leaves[minidx[1]]}; - b3DbvtNode* p = b3CreateNode(pdbvt, 0, n[0]->volume, n[1]->volume, 0); - p->childs[0] = n[0]; - p->childs[1] = n[1]; - n[0]->parent = p; - n[1]->parent = p; - leaves[minidx[0]] = p; - leaves[minidx[1]] = leaves[count - 1]; - --count; - } -} - -// -static b3DbvtNode* b3TopDown(b3DynamicBvh* pdbvt, - b3DbvtNode** leaves, - int count, - int bu_treshold) -{ - static const b3Vector3 axis[] = {b3MakeVector3(1, 0, 0), - b3MakeVector3(0, 1, 0), - b3MakeVector3(0, 0, 1)}; - b3Assert(bu_treshold > 1); - if (count > 1) - { - if (count > bu_treshold) - { - const b3DbvtVolume vol = b3Bounds(leaves, count); - const b3Vector3 org = vol.Center(); - int partition; - int bestaxis = -1; - int bestmidp = count; - int splitcount[3][2] = {{0, 0}, {0, 0}, {0, 0}}; - int i; - for (i = 0; i < count; ++i) - { - const b3Vector3 x = leaves[i]->volume.Center() - org; - for (int j = 0; j < 3; ++j) - { - ++splitcount[j][b3Dot(x, axis[j]) > 0 ? 1 : 0]; - } - } - for (i = 0; i < 3; ++i) - { - if ((splitcount[i][0] > 0) && (splitcount[i][1] > 0)) - { - const int midp = (int)b3Fabs(b3Scalar(splitcount[i][0] - splitcount[i][1])); - if (midp < bestmidp) - { - bestaxis = i; - bestmidp = midp; - } - } - } - if (bestaxis >= 0) - { - partition = b3Split(leaves, count, org, axis[bestaxis]); - b3Assert(partition != 0 && partition != count); - } - else - { - partition = count / 2 + 1; - } - b3DbvtNode* node = b3CreateNode(pdbvt, 0, vol, 0); - node->childs[0] = b3TopDown(pdbvt, &leaves[0], partition, bu_treshold); - node->childs[1] = b3TopDown(pdbvt, &leaves[partition], count - partition, bu_treshold); - node->childs[0]->parent = node; - node->childs[1]->parent = node; - return (node); - } - else - { - b3BottomUp(pdbvt, leaves, count); - return (leaves[0]); - } - } - return (leaves[0]); -} - -// -static B3_DBVT_INLINE b3DbvtNode* b3Sort(b3DbvtNode* n, b3DbvtNode*& r) -{ - b3DbvtNode* p = n->parent; - b3Assert(n->isinternal()); - if (p > n) - { - const int i = b3IndexOf(n); - const int j = 1 - i; - b3DbvtNode* s = p->childs[j]; - b3DbvtNode* q = p->parent; - b3Assert(n == p->childs[i]); - if (q) - q->childs[b3IndexOf(p)] = n; - else - r = n; - s->parent = n; - p->parent = n; - n->parent = q; - p->childs[0] = n->childs[0]; - p->childs[1] = n->childs[1]; - n->childs[0]->parent = p; - n->childs[1]->parent = p; - n->childs[i] = p; - n->childs[j] = s; - b3Swap(p->volume, n->volume); - return (p); - } - return (n); -} - -#if 0 -static B3_DBVT_INLINE b3DbvtNode* walkup(b3DbvtNode* n,int count) -{ - while(n&&(count--)) n=n->parent; - return(n); -} -#endif - -// -// Api -// - -// -b3DynamicBvh::b3DynamicBvh() -{ - m_root = 0; - m_free = 0; - m_lkhd = -1; - m_leaves = 0; - m_opath = 0; -} - -// -b3DynamicBvh::~b3DynamicBvh() -{ - clear(); -} - -// -void b3DynamicBvh::clear() -{ - if (m_root) - b3RecurseDeleteNode(this, m_root); - b3AlignedFree(m_free); - m_free = 0; - m_lkhd = -1; - m_stkStack.clear(); - m_opath = 0; -} - -// -void b3DynamicBvh::optimizeBottomUp() -{ - if (m_root) - { - b3NodeArray leaves; - leaves.reserve(m_leaves); - b3FetchLeaves(this, m_root, leaves); - b3BottomUp(this, &leaves[0], leaves.size()); - m_root = leaves[0]; - } -} - -// -void b3DynamicBvh::optimizeTopDown(int bu_treshold) -{ - if (m_root) - { - b3NodeArray leaves; - leaves.reserve(m_leaves); - b3FetchLeaves(this, m_root, leaves); - m_root = b3TopDown(this, &leaves[0], leaves.size(), bu_treshold); - } -} - -// -void b3DynamicBvh::optimizeIncremental(int passes) -{ - if (passes < 0) passes = m_leaves; - if (m_root && (passes > 0)) - { - do - { - b3DbvtNode* node = m_root; - unsigned bit = 0; - while (node->isinternal()) - { - node = b3Sort(node, m_root)->childs[(m_opath >> bit) & 1]; - bit = (bit + 1) & (sizeof(unsigned) * 8 - 1); - } - update(node); - ++m_opath; - } while (--passes); - } -} - -// -b3DbvtNode* b3DynamicBvh::insert(const b3DbvtVolume& volume, void* data) -{ - b3DbvtNode* leaf = b3CreateNode(this, 0, volume, data); - b3InsertLeaf(this, m_root, leaf); - ++m_leaves; - return (leaf); -} - -// -void b3DynamicBvh::update(b3DbvtNode* leaf, int lookahead) -{ - b3DbvtNode* root = b3RemoveLeaf(this, leaf); - if (root) - { - if (lookahead >= 0) - { - for (int i = 0; (i < lookahead) && root->parent; ++i) - { - root = root->parent; - } - } - else - root = m_root; - } - b3InsertLeaf(this, root, leaf); -} - -// -void b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume) -{ - b3DbvtNode* root = b3RemoveLeaf(this, leaf); - if (root) - { - if (m_lkhd >= 0) - { - for (int i = 0; (i < m_lkhd) && root->parent; ++i) - { - root = root->parent; - } - } - else - root = m_root; - } - leaf->volume = volume; - b3InsertLeaf(this, root, leaf); -} - -// -bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity, b3Scalar margin) -{ - if (leaf->volume.Contain(volume)) return (false); - volume.Expand(b3MakeVector3(margin, margin, margin)); - volume.SignedExpand(velocity); - update(leaf, volume); - return (true); -} - -// -bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity) -{ - if (leaf->volume.Contain(volume)) return (false); - volume.SignedExpand(velocity); - update(leaf, volume); - return (true); -} - -// -bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, b3Scalar margin) -{ - if (leaf->volume.Contain(volume)) return (false); - volume.Expand(b3MakeVector3(margin, margin, margin)); - update(leaf, volume); - return (true); -} - -// -void b3DynamicBvh::remove(b3DbvtNode* leaf) -{ - b3RemoveLeaf(this, leaf); - b3DeleteNode(this, leaf); - --m_leaves; -} - -// -void b3DynamicBvh::write(IWriter* iwriter) const -{ - b3DbvtNodeEnumerator nodes; - nodes.nodes.reserve(m_leaves * 2); - enumNodes(m_root, nodes); - iwriter->Prepare(m_root, nodes.nodes.size()); - for (int i = 0; i < nodes.nodes.size(); ++i) - { - const b3DbvtNode* n = nodes.nodes[i]; - int p = -1; - if (n->parent) p = nodes.nodes.findLinearSearch(n->parent); - if (n->isinternal()) - { - const int c0 = nodes.nodes.findLinearSearch(n->childs[0]); - const int c1 = nodes.nodes.findLinearSearch(n->childs[1]); - iwriter->WriteNode(n, i, p, c0, c1); - } - else - { - iwriter->WriteLeaf(n, i, p); - } - } -} - -// -void b3DynamicBvh::clone(b3DynamicBvh& dest, IClone* iclone) const -{ - dest.clear(); - if (m_root != 0) - { - b3AlignedObjectArray stack; - stack.reserve(m_leaves); - stack.push_back(sStkCLN(m_root, 0)); - do - { - const int i = stack.size() - 1; - const sStkCLN e = stack[i]; - b3DbvtNode* n = b3CreateNode(&dest, e.parent, e.node->volume, e.node->data); - stack.pop_back(); - if (e.parent != 0) - e.parent->childs[i & 1] = n; - else - dest.m_root = n; - if (e.node->isinternal()) - { - stack.push_back(sStkCLN(e.node->childs[0], n)); - stack.push_back(sStkCLN(e.node->childs[1], n)); - } - else - { - iclone->CloneLeaf(n); - } - } while (stack.size() > 0); - } -} - -// -int b3DynamicBvh::maxdepth(const b3DbvtNode* node) -{ - int depth = 0; - if (node) b3GetMaxDepth(node, 1, depth); - return (depth); -} - -// -int b3DynamicBvh::countLeaves(const b3DbvtNode* node) -{ - if (node->isinternal()) - return (countLeaves(node->childs[0]) + countLeaves(node->childs[1])); - else - return (1); -} - -// -void b3DynamicBvh::extractLeaves(const b3DbvtNode* node, b3AlignedObjectArray& leaves) -{ - if (node->isinternal()) - { - extractLeaves(node->childs[0], leaves); - extractLeaves(node->childs[1], leaves); - } - else - { - leaves.push_back(node); - } -} - -// -#if B3_DBVT_ENABLE_BENCHMARK - -#include -#include - -/* -q6600,2.4ghz - -/Ox /Ob2 /Oi /Ot /I "." /I "..\.." /I "..\..\src" /D "NDEBUG" /D "_LIB" /D "_WINDOWS" /D "_CRT_SECURE_NO_DEPRECATE" /D "_CRT_NONSTDC_NO_DEPRECATE" /D "WIN32" -/GF /FD /MT /GS- /Gy /arch:SSE2 /Zc:wchar_t- /Fp"..\..\out\release8\build\libbulletcollision\libbulletcollision.pch" -/Fo"..\..\out\release8\build\libbulletcollision\\" -/Fd"..\..\out\release8\build\libbulletcollision\bulletcollision.pdb" -/W3 /nologo /c /Wp64 /Zi /errorReport:prompt - -Benchmarking dbvt... -World scale: 100.000000 -Extents base: 1.000000 -Extents range: 4.000000 -Leaves: 8192 -sizeof(b3DbvtVolume): 32 bytes -sizeof(b3DbvtNode): 44 bytes -[1] b3DbvtVolume intersections: 3499 ms (-1%) -[2] b3DbvtVolume merges: 1934 ms (0%) -[3] b3DynamicBvh::collideTT: 5485 ms (-21%) -[4] b3DynamicBvh::collideTT self: 2814 ms (-20%) -[5] b3DynamicBvh::collideTT xform: 7379 ms (-1%) -[6] b3DynamicBvh::collideTT xform,self: 7270 ms (-2%) -[7] b3DynamicBvh::rayTest: 6314 ms (0%),(332143 r/s) -[8] insert/remove: 2093 ms (0%),(1001983 ir/s) -[9] updates (teleport): 1879 ms (-3%),(1116100 u/s) -[10] updates (jitter): 1244 ms (-4%),(1685813 u/s) -[11] optimize (incremental): 2514 ms (0%),(1668000 o/s) -[12] b3DbvtVolume notequal: 3659 ms (0%) -[13] culling(OCL+fullsort): 2218 ms (0%),(461 t/s) -[14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s) -[15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s) -[16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s) -[17] b3DbvtVolume select: 3419 ms (0%) -*/ - -struct b3DbvtBenchmark -{ - struct NilPolicy : b3DynamicBvh::ICollide - { - NilPolicy() : m_pcount(0), m_depth(-B3_INFINITY), m_checksort(true) {} - void Process(const b3DbvtNode*, const b3DbvtNode*) { ++m_pcount; } - void Process(const b3DbvtNode*) { ++m_pcount; } - void Process(const b3DbvtNode*, b3Scalar depth) - { - ++m_pcount; - if (m_checksort) - { - if (depth >= m_depth) - m_depth = depth; - else - printf("wrong depth: %f (should be >= %f)\r\n", depth, m_depth); - } - } - int m_pcount; - b3Scalar m_depth; - bool m_checksort; - }; - struct P14 : b3DynamicBvh::ICollide - { - struct Node - { - const b3DbvtNode* leaf; - b3Scalar depth; - }; - void Process(const b3DbvtNode* leaf, b3Scalar depth) - { - Node n; - n.leaf = leaf; - n.depth = depth; - } - static int sortfnc(const Node& a, const Node& b) - { - if (a.depth < b.depth) return (+1); - if (a.depth > b.depth) return (-1); - return (0); - } - b3AlignedObjectArray m_nodes; - }; - struct P15 : b3DynamicBvh::ICollide - { - struct Node - { - const b3DbvtNode* leaf; - b3Scalar depth; - }; - void Process(const b3DbvtNode* leaf) - { - Node n; - n.leaf = leaf; - n.depth = dot(leaf->volume.Center(), m_axis); - } - static int sortfnc(const Node& a, const Node& b) - { - if (a.depth < b.depth) return (+1); - if (a.depth > b.depth) return (-1); - return (0); - } - b3AlignedObjectArray m_nodes; - b3Vector3 m_axis; - }; - static b3Scalar RandUnit() - { - return (rand() / (b3Scalar)RAND_MAX); - } - static b3Vector3 RandVector3() - { - return (b3Vector3(RandUnit(), RandUnit(), RandUnit())); - } - static b3Vector3 RandVector3(b3Scalar cs) - { - return (RandVector3() * cs - b3Vector3(cs, cs, cs) / 2); - } - static b3DbvtVolume RandVolume(b3Scalar cs, b3Scalar eb, b3Scalar es) - { - return (b3DbvtVolume::FromCE(RandVector3(cs), b3Vector3(eb, eb, eb) + RandVector3() * es)); - } - static b3Transform RandTransform(b3Scalar cs) - { - b3Transform t; - t.setOrigin(RandVector3(cs)); - t.setRotation(b3Quaternion(RandUnit() * B3_PI * 2, RandUnit() * B3_PI * 2, RandUnit() * B3_PI * 2).normalized()); - return (t); - } - static void RandTree(b3Scalar cs, b3Scalar eb, b3Scalar es, int leaves, b3DynamicBvh& dbvt) - { - dbvt.clear(); - for (int i = 0; i < leaves; ++i) - { - dbvt.insert(RandVolume(cs, eb, es), 0); - } - } -}; - -void b3DynamicBvh::benchmark() -{ - static const b3Scalar cfgVolumeCenterScale = 100; - static const b3Scalar cfgVolumeExentsBase = 1; - static const b3Scalar cfgVolumeExentsScale = 4; - static const int cfgLeaves = 8192; - static const bool cfgEnable = true; - - //[1] b3DbvtVolume intersections - bool cfgBenchmark1_Enable = cfgEnable; - static const int cfgBenchmark1_Iterations = 8; - static const int cfgBenchmark1_Reference = 3499; - //[2] b3DbvtVolume merges - bool cfgBenchmark2_Enable = cfgEnable; - static const int cfgBenchmark2_Iterations = 4; - static const int cfgBenchmark2_Reference = 1945; - //[3] b3DynamicBvh::collideTT - bool cfgBenchmark3_Enable = cfgEnable; - static const int cfgBenchmark3_Iterations = 512; - static const int cfgBenchmark3_Reference = 5485; - //[4] b3DynamicBvh::collideTT self - bool cfgBenchmark4_Enable = cfgEnable; - static const int cfgBenchmark4_Iterations = 512; - static const int cfgBenchmark4_Reference = 2814; - //[5] b3DynamicBvh::collideTT xform - bool cfgBenchmark5_Enable = cfgEnable; - static const int cfgBenchmark5_Iterations = 512; - static const b3Scalar cfgBenchmark5_OffsetScale = 2; - static const int cfgBenchmark5_Reference = 7379; - //[6] b3DynamicBvh::collideTT xform,self - bool cfgBenchmark6_Enable = cfgEnable; - static const int cfgBenchmark6_Iterations = 512; - static const b3Scalar cfgBenchmark6_OffsetScale = 2; - static const int cfgBenchmark6_Reference = 7270; - //[7] b3DynamicBvh::rayTest - bool cfgBenchmark7_Enable = cfgEnable; - static const int cfgBenchmark7_Passes = 32; - static const int cfgBenchmark7_Iterations = 65536; - static const int cfgBenchmark7_Reference = 6307; - //[8] insert/remove - bool cfgBenchmark8_Enable = cfgEnable; - static const int cfgBenchmark8_Passes = 32; - static const int cfgBenchmark8_Iterations = 65536; - static const int cfgBenchmark8_Reference = 2105; - //[9] updates (teleport) - bool cfgBenchmark9_Enable = cfgEnable; - static const int cfgBenchmark9_Passes = 32; - static const int cfgBenchmark9_Iterations = 65536; - static const int cfgBenchmark9_Reference = 1879; - //[10] updates (jitter) - bool cfgBenchmark10_Enable = cfgEnable; - static const b3Scalar cfgBenchmark10_Scale = cfgVolumeCenterScale / 10000; - static const int cfgBenchmark10_Passes = 32; - static const int cfgBenchmark10_Iterations = 65536; - static const int cfgBenchmark10_Reference = 1244; - //[11] optimize (incremental) - bool cfgBenchmark11_Enable = cfgEnable; - static const int cfgBenchmark11_Passes = 64; - static const int cfgBenchmark11_Iterations = 65536; - static const int cfgBenchmark11_Reference = 2510; - //[12] b3DbvtVolume notequal - bool cfgBenchmark12_Enable = cfgEnable; - static const int cfgBenchmark12_Iterations = 32; - static const int cfgBenchmark12_Reference = 3677; - //[13] culling(OCL+fullsort) - bool cfgBenchmark13_Enable = cfgEnable; - static const int cfgBenchmark13_Iterations = 1024; - static const int cfgBenchmark13_Reference = 2231; - //[14] culling(OCL+qsort) - bool cfgBenchmark14_Enable = cfgEnable; - static const int cfgBenchmark14_Iterations = 8192; - static const int cfgBenchmark14_Reference = 3500; - //[15] culling(KDOP+qsort) - bool cfgBenchmark15_Enable = cfgEnable; - static const int cfgBenchmark15_Iterations = 8192; - static const int cfgBenchmark15_Reference = 1151; - //[16] insert/remove batch - bool cfgBenchmark16_Enable = cfgEnable; - static const int cfgBenchmark16_BatchCount = 256; - static const int cfgBenchmark16_Passes = 16384; - static const int cfgBenchmark16_Reference = 5138; - //[17] select - bool cfgBenchmark17_Enable = cfgEnable; - static const int cfgBenchmark17_Iterations = 4; - static const int cfgBenchmark17_Reference = 3390; - - b3Clock wallclock; - printf("Benchmarking dbvt...\r\n"); - printf("\tWorld scale: %f\r\n", cfgVolumeCenterScale); - printf("\tExtents base: %f\r\n", cfgVolumeExentsBase); - printf("\tExtents range: %f\r\n", cfgVolumeExentsScale); - printf("\tLeaves: %u\r\n", cfgLeaves); - printf("\tsizeof(b3DbvtVolume): %u bytes\r\n", sizeof(b3DbvtVolume)); - printf("\tsizeof(b3DbvtNode): %u bytes\r\n", sizeof(b3DbvtNode)); - if (cfgBenchmark1_Enable) - { // Benchmark 1 - srand(380843); - b3AlignedObjectArray volumes; - b3AlignedObjectArray results; - volumes.resize(cfgLeaves); - results.resize(cfgLeaves); - for (int i = 0; i < cfgLeaves; ++i) - { - volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale); - } - printf("[1] b3DbvtVolume intersections: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark1_Iterations; ++i) - { - for (int j = 0; j < cfgLeaves; ++j) - { - for (int k = 0; k < cfgLeaves; ++k) - { - results[k] = Intersect(volumes[j], volumes[k]); - } - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark1_Reference) * 100 / time); - } - if (cfgBenchmark2_Enable) - { // Benchmark 2 - srand(380843); - b3AlignedObjectArray volumes; - b3AlignedObjectArray results; - volumes.resize(cfgLeaves); - results.resize(cfgLeaves); - for (int i = 0; i < cfgLeaves; ++i) - { - volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale); - } - printf("[2] b3DbvtVolume merges: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark2_Iterations; ++i) - { - for (int j = 0; j < cfgLeaves; ++j) - { - for (int k = 0; k < cfgLeaves; ++k) - { - Merge(volumes[j], volumes[k], results[k]); - } - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark2_Reference) * 100 / time); - } - if (cfgBenchmark3_Enable) - { // Benchmark 3 - srand(380843); - b3DynamicBvh dbvt[2]; - b3DbvtBenchmark::NilPolicy policy; - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]); - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]); - dbvt[0].optimizeTopDown(); - dbvt[1].optimizeTopDown(); - printf("[3] b3DynamicBvh::collideTT: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark3_Iterations; ++i) - { - b3DynamicBvh::collideTT(dbvt[0].m_root, dbvt[1].m_root, policy); - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark3_Reference) * 100 / time); - } - if (cfgBenchmark4_Enable) - { // Benchmark 4 - srand(380843); - b3DynamicBvh dbvt; - b3DbvtBenchmark::NilPolicy policy; - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - printf("[4] b3DynamicBvh::collideTT self: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark4_Iterations; ++i) - { - b3DynamicBvh::collideTT(dbvt.m_root, dbvt.m_root, policy); - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark4_Reference) * 100 / time); - } - if (cfgBenchmark5_Enable) - { // Benchmark 5 - srand(380843); - b3DynamicBvh dbvt[2]; - b3AlignedObjectArray transforms; - b3DbvtBenchmark::NilPolicy policy; - transforms.resize(cfgBenchmark5_Iterations); - for (int i = 0; i < transforms.size(); ++i) - { - transforms[i] = b3DbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark5_OffsetScale); - } - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]); - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]); - dbvt[0].optimizeTopDown(); - dbvt[1].optimizeTopDown(); - printf("[5] b3DynamicBvh::collideTT xform: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark5_Iterations; ++i) - { - b3DynamicBvh::collideTT(dbvt[0].m_root, dbvt[1].m_root, transforms[i], policy); - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark5_Reference) * 100 / time); - } - if (cfgBenchmark6_Enable) - { // Benchmark 6 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray transforms; - b3DbvtBenchmark::NilPolicy policy; - transforms.resize(cfgBenchmark6_Iterations); - for (int i = 0; i < transforms.size(); ++i) - { - transforms[i] = b3DbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark6_OffsetScale); - } - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - printf("[6] b3DynamicBvh::collideTT xform,self: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark6_Iterations; ++i) - { - b3DynamicBvh::collideTT(dbvt.m_root, dbvt.m_root, transforms[i], policy); - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark6_Reference) * 100 / time); - } - if (cfgBenchmark7_Enable) - { // Benchmark 7 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray rayorg; - b3AlignedObjectArray raydir; - b3DbvtBenchmark::NilPolicy policy; - rayorg.resize(cfgBenchmark7_Iterations); - raydir.resize(cfgBenchmark7_Iterations); - for (int i = 0; i < rayorg.size(); ++i) - { - rayorg[i] = b3DbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2); - raydir[i] = b3DbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2); - } - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - printf("[7] b3DynamicBvh::rayTest: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark7_Passes; ++i) - { - for (int j = 0; j < cfgBenchmark7_Iterations; ++j) - { - b3DynamicBvh::rayTest(dbvt.m_root, rayorg[j], rayorg[j] + raydir[j], policy); - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - unsigned rays = cfgBenchmark7_Passes * cfgBenchmark7_Iterations; - printf("%u ms (%i%%),(%u r/s)\r\n", time, (time - cfgBenchmark7_Reference) * 100 / time, (rays * 1000) / time); - } - if (cfgBenchmark8_Enable) - { // Benchmark 8 - srand(380843); - b3DynamicBvh dbvt; - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - printf("[8] insert/remove: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark8_Passes; ++i) - { - for (int j = 0; j < cfgBenchmark8_Iterations; ++j) - { - dbvt.remove(dbvt.insert(b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0)); - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int ir = cfgBenchmark8_Passes * cfgBenchmark8_Iterations; - printf("%u ms (%i%%),(%u ir/s)\r\n", time, (time - cfgBenchmark8_Reference) * 100 / time, ir * 1000 / time); - } - if (cfgBenchmark9_Enable) - { // Benchmark 9 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray leaves; - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - dbvt.extractLeaves(dbvt.m_root, leaves); - printf("[9] updates (teleport): "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark9_Passes; ++i) - { - for (int j = 0; j < cfgBenchmark9_Iterations; ++j) - { - dbvt.update(const_cast(leaves[rand() % cfgLeaves]), - b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale)); - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int up = cfgBenchmark9_Passes * cfgBenchmark9_Iterations; - printf("%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark9_Reference) * 100 / time, up * 1000 / time); - } - if (cfgBenchmark10_Enable) - { // Benchmark 10 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray leaves; - b3AlignedObjectArray vectors; - vectors.resize(cfgBenchmark10_Iterations); - for (int i = 0; i < vectors.size(); ++i) - { - vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)) * cfgBenchmark10_Scale; - } - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - dbvt.extractLeaves(dbvt.m_root, leaves); - printf("[10] updates (jitter): "); - wallclock.reset(); - - for (int i = 0; i < cfgBenchmark10_Passes; ++i) - { - for (int j = 0; j < cfgBenchmark10_Iterations; ++j) - { - const b3Vector3& d = vectors[j]; - b3DbvtNode* l = const_cast(leaves[rand() % cfgLeaves]); - b3DbvtVolume v = b3DbvtVolume::FromMM(l->volume.Mins() + d, l->volume.Maxs() + d); - dbvt.update(l, v); - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int up = cfgBenchmark10_Passes * cfgBenchmark10_Iterations; - printf("%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark10_Reference) * 100 / time, up * 1000 / time); - } - if (cfgBenchmark11_Enable) - { // Benchmark 11 - srand(380843); - b3DynamicBvh dbvt; - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - printf("[11] optimize (incremental): "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark11_Passes; ++i) - { - dbvt.optimizeIncremental(cfgBenchmark11_Iterations); - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int op = cfgBenchmark11_Passes * cfgBenchmark11_Iterations; - printf("%u ms (%i%%),(%u o/s)\r\n", time, (time - cfgBenchmark11_Reference) * 100 / time, op / time * 1000); - } - if (cfgBenchmark12_Enable) - { // Benchmark 12 - srand(380843); - b3AlignedObjectArray volumes; - b3AlignedObjectArray results; - volumes.resize(cfgLeaves); - results.resize(cfgLeaves); - for (int i = 0; i < cfgLeaves; ++i) - { - volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale); - } - printf("[12] b3DbvtVolume notequal: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark12_Iterations; ++i) - { - for (int j = 0; j < cfgLeaves; ++j) - { - for (int k = 0; k < cfgLeaves; ++k) - { - results[k] = NotEqual(volumes[j], volumes[k]); - } - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark12_Reference) * 100 / time); - } - if (cfgBenchmark13_Enable) - { // Benchmark 13 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray vectors; - b3DbvtBenchmark::NilPolicy policy; - vectors.resize(cfgBenchmark13_Iterations); - for (int i = 0; i < vectors.size(); ++i) - { - vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized(); - } - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - printf("[13] culling(OCL+fullsort): "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark13_Iterations; ++i) - { - static const b3Scalar offset = 0; - policy.m_depth = -B3_INFINITY; - dbvt.collideOCL(dbvt.m_root, &vectors[i], &offset, vectors[i], 1, policy); - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int t = cfgBenchmark13_Iterations; - printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark13_Reference) * 100 / time, (t * 1000) / time); - } - if (cfgBenchmark14_Enable) - { // Benchmark 14 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray vectors; - b3DbvtBenchmark::P14 policy; - vectors.resize(cfgBenchmark14_Iterations); - for (int i = 0; i < vectors.size(); ++i) - { - vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized(); - } - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - policy.m_nodes.reserve(cfgLeaves); - printf("[14] culling(OCL+qsort): "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark14_Iterations; ++i) - { - static const b3Scalar offset = 0; - policy.m_nodes.resize(0); - dbvt.collideOCL(dbvt.m_root, &vectors[i], &offset, vectors[i], 1, policy, false); - policy.m_nodes.quickSort(b3DbvtBenchmark::P14::sortfnc); - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int t = cfgBenchmark14_Iterations; - printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark14_Reference) * 100 / time, (t * 1000) / time); - } - if (cfgBenchmark15_Enable) - { // Benchmark 15 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray vectors; - b3DbvtBenchmark::P15 policy; - vectors.resize(cfgBenchmark15_Iterations); - for (int i = 0; i < vectors.size(); ++i) - { - vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized(); - } - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - policy.m_nodes.reserve(cfgLeaves); - printf("[15] culling(KDOP+qsort): "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark15_Iterations; ++i) - { - static const b3Scalar offset = 0; - policy.m_nodes.resize(0); - policy.m_axis = vectors[i]; - dbvt.collideKDOP(dbvt.m_root, &vectors[i], &offset, 1, policy); - policy.m_nodes.quickSort(b3DbvtBenchmark::P15::sortfnc); - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int t = cfgBenchmark15_Iterations; - printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark15_Reference) * 100 / time, (t * 1000) / time); - } - if (cfgBenchmark16_Enable) - { // Benchmark 16 - srand(380843); - b3DynamicBvh dbvt; - b3AlignedObjectArray batch; - b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt); - dbvt.optimizeTopDown(); - batch.reserve(cfgBenchmark16_BatchCount); - printf("[16] insert/remove batch(%u): ", cfgBenchmark16_BatchCount); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark16_Passes; ++i) - { - for (int j = 0; j < cfgBenchmark16_BatchCount; ++j) - { - batch.push_back(dbvt.insert(b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0)); - } - for (int j = 0; j < cfgBenchmark16_BatchCount; ++j) - { - dbvt.remove(batch[j]); - } - batch.resize(0); - } - const int time = (int)wallclock.getTimeMilliseconds(); - const int ir = cfgBenchmark16_Passes * cfgBenchmark16_BatchCount; - printf("%u ms (%i%%),(%u bir/s)\r\n", time, (time - cfgBenchmark16_Reference) * 100 / time, int(ir * 1000.0 / time)); - } - if (cfgBenchmark17_Enable) - { // Benchmark 17 - srand(380843); - b3AlignedObjectArray volumes; - b3AlignedObjectArray results; - b3AlignedObjectArray indices; - volumes.resize(cfgLeaves); - results.resize(cfgLeaves); - indices.resize(cfgLeaves); - for (int i = 0; i < cfgLeaves; ++i) - { - indices[i] = i; - volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale); - } - for (int i = 0; i < cfgLeaves; ++i) - { - b3Swap(indices[i], indices[rand() % cfgLeaves]); - } - printf("[17] b3DbvtVolume select: "); - wallclock.reset(); - for (int i = 0; i < cfgBenchmark17_Iterations; ++i) - { - for (int j = 0; j < cfgLeaves; ++j) - { - for (int k = 0; k < cfgLeaves; ++k) - { - const int idx = indices[k]; - results[idx] = Select(volumes[idx], volumes[j], volumes[k]); - } - } - } - const int time = (int)wallclock.getTimeMilliseconds(); - printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark17_Reference) * 100 / time); - } - printf("\r\n\r\n"); -} -#endif diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h deleted file mode 100644 index f44e3377fe0..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h +++ /dev/null @@ -1,1332 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -///b3DynamicBvh implementation by Nathanael Presson - -#ifndef B3_DYNAMIC_BOUNDING_VOLUME_TREE_H -#define B3_DYNAMIC_BOUNDING_VOLUME_TREE_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Geometry/b3AabbUtil.h" - -// -// Compile time configuration -// - -// Implementation profiles -#define B3_DBVT_IMPL_GENERIC 0 // Generic implementation -#define B3_DBVT_IMPL_SSE 1 // SSE - -// Template implementation of ICollide -#ifdef _WIN32 -#if (defined(_MSC_VER) && _MSC_VER >= 1400) -#define B3_DBVT_USE_TEMPLATE 1 -#else -#define B3_DBVT_USE_TEMPLATE 0 -#endif -#else -#define B3_DBVT_USE_TEMPLATE 0 -#endif - -// Use only intrinsics instead of inline asm -#define B3_DBVT_USE_INTRINSIC_SSE 1 - -// Using memmov for collideOCL -#define B3_DBVT_USE_MEMMOVE 1 - -// Enable benchmarking code -#define B3_DBVT_ENABLE_BENCHMARK 0 - -// Inlining -#define B3_DBVT_INLINE B3_FORCE_INLINE - -// Specific methods implementation - -//SSE gives errors on a MSVC 7.1 -#if defined(B3_USE_SSE) //&& defined (_WIN32) -#define B3_DBVT_SELECT_IMPL B3_DBVT_IMPL_SSE -#define B3_DBVT_MERGE_IMPL B3_DBVT_IMPL_SSE -#define B3_DBVT_INT0_IMPL B3_DBVT_IMPL_SSE -#else -#define B3_DBVT_SELECT_IMPL B3_DBVT_IMPL_GENERIC -#define B3_DBVT_MERGE_IMPL B3_DBVT_IMPL_GENERIC -#define B3_DBVT_INT0_IMPL B3_DBVT_IMPL_GENERIC -#endif - -#if (B3_DBVT_SELECT_IMPL == B3_DBVT_IMPL_SSE) || \ - (B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE) || \ - (B3_DBVT_INT0_IMPL == B3_DBVT_IMPL_SSE) -#include -#endif - -// -// Auto config and checks -// - -#if B3_DBVT_USE_TEMPLATE -#define B3_DBVT_VIRTUAL -#define B3_DBVT_VIRTUAL_DTOR(a) -#define B3_DBVT_PREFIX template -#define B3_DBVT_IPOLICY T& policy -#define B3_DBVT_CHECKTYPE \ - static const ICollide& typechecker = *(T*)1; \ - (void)typechecker; -#else -#define B3_DBVT_VIRTUAL_DTOR(a) \ - virtual ~a() {} -#define B3_DBVT_VIRTUAL virtual -#define B3_DBVT_PREFIX -#define B3_DBVT_IPOLICY ICollide& policy -#define B3_DBVT_CHECKTYPE -#endif - -#if B3_DBVT_USE_MEMMOVE -#if !defined(__CELLOS_LV2__) && !defined(__MWERKS__) -#include -#endif -#include -#endif - -#ifndef B3_DBVT_USE_TEMPLATE -#error "B3_DBVT_USE_TEMPLATE undefined" -#endif - -#ifndef B3_DBVT_USE_MEMMOVE -#error "B3_DBVT_USE_MEMMOVE undefined" -#endif - -#ifndef B3_DBVT_ENABLE_BENCHMARK -#error "B3_DBVT_ENABLE_BENCHMARK undefined" -#endif - -#ifndef B3_DBVT_SELECT_IMPL -#error "B3_DBVT_SELECT_IMPL undefined" -#endif - -#ifndef B3_DBVT_MERGE_IMPL -#error "B3_DBVT_MERGE_IMPL undefined" -#endif - -#ifndef B3_DBVT_INT0_IMPL -#error "B3_DBVT_INT0_IMPL undefined" -#endif - -// -// Defaults volumes -// - -/* b3DbvtAabbMm */ -struct b3DbvtAabbMm -{ - B3_DBVT_INLINE b3Vector3 Center() const { return ((mi + mx) / 2); } - B3_DBVT_INLINE b3Vector3 Lengths() const { return (mx - mi); } - B3_DBVT_INLINE b3Vector3 Extents() const { return ((mx - mi) / 2); } - B3_DBVT_INLINE const b3Vector3& Mins() const { return (mi); } - B3_DBVT_INLINE const b3Vector3& Maxs() const { return (mx); } - static inline b3DbvtAabbMm FromCE(const b3Vector3& c, const b3Vector3& e); - static inline b3DbvtAabbMm FromCR(const b3Vector3& c, b3Scalar r); - static inline b3DbvtAabbMm FromMM(const b3Vector3& mi, const b3Vector3& mx); - static inline b3DbvtAabbMm FromPoints(const b3Vector3* pts, int n); - static inline b3DbvtAabbMm FromPoints(const b3Vector3** ppts, int n); - B3_DBVT_INLINE void Expand(const b3Vector3& e); - B3_DBVT_INLINE void SignedExpand(const b3Vector3& e); - B3_DBVT_INLINE bool Contain(const b3DbvtAabbMm& a) const; - B3_DBVT_INLINE int Classify(const b3Vector3& n, b3Scalar o, int s) const; - B3_DBVT_INLINE b3Scalar ProjectMinimum(const b3Vector3& v, unsigned signs) const; - B3_DBVT_INLINE friend bool b3Intersect(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b); - - B3_DBVT_INLINE friend bool b3Intersect(const b3DbvtAabbMm& a, - const b3Vector3& b); - - B3_DBVT_INLINE friend b3Scalar b3Proximity(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b); - B3_DBVT_INLINE friend int b3Select(const b3DbvtAabbMm& o, - const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b); - B3_DBVT_INLINE friend void b3Merge(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b, - b3DbvtAabbMm& r); - B3_DBVT_INLINE friend bool b3NotEqual(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b); - - B3_DBVT_INLINE b3Vector3& tMins() { return (mi); } - B3_DBVT_INLINE b3Vector3& tMaxs() { return (mx); } - -private: - B3_DBVT_INLINE void AddSpan(const b3Vector3& d, b3Scalar& smi, b3Scalar& smx) const; - -private: - b3Vector3 mi, mx; -}; - -// Types -typedef b3DbvtAabbMm b3DbvtVolume; - -/* b3DbvtNode */ -struct b3DbvtNode -{ - b3DbvtVolume volume; - b3DbvtNode* parent; - B3_DBVT_INLINE bool isleaf() const { return (childs[1] == 0); } - B3_DBVT_INLINE bool isinternal() const { return (!isleaf()); } - union { - b3DbvtNode* childs[2]; - void* data; - int dataAsInt; - }; -}; - -///The b3DynamicBvh class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree). -///This b3DynamicBvh is used for soft body collision detection and for the b3DynamicBvhBroadphase. It has a fast insert, remove and update of nodes. -///Unlike the b3QuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure. -struct b3DynamicBvh -{ - /* Stack element */ - struct sStkNN - { - const b3DbvtNode* a; - const b3DbvtNode* b; - sStkNN() {} - sStkNN(const b3DbvtNode* na, const b3DbvtNode* nb) : a(na), b(nb) {} - }; - struct sStkNP - { - const b3DbvtNode* node; - int mask; - sStkNP(const b3DbvtNode* n, unsigned m) : node(n), mask(m) {} - }; - struct sStkNPS - { - const b3DbvtNode* node; - int mask; - b3Scalar value; - sStkNPS() {} - sStkNPS(const b3DbvtNode* n, unsigned m, b3Scalar v) : node(n), mask(m), value(v) {} - }; - struct sStkCLN - { - const b3DbvtNode* node; - b3DbvtNode* parent; - sStkCLN(const b3DbvtNode* n, b3DbvtNode* p) : node(n), parent(p) {} - }; - // Policies/Interfaces - - /* ICollide */ - struct ICollide - { - B3_DBVT_VIRTUAL_DTOR(ICollide) - B3_DBVT_VIRTUAL void Process(const b3DbvtNode*, const b3DbvtNode*) {} - B3_DBVT_VIRTUAL void Process(const b3DbvtNode*) {} - B3_DBVT_VIRTUAL void Process(const b3DbvtNode* n, b3Scalar) { Process(n); } - B3_DBVT_VIRTUAL bool Descent(const b3DbvtNode*) { return (true); } - B3_DBVT_VIRTUAL bool AllLeaves(const b3DbvtNode*) { return (true); } - }; - /* IWriter */ - struct IWriter - { - virtual ~IWriter() {} - virtual void Prepare(const b3DbvtNode* root, int numnodes) = 0; - virtual void WriteNode(const b3DbvtNode*, int index, int parent, int child0, int child1) = 0; - virtual void WriteLeaf(const b3DbvtNode*, int index, int parent) = 0; - }; - /* IClone */ - struct IClone - { - virtual ~IClone() {} - virtual void CloneLeaf(b3DbvtNode*) {} - }; - - // Constants - enum - { - B3_SIMPLE_STACKSIZE = 64, - B3_DOUBLE_STACKSIZE = B3_SIMPLE_STACKSIZE * 2 - }; - - // Fields - b3DbvtNode* m_root; - b3DbvtNode* m_free; - int m_lkhd; - int m_leaves; - unsigned m_opath; - - b3AlignedObjectArray m_stkStack; - mutable b3AlignedObjectArray m_rayTestStack; - - // Methods - b3DynamicBvh(); - ~b3DynamicBvh(); - void clear(); - bool empty() const { return (0 == m_root); } - void optimizeBottomUp(); - void optimizeTopDown(int bu_treshold = 128); - void optimizeIncremental(int passes); - b3DbvtNode* insert(const b3DbvtVolume& box, void* data); - void update(b3DbvtNode* leaf, int lookahead = -1); - void update(b3DbvtNode* leaf, b3DbvtVolume& volume); - bool update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity, b3Scalar margin); - bool update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity); - bool update(b3DbvtNode* leaf, b3DbvtVolume& volume, b3Scalar margin); - void remove(b3DbvtNode* leaf); - void write(IWriter* iwriter) const; - void clone(b3DynamicBvh& dest, IClone* iclone = 0) const; - static int maxdepth(const b3DbvtNode* node); - static int countLeaves(const b3DbvtNode* node); - static void extractLeaves(const b3DbvtNode* node, b3AlignedObjectArray& leaves); -#if B3_DBVT_ENABLE_BENCHMARK - static void benchmark(); -#else - static void benchmark() - { - } -#endif - // B3_DBVT_IPOLICY must support ICollide policy/interface - B3_DBVT_PREFIX - static void enumNodes(const b3DbvtNode* root, - B3_DBVT_IPOLICY); - B3_DBVT_PREFIX - static void enumLeaves(const b3DbvtNode* root, - B3_DBVT_IPOLICY); - B3_DBVT_PREFIX - void collideTT(const b3DbvtNode* root0, - const b3DbvtNode* root1, - B3_DBVT_IPOLICY); - - B3_DBVT_PREFIX - void collideTTpersistentStack(const b3DbvtNode* root0, - const b3DbvtNode* root1, - B3_DBVT_IPOLICY); -#if 0 - B3_DBVT_PREFIX - void collideTT( const b3DbvtNode* root0, - const b3DbvtNode* root1, - const b3Transform& xform, - B3_DBVT_IPOLICY); - B3_DBVT_PREFIX - void collideTT( const b3DbvtNode* root0, - const b3Transform& xform0, - const b3DbvtNode* root1, - const b3Transform& xform1, - B3_DBVT_IPOLICY); -#endif - - B3_DBVT_PREFIX - void collideTV(const b3DbvtNode* root, - const b3DbvtVolume& volume, - B3_DBVT_IPOLICY) const; - ///rayTest is a re-entrant ray test, and can be called in parallel as long as the b3AlignedAlloc is thread-safe (uses locking etc) - ///rayTest is slower than rayTestInternal, because it builds a local stack, using memory allocations, and it recomputes signs/rayDirectionInverses each time - B3_DBVT_PREFIX - static void rayTest(const b3DbvtNode* root, - const b3Vector3& rayFrom, - const b3Vector3& rayTo, - B3_DBVT_IPOLICY); - ///rayTestInternal is faster than rayTest, because it uses a persistent stack (to reduce dynamic memory allocations to a minimum) and it uses precomputed signs/rayInverseDirections - ///rayTestInternal is used by b3DynamicBvhBroadphase to accelerate world ray casts - B3_DBVT_PREFIX - void rayTestInternal(const b3DbvtNode* root, - const b3Vector3& rayFrom, - const b3Vector3& rayTo, - const b3Vector3& rayDirectionInverse, - unsigned int signs[3], - b3Scalar lambda_max, - const b3Vector3& aabbMin, - const b3Vector3& aabbMax, - B3_DBVT_IPOLICY) const; - - B3_DBVT_PREFIX - static void collideKDOP(const b3DbvtNode* root, - const b3Vector3* normals, - const b3Scalar* offsets, - int count, - B3_DBVT_IPOLICY); - B3_DBVT_PREFIX - static void collideOCL(const b3DbvtNode* root, - const b3Vector3* normals, - const b3Scalar* offsets, - const b3Vector3& sortaxis, - int count, - B3_DBVT_IPOLICY, - bool fullsort = true); - B3_DBVT_PREFIX - static void collideTU(const b3DbvtNode* root, - B3_DBVT_IPOLICY); - // Helpers - static B3_DBVT_INLINE int nearest(const int* i, const b3DynamicBvh::sStkNPS* a, b3Scalar v, int l, int h) - { - int m = 0; - while (l < h) - { - m = (l + h) >> 1; - if (a[i[m]].value >= v) - l = m + 1; - else - h = m; - } - return (h); - } - static B3_DBVT_INLINE int allocate(b3AlignedObjectArray& ifree, - b3AlignedObjectArray& stock, - const sStkNPS& value) - { - int i; - if (ifree.size() > 0) - { - i = ifree[ifree.size() - 1]; - ifree.pop_back(); - stock[i] = value; - } - else - { - i = stock.size(); - stock.push_back(value); - } - return (i); - } - // -private: - b3DynamicBvh(const b3DynamicBvh&) {} -}; - -// -// Inline's -// - -// -inline b3DbvtAabbMm b3DbvtAabbMm::FromCE(const b3Vector3& c, const b3Vector3& e) -{ - b3DbvtAabbMm box; - box.mi = c - e; - box.mx = c + e; - return (box); -} - -// -inline b3DbvtAabbMm b3DbvtAabbMm::FromCR(const b3Vector3& c, b3Scalar r) -{ - return (FromCE(c, b3MakeVector3(r, r, r))); -} - -// -inline b3DbvtAabbMm b3DbvtAabbMm::FromMM(const b3Vector3& mi, const b3Vector3& mx) -{ - b3DbvtAabbMm box; - box.mi = mi; - box.mx = mx; - return (box); -} - -// -inline b3DbvtAabbMm b3DbvtAabbMm::FromPoints(const b3Vector3* pts, int n) -{ - b3DbvtAabbMm box; - box.mi = box.mx = pts[0]; - for (int i = 1; i < n; ++i) - { - box.mi.setMin(pts[i]); - box.mx.setMax(pts[i]); - } - return (box); -} - -// -inline b3DbvtAabbMm b3DbvtAabbMm::FromPoints(const b3Vector3** ppts, int n) -{ - b3DbvtAabbMm box; - box.mi = box.mx = *ppts[0]; - for (int i = 1; i < n; ++i) - { - box.mi.setMin(*ppts[i]); - box.mx.setMax(*ppts[i]); - } - return (box); -} - -// -B3_DBVT_INLINE void b3DbvtAabbMm::Expand(const b3Vector3& e) -{ - mi -= e; - mx += e; -} - -// -B3_DBVT_INLINE void b3DbvtAabbMm::SignedExpand(const b3Vector3& e) -{ - if (e.x > 0) - mx.setX(mx.x + e[0]); - else - mi.setX(mi.x + e[0]); - if (e.y > 0) - mx.setY(mx.y + e[1]); - else - mi.setY(mi.y + e[1]); - if (e.z > 0) - mx.setZ(mx.z + e[2]); - else - mi.setZ(mi.z + e[2]); -} - -// -B3_DBVT_INLINE bool b3DbvtAabbMm::Contain(const b3DbvtAabbMm& a) const -{ - return ((mi.x <= a.mi.x) && - (mi.y <= a.mi.y) && - (mi.z <= a.mi.z) && - (mx.x >= a.mx.x) && - (mx.y >= a.mx.y) && - (mx.z >= a.mx.z)); -} - -// -B3_DBVT_INLINE int b3DbvtAabbMm::Classify(const b3Vector3& n, b3Scalar o, int s) const -{ - b3Vector3 pi, px; - switch (s) - { - case (0 + 0 + 0): - px = b3MakeVector3(mi.x, mi.y, mi.z); - pi = b3MakeVector3(mx.x, mx.y, mx.z); - break; - case (1 + 0 + 0): - px = b3MakeVector3(mx.x, mi.y, mi.z); - pi = b3MakeVector3(mi.x, mx.y, mx.z); - break; - case (0 + 2 + 0): - px = b3MakeVector3(mi.x, mx.y, mi.z); - pi = b3MakeVector3(mx.x, mi.y, mx.z); - break; - case (1 + 2 + 0): - px = b3MakeVector3(mx.x, mx.y, mi.z); - pi = b3MakeVector3(mi.x, mi.y, mx.z); - break; - case (0 + 0 + 4): - px = b3MakeVector3(mi.x, mi.y, mx.z); - pi = b3MakeVector3(mx.x, mx.y, mi.z); - break; - case (1 + 0 + 4): - px = b3MakeVector3(mx.x, mi.y, mx.z); - pi = b3MakeVector3(mi.x, mx.y, mi.z); - break; - case (0 + 2 + 4): - px = b3MakeVector3(mi.x, mx.y, mx.z); - pi = b3MakeVector3(mx.x, mi.y, mi.z); - break; - case (1 + 2 + 4): - px = b3MakeVector3(mx.x, mx.y, mx.z); - pi = b3MakeVector3(mi.x, mi.y, mi.z); - break; - } - if ((b3Dot(n, px) + o) < 0) return (-1); - if ((b3Dot(n, pi) + o) >= 0) return (+1); - return (0); -} - -// -B3_DBVT_INLINE b3Scalar b3DbvtAabbMm::ProjectMinimum(const b3Vector3& v, unsigned signs) const -{ - const b3Vector3* b[] = {&mx, &mi}; - const b3Vector3 p = b3MakeVector3(b[(signs >> 0) & 1]->x, - b[(signs >> 1) & 1]->y, - b[(signs >> 2) & 1]->z); - return (b3Dot(p, v)); -} - -// -B3_DBVT_INLINE void b3DbvtAabbMm::AddSpan(const b3Vector3& d, b3Scalar& smi, b3Scalar& smx) const -{ - for (int i = 0; i < 3; ++i) - { - if (d[i] < 0) - { - smi += mx[i] * d[i]; - smx += mi[i] * d[i]; - } - else - { - smi += mi[i] * d[i]; - smx += mx[i] * d[i]; - } - } -} - -// -B3_DBVT_INLINE bool b3Intersect(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b) -{ -#if B3_DBVT_INT0_IMPL == B3_DBVT_IMPL_SSE - const __m128 rt(_mm_or_ps(_mm_cmplt_ps(_mm_load_ps(b.mx), _mm_load_ps(a.mi)), - _mm_cmplt_ps(_mm_load_ps(a.mx), _mm_load_ps(b.mi)))); -#if defined(_WIN32) - const __int32* pu((const __int32*)&rt); -#else - const int* pu((const int*)&rt); -#endif - return ((pu[0] | pu[1] | pu[2]) == 0); -#else - return ((a.mi.x <= b.mx.x) && - (a.mx.x >= b.mi.x) && - (a.mi.y <= b.mx.y) && - (a.mx.y >= b.mi.y) && - (a.mi.z <= b.mx.z) && - (a.mx.z >= b.mi.z)); -#endif -} - -// -B3_DBVT_INLINE bool b3Intersect(const b3DbvtAabbMm& a, - const b3Vector3& b) -{ - return ((b.x >= a.mi.x) && - (b.y >= a.mi.y) && - (b.z >= a.mi.z) && - (b.x <= a.mx.x) && - (b.y <= a.mx.y) && - (b.z <= a.mx.z)); -} - -////////////////////////////////////// - -// -B3_DBVT_INLINE b3Scalar b3Proximity(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b) -{ - const b3Vector3 d = (a.mi + a.mx) - (b.mi + b.mx); - return (b3Fabs(d.x) + b3Fabs(d.y) + b3Fabs(d.z)); -} - -// -B3_DBVT_INLINE int b3Select(const b3DbvtAabbMm& o, - const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b) -{ -#if B3_DBVT_SELECT_IMPL == B3_DBVT_IMPL_SSE - -#if defined(_WIN32) - static B3_ATTRIBUTE_ALIGNED16(const unsigned __int32) mask[] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; -#else - static B3_ATTRIBUTE_ALIGNED16(const unsigned int) mask[] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x00000000 /*0x7fffffff*/}; -#endif - ///@todo: the intrinsic version is 11% slower -#if B3_DBVT_USE_INTRINSIC_SSE - - union b3SSEUnion ///NOTE: if we use more intrinsics, move b3SSEUnion into the LinearMath directory - { - __m128 ssereg; - float floats[4]; - int ints[4]; - }; - - __m128 omi(_mm_load_ps(o.mi)); - omi = _mm_add_ps(omi, _mm_load_ps(o.mx)); - __m128 ami(_mm_load_ps(a.mi)); - ami = _mm_add_ps(ami, _mm_load_ps(a.mx)); - ami = _mm_sub_ps(ami, omi); - ami = _mm_and_ps(ami, _mm_load_ps((const float*)mask)); - __m128 bmi(_mm_load_ps(b.mi)); - bmi = _mm_add_ps(bmi, _mm_load_ps(b.mx)); - bmi = _mm_sub_ps(bmi, omi); - bmi = _mm_and_ps(bmi, _mm_load_ps((const float*)mask)); - __m128 t0(_mm_movehl_ps(ami, ami)); - ami = _mm_add_ps(ami, t0); - ami = _mm_add_ss(ami, _mm_shuffle_ps(ami, ami, 1)); - __m128 t1(_mm_movehl_ps(bmi, bmi)); - bmi = _mm_add_ps(bmi, t1); - bmi = _mm_add_ss(bmi, _mm_shuffle_ps(bmi, bmi, 1)); - - b3SSEUnion tmp; - tmp.ssereg = _mm_cmple_ss(bmi, ami); - return tmp.ints[0] & 1; - -#else - B3_ATTRIBUTE_ALIGNED16(__int32 r[1]); - __asm - { - mov eax,o - mov ecx,a - mov edx,b - movaps xmm0,[eax] - movaps xmm5,mask - addps xmm0,[eax+16] - movaps xmm1,[ecx] - movaps xmm2,[edx] - addps xmm1,[ecx+16] - addps xmm2,[edx+16] - subps xmm1,xmm0 - subps xmm2,xmm0 - andps xmm1,xmm5 - andps xmm2,xmm5 - movhlps xmm3,xmm1 - movhlps xmm4,xmm2 - addps xmm1,xmm3 - addps xmm2,xmm4 - pshufd xmm3,xmm1,1 - pshufd xmm4,xmm2,1 - addss xmm1,xmm3 - addss xmm2,xmm4 - cmpless xmm2,xmm1 - movss r,xmm2 - } - return (r[0] & 1); -#endif -#else - return (b3Proximity(o, a) < b3Proximity(o, b) ? 0 : 1); -#endif -} - -// -B3_DBVT_INLINE void b3Merge(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b, - b3DbvtAabbMm& r) -{ -#if B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE - __m128 ami(_mm_load_ps(a.mi)); - __m128 amx(_mm_load_ps(a.mx)); - __m128 bmi(_mm_load_ps(b.mi)); - __m128 bmx(_mm_load_ps(b.mx)); - ami = _mm_min_ps(ami, bmi); - amx = _mm_max_ps(amx, bmx); - _mm_store_ps(r.mi, ami); - _mm_store_ps(r.mx, amx); -#else - for (int i = 0; i < 3; ++i) - { - if (a.mi[i] < b.mi[i]) - r.mi[i] = a.mi[i]; - else - r.mi[i] = b.mi[i]; - if (a.mx[i] > b.mx[i]) - r.mx[i] = a.mx[i]; - else - r.mx[i] = b.mx[i]; - } -#endif -} - -// -B3_DBVT_INLINE bool b3NotEqual(const b3DbvtAabbMm& a, - const b3DbvtAabbMm& b) -{ - return ((a.mi.x != b.mi.x) || - (a.mi.y != b.mi.y) || - (a.mi.z != b.mi.z) || - (a.mx.x != b.mx.x) || - (a.mx.y != b.mx.y) || - (a.mx.z != b.mx.z)); -} - -// -// Inline's -// - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::enumNodes(const b3DbvtNode* root, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - policy.Process(root); - if (root->isinternal()) - { - enumNodes(root->childs[0], policy); - enumNodes(root->childs[1], policy); - } -} - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::enumLeaves(const b3DbvtNode* root, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - if (root->isinternal()) - { - enumLeaves(root->childs[0], policy); - enumLeaves(root->childs[1], policy); - } - else - { - policy.Process(root); - } -} - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideTT(const b3DbvtNode* root0, - const b3DbvtNode* root1, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - if (root0 && root1) - { - int depth = 1; - int treshold = B3_DOUBLE_STACKSIZE - 4; - b3AlignedObjectArray stkStack; - stkStack.resize(B3_DOUBLE_STACKSIZE); - stkStack[0] = sStkNN(root0, root1); - do - { - sStkNN p = stkStack[--depth]; - if (depth > treshold) - { - stkStack.resize(stkStack.size() * 2); - treshold = stkStack.size() - 4; - } - if (p.a == p.b) - { - if (p.a->isinternal()) - { - stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[0]); - stkStack[depth++] = sStkNN(p.a->childs[1], p.a->childs[1]); - stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[1]); - } - } - else if (b3Intersect(p.a->volume, p.b->volume)) - { - if (p.a->isinternal()) - { - if (p.b->isinternal()) - { - stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[0]); - stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[0]); - stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[1]); - stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[1]); - } - else - { - stkStack[depth++] = sStkNN(p.a->childs[0], p.b); - stkStack[depth++] = sStkNN(p.a->childs[1], p.b); - } - } - else - { - if (p.b->isinternal()) - { - stkStack[depth++] = sStkNN(p.a, p.b->childs[0]); - stkStack[depth++] = sStkNN(p.a, p.b->childs[1]); - } - else - { - policy.Process(p.a, p.b); - } - } - } - } while (depth); - } -} - -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideTTpersistentStack(const b3DbvtNode* root0, - const b3DbvtNode* root1, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - if (root0 && root1) - { - int depth = 1; - int treshold = B3_DOUBLE_STACKSIZE - 4; - - m_stkStack.resize(B3_DOUBLE_STACKSIZE); - m_stkStack[0] = sStkNN(root0, root1); - do - { - sStkNN p = m_stkStack[--depth]; - if (depth > treshold) - { - m_stkStack.resize(m_stkStack.size() * 2); - treshold = m_stkStack.size() - 4; - } - if (p.a == p.b) - { - if (p.a->isinternal()) - { - m_stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[0]); - m_stkStack[depth++] = sStkNN(p.a->childs[1], p.a->childs[1]); - m_stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[1]); - } - } - else if (b3Intersect(p.a->volume, p.b->volume)) - { - if (p.a->isinternal()) - { - if (p.b->isinternal()) - { - m_stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[0]); - m_stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[0]); - m_stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[1]); - m_stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[1]); - } - else - { - m_stkStack[depth++] = sStkNN(p.a->childs[0], p.b); - m_stkStack[depth++] = sStkNN(p.a->childs[1], p.b); - } - } - else - { - if (p.b->isinternal()) - { - m_stkStack[depth++] = sStkNN(p.a, p.b->childs[0]); - m_stkStack[depth++] = sStkNN(p.a, p.b->childs[1]); - } - else - { - policy.Process(p.a, p.b); - } - } - } - } while (depth); - } -} - -#if 0 -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideTT( const b3DbvtNode* root0, - const b3DbvtNode* root1, - const b3Transform& xform, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - if(root0&&root1) - { - int depth=1; - int treshold=B3_DOUBLE_STACKSIZE-4; - b3AlignedObjectArray stkStack; - stkStack.resize(B3_DOUBLE_STACKSIZE); - stkStack[0]=sStkNN(root0,root1); - do { - sStkNN p=stkStack[--depth]; - if(b3Intersect(p.a->volume,p.b->volume,xform)) - { - if(depth>treshold) - { - stkStack.resize(stkStack.size()*2); - treshold=stkStack.size()-4; - } - if(p.a->isinternal()) - { - if(p.b->isinternal()) - { - stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]); - stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]); - stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]); - stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]); - } - else - { - stkStack[depth++]=sStkNN(p.a->childs[0],p.b); - stkStack[depth++]=sStkNN(p.a->childs[1],p.b); - } - } - else - { - if(p.b->isinternal()) - { - stkStack[depth++]=sStkNN(p.a,p.b->childs[0]); - stkStack[depth++]=sStkNN(p.a,p.b->childs[1]); - } - else - { - policy.Process(p.a,p.b); - } - } - } - } while(depth); - } -} -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideTT( const b3DbvtNode* root0, - const b3Transform& xform0, - const b3DbvtNode* root1, - const b3Transform& xform1, - B3_DBVT_IPOLICY) -{ - const b3Transform xform=xform0.inverse()*xform1; - collideTT(root0,root1,xform,policy); -} -#endif - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideTV(const b3DbvtNode* root, - const b3DbvtVolume& vol, - B3_DBVT_IPOLICY) const -{ - B3_DBVT_CHECKTYPE - if (root) - { - B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) - volume(vol); - b3AlignedObjectArray stack; - stack.resize(0); - stack.reserve(B3_SIMPLE_STACKSIZE); - stack.push_back(root); - do - { - const b3DbvtNode* n = stack[stack.size() - 1]; - stack.pop_back(); - if (b3Intersect(n->volume, volume)) - { - if (n->isinternal()) - { - stack.push_back(n->childs[0]); - stack.push_back(n->childs[1]); - } - else - { - policy.Process(n); - } - } - } while (stack.size() > 0); - } -} - -B3_DBVT_PREFIX -inline void b3DynamicBvh::rayTestInternal(const b3DbvtNode* root, - const b3Vector3& rayFrom, - const b3Vector3& rayTo, - const b3Vector3& rayDirectionInverse, - unsigned int signs[3], - b3Scalar lambda_max, - const b3Vector3& aabbMin, - const b3Vector3& aabbMax, - B3_DBVT_IPOLICY) const -{ - (void)rayTo; - B3_DBVT_CHECKTYPE - if (root) - { - int depth = 1; - int treshold = B3_DOUBLE_STACKSIZE - 2; - b3AlignedObjectArray& stack = m_rayTestStack; - stack.resize(B3_DOUBLE_STACKSIZE); - stack[0] = root; - b3Vector3 bounds[2]; - do - { - const b3DbvtNode* node = stack[--depth]; - bounds[0] = node->volume.Mins() - aabbMax; - bounds[1] = node->volume.Maxs() - aabbMin; - b3Scalar tmin = 1.f, lambda_min = 0.f; - unsigned int result1 = false; - result1 = b3RayAabb2(rayFrom, rayDirectionInverse, signs, bounds, tmin, lambda_min, lambda_max); - if (result1) - { - if (node->isinternal()) - { - if (depth > treshold) - { - stack.resize(stack.size() * 2); - treshold = stack.size() - 2; - } - stack[depth++] = node->childs[0]; - stack[depth++] = node->childs[1]; - } - else - { - policy.Process(node); - } - } - } while (depth); - } -} - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::rayTest(const b3DbvtNode* root, - const b3Vector3& rayFrom, - const b3Vector3& rayTo, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - if (root) - { - b3Vector3 rayDir = (rayTo - rayFrom); - rayDir.normalize(); - - ///what about division by zero? --> just set rayDirection[i] to INF/B3_LARGE_FLOAT - b3Vector3 rayDirectionInverse; - rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0]; - rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1]; - rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2]; - unsigned int signs[3] = {rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; - - b3Scalar lambda_max = rayDir.dot(rayTo - rayFrom); -#ifdef COMPARE_BTRAY_AABB2 - b3Vector3 resultNormal; -#endif //COMPARE_BTRAY_AABB2 - - b3AlignedObjectArray stack; - - int depth = 1; - int treshold = B3_DOUBLE_STACKSIZE - 2; - - stack.resize(B3_DOUBLE_STACKSIZE); - stack[0] = root; - b3Vector3 bounds[2]; - do - { - const b3DbvtNode* node = stack[--depth]; - - bounds[0] = node->volume.Mins(); - bounds[1] = node->volume.Maxs(); - - b3Scalar tmin = 1.f, lambda_min = 0.f; - unsigned int result1 = b3RayAabb2(rayFrom, rayDirectionInverse, signs, bounds, tmin, lambda_min, lambda_max); - -#ifdef COMPARE_BTRAY_AABB2 - b3Scalar param = 1.f; - bool result2 = b3RayAabb(rayFrom, rayTo, node->volume.Mins(), node->volume.Maxs(), param, resultNormal); - b3Assert(result1 == result2); -#endif //TEST_BTRAY_AABB2 - - if (result1) - { - if (node->isinternal()) - { - if (depth > treshold) - { - stack.resize(stack.size() * 2); - treshold = stack.size() - 2; - } - stack[depth++] = node->childs[0]; - stack[depth++] = node->childs[1]; - } - else - { - policy.Process(node); - } - } - } while (depth); - } -} - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideKDOP(const b3DbvtNode* root, - const b3Vector3* normals, - const b3Scalar* offsets, - int count, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - if (root) - { - const int inside = (1 << count) - 1; - b3AlignedObjectArray stack; - int signs[sizeof(unsigned) * 8]; - b3Assert(count < int(sizeof(signs) / sizeof(signs[0]))); - for (int i = 0; i < count; ++i) - { - signs[i] = ((normals[i].x >= 0) ? 1 : 0) + - ((normals[i].y >= 0) ? 2 : 0) + - ((normals[i].z >= 0) ? 4 : 0); - } - stack.reserve(B3_SIMPLE_STACKSIZE); - stack.push_back(sStkNP(root, 0)); - do - { - sStkNP se = stack[stack.size() - 1]; - bool out = false; - stack.pop_back(); - for (int i = 0, j = 1; (!out) && (i < count); ++i, j <<= 1) - { - if (0 == (se.mask & j)) - { - const int side = se.node->volume.Classify(normals[i], offsets[i], signs[i]); - switch (side) - { - case -1: - out = true; - break; - case +1: - se.mask |= j; - break; - } - } - } - if (!out) - { - if ((se.mask != inside) && (se.node->isinternal())) - { - stack.push_back(sStkNP(se.node->childs[0], se.mask)); - stack.push_back(sStkNP(se.node->childs[1], se.mask)); - } - else - { - if (policy.AllLeaves(se.node)) enumLeaves(se.node, policy); - } - } - } while (stack.size()); - } -} - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideOCL(const b3DbvtNode* root, - const b3Vector3* normals, - const b3Scalar* offsets, - const b3Vector3& sortaxis, - int count, - B3_DBVT_IPOLICY, - bool fsort) -{ - B3_DBVT_CHECKTYPE - if (root) - { - const unsigned srtsgns = (sortaxis[0] >= 0 ? 1 : 0) + - (sortaxis[1] >= 0 ? 2 : 0) + - (sortaxis[2] >= 0 ? 4 : 0); - const int inside = (1 << count) - 1; - b3AlignedObjectArray stock; - b3AlignedObjectArray ifree; - b3AlignedObjectArray stack; - int signs[sizeof(unsigned) * 8]; - b3Assert(count < int(sizeof(signs) / sizeof(signs[0]))); - for (int i = 0; i < count; ++i) - { - signs[i] = ((normals[i].x >= 0) ? 1 : 0) + - ((normals[i].y >= 0) ? 2 : 0) + - ((normals[i].z >= 0) ? 4 : 0); - } - stock.reserve(B3_SIMPLE_STACKSIZE); - stack.reserve(B3_SIMPLE_STACKSIZE); - ifree.reserve(B3_SIMPLE_STACKSIZE); - stack.push_back(allocate(ifree, stock, sStkNPS(root, 0, root->volume.ProjectMinimum(sortaxis, srtsgns)))); - do - { - const int id = stack[stack.size() - 1]; - sStkNPS se = stock[id]; - stack.pop_back(); - ifree.push_back(id); - if (se.mask != inside) - { - bool out = false; - for (int i = 0, j = 1; (!out) && (i < count); ++i, j <<= 1) - { - if (0 == (se.mask & j)) - { - const int side = se.node->volume.Classify(normals[i], offsets[i], signs[i]); - switch (side) - { - case -1: - out = true; - break; - case +1: - se.mask |= j; - break; - } - } - } - if (out) continue; - } - if (policy.Descent(se.node)) - { - if (se.node->isinternal()) - { - const b3DbvtNode* pns[] = {se.node->childs[0], se.node->childs[1]}; - sStkNPS nes[] = {sStkNPS(pns[0], se.mask, pns[0]->volume.ProjectMinimum(sortaxis, srtsgns)), - sStkNPS(pns[1], se.mask, pns[1]->volume.ProjectMinimum(sortaxis, srtsgns))}; - const int q = nes[0].value < nes[1].value ? 1 : 0; - int j = stack.size(); - if (fsort && (j > 0)) - { - /* Insert 0 */ - j = nearest(&stack[0], &stock[0], nes[q].value, 0, stack.size()); - stack.push_back(0); -#if B3_DBVT_USE_MEMMOVE - memmove(&stack[j + 1], &stack[j], sizeof(int) * (stack.size() - j - 1)); -#else - for (int k = stack.size() - 1; k > j; --k) stack[k] = stack[k - 1]; -#endif - stack[j] = allocate(ifree, stock, nes[q]); - /* Insert 1 */ - j = nearest(&stack[0], &stock[0], nes[1 - q].value, j, stack.size()); - stack.push_back(0); -#if B3_DBVT_USE_MEMMOVE - memmove(&stack[j + 1], &stack[j], sizeof(int) * (stack.size() - j - 1)); -#else - for (int k = stack.size() - 1; k > j; --k) stack[k] = stack[k - 1]; -#endif - stack[j] = allocate(ifree, stock, nes[1 - q]); - } - else - { - stack.push_back(allocate(ifree, stock, nes[q])); - stack.push_back(allocate(ifree, stock, nes[1 - q])); - } - } - else - { - policy.Process(se.node, se.value); - } - } - } while (stack.size()); - } -} - -// -B3_DBVT_PREFIX -inline void b3DynamicBvh::collideTU(const b3DbvtNode* root, - B3_DBVT_IPOLICY) -{ - B3_DBVT_CHECKTYPE - if (root) - { - b3AlignedObjectArray stack; - stack.reserve(B3_SIMPLE_STACKSIZE); - stack.push_back(root); - do - { - const b3DbvtNode* n = stack[stack.size() - 1]; - stack.pop_back(); - if (policy.Descent(n)) - { - if (n->isinternal()) - { - stack.push_back(n->childs[0]); - stack.push_back(n->childs[1]); - } - else - { - policy.Process(n); - } - } - } while (stack.size() > 0); - } -} - -// -// PP Cleanup -// - -#undef B3_DBVT_USE_MEMMOVE -#undef B3_DBVT_USE_TEMPLATE -#undef B3_DBVT_VIRTUAL_DTOR -#undef B3_DBVT_VIRTUAL -#undef B3_DBVT_PREFIX -#undef B3_DBVT_IPOLICY -#undef B3_DBVT_CHECKTYPE -#undef B3_DBVT_IMPL_GENERIC -#undef B3_DBVT_IMPL_SSE -#undef B3_DBVT_USE_INTRINSIC_SSE -#undef B3_DBVT_SELECT_IMPL -#undef B3_DBVT_MERGE_IMPL -#undef B3_DBVT_INT0_IMPL - -#endif diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp deleted file mode 100644 index dea2ddb0f2f..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp +++ /dev/null @@ -1,808 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -///b3DynamicBvhBroadphase implementation by Nathanael Presson - -#include "b3DynamicBvhBroadphase.h" -#include "b3OverlappingPair.h" - -// -// Profiling -// - -#if B3_DBVT_BP_PROFILE || B3_DBVT_BP_ENABLE_BENCHMARK -#include -#endif - -#if B3_DBVT_BP_PROFILE -struct b3ProfileScope -{ - __forceinline b3ProfileScope(b3Clock& clock, unsigned long& value) : m_clock(&clock), m_value(&value), m_base(clock.getTimeMicroseconds()) - { - } - __forceinline ~b3ProfileScope() - { - (*m_value) += m_clock->getTimeMicroseconds() - m_base; - } - b3Clock* m_clock; - unsigned long* m_value; - unsigned long m_base; -}; -#define b3SPC(_value_) b3ProfileScope spc_scope(m_clock, _value_) -#else -#define b3SPC(_value_) -#endif - -// -// Helpers -// - -// -template -static inline void b3ListAppend(T* item, T*& list) -{ - item->links[0] = 0; - item->links[1] = list; - if (list) list->links[0] = item; - list = item; -} - -// -template -static inline void b3ListRemove(T* item, T*& list) -{ - if (item->links[0]) - item->links[0]->links[1] = item->links[1]; - else - list = item->links[1]; - if (item->links[1]) item->links[1]->links[0] = item->links[0]; -} - -// -template -static inline int b3ListCount(T* root) -{ - int n = 0; - while (root) - { - ++n; - root = root->links[1]; - } - return (n); -} - -// -template -static inline void b3Clear(T& value) -{ - static const struct ZeroDummy : T - { - } zerodummy; - value = zerodummy; -} - -// -// Colliders -// - -/* Tree collider */ -struct b3DbvtTreeCollider : b3DynamicBvh::ICollide -{ - b3DynamicBvhBroadphase* pbp; - b3DbvtProxy* proxy; - b3DbvtTreeCollider(b3DynamicBvhBroadphase* p) : pbp(p) {} - void Process(const b3DbvtNode* na, const b3DbvtNode* nb) - { - if (na != nb) - { - b3DbvtProxy* pa = (b3DbvtProxy*)na->data; - b3DbvtProxy* pb = (b3DbvtProxy*)nb->data; -#if B3_DBVT_BP_SORTPAIRS - if (pa->m_uniqueId > pb->m_uniqueId) - b3Swap(pa, pb); -#endif - pbp->m_paircache->addOverlappingPair(pa->getUid(), pb->getUid()); - ++pbp->m_newpairs; - } - } - void Process(const b3DbvtNode* n) - { - Process(n, proxy->leaf); - } -}; - -// -// b3DynamicBvhBroadphase -// - -// -b3DynamicBvhBroadphase::b3DynamicBvhBroadphase(int proxyCapacity, b3OverlappingPairCache* paircache) -{ - m_deferedcollide = false; - m_needcleanup = true; - m_releasepaircache = (paircache != 0) ? false : true; - m_prediction = 0; - m_stageCurrent = 0; - m_fixedleft = 0; - m_fupdates = 1; - m_dupdates = 0; - m_cupdates = 10; - m_newpairs = 1; - m_updates_call = 0; - m_updates_done = 0; - m_updates_ratio = 0; - m_paircache = paircache ? paircache : new (b3AlignedAlloc(sizeof(b3HashedOverlappingPairCache), 16)) b3HashedOverlappingPairCache(); - - m_pid = 0; - m_cid = 0; - for (int i = 0; i <= STAGECOUNT; ++i) - { - m_stageRoots[i] = 0; - } -#if B3_DBVT_BP_PROFILE - b3Clear(m_profiling); -#endif - m_proxies.resize(proxyCapacity); -} - -// -b3DynamicBvhBroadphase::~b3DynamicBvhBroadphase() -{ - if (m_releasepaircache) - { - m_paircache->~b3OverlappingPairCache(); - b3AlignedFree(m_paircache); - } -} - -// -b3BroadphaseProxy* b3DynamicBvhBroadphase::createProxy(const b3Vector3& aabbMin, - const b3Vector3& aabbMax, - int objectId, - void* userPtr, - int collisionFilterGroup, - int collisionFilterMask) -{ - b3DbvtProxy* mem = &m_proxies[objectId]; - b3DbvtProxy* proxy = new (mem) b3DbvtProxy(aabbMin, aabbMax, userPtr, - collisionFilterGroup, - collisionFilterMask); - - b3DbvtAabbMm aabb = b3DbvtVolume::FromMM(aabbMin, aabbMax); - - //bproxy->aabb = b3DbvtVolume::FromMM(aabbMin,aabbMax); - proxy->stage = m_stageCurrent; - proxy->m_uniqueId = objectId; - proxy->leaf = m_sets[0].insert(aabb, proxy); - b3ListAppend(proxy, m_stageRoots[m_stageCurrent]); - if (!m_deferedcollide) - { - b3DbvtTreeCollider collider(this); - collider.proxy = proxy; - m_sets[0].collideTV(m_sets[0].m_root, aabb, collider); - m_sets[1].collideTV(m_sets[1].m_root, aabb, collider); - } - return (proxy); -} - -// -void b3DynamicBvhBroadphase::destroyProxy(b3BroadphaseProxy* absproxy, - b3Dispatcher* dispatcher) -{ - b3DbvtProxy* proxy = (b3DbvtProxy*)absproxy; - if (proxy->stage == STAGECOUNT) - m_sets[1].remove(proxy->leaf); - else - m_sets[0].remove(proxy->leaf); - b3ListRemove(proxy, m_stageRoots[proxy->stage]); - m_paircache->removeOverlappingPairsContainingProxy(proxy->getUid(), dispatcher); - - m_needcleanup = true; -} - -void b3DynamicBvhBroadphase::getAabb(int objectId, b3Vector3& aabbMin, b3Vector3& aabbMax) const -{ - const b3DbvtProxy* proxy = &m_proxies[objectId]; - aabbMin = proxy->m_aabbMin; - aabbMax = proxy->m_aabbMax; -} -/* -void b3DynamicBvhBroadphase::getAabb(b3BroadphaseProxy* absproxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const -{ - b3DbvtProxy* proxy=(b3DbvtProxy*)absproxy; - aabbMin = proxy->m_aabbMin; - aabbMax = proxy->m_aabbMax; -} -*/ - -struct BroadphaseRayTester : b3DynamicBvh::ICollide -{ - b3BroadphaseRayCallback& m_rayCallback; - BroadphaseRayTester(b3BroadphaseRayCallback& orgCallback) - : m_rayCallback(orgCallback) - { - } - void Process(const b3DbvtNode* leaf) - { - b3DbvtProxy* proxy = (b3DbvtProxy*)leaf->data; - m_rayCallback.process(proxy); - } -}; - -void b3DynamicBvhBroadphase::rayTest(const b3Vector3& rayFrom, const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) -{ - BroadphaseRayTester callback(rayCallback); - - m_sets[0].rayTestInternal(m_sets[0].m_root, - rayFrom, - rayTo, - rayCallback.m_rayDirectionInverse, - rayCallback.m_signs, - rayCallback.m_lambda_max, - aabbMin, - aabbMax, - callback); - - m_sets[1].rayTestInternal(m_sets[1].m_root, - rayFrom, - rayTo, - rayCallback.m_rayDirectionInverse, - rayCallback.m_signs, - rayCallback.m_lambda_max, - aabbMin, - aabbMax, - callback); -} - -struct BroadphaseAabbTester : b3DynamicBvh::ICollide -{ - b3BroadphaseAabbCallback& m_aabbCallback; - BroadphaseAabbTester(b3BroadphaseAabbCallback& orgCallback) - : m_aabbCallback(orgCallback) - { - } - void Process(const b3DbvtNode* leaf) - { - b3DbvtProxy* proxy = (b3DbvtProxy*)leaf->data; - m_aabbCallback.process(proxy); - } -}; - -void b3DynamicBvhBroadphase::aabbTest(const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3BroadphaseAabbCallback& aabbCallback) -{ - BroadphaseAabbTester callback(aabbCallback); - - const B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) bounds = b3DbvtVolume::FromMM(aabbMin, aabbMax); - //process all children, that overlap with the given AABB bounds - m_sets[0].collideTV(m_sets[0].m_root, bounds, callback); - m_sets[1].collideTV(m_sets[1].m_root, bounds, callback); -} - -// -void b3DynamicBvhBroadphase::setAabb(int objectId, - const b3Vector3& aabbMin, - const b3Vector3& aabbMax, - b3Dispatcher* /*dispatcher*/) -{ - b3DbvtProxy* proxy = &m_proxies[objectId]; - // b3DbvtProxy* proxy=(b3DbvtProxy*)absproxy; - B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) - aabb = b3DbvtVolume::FromMM(aabbMin, aabbMax); -#if B3_DBVT_BP_PREVENTFALSEUPDATE - if (b3NotEqual(aabb, proxy->leaf->volume)) -#endif - { - bool docollide = false; - if (proxy->stage == STAGECOUNT) - { /* fixed -> dynamic set */ - m_sets[1].remove(proxy->leaf); - proxy->leaf = m_sets[0].insert(aabb, proxy); - docollide = true; - } - else - { /* dynamic set */ - ++m_updates_call; - if (b3Intersect(proxy->leaf->volume, aabb)) - { /* Moving */ - - const b3Vector3 delta = aabbMin - proxy->m_aabbMin; - b3Vector3 velocity(((proxy->m_aabbMax - proxy->m_aabbMin) / 2) * m_prediction); - if (delta[0] < 0) velocity[0] = -velocity[0]; - if (delta[1] < 0) velocity[1] = -velocity[1]; - if (delta[2] < 0) velocity[2] = -velocity[2]; - if ( -#ifdef B3_DBVT_BP_MARGIN - m_sets[0].update(proxy->leaf, aabb, velocity, B3_DBVT_BP_MARGIN) -#else - m_sets[0].update(proxy->leaf, aabb, velocity) -#endif - ) - { - ++m_updates_done; - docollide = true; - } - } - else - { /* Teleporting */ - m_sets[0].update(proxy->leaf, aabb); - ++m_updates_done; - docollide = true; - } - } - b3ListRemove(proxy, m_stageRoots[proxy->stage]); - proxy->m_aabbMin = aabbMin; - proxy->m_aabbMax = aabbMax; - proxy->stage = m_stageCurrent; - b3ListAppend(proxy, m_stageRoots[m_stageCurrent]); - if (docollide) - { - m_needcleanup = true; - if (!m_deferedcollide) - { - b3DbvtTreeCollider collider(this); - m_sets[1].collideTTpersistentStack(m_sets[1].m_root, proxy->leaf, collider); - m_sets[0].collideTTpersistentStack(m_sets[0].m_root, proxy->leaf, collider); - } - } - } -} - -// -void b3DynamicBvhBroadphase::setAabbForceUpdate(b3BroadphaseProxy* absproxy, - const b3Vector3& aabbMin, - const b3Vector3& aabbMax, - b3Dispatcher* /*dispatcher*/) -{ - b3DbvtProxy* proxy = (b3DbvtProxy*)absproxy; - B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) - aabb = b3DbvtVolume::FromMM(aabbMin, aabbMax); - bool docollide = false; - if (proxy->stage == STAGECOUNT) - { /* fixed -> dynamic set */ - m_sets[1].remove(proxy->leaf); - proxy->leaf = m_sets[0].insert(aabb, proxy); - docollide = true; - } - else - { /* dynamic set */ - ++m_updates_call; - /* Teleporting */ - m_sets[0].update(proxy->leaf, aabb); - ++m_updates_done; - docollide = true; - } - b3ListRemove(proxy, m_stageRoots[proxy->stage]); - proxy->m_aabbMin = aabbMin; - proxy->m_aabbMax = aabbMax; - proxy->stage = m_stageCurrent; - b3ListAppend(proxy, m_stageRoots[m_stageCurrent]); - if (docollide) - { - m_needcleanup = true; - if (!m_deferedcollide) - { - b3DbvtTreeCollider collider(this); - m_sets[1].collideTTpersistentStack(m_sets[1].m_root, proxy->leaf, collider); - m_sets[0].collideTTpersistentStack(m_sets[0].m_root, proxy->leaf, collider); - } - } -} - -// -void b3DynamicBvhBroadphase::calculateOverlappingPairs(b3Dispatcher* dispatcher) -{ - collide(dispatcher); -#if B3_DBVT_BP_PROFILE - if (0 == (m_pid % B3_DBVT_BP_PROFILING_RATE)) - { - printf("fixed(%u) dynamics(%u) pairs(%u)\r\n", m_sets[1].m_leaves, m_sets[0].m_leaves, m_paircache->getNumOverlappingPairs()); - unsigned int total = m_profiling.m_total; - if (total <= 0) total = 1; - printf("ddcollide: %u%% (%uus)\r\n", (50 + m_profiling.m_ddcollide * 100) / total, m_profiling.m_ddcollide / B3_DBVT_BP_PROFILING_RATE); - printf("fdcollide: %u%% (%uus)\r\n", (50 + m_profiling.m_fdcollide * 100) / total, m_profiling.m_fdcollide / B3_DBVT_BP_PROFILING_RATE); - printf("cleanup: %u%% (%uus)\r\n", (50 + m_profiling.m_cleanup * 100) / total, m_profiling.m_cleanup / B3_DBVT_BP_PROFILING_RATE); - printf("total: %uus\r\n", total / B3_DBVT_BP_PROFILING_RATE); - const unsigned long sum = m_profiling.m_ddcollide + - m_profiling.m_fdcollide + - m_profiling.m_cleanup; - printf("leaked: %u%% (%uus)\r\n", 100 - ((50 + sum * 100) / total), (total - sum) / B3_DBVT_BP_PROFILING_RATE); - printf("job counts: %u%%\r\n", (m_profiling.m_jobcount * 100) / ((m_sets[0].m_leaves + m_sets[1].m_leaves) * B3_DBVT_BP_PROFILING_RATE)); - b3Clear(m_profiling); - m_clock.reset(); - } -#endif - - performDeferredRemoval(dispatcher); -} - -void b3DynamicBvhBroadphase::performDeferredRemoval(b3Dispatcher* dispatcher) -{ - if (m_paircache->hasDeferredRemoval()) - { - b3BroadphasePairArray& overlappingPairArray = m_paircache->getOverlappingPairArray(); - - //perform a sort, to find duplicates and to sort 'invalid' pairs to the end - overlappingPairArray.quickSort(b3BroadphasePairSortPredicate()); - - int invalidPair = 0; - - int i; - - b3BroadphasePair previousPair = b3MakeBroadphasePair(-1, -1); - - for (i = 0; i < overlappingPairArray.size(); i++) - { - b3BroadphasePair& pair = overlappingPairArray[i]; - - bool isDuplicate = (pair == previousPair); - - previousPair = pair; - - bool needsRemoval = false; - - if (!isDuplicate) - { - //important to perform AABB check that is consistent with the broadphase - b3DbvtProxy* pa = &m_proxies[pair.x]; - b3DbvtProxy* pb = &m_proxies[pair.y]; - bool hasOverlap = b3Intersect(pa->leaf->volume, pb->leaf->volume); - - if (hasOverlap) - { - needsRemoval = false; - } - else - { - needsRemoval = true; - } - } - else - { - //remove duplicate - needsRemoval = true; - //should have no algorithm - } - - if (needsRemoval) - { - m_paircache->cleanOverlappingPair(pair, dispatcher); - - pair.x = -1; - pair.y = -1; - invalidPair++; - } - } - - //perform a sort, to sort 'invalid' pairs to the end - overlappingPairArray.quickSort(b3BroadphasePairSortPredicate()); - overlappingPairArray.resize(overlappingPairArray.size() - invalidPair); - } -} - -// -void b3DynamicBvhBroadphase::collide(b3Dispatcher* dispatcher) -{ - /*printf("---------------------------------------------------------\n"); - printf("m_sets[0].m_leaves=%d\n",m_sets[0].m_leaves); - printf("m_sets[1].m_leaves=%d\n",m_sets[1].m_leaves); - printf("numPairs = %d\n",getOverlappingPairCache()->getNumOverlappingPairs()); - { - int i; - for (i=0;igetNumOverlappingPairs();i++) - { - printf("pair[%d]=(%d,%d),",i,getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy0->getUid(), - getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy1->getUid()); - } - printf("\n"); - } -*/ - - b3SPC(m_profiling.m_total); - /* optimize */ - m_sets[0].optimizeIncremental(1 + (m_sets[0].m_leaves * m_dupdates) / 100); - if (m_fixedleft) - { - const int count = 1 + (m_sets[1].m_leaves * m_fupdates) / 100; - m_sets[1].optimizeIncremental(1 + (m_sets[1].m_leaves * m_fupdates) / 100); - m_fixedleft = b3Max(0, m_fixedleft - count); - } - /* dynamic -> fixed set */ - m_stageCurrent = (m_stageCurrent + 1) % STAGECOUNT; - b3DbvtProxy* current = m_stageRoots[m_stageCurrent]; - if (current) - { - b3DbvtTreeCollider collider(this); - do - { - b3DbvtProxy* next = current->links[1]; - b3ListRemove(current, m_stageRoots[current->stage]); - b3ListAppend(current, m_stageRoots[STAGECOUNT]); -#if B3_DBVT_BP_ACCURATESLEEPING - m_paircache->removeOverlappingPairsContainingProxy(current, dispatcher); - collider.proxy = current; - b3DynamicBvh::collideTV(m_sets[0].m_root, current->aabb, collider); - b3DynamicBvh::collideTV(m_sets[1].m_root, current->aabb, collider); -#endif - m_sets[0].remove(current->leaf); - B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) - curAabb = b3DbvtVolume::FromMM(current->m_aabbMin, current->m_aabbMax); - current->leaf = m_sets[1].insert(curAabb, current); - current->stage = STAGECOUNT; - current = next; - } while (current); - m_fixedleft = m_sets[1].m_leaves; - m_needcleanup = true; - } - /* collide dynamics */ - { - b3DbvtTreeCollider collider(this); - if (m_deferedcollide) - { - b3SPC(m_profiling.m_fdcollide); - m_sets[0].collideTTpersistentStack(m_sets[0].m_root, m_sets[1].m_root, collider); - } - if (m_deferedcollide) - { - b3SPC(m_profiling.m_ddcollide); - m_sets[0].collideTTpersistentStack(m_sets[0].m_root, m_sets[0].m_root, collider); - } - } - /* clean up */ - if (m_needcleanup) - { - b3SPC(m_profiling.m_cleanup); - b3BroadphasePairArray& pairs = m_paircache->getOverlappingPairArray(); - if (pairs.size() > 0) - { - int ni = b3Min(pairs.size(), b3Max(m_newpairs, (pairs.size() * m_cupdates) / 100)); - for (int i = 0; i < ni; ++i) - { - b3BroadphasePair& p = pairs[(m_cid + i) % pairs.size()]; - b3DbvtProxy* pa = &m_proxies[p.x]; - b3DbvtProxy* pb = &m_proxies[p.y]; - if (!b3Intersect(pa->leaf->volume, pb->leaf->volume)) - { -#if B3_DBVT_BP_SORTPAIRS - if (pa->m_uniqueId > pb->m_uniqueId) - b3Swap(pa, pb); -#endif - m_paircache->removeOverlappingPair(pa->getUid(), pb->getUid(), dispatcher); - --ni; - --i; - } - } - if (pairs.size() > 0) - m_cid = (m_cid + ni) % pairs.size(); - else - m_cid = 0; - } - } - ++m_pid; - m_newpairs = 1; - m_needcleanup = false; - if (m_updates_call > 0) - { - m_updates_ratio = m_updates_done / (b3Scalar)m_updates_call; - } - else - { - m_updates_ratio = 0; - } - m_updates_done /= 2; - m_updates_call /= 2; -} - -// -void b3DynamicBvhBroadphase::optimize() -{ - m_sets[0].optimizeTopDown(); - m_sets[1].optimizeTopDown(); -} - -// -b3OverlappingPairCache* b3DynamicBvhBroadphase::getOverlappingPairCache() -{ - return (m_paircache); -} - -// -const b3OverlappingPairCache* b3DynamicBvhBroadphase::getOverlappingPairCache() const -{ - return (m_paircache); -} - -// -void b3DynamicBvhBroadphase::getBroadphaseAabb(b3Vector3& aabbMin, b3Vector3& aabbMax) const -{ - B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) - bounds; - - if (!m_sets[0].empty()) - if (!m_sets[1].empty()) - b3Merge(m_sets[0].m_root->volume, - m_sets[1].m_root->volume, bounds); - else - bounds = m_sets[0].m_root->volume; - else if (!m_sets[1].empty()) - bounds = m_sets[1].m_root->volume; - else - bounds = b3DbvtVolume::FromCR(b3MakeVector3(0, 0, 0), 0); - aabbMin = bounds.Mins(); - aabbMax = bounds.Maxs(); -} - -void b3DynamicBvhBroadphase::resetPool(b3Dispatcher* dispatcher) -{ - int totalObjects = m_sets[0].m_leaves + m_sets[1].m_leaves; - if (!totalObjects) - { - //reset internal dynamic tree data structures - m_sets[0].clear(); - m_sets[1].clear(); - - m_deferedcollide = false; - m_needcleanup = true; - m_stageCurrent = 0; - m_fixedleft = 0; - m_fupdates = 1; - m_dupdates = 0; - m_cupdates = 10; - m_newpairs = 1; - m_updates_call = 0; - m_updates_done = 0; - m_updates_ratio = 0; - - m_pid = 0; - m_cid = 0; - for (int i = 0; i <= STAGECOUNT; ++i) - { - m_stageRoots[i] = 0; - } - } -} - -// -void b3DynamicBvhBroadphase::printStats() -{ -} - -// -#if B3_DBVT_BP_ENABLE_BENCHMARK - -struct b3BroadphaseBenchmark -{ - struct Experiment - { - const char* name; - int object_count; - int update_count; - int spawn_count; - int iterations; - b3Scalar speed; - b3Scalar amplitude; - }; - struct Object - { - b3Vector3 center; - b3Vector3 extents; - b3BroadphaseProxy* proxy; - b3Scalar time; - void update(b3Scalar speed, b3Scalar amplitude, b3BroadphaseInterface* pbi) - { - time += speed; - center[0] = b3Cos(time * (b3Scalar)2.17) * amplitude + - b3Sin(time) * amplitude / 2; - center[1] = b3Cos(time * (b3Scalar)1.38) * amplitude + - b3Sin(time) * amplitude; - center[2] = b3Sin(time * (b3Scalar)0.777) * amplitude; - pbi->setAabb(proxy, center - extents, center + extents, 0); - } - }; - static int UnsignedRand(int range = RAND_MAX - 1) { return (rand() % (range + 1)); } - static b3Scalar UnitRand() { return (UnsignedRand(16384) / (b3Scalar)16384); } - static void OutputTime(const char* name, b3Clock& c, unsigned count = 0) - { - const unsigned long us = c.getTimeMicroseconds(); - const unsigned long ms = (us + 500) / 1000; - const b3Scalar sec = us / (b3Scalar)(1000 * 1000); - if (count > 0) - printf("%s : %u us (%u ms), %.2f/s\r\n", name, us, ms, count / sec); - else - printf("%s : %u us (%u ms)\r\n", name, us, ms); - } -}; - -void b3DynamicBvhBroadphase::benchmark(b3BroadphaseInterface* pbi) -{ - static const b3BroadphaseBenchmark::Experiment experiments[] = - { - {"1024o.10%", 1024, 10, 0, 8192, (b3Scalar)0.005, (b3Scalar)100}, - /*{"4096o.10%",4096,10,0,8192,(b3Scalar)0.005,(b3Scalar)100}, - {"8192o.10%",8192,10,0,8192,(b3Scalar)0.005,(b3Scalar)100},*/ - }; - static const int nexperiments = sizeof(experiments) / sizeof(experiments[0]); - b3AlignedObjectArray objects; - b3Clock wallclock; - /* Begin */ - for (int iexp = 0; iexp < nexperiments; ++iexp) - { - const b3BroadphaseBenchmark::Experiment& experiment = experiments[iexp]; - const int object_count = experiment.object_count; - const int update_count = (object_count * experiment.update_count) / 100; - const int spawn_count = (object_count * experiment.spawn_count) / 100; - const b3Scalar speed = experiment.speed; - const b3Scalar amplitude = experiment.amplitude; - printf("Experiment #%u '%s':\r\n", iexp, experiment.name); - printf("\tObjects: %u\r\n", object_count); - printf("\tUpdate: %u\r\n", update_count); - printf("\tSpawn: %u\r\n", spawn_count); - printf("\tSpeed: %f\r\n", speed); - printf("\tAmplitude: %f\r\n", amplitude); - srand(180673); - /* Create objects */ - wallclock.reset(); - objects.reserve(object_count); - for (int i = 0; i < object_count; ++i) - { - b3BroadphaseBenchmark::Object* po = new b3BroadphaseBenchmark::Object(); - po->center[0] = b3BroadphaseBenchmark::UnitRand() * 50; - po->center[1] = b3BroadphaseBenchmark::UnitRand() * 50; - po->center[2] = b3BroadphaseBenchmark::UnitRand() * 50; - po->extents[0] = b3BroadphaseBenchmark::UnitRand() * 2 + 2; - po->extents[1] = b3BroadphaseBenchmark::UnitRand() * 2 + 2; - po->extents[2] = b3BroadphaseBenchmark::UnitRand() * 2 + 2; - po->time = b3BroadphaseBenchmark::UnitRand() * 2000; - po->proxy = pbi->createProxy(po->center - po->extents, po->center + po->extents, 0, po, 1, 1, 0, 0); - objects.push_back(po); - } - b3BroadphaseBenchmark::OutputTime("\tInitialization", wallclock); - /* First update */ - wallclock.reset(); - for (int i = 0; i < objects.size(); ++i) - { - objects[i]->update(speed, amplitude, pbi); - } - b3BroadphaseBenchmark::OutputTime("\tFirst update", wallclock); - /* Updates */ - wallclock.reset(); - for (int i = 0; i < experiment.iterations; ++i) - { - for (int j = 0; j < update_count; ++j) - { - objects[j]->update(speed, amplitude, pbi); - } - pbi->calculateOverlappingPairs(0); - } - b3BroadphaseBenchmark::OutputTime("\tUpdate", wallclock, experiment.iterations); - /* Clean up */ - wallclock.reset(); - for (int i = 0; i < objects.size(); ++i) - { - pbi->destroyProxy(objects[i]->proxy, 0); - delete objects[i]; - } - objects.resize(0); - b3BroadphaseBenchmark::OutputTime("\tRelease", wallclock); - } -} -#else -/*void b3DynamicBvhBroadphase::benchmark(b3BroadphaseInterface*) -{} -*/ -#endif - -#if B3_DBVT_BP_PROFILE -#undef b3SPC -#endif diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h deleted file mode 100644 index c235e40148c..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h +++ /dev/null @@ -1,197 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -///b3DynamicBvhBroadphase implementation by Nathanael Presson -#ifndef B3_DBVT_BROADPHASE_H -#define B3_DBVT_BROADPHASE_H - -#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h" -#include "Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -#include "b3BroadphaseCallback.h" - -// -// Compile time config -// - -#define B3_DBVT_BP_PROFILE 0 -//#define B3_DBVT_BP_SORTPAIRS 1 -#define B3_DBVT_BP_PREVENTFALSEUPDATE 0 -#define B3_DBVT_BP_ACCURATESLEEPING 0 -#define B3_DBVT_BP_ENABLE_BENCHMARK 0 -#define B3_DBVT_BP_MARGIN (b3Scalar)0.05 - -#if B3_DBVT_BP_PROFILE -#define B3_DBVT_BP_PROFILING_RATE 256 - -#endif - -B3_ATTRIBUTE_ALIGNED16(struct) -b3BroadphaseProxy -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - ///optional filtering to cull potential collisions - enum CollisionFilterGroups - { - DefaultFilter = 1, - StaticFilter = 2, - KinematicFilter = 4, - DebrisFilter = 8, - SensorTrigger = 16, - CharacterFilter = 32, - AllFilter = -1 //all bits sets: DefaultFilter | StaticFilter | KinematicFilter | DebrisFilter | SensorTrigger - }; - - //Usually the client b3CollisionObject or Rigidbody class - void* m_clientObject; - int m_collisionFilterGroup; - int m_collisionFilterMask; - int m_uniqueId; //m_uniqueId is introduced for paircache. could get rid of this, by calculating the address offset etc. - - b3Vector3 m_aabbMin; - b3Vector3 m_aabbMax; - - B3_FORCE_INLINE int getUid() const - { - return m_uniqueId; - } - - //used for memory pools - b3BroadphaseProxy() : m_clientObject(0) - { - } - - b3BroadphaseProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, void* userPtr, int collisionFilterGroup, int collisionFilterMask) - : m_clientObject(userPtr), - m_collisionFilterGroup(collisionFilterGroup), - m_collisionFilterMask(collisionFilterMask), - m_aabbMin(aabbMin), - m_aabbMax(aabbMax) - { - } -}; - -// -// b3DbvtProxy -// -struct b3DbvtProxy : b3BroadphaseProxy -{ - /* Fields */ - //b3DbvtAabbMm aabb; - b3DbvtNode* leaf; - b3DbvtProxy* links[2]; - int stage; - /* ctor */ - - explicit b3DbvtProxy() {} - b3DbvtProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, void* userPtr, int collisionFilterGroup, int collisionFilterMask) : b3BroadphaseProxy(aabbMin, aabbMax, userPtr, collisionFilterGroup, collisionFilterMask) - { - links[0] = links[1] = 0; - } -}; - -typedef b3AlignedObjectArray b3DbvtProxyArray; - -///The b3DynamicBvhBroadphase implements a broadphase using two dynamic AABB bounding volume hierarchies/trees (see b3DynamicBvh). -///One tree is used for static/non-moving objects, and another tree is used for dynamic objects. Objects can move from one tree to the other. -///This is a very fast broadphase, especially for very dynamic worlds where many objects are moving. Its insert/add and remove of objects is generally faster than the sweep and prune broadphases b3AxisSweep3 and b332BitAxisSweep3. -struct b3DynamicBvhBroadphase -{ - /* Config */ - enum - { - DYNAMIC_SET = 0, /* Dynamic set index */ - FIXED_SET = 1, /* Fixed set index */ - STAGECOUNT = 2 /* Number of stages */ - }; - /* Fields */ - b3DynamicBvh m_sets[2]; // Dbvt sets - b3DbvtProxy* m_stageRoots[STAGECOUNT + 1]; // Stages list - - b3AlignedObjectArray m_proxies; - b3OverlappingPairCache* m_paircache; // Pair cache - b3Scalar m_prediction; // Velocity prediction - int m_stageCurrent; // Current stage - int m_fupdates; // % of fixed updates per frame - int m_dupdates; // % of dynamic updates per frame - int m_cupdates; // % of cleanup updates per frame - int m_newpairs; // Number of pairs created - int m_fixedleft; // Fixed optimization left - unsigned m_updates_call; // Number of updates call - unsigned m_updates_done; // Number of updates done - b3Scalar m_updates_ratio; // m_updates_done/m_updates_call - int m_pid; // Parse id - int m_cid; // Cleanup index - bool m_releasepaircache; // Release pair cache on delete - bool m_deferedcollide; // Defere dynamic/static collision to collide call - bool m_needcleanup; // Need to run cleanup? -#if B3_DBVT_BP_PROFILE - b3Clock m_clock; - struct - { - unsigned long m_total; - unsigned long m_ddcollide; - unsigned long m_fdcollide; - unsigned long m_cleanup; - unsigned long m_jobcount; - } m_profiling; -#endif - /* Methods */ - b3DynamicBvhBroadphase(int proxyCapacity, b3OverlappingPairCache* paircache = 0); - virtual ~b3DynamicBvhBroadphase(); - void collide(b3Dispatcher* dispatcher); - void optimize(); - - /* b3BroadphaseInterface Implementation */ - b3BroadphaseProxy* createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int objectIndex, void* userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void destroyProxy(b3BroadphaseProxy* proxy, b3Dispatcher* dispatcher); - virtual void setAabb(int objectId, const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3Dispatcher* dispatcher); - virtual void rayTest(const b3Vector3& rayFrom, const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin = b3MakeVector3(0, 0, 0), const b3Vector3& aabbMax = b3MakeVector3(0, 0, 0)); - virtual void aabbTest(const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3BroadphaseAabbCallback& callback); - - //virtual void getAabb(b3BroadphaseProxy* proxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const; - virtual void getAabb(int objectId, b3Vector3& aabbMin, b3Vector3& aabbMax) const; - virtual void calculateOverlappingPairs(b3Dispatcher* dispatcher = 0); - virtual b3OverlappingPairCache* getOverlappingPairCache(); - virtual const b3OverlappingPairCache* getOverlappingPairCache() const; - virtual void getBroadphaseAabb(b3Vector3& aabbMin, b3Vector3& aabbMax) const; - virtual void printStats(); - - ///reset broadphase internal structures, to ensure determinism/reproducability - virtual void resetPool(b3Dispatcher* dispatcher); - - void performDeferredRemoval(b3Dispatcher* dispatcher); - - void setVelocityPrediction(b3Scalar prediction) - { - m_prediction = prediction; - } - b3Scalar getVelocityPrediction() const - { - return m_prediction; - } - - ///this setAabbForceUpdate is similar to setAabb but always forces the aabb update. - ///it is not part of the b3BroadphaseInterface but specific to b3DynamicBvhBroadphase. - ///it bypasses certain optimizations that prevent aabb updates (when the aabb shrinks), see - ///http://code.google.com/p/bullet/issues/detail?id=223 - void setAabbForceUpdate(b3BroadphaseProxy* absproxy, const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3Dispatcher* /*dispatcher*/); - - //static void benchmark(b3BroadphaseInterface*); -}; - -#endif diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h deleted file mode 100644 index 4ff9ebae81f..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h +++ /dev/null @@ -1,70 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_OVERLAPPING_PAIR_H -#define B3_OVERLAPPING_PAIR_H - -#include "Bullet3Common/shared/b3Int4.h" - -#define B3_NEW_PAIR_MARKER -1 -#define B3_REMOVED_PAIR_MARKER -2 - -typedef b3Int4 b3BroadphasePair; - -inline b3Int4 b3MakeBroadphasePair(int xx, int yy) -{ - b3Int4 pair; - - if (xx < yy) - { - pair.x = xx; - pair.y = yy; - } - else - { - pair.x = yy; - pair.y = xx; - } - pair.z = B3_NEW_PAIR_MARKER; - pair.w = B3_NEW_PAIR_MARKER; - return pair; -} - -/*struct b3BroadphasePair : public b3Int4 -{ - explicit b3BroadphasePair(){} - -}; -*/ - -class b3BroadphasePairSortPredicate -{ -public: - bool operator()(const b3BroadphasePair& a, const b3BroadphasePair& b) const - { - const int uidA0 = a.x; - const int uidB0 = b.x; - const int uidA1 = a.y; - const int uidB1 = b.y; - return uidA0 > uidB0 || (uidA0 == uidB0 && uidA1 > uidB1); - } -}; - -B3_FORCE_INLINE bool operator==(const b3BroadphasePair& a, const b3BroadphasePair& b) -{ - return (a.x == b.x) && (a.y == b.y); -} - -#endif //B3_OVERLAPPING_PAIR_H diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.cpp b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.cpp deleted file mode 100644 index 19773244bee..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.cpp +++ /dev/null @@ -1,559 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3OverlappingPairCache.h" - -//#include "b3Dispatcher.h" -//#include "b3CollisionAlgorithm.h" -#include "Bullet3Geometry/b3AabbUtil.h" - -#include - -int b3g_overlappingPairs = 0; -int b3g_removePairs = 0; -int b3g_addedPairs = 0; -int b3g_findPairs = 0; - -b3HashedOverlappingPairCache::b3HashedOverlappingPairCache() : m_overlapFilterCallback(0) -//, m_blockedForChanges(false) -{ - int initialAllocatedSize = 2; - m_overlappingPairArray.reserve(initialAllocatedSize); - growTables(); -} - -b3HashedOverlappingPairCache::~b3HashedOverlappingPairCache() -{ -} - -void b3HashedOverlappingPairCache::cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher) -{ - /* if (pair.m_algorithm) - { - { - pair.m_algorithm->~b3CollisionAlgorithm(); - dispatcher->freeCollisionAlgorithm(pair.m_algorithm); - pair.m_algorithm=0; - } - } - */ -} - -void b3HashedOverlappingPairCache::cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher) -{ - class CleanPairCallback : public b3OverlapCallback - { - int m_cleanProxy; - b3OverlappingPairCache* m_pairCache; - b3Dispatcher* m_dispatcher; - - public: - CleanPairCallback(int cleanProxy, b3OverlappingPairCache* pairCache, b3Dispatcher* dispatcher) - : m_cleanProxy(cleanProxy), - m_pairCache(pairCache), - m_dispatcher(dispatcher) - { - } - virtual bool processOverlap(b3BroadphasePair& pair) - { - if ((pair.x == m_cleanProxy) || - (pair.y == m_cleanProxy)) - { - m_pairCache->cleanOverlappingPair(pair, m_dispatcher); - } - return false; - } - }; - - CleanPairCallback cleanPairs(proxy, this, dispatcher); - - processAllOverlappingPairs(&cleanPairs, dispatcher); -} - -void b3HashedOverlappingPairCache::removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher) -{ - class RemovePairCallback : public b3OverlapCallback - { - int m_obsoleteProxy; - - public: - RemovePairCallback(int obsoleteProxy) - : m_obsoleteProxy(obsoleteProxy) - { - } - virtual bool processOverlap(b3BroadphasePair& pair) - { - return ((pair.x == m_obsoleteProxy) || - (pair.y == m_obsoleteProxy)); - } - }; - - RemovePairCallback removeCallback(proxy); - - processAllOverlappingPairs(&removeCallback, dispatcher); -} - -b3BroadphasePair* b3HashedOverlappingPairCache::findPair(int proxy0, int proxy1) -{ - b3g_findPairs++; - if (proxy0 > proxy1) - b3Swap(proxy0, proxy1); - int proxyId1 = proxy0; - int proxyId2 = proxy1; - - /*if (proxyId1 > proxyId2) - b3Swap(proxyId1, proxyId2);*/ - - int hash = static_cast(getHash(static_cast(proxyId1), static_cast(proxyId2)) & (m_overlappingPairArray.capacity() - 1)); - - if (hash >= m_hashTable.size()) - { - return NULL; - } - - int index = m_hashTable[hash]; - while (index != B3_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false) - { - index = m_next[index]; - } - - if (index == B3_NULL_PAIR) - { - return NULL; - } - - b3Assert(index < m_overlappingPairArray.size()); - - return &m_overlappingPairArray[index]; -} - -//#include - -void b3HashedOverlappingPairCache::growTables() -{ - int newCapacity = m_overlappingPairArray.capacity(); - - if (m_hashTable.size() < newCapacity) - { - //grow hashtable and next table - int curHashtableSize = m_hashTable.size(); - - m_hashTable.resize(newCapacity); - m_next.resize(newCapacity); - - int i; - - for (i = 0; i < newCapacity; ++i) - { - m_hashTable[i] = B3_NULL_PAIR; - } - for (i = 0; i < newCapacity; ++i) - { - m_next[i] = B3_NULL_PAIR; - } - - for (i = 0; i < curHashtableSize; i++) - { - const b3BroadphasePair& pair = m_overlappingPairArray[i]; - int proxyId1 = pair.x; - int proxyId2 = pair.y; - /*if (proxyId1 > proxyId2) - b3Swap(proxyId1, proxyId2);*/ - int hashValue = static_cast(getHash(static_cast(proxyId1), static_cast(proxyId2)) & (m_overlappingPairArray.capacity() - 1)); // New hash value with new mask - m_next[i] = m_hashTable[hashValue]; - m_hashTable[hashValue] = i; - } - } -} - -b3BroadphasePair* b3HashedOverlappingPairCache::internalAddPair(int proxy0, int proxy1) -{ - if (proxy0 > proxy1) - b3Swap(proxy0, proxy1); - int proxyId1 = proxy0; - int proxyId2 = proxy1; - - /*if (proxyId1 > proxyId2) - b3Swap(proxyId1, proxyId2);*/ - - int hash = static_cast(getHash(static_cast(proxyId1), static_cast(proxyId2)) & (m_overlappingPairArray.capacity() - 1)); // New hash value with new mask - - b3BroadphasePair* pair = internalFindPair(proxy0, proxy1, hash); - if (pair != NULL) - { - return pair; - } - /*for(int i=0;i%u\r\n",proxyId1,proxyId2); - internalFindPair(proxy0, proxy1, hash); - } - }*/ - int count = m_overlappingPairArray.size(); - int oldCapacity = m_overlappingPairArray.capacity(); - pair = &m_overlappingPairArray.expandNonInitializing(); - - //this is where we add an actual pair, so also call the 'ghost' - // if (m_ghostPairCallback) - // m_ghostPairCallback->addOverlappingPair(proxy0,proxy1); - - int newCapacity = m_overlappingPairArray.capacity(); - - if (oldCapacity < newCapacity) - { - growTables(); - //hash with new capacity - hash = static_cast(getHash(static_cast(proxyId1), static_cast(proxyId2)) & (m_overlappingPairArray.capacity() - 1)); - } - - *pair = b3MakeBroadphasePair(proxy0, proxy1); - - // pair->m_pProxy0 = proxy0; - // pair->m_pProxy1 = proxy1; - //pair->m_algorithm = 0; - //pair->m_internalTmpValue = 0; - - m_next[count] = m_hashTable[hash]; - m_hashTable[hash] = count; - - return pair; -} - -void* b3HashedOverlappingPairCache::removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher) -{ - b3g_removePairs++; - if (proxy0 > proxy1) - b3Swap(proxy0, proxy1); - int proxyId1 = proxy0; - int proxyId2 = proxy1; - - /*if (proxyId1 > proxyId2) - b3Swap(proxyId1, proxyId2);*/ - - int hash = static_cast(getHash(static_cast(proxyId1), static_cast(proxyId2)) & (m_overlappingPairArray.capacity() - 1)); - - b3BroadphasePair* pair = internalFindPair(proxy0, proxy1, hash); - if (pair == NULL) - { - return 0; - } - - cleanOverlappingPair(*pair, dispatcher); - - int pairIndex = int(pair - &m_overlappingPairArray[0]); - b3Assert(pairIndex < m_overlappingPairArray.size()); - - // Remove the pair from the hash table. - int index = m_hashTable[hash]; - b3Assert(index != B3_NULL_PAIR); - - int previous = B3_NULL_PAIR; - while (index != pairIndex) - { - previous = index; - index = m_next[index]; - } - - if (previous != B3_NULL_PAIR) - { - b3Assert(m_next[previous] == pairIndex); - m_next[previous] = m_next[pairIndex]; - } - else - { - m_hashTable[hash] = m_next[pairIndex]; - } - - // We now move the last pair into spot of the - // pair being removed. We need to fix the hash - // table indices to support the move. - - int lastPairIndex = m_overlappingPairArray.size() - 1; - - //if (m_ghostPairCallback) - // m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher); - - // If the removed pair is the last pair, we are done. - if (lastPairIndex == pairIndex) - { - m_overlappingPairArray.pop_back(); - return 0; - } - - // Remove the last pair from the hash table. - const b3BroadphasePair* last = &m_overlappingPairArray[lastPairIndex]; - /* missing swap here too, Nat. */ - int lastHash = static_cast(getHash(static_cast(last->x), static_cast(last->y)) & (m_overlappingPairArray.capacity() - 1)); - - index = m_hashTable[lastHash]; - b3Assert(index != B3_NULL_PAIR); - - previous = B3_NULL_PAIR; - while (index != lastPairIndex) - { - previous = index; - index = m_next[index]; - } - - if (previous != B3_NULL_PAIR) - { - b3Assert(m_next[previous] == lastPairIndex); - m_next[previous] = m_next[lastPairIndex]; - } - else - { - m_hashTable[lastHash] = m_next[lastPairIndex]; - } - - // Copy the last pair into the remove pair's spot. - m_overlappingPairArray[pairIndex] = m_overlappingPairArray[lastPairIndex]; - - // Insert the last pair into the hash table - m_next[pairIndex] = m_hashTable[lastHash]; - m_hashTable[lastHash] = pairIndex; - - m_overlappingPairArray.pop_back(); - - return 0; -} -//#include - -void b3HashedOverlappingPairCache::processAllOverlappingPairs(b3OverlapCallback* callback, b3Dispatcher* dispatcher) -{ - int i; - - // printf("m_overlappingPairArray.size()=%d\n",m_overlappingPairArray.size()); - for (i = 0; i < m_overlappingPairArray.size();) - { - b3BroadphasePair* pair = &m_overlappingPairArray[i]; - if (callback->processOverlap(*pair)) - { - removeOverlappingPair(pair->x, pair->y, dispatcher); - - b3g_overlappingPairs--; - } - else - { - i++; - } - } -} - -void b3HashedOverlappingPairCache::sortOverlappingPairs(b3Dispatcher* dispatcher) -{ - ///need to keep hashmap in sync with pair address, so rebuild all - b3BroadphasePairArray tmpPairs; - int i; - for (i = 0; i < m_overlappingPairArray.size(); i++) - { - tmpPairs.push_back(m_overlappingPairArray[i]); - } - - for (i = 0; i < tmpPairs.size(); i++) - { - removeOverlappingPair(tmpPairs[i].x, tmpPairs[i].y, dispatcher); - } - - for (i = 0; i < m_next.size(); i++) - { - m_next[i] = B3_NULL_PAIR; - } - - tmpPairs.quickSort(b3BroadphasePairSortPredicate()); - - for (i = 0; i < tmpPairs.size(); i++) - { - addOverlappingPair(tmpPairs[i].x, tmpPairs[i].y); - } -} - -void* b3SortedOverlappingPairCache::removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher) -{ - if (!hasDeferredRemoval()) - { - b3BroadphasePair findPair = b3MakeBroadphasePair(proxy0, proxy1); - - int findIndex = m_overlappingPairArray.findLinearSearch(findPair); - if (findIndex < m_overlappingPairArray.size()) - { - b3g_overlappingPairs--; - b3BroadphasePair& pair = m_overlappingPairArray[findIndex]; - - cleanOverlappingPair(pair, dispatcher); - //if (m_ghostPairCallback) - // m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher); - - m_overlappingPairArray.swap(findIndex, m_overlappingPairArray.capacity() - 1); - m_overlappingPairArray.pop_back(); - return 0; - } - } - - return 0; -} - -b3BroadphasePair* b3SortedOverlappingPairCache::addOverlappingPair(int proxy0, int proxy1) -{ - //don't add overlap with own - b3Assert(proxy0 != proxy1); - - if (!needsBroadphaseCollision(proxy0, proxy1)) - return 0; - - b3BroadphasePair* pair = &m_overlappingPairArray.expandNonInitializing(); - *pair = b3MakeBroadphasePair(proxy0, proxy1); - - b3g_overlappingPairs++; - b3g_addedPairs++; - - // if (m_ghostPairCallback) - // m_ghostPairCallback->addOverlappingPair(proxy0, proxy1); - return pair; -} - -///this findPair becomes really slow. Either sort the list to speedup the query, or -///use a different solution. It is mainly used for Removing overlapping pairs. Removal could be delayed. -///we could keep a linked list in each proxy, and store pair in one of the proxies (with lowest memory address) -///Also we can use a 2D bitmap, which can be useful for a future GPU implementation -b3BroadphasePair* b3SortedOverlappingPairCache::findPair(int proxy0, int proxy1) -{ - if (!needsBroadphaseCollision(proxy0, proxy1)) - return 0; - - b3BroadphasePair tmpPair = b3MakeBroadphasePair(proxy0, proxy1); - int findIndex = m_overlappingPairArray.findLinearSearch(tmpPair); - - if (findIndex < m_overlappingPairArray.size()) - { - //b3Assert(it != m_overlappingPairSet.end()); - b3BroadphasePair* pair = &m_overlappingPairArray[findIndex]; - return pair; - } - return 0; -} - -//#include - -void b3SortedOverlappingPairCache::processAllOverlappingPairs(b3OverlapCallback* callback, b3Dispatcher* dispatcher) -{ - int i; - - for (i = 0; i < m_overlappingPairArray.size();) - { - b3BroadphasePair* pair = &m_overlappingPairArray[i]; - if (callback->processOverlap(*pair)) - { - cleanOverlappingPair(*pair, dispatcher); - pair->x = -1; - pair->y = -1; - m_overlappingPairArray.swap(i, m_overlappingPairArray.size() - 1); - m_overlappingPairArray.pop_back(); - b3g_overlappingPairs--; - } - else - { - i++; - } - } -} - -b3SortedOverlappingPairCache::b3SortedOverlappingPairCache() : m_blockedForChanges(false), - m_hasDeferredRemoval(true), - m_overlapFilterCallback(0) - -{ - int initialAllocatedSize = 2; - m_overlappingPairArray.reserve(initialAllocatedSize); -} - -b3SortedOverlappingPairCache::~b3SortedOverlappingPairCache() -{ -} - -void b3SortedOverlappingPairCache::cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher) -{ - /* if (pair.m_algorithm) - { - { - pair.m_algorithm->~b3CollisionAlgorithm(); - dispatcher->freeCollisionAlgorithm(pair.m_algorithm); - pair.m_algorithm=0; - b3g_removePairs--; - } - } - */ -} - -void b3SortedOverlappingPairCache::cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher) -{ - class CleanPairCallback : public b3OverlapCallback - { - int m_cleanProxy; - b3OverlappingPairCache* m_pairCache; - b3Dispatcher* m_dispatcher; - - public: - CleanPairCallback(int cleanProxy, b3OverlappingPairCache* pairCache, b3Dispatcher* dispatcher) - : m_cleanProxy(cleanProxy), - m_pairCache(pairCache), - m_dispatcher(dispatcher) - { - } - virtual bool processOverlap(b3BroadphasePair& pair) - { - if ((pair.x == m_cleanProxy) || - (pair.y == m_cleanProxy)) - { - m_pairCache->cleanOverlappingPair(pair, m_dispatcher); - } - return false; - } - }; - - CleanPairCallback cleanPairs(proxy, this, dispatcher); - - processAllOverlappingPairs(&cleanPairs, dispatcher); -} - -void b3SortedOverlappingPairCache::removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher) -{ - class RemovePairCallback : public b3OverlapCallback - { - int m_obsoleteProxy; - - public: - RemovePairCallback(int obsoleteProxy) - : m_obsoleteProxy(obsoleteProxy) - { - } - virtual bool processOverlap(b3BroadphasePair& pair) - { - return ((pair.x == m_obsoleteProxy) || - (pair.y == m_obsoleteProxy)); - } - }; - - RemovePairCallback removeCallback(proxy); - - processAllOverlappingPairs(&removeCallback, dispatcher); -} - -void b3SortedOverlappingPairCache::sortOverlappingPairs(b3Dispatcher* dispatcher) -{ - //should already be sorted -} diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h deleted file mode 100644 index f1de1d94eb9..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h +++ /dev/null @@ -1,427 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_OVERLAPPING_PAIR_CACHE_H -#define B3_OVERLAPPING_PAIR_CACHE_H - -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -class b3Dispatcher; -#include "b3OverlappingPair.h" - -typedef b3AlignedObjectArray b3BroadphasePairArray; - -struct b3OverlapCallback -{ - virtual ~b3OverlapCallback() - { - } - //return true for deletion of the pair - virtual bool processOverlap(b3BroadphasePair& pair) = 0; -}; - -struct b3OverlapFilterCallback -{ - virtual ~b3OverlapFilterCallback() - { - } - // return true when pairs need collision - virtual bool needBroadphaseCollision(int proxy0, int proxy1) const = 0; -}; - -extern int b3g_removePairs; -extern int b3g_addedPairs; -extern int b3g_findPairs; - -const int B3_NULL_PAIR = 0xffffffff; - -///The b3OverlappingPairCache provides an interface for overlapping pair management (add, remove, storage), used by the b3BroadphaseInterface broadphases. -///The b3HashedOverlappingPairCache and b3SortedOverlappingPairCache classes are two implementations. -class b3OverlappingPairCache -{ -public: - virtual ~b3OverlappingPairCache() {} // this is needed so we can get to the derived class destructor - - virtual b3BroadphasePair* getOverlappingPairArrayPtr() = 0; - - virtual const b3BroadphasePair* getOverlappingPairArrayPtr() const = 0; - - virtual b3BroadphasePairArray& getOverlappingPairArray() = 0; - - virtual void cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher) = 0; - - virtual int getNumOverlappingPairs() const = 0; - - virtual void cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher) = 0; - - virtual void setOverlapFilterCallback(b3OverlapFilterCallback* callback) = 0; - - virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* dispatcher) = 0; - - virtual b3BroadphasePair* findPair(int proxy0, int proxy1) = 0; - - virtual bool hasDeferredRemoval() = 0; - - //virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)=0; - - virtual b3BroadphasePair* addOverlappingPair(int proxy0, int proxy1) = 0; - virtual void* removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher) = 0; - virtual void removeOverlappingPairsContainingProxy(int /*proxy0*/, b3Dispatcher* /*dispatcher*/) = 0; - - virtual void sortOverlappingPairs(b3Dispatcher* dispatcher) = 0; -}; - -/// Hash-space based Pair Cache, thanks to Erin Catto, Box2D, http://www.box2d.org, and Pierre Terdiman, Codercorner, http://codercorner.com -class b3HashedOverlappingPairCache : public b3OverlappingPairCache -{ - b3BroadphasePairArray m_overlappingPairArray; - b3OverlapFilterCallback* m_overlapFilterCallback; - // bool m_blockedForChanges; - -public: - b3HashedOverlappingPairCache(); - virtual ~b3HashedOverlappingPairCache(); - - virtual void removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher); - - virtual void* removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher); - - B3_FORCE_INLINE bool needsBroadphaseCollision(int proxy0, int proxy1) const - { - if (m_overlapFilterCallback) - return m_overlapFilterCallback->needBroadphaseCollision(proxy0, proxy1); - - bool collides = true; //(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0; - //collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask); - - return collides; - } - - // Add a pair and return the new pair. If the pair already exists, - // no new pair is created and the old one is returned. - virtual b3BroadphasePair* addOverlappingPair(int proxy0, int proxy1) - { - b3g_addedPairs++; - - if (!needsBroadphaseCollision(proxy0, proxy1)) - return 0; - - return internalAddPair(proxy0, proxy1); - } - - void cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher); - - virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* dispatcher); - - virtual b3BroadphasePair* getOverlappingPairArrayPtr() - { - return &m_overlappingPairArray[0]; - } - - const b3BroadphasePair* getOverlappingPairArrayPtr() const - { - return &m_overlappingPairArray[0]; - } - - b3BroadphasePairArray& getOverlappingPairArray() - { - return m_overlappingPairArray; - } - - const b3BroadphasePairArray& getOverlappingPairArray() const - { - return m_overlappingPairArray; - } - - void cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher); - - b3BroadphasePair* findPair(int proxy0, int proxy1); - - int GetCount() const { return m_overlappingPairArray.size(); } - // b3BroadphasePair* GetPairs() { return m_pairs; } - - b3OverlapFilterCallback* getOverlapFilterCallback() - { - return m_overlapFilterCallback; - } - - void setOverlapFilterCallback(b3OverlapFilterCallback* callback) - { - m_overlapFilterCallback = callback; - } - - int getNumOverlappingPairs() const - { - return m_overlappingPairArray.size(); - } - -private: - b3BroadphasePair* internalAddPair(int proxy0, int proxy1); - - void growTables(); - - B3_FORCE_INLINE bool equalsPair(const b3BroadphasePair& pair, int proxyId1, int proxyId2) - { - return pair.x == proxyId1 && pair.y == proxyId2; - } - - /* - // Thomas Wang's hash, see: http://www.concentric.net/~Ttwang/tech/inthash.htm - // This assumes proxyId1 and proxyId2 are 16-bit. - B3_FORCE_INLINE int getHash(int proxyId1, int proxyId2) - { - int key = (proxyId2 << 16) | proxyId1; - key = ~key + (key << 15); - key = key ^ (key >> 12); - key = key + (key << 2); - key = key ^ (key >> 4); - key = key * 2057; - key = key ^ (key >> 16); - return key; - } - */ - - B3_FORCE_INLINE unsigned int getHash(unsigned int proxyId1, unsigned int proxyId2) - { - int key = static_cast(((unsigned int)proxyId1) | (((unsigned int)proxyId2) << 16)); - // Thomas Wang's hash - - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return static_cast(key); - } - - B3_FORCE_INLINE b3BroadphasePair* internalFindPair(int proxy0, int proxy1, int hash) - { - int proxyId1 = proxy0; - int proxyId2 = proxy1; -#if 0 // wrong, 'equalsPair' use unsorted uids, copy-past devil striked again. Nat. - if (proxyId1 > proxyId2) - b3Swap(proxyId1, proxyId2); -#endif - - int index = m_hashTable[hash]; - - while (index != B3_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false) - { - index = m_next[index]; - } - - if (index == B3_NULL_PAIR) - { - return NULL; - } - - b3Assert(index < m_overlappingPairArray.size()); - - return &m_overlappingPairArray[index]; - } - - virtual bool hasDeferredRemoval() - { - return false; - } - - /* virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback) - { - m_ghostPairCallback = ghostPairCallback; - } - */ - - virtual void sortOverlappingPairs(b3Dispatcher* dispatcher); - -protected: - b3AlignedObjectArray m_hashTable; - b3AlignedObjectArray m_next; - // b3OverlappingPairCallback* m_ghostPairCallback; -}; - -///b3SortedOverlappingPairCache maintains the objects with overlapping AABB -///Typically managed by the Broadphase, Axis3Sweep or b3SimpleBroadphase -class b3SortedOverlappingPairCache : public b3OverlappingPairCache -{ -protected: - //avoid brute-force finding all the time - b3BroadphasePairArray m_overlappingPairArray; - - //during the dispatch, check that user doesn't destroy/create proxy - bool m_blockedForChanges; - - ///by default, do the removal during the pair traversal - bool m_hasDeferredRemoval; - - //if set, use the callback instead of the built in filter in needBroadphaseCollision - b3OverlapFilterCallback* m_overlapFilterCallback; - - // b3OverlappingPairCallback* m_ghostPairCallback; - -public: - b3SortedOverlappingPairCache(); - virtual ~b3SortedOverlappingPairCache(); - - virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* dispatcher); - - void* removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher); - - void cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher); - - b3BroadphasePair* addOverlappingPair(int proxy0, int proxy1); - - b3BroadphasePair* findPair(int proxy0, int proxy1); - - void cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher); - - virtual void removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher); - - inline bool needsBroadphaseCollision(int proxy0, int proxy1) const - { - if (m_overlapFilterCallback) - return m_overlapFilterCallback->needBroadphaseCollision(proxy0, proxy1); - - bool collides = true; //(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0; - //collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask); - - return collides; - } - - b3BroadphasePairArray& getOverlappingPairArray() - { - return m_overlappingPairArray; - } - - const b3BroadphasePairArray& getOverlappingPairArray() const - { - return m_overlappingPairArray; - } - - b3BroadphasePair* getOverlappingPairArrayPtr() - { - return &m_overlappingPairArray[0]; - } - - const b3BroadphasePair* getOverlappingPairArrayPtr() const - { - return &m_overlappingPairArray[0]; - } - - int getNumOverlappingPairs() const - { - return m_overlappingPairArray.size(); - } - - b3OverlapFilterCallback* getOverlapFilterCallback() - { - return m_overlapFilterCallback; - } - - void setOverlapFilterCallback(b3OverlapFilterCallback* callback) - { - m_overlapFilterCallback = callback; - } - - virtual bool hasDeferredRemoval() - { - return m_hasDeferredRemoval; - } - - /* virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback) - { - m_ghostPairCallback = ghostPairCallback; - } - */ - virtual void sortOverlappingPairs(b3Dispatcher* dispatcher); -}; - -///b3NullPairCache skips add/removal of overlapping pairs. Userful for benchmarking and unit testing. -class b3NullPairCache : public b3OverlappingPairCache -{ - b3BroadphasePairArray m_overlappingPairArray; - -public: - virtual b3BroadphasePair* getOverlappingPairArrayPtr() - { - return &m_overlappingPairArray[0]; - } - const b3BroadphasePair* getOverlappingPairArrayPtr() const - { - return &m_overlappingPairArray[0]; - } - b3BroadphasePairArray& getOverlappingPairArray() - { - return m_overlappingPairArray; - } - - virtual void cleanOverlappingPair(b3BroadphasePair& /*pair*/, b3Dispatcher* /*dispatcher*/) - { - } - - virtual int getNumOverlappingPairs() const - { - return 0; - } - - virtual void cleanProxyFromPairs(int /*proxy*/, b3Dispatcher* /*dispatcher*/) - { - } - - virtual void setOverlapFilterCallback(b3OverlapFilterCallback* /*callback*/) - { - } - - virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* /*dispatcher*/) - { - } - - virtual b3BroadphasePair* findPair(int /*proxy0*/, int /*proxy1*/) - { - return 0; - } - - virtual bool hasDeferredRemoval() - { - return true; - } - - // virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* /* ghostPairCallback */) - // { - // - // } - - virtual b3BroadphasePair* addOverlappingPair(int /*proxy0*/, int /*proxy1*/) - { - return 0; - } - - virtual void* removeOverlappingPair(int /*proxy0*/, int /*proxy1*/, b3Dispatcher* /*dispatcher*/) - { - return 0; - } - - virtual void removeOverlappingPairsContainingProxy(int /*proxy0*/, b3Dispatcher* /*dispatcher*/) - { - } - - virtual void sortOverlappingPairs(b3Dispatcher* dispatcher) - { - (void)dispatcher; - } -}; - -#endif //B3_OVERLAPPING_PAIR_CACHE_H diff --git a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h b/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h deleted file mode 100644 index 343a2c0e215..00000000000 --- a/thirdparty/bullet/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h +++ /dev/null @@ -1,56 +0,0 @@ - -#ifndef B3_AABB_H -#define B3_AABB_H - -#include "Bullet3Common/shared/b3Float4.h" -#include "Bullet3Common/shared/b3Mat3x3.h" - -typedef struct b3Aabb b3Aabb_t; - -struct b3Aabb -{ - union { - float m_min[4]; - b3Float4 m_minVec; - int m_minIndices[4]; - }; - union { - float m_max[4]; - b3Float4 m_maxVec; - int m_signedMaxIndices[4]; - }; -}; - -inline void b3TransformAabb2(b3Float4ConstArg localAabbMin, b3Float4ConstArg localAabbMax, float margin, - b3Float4ConstArg pos, - b3QuatConstArg orn, - b3Float4* aabbMinOut, b3Float4* aabbMaxOut) -{ - b3Float4 localHalfExtents = 0.5f * (localAabbMax - localAabbMin); - localHalfExtents += b3MakeFloat4(margin, margin, margin, 0.f); - b3Float4 localCenter = 0.5f * (localAabbMax + localAabbMin); - b3Mat3x3 m; - m = b3QuatGetRotationMatrix(orn); - b3Mat3x3 abs_b = b3AbsoluteMat3x3(m); - b3Float4 center = b3TransformPoint(localCenter, pos, orn); - - b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents, b3GetRow(abs_b, 0)), - b3Dot3F4(localHalfExtents, b3GetRow(abs_b, 1)), - b3Dot3F4(localHalfExtents, b3GetRow(abs_b, 2)), - 0.f); - *aabbMinOut = center - extent; - *aabbMaxOut = center + extent; -} - -/// conservative test for overlap between two aabbs -inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1, b3Float4ConstArg aabbMax1, - b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2) -{ - bool overlap = true; - overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap; - overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap; - overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap; - return overlap; -} - -#endif //B3_AABB_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3Config.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3Config.h deleted file mode 100644 index 518da89c541..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3Config.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef B3_CONFIG_H -#define B3_CONFIG_H - -struct b3Config -{ - int m_maxConvexBodies; - int m_maxConvexShapes; - int m_maxBroadphasePairs; - int m_maxContactCapacity; - int m_compoundPairCapacity; - - int m_maxVerticesPerFace; - int m_maxFacesPerShape; - int m_maxConvexVertices; - int m_maxConvexIndices; - int m_maxConvexUniqueEdges; - - int m_maxCompoundChildShapes; - - int m_maxTriConvexPairCapacity; - - b3Config() - : m_maxConvexBodies(128 * 1024), - m_maxVerticesPerFace(64), - m_maxFacesPerShape(12), - m_maxConvexVertices(8192), - m_maxConvexIndices(81920), - m_maxConvexUniqueEdges(8192), - m_maxCompoundChildShapes(8192), - m_maxTriConvexPairCapacity(256 * 1024) - { - m_maxConvexShapes = m_maxConvexBodies; - m_maxBroadphasePairs = 16 * m_maxConvexBodies; - m_maxContactCapacity = m_maxBroadphasePairs; - m_compoundPairCapacity = 1024 * 1024; - } -}; - -#endif //B3_CONFIG_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h deleted file mode 100644 index c2cd3c729b3..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_CONTACT4_H -#define B3_CONTACT4_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -B3_ATTRIBUTE_ALIGNED16(struct) -b3Contact4 : public b3Contact4Data -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int getBodyA() const { return abs(m_bodyAPtrAndSignBit); } - int getBodyB() const { return abs(m_bodyBPtrAndSignBit); } - bool isBodyAFixed() const { return m_bodyAPtrAndSignBit < 0; } - bool isBodyBFixed() const { return m_bodyBPtrAndSignBit < 0; } - // todo. make it safer - int& getBatchIdx() { return m_batchIdx; } - const int& getBatchIdx() const { return m_batchIdx; } - float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp / (float)0xffff); } - void setRestituitionCoeff(float c) - { - b3Assert(c >= 0.f && c <= 1.f); - m_restituitionCoeffCmp = (unsigned short)(c * 0xffff); - } - float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp / (float)0xffff); } - void setFrictionCoeff(float c) - { - b3Assert(c >= 0.f && c <= 1.f); - m_frictionCoeffCmp = (unsigned short)(c * 0xffff); - } - - //float& getNPoints() { return m_worldNormal[3]; } - int getNPoints() const { return (int)m_worldNormalOnB.w; } - - float getPenetration(int idx) const { return m_worldPosB[idx].w; } - - bool isInvalid() const { return (getBodyA() == 0 || getBodyB() == 0); } -}; - -#endif //B3_CONTACT4_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.cpp b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.cpp deleted file mode 100644 index a5dab74a344..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.cpp +++ /dev/null @@ -1,500 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3ConvexUtility.h" -#include "Bullet3Geometry/b3ConvexHullComputer.h" -#include "Bullet3Geometry/b3GrahamScan2dConvexHull.h" -#include "Bullet3Common/b3Quaternion.h" -#include "Bullet3Common/b3HashMap.h" - -b3ConvexUtility::~b3ConvexUtility() -{ -} - -bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices, int numPoints, bool mergeCoplanarTriangles) -{ - b3ConvexHullComputer conv; - conv.compute(&orgVertices[0].getX(), sizeof(b3Vector3), numPoints, 0.f, 0.f); - - b3AlignedObjectArray faceNormals; - int numFaces = conv.faces.size(); - faceNormals.resize(numFaces); - b3ConvexHullComputer* convexUtil = &conv; - - b3AlignedObjectArray tmpFaces; - tmpFaces.resize(numFaces); - - int numVertices = convexUtil->vertices.size(); - m_vertices.resize(numVertices); - for (int p = 0; p < numVertices; p++) - { - m_vertices[p] = convexUtil->vertices[p]; - } - - for (int i = 0; i < numFaces; i++) - { - int face = convexUtil->faces[i]; - //printf("face=%d\n",face); - const b3ConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face]; - const b3ConvexHullComputer::Edge* edge = firstEdge; - - b3Vector3 edges[3]; - int numEdges = 0; - //compute face normals - - do - { - int src = edge->getSourceVertex(); - tmpFaces[i].m_indices.push_back(src); - int targ = edge->getTargetVertex(); - b3Vector3 wa = convexUtil->vertices[src]; - - b3Vector3 wb = convexUtil->vertices[targ]; - b3Vector3 newEdge = wb - wa; - newEdge.normalize(); - if (numEdges < 2) - edges[numEdges++] = newEdge; - - edge = edge->getNextEdgeOfFace(); - } while (edge != firstEdge); - - b3Scalar planeEq = 1e30f; - - if (numEdges == 2) - { - faceNormals[i] = edges[0].cross(edges[1]); - faceNormals[i].normalize(); - tmpFaces[i].m_plane[0] = faceNormals[i].getX(); - tmpFaces[i].m_plane[1] = faceNormals[i].getY(); - tmpFaces[i].m_plane[2] = faceNormals[i].getZ(); - tmpFaces[i].m_plane[3] = planeEq; - } - else - { - b3Assert(0); //degenerate? - faceNormals[i].setZero(); - } - - for (int v = 0; v < tmpFaces[i].m_indices.size(); v++) - { - b3Scalar eq = m_vertices[tmpFaces[i].m_indices[v]].dot(faceNormals[i]); - if (planeEq > eq) - { - planeEq = eq; - } - } - tmpFaces[i].m_plane[3] = -planeEq; - } - - //merge coplanar faces and copy them to m_polyhedron - - b3Scalar faceWeldThreshold = 0.999f; - b3AlignedObjectArray todoFaces; - for (int i = 0; i < tmpFaces.size(); i++) - todoFaces.push_back(i); - - while (todoFaces.size()) - { - b3AlignedObjectArray coplanarFaceGroup; - int refFace = todoFaces[todoFaces.size() - 1]; - - coplanarFaceGroup.push_back(refFace); - b3MyFace& faceA = tmpFaces[refFace]; - todoFaces.pop_back(); - - b3Vector3 faceNormalA = b3MakeVector3(faceA.m_plane[0], faceA.m_plane[1], faceA.m_plane[2]); - for (int j = todoFaces.size() - 1; j >= 0; j--) - { - int i = todoFaces[j]; - b3MyFace& faceB = tmpFaces[i]; - b3Vector3 faceNormalB = b3MakeVector3(faceB.m_plane[0], faceB.m_plane[1], faceB.m_plane[2]); - if (faceNormalA.dot(faceNormalB) > faceWeldThreshold) - { - coplanarFaceGroup.push_back(i); - todoFaces.remove(i); - } - } - - bool did_merge = false; - if (coplanarFaceGroup.size() > 1) - { - //do the merge: use Graham Scan 2d convex hull - - b3AlignedObjectArray orgpoints; - b3Vector3 averageFaceNormal = b3MakeVector3(0, 0, 0); - - for (int i = 0; i < coplanarFaceGroup.size(); i++) - { - // m_polyhedron->m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]); - - b3MyFace& face = tmpFaces[coplanarFaceGroup[i]]; - b3Vector3 faceNormal = b3MakeVector3(face.m_plane[0], face.m_plane[1], face.m_plane[2]); - averageFaceNormal += faceNormal; - for (int f = 0; f < face.m_indices.size(); f++) - { - int orgIndex = face.m_indices[f]; - b3Vector3 pt = m_vertices[orgIndex]; - - bool found = false; - - for (int i = 0; i < orgpoints.size(); i++) - { - //if ((orgpoints[i].m_orgIndex == orgIndex) || ((rotatedPt-orgpoints[i]).length2()<0.0001)) - if (orgpoints[i].m_orgIndex == orgIndex) - { - found = true; - break; - } - } - if (!found) - orgpoints.push_back(b3GrahamVector3(pt, orgIndex)); - } - } - - b3MyFace combinedFace; - for (int i = 0; i < 4; i++) - combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i]; - - b3AlignedObjectArray hull; - - averageFaceNormal.normalize(); - b3GrahamScanConvexHull2D(orgpoints, hull, averageFaceNormal); - - for (int i = 0; i < hull.size(); i++) - { - combinedFace.m_indices.push_back(hull[i].m_orgIndex); - for (int k = 0; k < orgpoints.size(); k++) - { - if (orgpoints[k].m_orgIndex == hull[i].m_orgIndex) - { - orgpoints[k].m_orgIndex = -1; // invalidate... - break; - } - } - } - - // are there rejected vertices? - bool reject_merge = false; - - for (int i = 0; i < orgpoints.size(); i++) - { - if (orgpoints[i].m_orgIndex == -1) - continue; // this is in the hull... - // this vertex is rejected -- is anybody else using this vertex? - for (int j = 0; j < tmpFaces.size(); j++) - { - b3MyFace& face = tmpFaces[j]; - // is this a face of the current coplanar group? - bool is_in_current_group = false; - for (int k = 0; k < coplanarFaceGroup.size(); k++) - { - if (coplanarFaceGroup[k] == j) - { - is_in_current_group = true; - break; - } - } - if (is_in_current_group) // ignore this face... - continue; - // does this face use this rejected vertex? - for (int v = 0; v < face.m_indices.size(); v++) - { - if (face.m_indices[v] == orgpoints[i].m_orgIndex) - { - // this rejected vertex is used in another face -- reject merge - reject_merge = true; - break; - } - } - if (reject_merge) - break; - } - if (reject_merge) - break; - } - - if (!reject_merge) - { - // do this merge! - did_merge = true; - m_faces.push_back(combinedFace); - } - } - if (!did_merge) - { - for (int i = 0; i < coplanarFaceGroup.size(); i++) - { - b3MyFace face = tmpFaces[coplanarFaceGroup[i]]; - m_faces.push_back(face); - } - } - } - - initialize(); - - return true; -} - -inline bool IsAlmostZero(const b3Vector3& v) -{ - if (fabsf(v.getX()) > 1e-6 || fabsf(v.getY()) > 1e-6 || fabsf(v.getZ()) > 1e-6) return false; - return true; -} - -struct b3InternalVertexPair -{ - b3InternalVertexPair(short int v0, short int v1) - : m_v0(v0), - m_v1(v1) - { - if (m_v1 > m_v0) - b3Swap(m_v0, m_v1); - } - short int m_v0; - short int m_v1; - int getHash() const - { - return m_v0 + (m_v1 << 16); - } - bool equals(const b3InternalVertexPair& other) const - { - return m_v0 == other.m_v0 && m_v1 == other.m_v1; - } -}; - -struct b3InternalEdge -{ - b3InternalEdge() - : m_face0(-1), - m_face1(-1) - { - } - short int m_face0; - short int m_face1; -}; - -// - -#ifdef TEST_INTERNAL_OBJECTS -bool b3ConvexUtility::testContainment() const -{ - for (int p = 0; p < 8; p++) - { - b3Vector3 LocalPt; - if (p == 0) - LocalPt = m_localCenter + b3Vector3(m_extents[0], m_extents[1], m_extents[2]); - else if (p == 1) - LocalPt = m_localCenter + b3Vector3(m_extents[0], m_extents[1], -m_extents[2]); - else if (p == 2) - LocalPt = m_localCenter + b3Vector3(m_extents[0], -m_extents[1], m_extents[2]); - else if (p == 3) - LocalPt = m_localCenter + b3Vector3(m_extents[0], -m_extents[1], -m_extents[2]); - else if (p == 4) - LocalPt = m_localCenter + b3Vector3(-m_extents[0], m_extents[1], m_extents[2]); - else if (p == 5) - LocalPt = m_localCenter + b3Vector3(-m_extents[0], m_extents[1], -m_extents[2]); - else if (p == 6) - LocalPt = m_localCenter + b3Vector3(-m_extents[0], -m_extents[1], m_extents[2]); - else if (p == 7) - LocalPt = m_localCenter + b3Vector3(-m_extents[0], -m_extents[1], -m_extents[2]); - - for (int i = 0; i < m_faces.size(); i++) - { - const b3Vector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]); - const b3Scalar d = LocalPt.dot(Normal) + m_faces[i].m_plane[3]; - if (d > 0.0f) - return false; - } - } - return true; -} -#endif - -void b3ConvexUtility::initialize() -{ - b3HashMap edges; - - b3Scalar TotalArea = 0.0f; - - m_localCenter.setValue(0, 0, 0); - for (int i = 0; i < m_faces.size(); i++) - { - int numVertices = m_faces[i].m_indices.size(); - int NbTris = numVertices; - for (int j = 0; j < NbTris; j++) - { - int k = (j + 1) % numVertices; - b3InternalVertexPair vp(m_faces[i].m_indices[j], m_faces[i].m_indices[k]); - b3InternalEdge* edptr = edges.find(vp); - b3Vector3 edge = m_vertices[vp.m_v1] - m_vertices[vp.m_v0]; - edge.normalize(); - - bool found = false; - b3Vector3 diff, diff2; - - for (int p = 0; p < m_uniqueEdges.size(); p++) - { - diff = m_uniqueEdges[p] - edge; - diff2 = m_uniqueEdges[p] + edge; - - // if ((diff.length2()==0.f) || - // (diff2.length2()==0.f)) - - if (IsAlmostZero(diff) || - IsAlmostZero(diff2)) - { - found = true; - break; - } - } - - if (!found) - { - m_uniqueEdges.push_back(edge); - } - - if (edptr) - { - //TBD: figure out why I added this assert - // b3Assert(edptr->m_face0>=0); - // b3Assert(edptr->m_face1<0); - edptr->m_face1 = i; - } - else - { - b3InternalEdge ed; - ed.m_face0 = i; - edges.insert(vp, ed); - } - } - } - -#ifdef USE_CONNECTED_FACES - for (int i = 0; i < m_faces.size(); i++) - { - int numVertices = m_faces[i].m_indices.size(); - m_faces[i].m_connectedFaces.resize(numVertices); - - for (int j = 0; j < numVertices; j++) - { - int k = (j + 1) % numVertices; - b3InternalVertexPair vp(m_faces[i].m_indices[j], m_faces[i].m_indices[k]); - b3InternalEdge* edptr = edges.find(vp); - b3Assert(edptr); - b3Assert(edptr->m_face0 >= 0); - b3Assert(edptr->m_face1 >= 0); - - int connectedFace = (edptr->m_face0 == i) ? edptr->m_face1 : edptr->m_face0; - m_faces[i].m_connectedFaces[j] = connectedFace; - } - } -#endif //USE_CONNECTED_FACES - - for (int i = 0; i < m_faces.size(); i++) - { - int numVertices = m_faces[i].m_indices.size(); - int NbTris = numVertices - 2; - - const b3Vector3& p0 = m_vertices[m_faces[i].m_indices[0]]; - for (int j = 1; j <= NbTris; j++) - { - int k = (j + 1) % numVertices; - const b3Vector3& p1 = m_vertices[m_faces[i].m_indices[j]]; - const b3Vector3& p2 = m_vertices[m_faces[i].m_indices[k]]; - b3Scalar Area = ((p0 - p1).cross(p0 - p2)).length() * 0.5f; - b3Vector3 Center = (p0 + p1 + p2) / 3.0f; - m_localCenter += Area * Center; - TotalArea += Area; - } - } - m_localCenter /= TotalArea; - -#ifdef TEST_INTERNAL_OBJECTS - if (1) - { - m_radius = FLT_MAX; - for (int i = 0; i < m_faces.size(); i++) - { - const b3Vector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]); - const b3Scalar dist = b3Fabs(m_localCenter.dot(Normal) + m_faces[i].m_plane[3]); - if (dist < m_radius) - m_radius = dist; - } - - b3Scalar MinX = FLT_MAX; - b3Scalar MinY = FLT_MAX; - b3Scalar MinZ = FLT_MAX; - b3Scalar MaxX = -FLT_MAX; - b3Scalar MaxY = -FLT_MAX; - b3Scalar MaxZ = -FLT_MAX; - for (int i = 0; i < m_vertices.size(); i++) - { - const b3Vector3& pt = m_vertices[i]; - if (pt.getX() < MinX) MinX = pt.getX(); - if (pt.getX() > MaxX) MaxX = pt.getX(); - if (pt.getY() < MinY) MinY = pt.getY(); - if (pt.getY() > MaxY) MaxY = pt.getY(); - if (pt.getZ() < MinZ) MinZ = pt.getZ(); - if (pt.getZ() > MaxZ) MaxZ = pt.getZ(); - } - mC.setValue(MaxX + MinX, MaxY + MinY, MaxZ + MinZ); - mE.setValue(MaxX - MinX, MaxY - MinY, MaxZ - MinZ); - - // const b3Scalar r = m_radius / sqrtf(2.0f); - const b3Scalar r = m_radius / sqrtf(3.0f); - const int LargestExtent = mE.maxAxis(); - const b3Scalar Step = (mE[LargestExtent] * 0.5f - r) / 1024.0f; - m_extents[0] = m_extents[1] = m_extents[2] = r; - m_extents[LargestExtent] = mE[LargestExtent] * 0.5f; - bool FoundBox = false; - for (int j = 0; j < 1024; j++) - { - if (testContainment()) - { - FoundBox = true; - break; - } - - m_extents[LargestExtent] -= Step; - } - if (!FoundBox) - { - m_extents[0] = m_extents[1] = m_extents[2] = r; - } - else - { - // Refine the box - const b3Scalar Step = (m_radius - r) / 1024.0f; - const int e0 = (1 << LargestExtent) & 3; - const int e1 = (1 << e0) & 3; - - for (int j = 0; j < 1024; j++) - { - const b3Scalar Saved0 = m_extents[e0]; - const b3Scalar Saved1 = m_extents[e1]; - m_extents[e0] += Step; - m_extents[e1] += Step; - - if (!testContainment()) - { - m_extents[e0] = Saved0; - m_extents[e1] = Saved1; - break; - } - } - } - } -#endif -} diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h deleted file mode 100644 index 4c8a88cbda9..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h +++ /dev/null @@ -1,55 +0,0 @@ - -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef _BT_CONVEX_UTILITY_H -#define _BT_CONVEX_UTILITY_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Transform.h" - -struct b3MyFace -{ - b3AlignedObjectArray m_indices; - b3Scalar m_plane[4]; -}; - -B3_ATTRIBUTE_ALIGNED16(class) -b3ConvexUtility -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3Vector3 m_localCenter; - b3Vector3 m_extents; - b3Vector3 mC; - b3Vector3 mE; - b3Scalar m_radius; - - b3AlignedObjectArray m_vertices; - b3AlignedObjectArray m_faces; - b3AlignedObjectArray m_uniqueEdges; - - b3ConvexUtility() - { - } - virtual ~b3ConvexUtility(); - - bool initializePolyhedralFeatures(const b3Vector3* orgVertices, int numVertices, bool mergeCoplanarTriangles = true); - - void initialize(); - bool testContainment() const; -}; -#endif diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.cpp b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.cpp deleted file mode 100644 index e0b2161100a..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.cpp +++ /dev/null @@ -1,297 +0,0 @@ -#include "b3CpuNarrowPhase.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h" - -struct b3CpuNarrowPhaseInternalData -{ - b3AlignedObjectArray m_localShapeAABBCPU; - b3AlignedObjectArray m_collidablesCPU; - b3AlignedObjectArray m_convexData; - b3Config m_config; - - b3AlignedObjectArray m_convexPolyhedra; - b3AlignedObjectArray m_uniqueEdges; - b3AlignedObjectArray m_convexVertices; - b3AlignedObjectArray m_convexIndices; - b3AlignedObjectArray m_convexFaces; - - b3AlignedObjectArray m_contacts; - - int m_numAcceleratedShapes; -}; - -const b3AlignedObjectArray& b3CpuNarrowPhase::getContacts() const -{ - return m_data->m_contacts; -} - -b3Collidable& b3CpuNarrowPhase::getCollidableCpu(int collidableIndex) -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -const b3Collidable& b3CpuNarrowPhase::getCollidableCpu(int collidableIndex) const -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -b3CpuNarrowPhase::b3CpuNarrowPhase(const struct b3Config& config) -{ - m_data = new b3CpuNarrowPhaseInternalData; - m_data->m_config = config; - m_data->m_numAcceleratedShapes = 0; -} - -b3CpuNarrowPhase::~b3CpuNarrowPhase() -{ - delete m_data; -} - -void b3CpuNarrowPhase::computeContacts(b3AlignedObjectArray& pairs, b3AlignedObjectArray& aabbsWorldSpace, b3AlignedObjectArray& bodies) -{ - int nPairs = pairs.size(); - int numContacts = 0; - int maxContactCapacity = m_data->m_config.m_maxContactCapacity; - m_data->m_contacts.resize(maxContactCapacity); - - for (int i = 0; i < nPairs; i++) - { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - int collidableIndexA = bodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = bodies[bodyIndexB].m_collidableIdx; - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_SPHERE && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - // computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0], - // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - } - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - // computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], - // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - //printf("convex-sphere\n"); - } - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - // computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], - // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - // printf("convex-plane\n"); - } - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - // computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0], - // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - // printf("plane-convex\n"); - } - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - // computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], - // &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0], - // nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU); - // printf("convex-plane\n"); - } - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - // computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], - // &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - // printf("convex-plane\n"); - } - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - // computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0], - // &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - // printf("plane-convex\n"); - } - - if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - //printf("pairs[i].z=%d\n",pairs[i].z); - //int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,bodies, - // m_data->m_collidablesCPU,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - int contactIndex = b3ContactConvexConvexSAT(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, bodies, - m_data->m_collidablesCPU, m_data->m_convexPolyhedra, m_data->m_convexVertices, m_data->m_uniqueEdges, m_data->m_convexIndices, m_data->m_convexFaces, m_data->m_contacts, numContacts, maxContactCapacity); - - if (contactIndex >= 0) - { - pairs[i].z = contactIndex; - } - // printf("plane-convex\n"); - } - } - - m_data->m_contacts.resize(numContacts); -} - -int b3CpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = m_data->m_collidablesCPU[collidableIndex]; - col.m_shapeType = SHAPE_CONVEX_HULL; - col.m_shapeIndex = -1; - - { - b3Vector3 localCenter = b3MakeVector3(0, 0, 0); - for (int i = 0; i < utilPtr->m_vertices.size(); i++) - localCenter += utilPtr->m_vertices[i]; - localCenter *= (1.f / utilPtr->m_vertices.size()); - utilPtr->m_localCenter = localCenter; - - col.m_shapeIndex = registerConvexHullShapeInternal(utilPtr, col); - } - - if (col.m_shapeIndex >= 0) - { - b3Aabb aabb; - - b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f); - b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f); - - for (int i = 0; i < utilPtr->m_vertices.size(); i++) - { - myAabbMin.setMin(utilPtr->m_vertices[i]); - myAabbMax.setMax(utilPtr->m_vertices[i]); - } - aabb.m_min[0] = myAabbMin[0]; - aabb.m_min[1] = myAabbMin[1]; - aabb.m_min[2] = myAabbMin[2]; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; - aabb.m_max[1] = myAabbMax[1]; - aabb.m_max[2] = myAabbMax[2]; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU.push_back(aabb); - } - - return collidableIndex; -} - -int b3CpuNarrowPhase::allocateCollidable() -{ - int curSize = m_data->m_collidablesCPU.size(); - if (curSize < m_data->m_config.m_maxConvexShapes) - { - m_data->m_collidablesCPU.expand(); - return curSize; - } - else - { - b3Error("allocateCollidable out-of-range %d\n", m_data->m_config.m_maxConvexShapes); - } - return -1; -} - -int b3CpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling) -{ - b3AlignedObjectArray verts; - - unsigned char* vts = (unsigned char*)vertices; - for (int i = 0; i < numVertices; i++) - { - float* vertex = (float*)&vts[i * strideInBytes]; - verts.push_back(b3MakeVector3(vertex[0] * scaling[0], vertex[1] * scaling[1], vertex[2] * scaling[2])); - } - - b3ConvexUtility* utilPtr = new b3ConvexUtility(); - bool merge = true; - if (numVertices) - { - utilPtr->initializePolyhedralFeatures(&verts[0], verts.size(), merge); - } - - int collidableIndex = registerConvexHullShape(utilPtr); - - delete utilPtr; - return collidableIndex; -} - -int b3CpuNarrowPhase::registerConvexHullShapeInternal(b3ConvexUtility* convexPtr, b3Collidable& col) -{ - m_data->m_convexData.resize(m_data->m_numAcceleratedShapes + 1); - m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes + 1); - - b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size() - 1); - convex.mC = convexPtr->mC; - convex.mE = convexPtr->mE; - convex.m_extents = convexPtr->m_extents; - convex.m_localCenter = convexPtr->m_localCenter; - convex.m_radius = convexPtr->m_radius; - - convex.m_numUniqueEdges = convexPtr->m_uniqueEdges.size(); - int edgeOffset = m_data->m_uniqueEdges.size(); - convex.m_uniqueEdgesOffset = edgeOffset; - - m_data->m_uniqueEdges.resize(edgeOffset + convex.m_numUniqueEdges); - - //convex data here - int i; - for (i = 0; i < convexPtr->m_uniqueEdges.size(); i++) - { - m_data->m_uniqueEdges[edgeOffset + i] = convexPtr->m_uniqueEdges[i]; - } - - int faceOffset = m_data->m_convexFaces.size(); - convex.m_faceOffset = faceOffset; - convex.m_numFaces = convexPtr->m_faces.size(); - - m_data->m_convexFaces.resize(faceOffset + convex.m_numFaces); - - for (i = 0; i < convexPtr->m_faces.size(); i++) - { - m_data->m_convexFaces[convex.m_faceOffset + i].m_plane = b3MakeVector3(convexPtr->m_faces[i].m_plane[0], - convexPtr->m_faces[i].m_plane[1], - convexPtr->m_faces[i].m_plane[2], - convexPtr->m_faces[i].m_plane[3]); - - int indexOffset = m_data->m_convexIndices.size(); - int numIndices = convexPtr->m_faces[i].m_indices.size(); - m_data->m_convexFaces[convex.m_faceOffset + i].m_numIndices = numIndices; - m_data->m_convexFaces[convex.m_faceOffset + i].m_indexOffset = indexOffset; - m_data->m_convexIndices.resize(indexOffset + numIndices); - for (int p = 0; p < numIndices; p++) - { - m_data->m_convexIndices[indexOffset + p] = convexPtr->m_faces[i].m_indices[p]; - } - } - - convex.m_numVertices = convexPtr->m_vertices.size(); - int vertexOffset = m_data->m_convexVertices.size(); - convex.m_vertexOffset = vertexOffset; - - m_data->m_convexVertices.resize(vertexOffset + convex.m_numVertices); - for (int i = 0; i < convexPtr->m_vertices.size(); i++) - { - m_data->m_convexVertices[vertexOffset + i] = convexPtr->m_vertices[i]; - } - - (m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr; - - return m_data->m_numAcceleratedShapes++; -} - -const b3Aabb& b3CpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const -{ - return m_data->m_localShapeAABBCPU[collidableIndex]; -} diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h deleted file mode 100644 index f02353c2657..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h +++ /dev/null @@ -1,92 +0,0 @@ -#ifndef B3_CPU_NARROWPHASE_H -#define B3_CPU_NARROWPHASE_H - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -class b3CpuNarrowPhase -{ -protected: - struct b3CpuNarrowPhaseInternalData* m_data; - int m_acceleratedCompanionShapeIndex; - int m_planeBodyIndex; - int m_static0Index; - - int registerConvexHullShapeInternal(class b3ConvexUtility* convexPtr, b3Collidable& col); - int registerConcaveMeshShape(b3AlignedObjectArray* vertices, b3AlignedObjectArray* indices, b3Collidable& col, const float* scaling); - -public: - b3CpuNarrowPhase(const struct b3Config& config); - - virtual ~b3CpuNarrowPhase(void); - - int registerSphereShape(float radius); - int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant); - - int registerCompoundShape(b3AlignedObjectArray* childShapes); - int registerFace(const b3Vector3& faceNormal, float faceConstant); - - int registerConcaveMesh(b3AlignedObjectArray* vertices, b3AlignedObjectArray* indices, const float* scaling); - - //do they need to be merged? - - int registerConvexHullShape(b3ConvexUtility* utilPtr); - int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - - //int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu); - void setObjectTransform(const float* position, const float* orientation, int bodyIndex); - - void writeAllBodiesToGpu(); - void reset(); - void readbackAllBodiesToCpu(); - bool getObjectTransformFromCpu(float* position, float* orientation, int bodyIndex) const; - - void setObjectTransformCpu(float* position, float* orientation, int bodyIndex); - void setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex); - - //virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects); - virtual void computeContacts(b3AlignedObjectArray& pairs, b3AlignedObjectArray& aabbsWorldSpace, b3AlignedObjectArray& bodies); - - const struct b3RigidBodyData* getBodiesCpu() const; - //struct b3RigidBodyData* getBodiesCpu(); - - int getNumBodiesGpu() const; - - int getNumBodyInertiasGpu() const; - - const struct b3Collidable* getCollidablesCpu() const; - int getNumCollidablesGpu() const; - - /*const struct b3Contact4* getContactsCPU() const; - - - int getNumContactsGpu() const; - */ - - const b3AlignedObjectArray& getContacts() const; - - int getNumRigidBodies() const; - - int allocateCollidable(); - - int getStatic0Index() const - { - return m_static0Index; - } - b3Collidable& getCollidableCpu(int collidableIndex); - const b3Collidable& getCollidableCpu(int collidableIndex) const; - - const b3CpuNarrowPhaseInternalData* getInternalData() const - { - return m_data; - } - - const struct b3Aabb& getLocalSpaceAabb(int collidableIndex) const; -}; - -#endif //B3_CPU_NARROWPHASE_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h deleted file mode 100644 index b50c0eca4f9..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h +++ /dev/null @@ -1,25 +0,0 @@ - -#ifndef B3_RAYCAST_INFO_H -#define B3_RAYCAST_INFO_H - -#include "Bullet3Common/b3Vector3.h" - -B3_ATTRIBUTE_ALIGNED16(struct) -b3RayInfo -{ - b3Vector3 m_from; - b3Vector3 m_to; -}; - -B3_ATTRIBUTE_ALIGNED16(struct) -b3RayHit -{ - b3Scalar m_hitFraction; - int m_hitBody; - int m_hitResult1; - int m_hitResult2; - b3Vector3 m_hitPoint; - b3Vector3 m_hitNormal; -}; - -#endif //B3_RAYCAST_INFO_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h deleted file mode 100644 index be1be57f055..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_RIGID_BODY_CL -#define B3_RIGID_BODY_CL - -#include "Bullet3Common/b3Scalar.h" -#include "Bullet3Common/b3Matrix3x3.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -inline float b3GetInvMass(const b3RigidBodyData& body) -{ - return body.m_invMass; -} - -#endif //B3_RIGID_BODY_CL diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h deleted file mode 100644 index d6beb662b55..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h +++ /dev/null @@ -1,19 +0,0 @@ - -#ifndef B3_BVH_SUBTREE_INFO_DATA_H -#define B3_BVH_SUBTREE_INFO_DATA_H - -typedef struct b3BvhSubtreeInfoData b3BvhSubtreeInfoData_t; - -struct b3BvhSubtreeInfoData -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -}; - -#endif //B3_BVH_SUBTREE_INFO_DATA_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h deleted file mode 100644 index 7c2507cc98b..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h +++ /dev/null @@ -1,123 +0,0 @@ - - -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" - -// work-in-progress -void b3BvhTraversal(__global const b3Int4* pairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global b3Aabb* aabbs, - __global b3Int4* concavePairsOut, - __global volatile int* numConcavePairsOut, - __global const b3BvhSubtreeInfo* subtreeHeadersRoot, - __global const b3QuantizedBvhNode* quantizedNodesRoot, - __global const b3BvhInfo* bvhInfos, - int numPairs, - int maxNumConcavePairsCapacity, - int id) -{ - int bodyIndexA = pairs[id].x; - int bodyIndexB = pairs[id].y; - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0)) - { - return; - } - - if (collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) - return; - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - if (shapeTypeB != SHAPE_CONVEX_HULL && - shapeTypeB != SHAPE_SPHERE && - shapeTypeB != SHAPE_COMPOUND_OF_CONVEX_HULLS) - return; - - b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes]; - - b3Float4 bvhAabbMin = bvhInfo.m_aabbMin; - b3Float4 bvhAabbMax = bvhInfo.m_aabbMax; - b3Float4 bvhQuantization = bvhInfo.m_quantization; - int numSubtreeHeaders = bvhInfo.m_numSubTrees; - __global const b3BvhSubtreeInfoData* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset]; - __global const b3QuantizedBvhNodeData* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset]; - - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - b3QuantizeWithClamp(quantizedQueryAabbMin, aabbs[bodyIndexB].m_minVec, false, bvhAabbMin, bvhAabbMax, bvhQuantization); - b3QuantizeWithClamp(quantizedQueryAabbMax, aabbs[bodyIndexB].m_maxVec, true, bvhAabbMin, bvhAabbMax, bvhQuantization); - - for (int i = 0; i < numSubtreeHeaders; i++) - { - b3BvhSubtreeInfoData subtree = subtreeHeaders[i]; - - int overlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax); - if (overlap != 0) - { - int startNodeIndex = subtree.m_rootNodeIndex; - int endNodeIndex = subtree.m_rootNodeIndex + subtree.m_subtreeSize; - int curIndex = startNodeIndex; - int escapeIndex; - int isLeafNode; - int aabbOverlap; - while (curIndex < endNodeIndex) - { - b3QuantizedBvhNodeData rootNode = quantizedNodes[curIndex]; - aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode.m_quantizedAabbMin, rootNode.m_quantizedAabbMax); - isLeafNode = b3IsLeaf(&rootNode); - if (aabbOverlap) - { - if (isLeafNode) - { - int triangleIndex = b3GetTriangleIndex(&rootNode); - if (shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - int pairIdx = b3AtomicAdd(numConcavePairsOut, numChildrenB); - for (int b = 0; b < numChildrenB; b++) - { - if ((pairIdx + b) < maxNumConcavePairsCapacity) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b; - b3Int4 newPair = b3MakeInt4(bodyIndexA, bodyIndexB, triangleIndex, childShapeIndexB); - concavePairsOut[pairIdx + b] = newPair; - } - } - } - else - { - int pairIdx = b3AtomicInc(numConcavePairsOut); - if (pairIdx < maxNumConcavePairsCapacity) - { - b3Int4 newPair = b3MakeInt4(bodyIndexA, bodyIndexB, triangleIndex, 0); - concavePairsOut[pairIdx] = newPair; - } - } - } - curIndex++; - } - else - { - if (isLeafNode) - { - curIndex++; - } - else - { - escapeIndex = b3GetEscapeIndex(&rootNode); - curIndex += escapeIndex; - } - } - } - } - } -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h deleted file mode 100644 index 0d9b13f1d69..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef B3_CLIP_FACES_H -#define B3_CLIP_FACES_H - -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - -inline b3Float4 b3Lerp3(b3Float4ConstArg a, b3Float4ConstArg b, float t) -{ - return b3MakeFloat4(a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFaceGlobal(__global const b3Float4* pVtxIn, int numVertsIn, b3Float4ConstArg planeNormalWS, float planeEqWS, __global b3Float4* ppVtxOut) -{ - int ve; - float ds, de; - int numVertsOut = 0; - //double-check next test - // if (numVertsIn < 2) - // return 0; - - b3Float4 firstVertex = pVtxIn[numVertsIn - 1]; - b3Float4 endVertex = pVtxIn[0]; - - ds = b3Dot(planeNormalWS, firstVertex) + planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex = pVtxIn[ve]; - de = b3Dot(planeNormalWS, endVertex) + planeEqWS; - if (ds < 0) - { - if (de < 0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - } - } - else - { - if (de < 0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - -__kernel void clipFacesAndFindContactsKernel(__global const b3Float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global b3Int4* clippingFacesOut, - __global b3Float4* worldVertsA1, - __global b3Float4* worldNormalsA1, - __global b3Float4* worldVertsB1, - __global b3Float4* worldVertsB2, - int vertexFaceCapacity, - int pairIndex) -{ - // int i = get_global_id(0); - //int pairIndex = i; - int i = pairIndex; - - float minDist = -1e30f; - float maxDist = 0.02f; - - // if (i= 0) - { - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - - for (int e0 = 0; e0 < numVertsInA; e0++) - { - const b3Float4 aw = worldVertsA1[pairIndex * capacityWorldVertsB2 + e0]; - const b3Float4 bw = worldVertsA1[pairIndex * capacityWorldVertsB2 + ((e0 + 1) % numVertsInA)]; - const b3Float4 WorldEdge0 = aw - bw; - b3Float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; - b3Float4 planeNormalWS1 = -b3Cross(WorldEdge0, worldPlaneAnormal1); - b3Float4 worldA1 = aw; - float planeEqWS1 = -b3Dot(worldA1, planeNormalWS1); - b3Float4 planeNormalWS = planeNormalWS1; - float planeEqWS = planeEqWS1; - numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS, planeEqWS, pVtxOut); - __global b3Float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsInB = numVertsOut; - numVertsOut = 0; - } - - b3Float4 planeNormalWS = worldNormalsA1[pairIndex]; - float planeEqWS = -b3Dot(planeNormalWS, worldVertsA1[pairIndex * capacityWorldVertsB2]); - - for (int i = 0; i < numVertsInB; i++) - { - float depth = b3Dot(planeNormalWS, pVtxIn[i]) + planeEqWS; - if (depth <= minDist) - { - depth = minDist; - } - /* - static float maxDepth = 0.f; - if (depth < maxDepth) - { - maxDepth = depth; - if (maxDepth < -10) - { - printf("error at framecount %d?\n",myframecount); - } - printf("maxDepth = %f\n", maxDepth); - - } -*/ - if (depth <= maxDist) - { - b3Float4 pointInWorld = pVtxIn[i]; - pVtxOut[numLocalContactsOut++] = b3MakeFloat4(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth); - } - } - } - clippingFaces[pairIndex].w = numLocalContactsOut; - } - - for (int i = 0; i < numLocalContactsOut; i++) - pVtxIn[i] = pVtxOut[i]; - - } // if (hasSeparatingAxis[i]) - } // if (im_worldNormalOnB.w; -}; - -inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints) -{ - contact->m_worldNormalOnB.w = (float)numPoints; -}; - -#endif //B3_CONTACT4DATA_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h deleted file mode 100644 index ca68f4bc4eb..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h +++ /dev/null @@ -1,486 +0,0 @@ - -#ifndef B3_CONTACT_CONVEX_CONVEX_SAT_H -#define B3_CONTACT_CONVEX_CONVEX_SAT_H - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h" - -#define B3_MAX_VERTS 1024 - -inline b3Float4 b3Lerp3(const b3Float4& a, const b3Float4& b, float t) -{ - return b3MakeVector3(a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -inline int b3ClipFace(const b3Float4* pVtxIn, int numVertsIn, b3Float4& planeNormalWS, float planeEqWS, b3Float4* ppVtxOut) -{ - int ve; - float ds, de; - int numVertsOut = 0; - if (numVertsIn < 2) - return 0; - - b3Float4 firstVertex = pVtxIn[numVertsIn - 1]; - b3Float4 endVertex = pVtxIn[0]; - - ds = b3Dot3F4(planeNormalWS, firstVertex) + planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex = pVtxIn[ve]; - - de = b3Dot3F4(planeNormalWS, endVertex) + planeEqWS; - - if (ds < 0) - { - if (de < 0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - } - } - else - { - if (de < 0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - -inline int b3ClipFaceAgainstHull(const b3Float4& separatingNormal, const b3ConvexPolyhedronData* hullA, - const b3Float4& posA, const b3Quaternion& ornA, b3Float4* worldVertsB1, int numWorldVertsB1, - b3Float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, - //const b3Float4* verticesB, const b3GpuFace* facesB, const int* indicesB, - b3Float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - b3Float4* pVtxIn = worldVertsB1; - b3Float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA = -1; - { - float dmin = FLT_MAX; - for (int face = 0; face < hullA->m_numFaces; face++) - { - const b3Float4 Normal = b3MakeVector3( - facesA[hullA->m_faceOffset + face].m_plane.x, - facesA[hullA->m_faceOffset + face].m_plane.y, - facesA[hullA->m_faceOffset + face].m_plane.z, 0.f); - const b3Float4 faceANormalWS = b3QuatRotate(ornA, Normal); - - float d = b3Dot3F4(faceANormalWS, separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA < 0) - return numContactsOut; - - b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - //int numContacts = numWorldVertsB1; - int numVerticesA = polyA.m_numIndices; - for (int e0 = 0; e0 < numVerticesA; e0++) - { - const b3Float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]]; - const b3Float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]]; - const b3Float4 edge0 = a - b; - const b3Float4 WorldEdge0 = b3QuatRotate(ornA, edge0); - b3Float4 planeNormalA = b3MakeFloat4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); - b3Float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA); - - b3Float4 planeNormalWS1 = -b3Cross3(WorldEdge0, worldPlaneAnormal1); - b3Float4 worldA1 = b3TransformPoint(a, posA, ornA); - float planeEqWS1 = -b3Dot3F4(worldA1, planeNormalWS1); - - b3Float4 planeNormalWS = planeNormalWS1; - float planeEqWS = planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = b3ClipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - b3Float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - // only keep points that are behind the witness face - { - b3Float4 localPlaneNormal = b3MakeFloat4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); - float localPlaneEq = polyA.m_plane.w; - b3Float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal); - float planeEqWS = localPlaneEq - b3Dot3F4(planeNormalWS, posA); - for (int i = 0; i < numVertsIn; i++) - { - float depth = b3Dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS; - if (depth <= minDist) - { - depth = minDist; - } - if (numContactsOut < contactCapacity) - { - if (depth <= maxDist) - { - b3Float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth); - //printf("depth=%f\n",depth); - } - } - else - { - b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity); - } - } - } - - return numContactsOut; -} - -inline int b3ClipHullAgainstHull(const b3Float4& separatingNormal, - const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const b3Float4& posA, const b3Quaternion& ornA, const b3Float4& posB, const b3Quaternion& ornB, - b3Float4* worldVertsB1, b3Float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, - const b3AlignedObjectArray& verticesB, const b3AlignedObjectArray& facesB, const b3AlignedObjectArray& indicesB, - - b3Float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1 = 0; - - B3_PROFILE("clipHullAgainstHull"); - - //float curMaxDist=maxDist; - int closestFaceB = -1; - float dmax = -FLT_MAX; - - { - //B3_PROFILE("closestFaceB"); - if (hullB.m_numFaces != 1) - { - //printf("wtf\n"); - } - static bool once = true; - //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); - - for (int face = 0; face < hullB.m_numFaces; face++) - { -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("face %d\n", face); - const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face]; - if (once) - { - for (int i = 0; i < faceB->m_numIndices; i++) - { - b3Float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]]; - printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z); - } - } -#endif //BT_DEBUG_SAT_FACE \ - //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) - { - const b3Float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x, - facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f); - const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal); -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z); -#endif - float d = b3Dot3F4(WorldNormal, separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - once = false; - } - - b3Assert(closestFaceB >= 0); - { - //B3_PROFILE("worldVertsB1"); - const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB]; - const int numVertices = polyB.m_numIndices; - for (int e0 = 0; e0 < numVertices; e0++) - { - const b3Float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]]; - worldVertsB1[numWorldVertsB1++] = b3TransformPoint(b, posB, ornB); - } - } - - if (closestFaceB >= 0) - { - //B3_PROFILE("clipFaceAgainstHull"); - numContactsOut = b3ClipFaceAgainstHull((b3Float4&)separatingNormal, &hullA, - posA, ornA, - worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist, - verticesA, facesA, indicesA, - contactsOut, contactCapacity); - } - - return numContactsOut; -} - -inline int b3ClipHullHullSingle( - int bodyIndexA, int bodyIndexB, - const b3Float4& posA, - const b3Quaternion& ornA, - const b3Float4& posB, - const b3Quaternion& ornB, - - int collidableIndexA, int collidableIndexB, - - const b3AlignedObjectArray* bodyBuf, - b3AlignedObjectArray* globalContactOut, - int& nContacts, - - const b3AlignedObjectArray& hostConvexDataA, - const b3AlignedObjectArray& hostConvexDataB, - - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& uniqueEdgesA, - const b3AlignedObjectArray& facesA, - const b3AlignedObjectArray& indicesA, - - const b3AlignedObjectArray& verticesB, - const b3AlignedObjectArray& uniqueEdgesB, - const b3AlignedObjectArray& facesB, - const b3AlignedObjectArray& indicesB, - - const b3AlignedObjectArray& hostCollidablesA, - const b3AlignedObjectArray& hostCollidablesB, - const b3Vector3& sepNormalWorldSpace, - int maxContactCapacity) -{ - int contactIndex = -1; - b3ConvexPolyhedronData hullA, hullB; - - b3Collidable colA = hostCollidablesA[collidableIndexA]; - hullA = hostConvexDataA[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - b3Collidable colB = hostCollidablesB[collidableIndexB]; - hullB = hostConvexDataB[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - - b3Float4 contactsOut[B3_MAX_VERTS]; - int localContactCapacity = B3_MAX_VERTS; - -#ifdef _WIN32 - b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); - b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); -#endif - - { - b3Float4 worldVertsB1[B3_MAX_VERTS]; - b3Float4 worldVertsB2[B3_MAX_VERTS]; - int capacityWorldVerts = B3_MAX_VERTS; - - b3Float4 hostNormal = b3MakeFloat4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f); - int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; - int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; - - b3Scalar minDist = -1; - b3Scalar maxDist = 0.; - - b3Transform trA, trB; - { - //B3_PROFILE("b3TransformPoint computation"); - //trA.setIdentity(); - trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z)); - trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w)); - - //trB.setIdentity(); - trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z)); - trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w)); - } - - b3Quaternion trAorn = trA.getRotation(); - b3Quaternion trBorn = trB.getRotation(); - - int numContactsOut = b3ClipHullAgainstHull(hostNormal, - hostConvexDataA.at(shapeA), - hostConvexDataB.at(shapeB), - (b3Float4&)trA.getOrigin(), (b3Quaternion&)trAorn, - (b3Float4&)trB.getOrigin(), (b3Quaternion&)trBorn, - worldVertsB1, worldVertsB2, capacityWorldVerts, - minDist, maxDist, - verticesA, facesA, indicesA, - verticesB, facesB, indicesB, - - contactsOut, localContactCapacity); - - if (numContactsOut > 0) - { - B3_PROFILE("overlap"); - - b3Float4 normalOnSurfaceB = (b3Float4&)hostNormal; - // b3Float4 centerOut; - - b3Int4 contactIdx; - contactIdx.x = 0; - contactIdx.y = 1; - contactIdx.z = 2; - contactIdx.w = 3; - - int numPoints = 0; - - { - B3_PROFILE("extractManifold"); - numPoints = b3ReduceContacts(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); - } - - b3Assert(numPoints); - - if (nContacts < maxContactCapacity) - { - contactIndex = nContacts; - globalContactOut->expand(); - b3Contact4Data& contact = globalContactOut->at(nContacts); - contact.m_batchIdx = 0; //i; - contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; - contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; - - contact.m_frictionCoeffCmp = 45874; - contact.m_restituitionCoeffCmp = 0; - - // float distance = 0.f; - for (int p = 0; p < numPoints; p++) - { - contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]]; //check if it is actually on B - contact.m_worldNormalOnB = normalOnSurfaceB; - } - //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints); - contact.m_worldNormalOnB.w = (b3Scalar)numPoints; - nContacts++; - } - else - { - b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity); - } - } - } - return contactIndex; -} - -inline int b3ContactConvexConvexSAT( - int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3AlignedObjectArray& rigidBodies, - const b3AlignedObjectArray& collidables, - const b3AlignedObjectArray& convexShapes, - const b3AlignedObjectArray& convexVertices, - const b3AlignedObjectArray& uniqueEdges, - const b3AlignedObjectArray& convexIndices, - const b3AlignedObjectArray& faces, - b3AlignedObjectArray& globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - int contactIndex = -1; - - b3Float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - b3Float4 posB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - - b3ConvexPolyhedronData hullA, hullB; - - b3Float4 sepNormalWorldSpace; - - b3Collidable colA = collidables[collidableIndexA]; - hullA = convexShapes[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - b3Collidable colB = collidables[collidableIndexB]; - hullB = convexShapes[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - -#ifdef _WIN32 - b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); - b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); -#endif - - bool foundSepAxis = b3FindSeparatingAxis(hullA, hullB, - posA, - ornA, - posB, - ornB, - - convexVertices, uniqueEdges, faces, convexIndices, - convexVertices, uniqueEdges, faces, convexIndices, - - sepNormalWorldSpace); - - if (foundSepAxis) - { - contactIndex = b3ClipHullHullSingle( - bodyIndexA, bodyIndexB, - posA, ornA, - posB, ornB, - collidableIndexA, collidableIndexB, - &rigidBodies, - &globalContactsOut, - nGlobalContactsOut, - - convexShapes, - convexShapes, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - collidables, - collidables, - sepNormalWorldSpace, - maxContactCapacity); - } - - return contactIndex; -} - -#endif //B3_CONTACT_CONVEX_CONVEX_SAT_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h deleted file mode 100644 index acf7c1b1802..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h +++ /dev/null @@ -1,153 +0,0 @@ - -#ifndef B3_CONTACT_SPHERE_SPHERE_H -#define B3_CONTACT_SPHERE_SPHERE_H - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - float radius = collidables[collidableIndexA].m_radius; - float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; - b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; - - float4 pos = rigidBodies[bodyIndexB].m_pos; - - b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; - - b3Transform tr; - tr.setIdentity(); - tr.setOrigin(pos); - tr.setRotation(quat); - b3Transform trInv = tr.inverse(); - - float4 spherePos = trInv(spherePos1); - - int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; - int shapeIndex = collidables[collidableIndex].m_shapeIndex; - int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = b3MakeVector3(0, 0, 0, 0); - float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); - float minDist = -1000000.f; // TODO: What is the largest/smallest float? - bool bCollide = true; - int region = -1; - float4 localHitNormal; - for (int f = 0; f < numFaces; f++) - { - b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f]; - float4 planeEqn; - float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f); - float4 n1 = localPlaneNormal; //quatRotate(quat,localPlaneNormal); - planeEqn = n1; - planeEqn[3] = face.m_plane.w; - - float4 pntReturn; - float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - - if (dist > radius) - { - bCollide = false; - break; - } - - if (dist > 0) - { - //might hit an edge or vertex - b3Vector3 out; - - bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); - if (isInPoly) - { - if (dist > minDist) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region = 1; - } - } - else - { - b3Vector3 tmp = spherePos - out; - b3Scalar l2 = tmp.length2(); - if (l2 < radius * radius) - { - dist = b3Sqrt(l2); - if (dist > minDist) - { - minDist = dist; - closestPnt = out; - localHitNormal = tmp / dist; - region = 2; - } - } - else - { - bCollide = false; - break; - } - } - } - else - { - if (dist > minDist) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region = 3; - } - } - } - static int numChecks = 0; - numChecks++; - - if (bCollide && minDist > -10000) - { - float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal; //-hitNormalWorld; - float4 pOnB1 = tr(closestPnt); - //printf("dist ,%f,",minDist); - float actualDepth = minDist - radius; - if (actualDepth < 0) - { - //printf("actualDepth = ,%f,", actualDepth); - //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); - //printf("region=,%d,\n", region); - pOnB1[3] = actualDepth; - - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx = nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB1; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; - c->m_worldPosB[0] = pOnB1; - int numPoints = 1; - c->m_worldNormalOnB.w = (b3Scalar)numPoints; - } //if (dstIdx < numPairs) - } - } //if (hasCollision) -} -#endif //B3_CONTACT_SPHERE_SPHERE_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h deleted file mode 100644 index d5a73bd4f57..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h +++ /dev/null @@ -1,38 +0,0 @@ - -#ifndef B3_CONVEX_POLYHEDRON_DATA_H -#define B3_CONVEX_POLYHEDRON_DATA_H - -#include "Bullet3Common/shared/b3Float4.h" -#include "Bullet3Common/shared/b3Quat.h" - -typedef struct b3GpuFace b3GpuFace_t; -struct b3GpuFace -{ - b3Float4 m_plane; - int m_indexOffset; - int m_numIndices; - int m_unusedPadding1; - int m_unusedPadding2; -}; - -typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t; - -struct b3ConvexPolyhedronData -{ - b3Float4 m_localCenter; - b3Float4 m_extents; - b3Float4 mC; - b3Float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; -}; - -#endif //B3_CONVEX_POLYHEDRON_DATA_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h deleted file mode 100644 index 983554eb2e5..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h +++ /dev/null @@ -1,797 +0,0 @@ -#ifndef B3_FIND_CONCAVE_SEPARATING_AXIS_H -#define B3_FIND_CONCAVE_SEPARATING_AXIS_H - -#define B3_TRIANGLE_NUM_CONVEX_FACES 5 - -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - -inline void b3Project(__global const b3ConvexPolyhedronData* hull, b3Float4ConstArg pos, b3QuatConstArg orn, - const b3Float4* dir, __global const b3Float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn), *dir); - float offset = b3Dot(pos, *dir); - for (int i = 0; i < numVerts; i++) - { - float dp = b3Dot(vertices[hull->m_vertexOffset + i], localDir); - if (dp < min[0]) - min[0] = dp; - if (dp > max[0]) - max[0] = dp; - } - if (min[0] > max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline bool b3TestSepAxis(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, - b3Float4ConstArg posA, b3QuatConstArg ornA, - b3Float4ConstArg posB, b3QuatConstArg ornB, - b3Float4* sep_axis, const b3Float4* verticesA, __global const b3Float4* verticesB, float* depth) -{ - float Min0, Max0; - float Min1, Max1; - b3Project(hullA, posA, ornA, sep_axis, verticesA, &Min0, &Max0); - b3Project(hullB, posB, ornB, sep_axis, verticesB, &Min1, &Max1); - - if (Max0 < Min1 || Max1 < Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - *depth = d0 < d1 ? d0 : d1; - return true; -} - -bool b3FindSeparatingAxis(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, - b3Float4ConstArg posA1, - b3QuatConstArg ornA, - b3Float4ConstArg posB1, - b3QuatConstArg ornB, - b3Float4ConstArg DeltaC2, - - const b3Float4* verticesA, - const b3Float4* uniqueEdgesA, - const b3GpuFace* facesA, - const int* indicesA, - - __global const b3Float4* verticesB, - __global const b3Float4* uniqueEdgesB, - __global const b3GpuFace* facesB, - __global const int* indicesB, - b3Float4* sep, - float* dmin) -{ - b3Float4 posA = posA1; - posA.w = 0.f; - b3Float4 posB = posB1; - posB.w = 0.f; - /* - static int maxFaceVertex = 0; - - int curFaceVertexAB = hullA->m_numFaces*hullB->m_numVertices; - curFaceVertexAB+= hullB->m_numFaces*hullA->m_numVertices; - - if (curFaceVertexAB>maxFaceVertex) - { - maxFaceVertex = curFaceVertexAB; - printf("curFaceVertexAB = %d\n",curFaceVertexAB); - printf("hullA->m_numFaces = %d\n",hullA->m_numFaces); - printf("hullA->m_numVertices = %d\n",hullA->m_numVertices); - printf("hullB->m_numVertices = %d\n",hullB->m_numVertices); - } -*/ - - int curPlaneTests = 0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for (int i = 0; i < numFacesA; i++) - { - const b3Float4 normal = facesA[hullA->m_faceOffset + i].m_plane; - b3Float4 faceANormalWS = b3QuatRotate(ornA, normal); - if (b3Dot(DeltaC2, faceANormalWS) < 0) - faceANormalWS *= -1.f; - curPlaneTests++; - float d; - if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, &faceANormalWS, verticesA, verticesB, &d)) - return false; - if (d < *dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if ((b3Dot(-DeltaC2, *sep)) > 0.0f) - { - *sep = -(*sep); - } - return true; -} - -b3Vector3 unitSphere162[] = - { - b3MakeVector3(0.000000, -1.000000, 0.000000), - b3MakeVector3(0.203181, -0.967950, 0.147618), - b3MakeVector3(-0.077607, -0.967950, 0.238853), - b3MakeVector3(0.723607, -0.447220, 0.525725), - b3MakeVector3(0.609547, -0.657519, 0.442856), - b3MakeVector3(0.812729, -0.502301, 0.295238), - b3MakeVector3(-0.251147, -0.967949, 0.000000), - b3MakeVector3(-0.077607, -0.967950, -0.238853), - b3MakeVector3(0.203181, -0.967950, -0.147618), - b3MakeVector3(0.860698, -0.251151, 0.442858), - b3MakeVector3(-0.276388, -0.447220, 0.850649), - b3MakeVector3(-0.029639, -0.502302, 0.864184), - b3MakeVector3(-0.155215, -0.251152, 0.955422), - b3MakeVector3(-0.894426, -0.447216, 0.000000), - b3MakeVector3(-0.831051, -0.502299, 0.238853), - b3MakeVector3(-0.956626, -0.251149, 0.147618), - b3MakeVector3(-0.276388, -0.447220, -0.850649), - b3MakeVector3(-0.483971, -0.502302, -0.716565), - b3MakeVector3(-0.436007, -0.251152, -0.864188), - b3MakeVector3(0.723607, -0.447220, -0.525725), - b3MakeVector3(0.531941, -0.502302, -0.681712), - b3MakeVector3(0.687159, -0.251152, -0.681715), - b3MakeVector3(0.687159, -0.251152, 0.681715), - b3MakeVector3(-0.436007, -0.251152, 0.864188), - b3MakeVector3(-0.956626, -0.251149, -0.147618), - b3MakeVector3(-0.155215, -0.251152, -0.955422), - b3MakeVector3(0.860698, -0.251151, -0.442858), - b3MakeVector3(0.276388, 0.447220, 0.850649), - b3MakeVector3(0.483971, 0.502302, 0.716565), - b3MakeVector3(0.232822, 0.657519, 0.716563), - b3MakeVector3(-0.723607, 0.447220, 0.525725), - b3MakeVector3(-0.531941, 0.502302, 0.681712), - b3MakeVector3(-0.609547, 0.657519, 0.442856), - b3MakeVector3(-0.723607, 0.447220, -0.525725), - b3MakeVector3(-0.812729, 0.502301, -0.295238), - b3MakeVector3(-0.609547, 0.657519, -0.442856), - b3MakeVector3(0.276388, 0.447220, -0.850649), - b3MakeVector3(0.029639, 0.502302, -0.864184), - b3MakeVector3(0.232822, 0.657519, -0.716563), - b3MakeVector3(0.894426, 0.447216, 0.000000), - b3MakeVector3(0.831051, 0.502299, -0.238853), - b3MakeVector3(0.753442, 0.657515, 0.000000), - b3MakeVector3(-0.232822, -0.657519, 0.716563), - b3MakeVector3(-0.162456, -0.850654, 0.499995), - b3MakeVector3(0.052790, -0.723612, 0.688185), - b3MakeVector3(0.138199, -0.894429, 0.425321), - b3MakeVector3(0.262869, -0.525738, 0.809012), - b3MakeVector3(0.361805, -0.723611, 0.587779), - b3MakeVector3(0.531941, -0.502302, 0.681712), - b3MakeVector3(0.425323, -0.850654, 0.309011), - b3MakeVector3(0.812729, -0.502301, -0.295238), - b3MakeVector3(0.609547, -0.657519, -0.442856), - b3MakeVector3(0.850648, -0.525736, 0.000000), - b3MakeVector3(0.670817, -0.723611, -0.162457), - b3MakeVector3(0.670817, -0.723610, 0.162458), - b3MakeVector3(0.425323, -0.850654, -0.309011), - b3MakeVector3(0.447211, -0.894428, 0.000001), - b3MakeVector3(-0.753442, -0.657515, 0.000000), - b3MakeVector3(-0.525730, -0.850652, 0.000000), - b3MakeVector3(-0.638195, -0.723609, 0.262864), - b3MakeVector3(-0.361801, -0.894428, 0.262864), - b3MakeVector3(-0.688189, -0.525736, 0.499997), - b3MakeVector3(-0.447211, -0.723610, 0.525729), - b3MakeVector3(-0.483971, -0.502302, 0.716565), - b3MakeVector3(-0.232822, -0.657519, -0.716563), - b3MakeVector3(-0.162456, -0.850654, -0.499995), - b3MakeVector3(-0.447211, -0.723611, -0.525727), - b3MakeVector3(-0.361801, -0.894429, -0.262863), - b3MakeVector3(-0.688189, -0.525736, -0.499997), - b3MakeVector3(-0.638195, -0.723609, -0.262863), - b3MakeVector3(-0.831051, -0.502299, -0.238853), - b3MakeVector3(0.361804, -0.723612, -0.587779), - b3MakeVector3(0.138197, -0.894429, -0.425321), - b3MakeVector3(0.262869, -0.525738, -0.809012), - b3MakeVector3(0.052789, -0.723611, -0.688186), - b3MakeVector3(-0.029639, -0.502302, -0.864184), - b3MakeVector3(0.956626, 0.251149, 0.147618), - b3MakeVector3(0.956626, 0.251149, -0.147618), - b3MakeVector3(0.951058, -0.000000, 0.309013), - b3MakeVector3(1.000000, 0.000000, 0.000000), - b3MakeVector3(0.947213, -0.276396, 0.162458), - b3MakeVector3(0.951058, 0.000000, -0.309013), - b3MakeVector3(0.947213, -0.276396, -0.162458), - b3MakeVector3(0.155215, 0.251152, 0.955422), - b3MakeVector3(0.436007, 0.251152, 0.864188), - b3MakeVector3(-0.000000, -0.000000, 1.000000), - b3MakeVector3(0.309017, 0.000000, 0.951056), - b3MakeVector3(0.138199, -0.276398, 0.951055), - b3MakeVector3(0.587786, 0.000000, 0.809017), - b3MakeVector3(0.447216, -0.276398, 0.850648), - b3MakeVector3(-0.860698, 0.251151, 0.442858), - b3MakeVector3(-0.687159, 0.251152, 0.681715), - b3MakeVector3(-0.951058, -0.000000, 0.309013), - b3MakeVector3(-0.809018, 0.000000, 0.587783), - b3MakeVector3(-0.861803, -0.276396, 0.425324), - b3MakeVector3(-0.587786, 0.000000, 0.809017), - b3MakeVector3(-0.670819, -0.276397, 0.688191), - b3MakeVector3(-0.687159, 0.251152, -0.681715), - b3MakeVector3(-0.860698, 0.251151, -0.442858), - b3MakeVector3(-0.587786, -0.000000, -0.809017), - b3MakeVector3(-0.809018, -0.000000, -0.587783), - b3MakeVector3(-0.670819, -0.276397, -0.688191), - b3MakeVector3(-0.951058, 0.000000, -0.309013), - b3MakeVector3(-0.861803, -0.276396, -0.425324), - b3MakeVector3(0.436007, 0.251152, -0.864188), - b3MakeVector3(0.155215, 0.251152, -0.955422), - b3MakeVector3(0.587786, -0.000000, -0.809017), - b3MakeVector3(0.309017, -0.000000, -0.951056), - b3MakeVector3(0.447216, -0.276398, -0.850648), - b3MakeVector3(0.000000, 0.000000, -1.000000), - b3MakeVector3(0.138199, -0.276398, -0.951055), - b3MakeVector3(0.670820, 0.276396, 0.688190), - b3MakeVector3(0.809019, -0.000002, 0.587783), - b3MakeVector3(0.688189, 0.525736, 0.499997), - b3MakeVector3(0.861804, 0.276394, 0.425323), - b3MakeVector3(0.831051, 0.502299, 0.238853), - b3MakeVector3(-0.447216, 0.276397, 0.850649), - b3MakeVector3(-0.309017, -0.000001, 0.951056), - b3MakeVector3(-0.262869, 0.525738, 0.809012), - b3MakeVector3(-0.138199, 0.276397, 0.951055), - b3MakeVector3(0.029639, 0.502302, 0.864184), - b3MakeVector3(-0.947213, 0.276396, -0.162458), - b3MakeVector3(-1.000000, 0.000001, 0.000000), - b3MakeVector3(-0.850648, 0.525736, -0.000000), - b3MakeVector3(-0.947213, 0.276397, 0.162458), - b3MakeVector3(-0.812729, 0.502301, 0.295238), - b3MakeVector3(-0.138199, 0.276397, -0.951055), - b3MakeVector3(-0.309016, -0.000000, -0.951057), - b3MakeVector3(-0.262869, 0.525738, -0.809012), - b3MakeVector3(-0.447215, 0.276397, -0.850649), - b3MakeVector3(-0.531941, 0.502302, -0.681712), - b3MakeVector3(0.861804, 0.276396, -0.425322), - b3MakeVector3(0.809019, 0.000000, -0.587782), - b3MakeVector3(0.688189, 0.525736, -0.499997), - b3MakeVector3(0.670821, 0.276397, -0.688189), - b3MakeVector3(0.483971, 0.502302, -0.716565), - b3MakeVector3(0.077607, 0.967950, 0.238853), - b3MakeVector3(0.251147, 0.967949, 0.000000), - b3MakeVector3(0.000000, 1.000000, 0.000000), - b3MakeVector3(0.162456, 0.850654, 0.499995), - b3MakeVector3(0.361800, 0.894429, 0.262863), - b3MakeVector3(0.447209, 0.723612, 0.525728), - b3MakeVector3(0.525730, 0.850652, 0.000000), - b3MakeVector3(0.638194, 0.723610, 0.262864), - b3MakeVector3(-0.203181, 0.967950, 0.147618), - b3MakeVector3(-0.425323, 0.850654, 0.309011), - b3MakeVector3(-0.138197, 0.894430, 0.425320), - b3MakeVector3(-0.361804, 0.723612, 0.587778), - b3MakeVector3(-0.052790, 0.723612, 0.688185), - b3MakeVector3(-0.203181, 0.967950, -0.147618), - b3MakeVector3(-0.425323, 0.850654, -0.309011), - b3MakeVector3(-0.447210, 0.894429, 0.000000), - b3MakeVector3(-0.670817, 0.723611, -0.162457), - b3MakeVector3(-0.670817, 0.723611, 0.162457), - b3MakeVector3(0.077607, 0.967950, -0.238853), - b3MakeVector3(0.162456, 0.850654, -0.499995), - b3MakeVector3(-0.138197, 0.894430, -0.425320), - b3MakeVector3(-0.052790, 0.723612, -0.688185), - b3MakeVector3(-0.361804, 0.723612, -0.587778), - b3MakeVector3(0.361800, 0.894429, -0.262863), - b3MakeVector3(0.638194, 0.723610, -0.262864), - b3MakeVector3(0.447209, 0.723612, -0.525728)}; - -bool b3FindSeparatingAxisEdgeEdge(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, - b3Float4ConstArg posA1, - b3QuatConstArg ornA, - b3Float4ConstArg posB1, - b3QuatConstArg ornB, - b3Float4ConstArg DeltaC2, - const b3Float4* verticesA, - const b3Float4* uniqueEdgesA, - const b3GpuFace* facesA, - const int* indicesA, - __global const b3Float4* verticesB, - __global const b3Float4* uniqueEdgesB, - __global const b3GpuFace* facesB, - __global const int* indicesB, - b3Float4* sep, - float* dmin, - bool searchAllEdgeEdge) -{ - b3Float4 posA = posA1; - posA.w = 0.f; - b3Float4 posB = posB1; - posB.w = 0.f; - - // int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - static int maxEdgeTests = 0; - int curEdgeTests = hullA->m_numUniqueEdges * hullB->m_numUniqueEdges; - if (curEdgeTests > maxEdgeTests) - { - maxEdgeTests = curEdgeTests; - printf("maxEdgeTests = %d\n", maxEdgeTests); - printf("hullA->m_numUniqueEdges = %d\n", hullA->m_numUniqueEdges); - printf("hullB->m_numUniqueEdges = %d\n", hullB->m_numUniqueEdges); - } - - if (searchAllEdgeEdge) - { - for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++) - { - const b3Float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset + e0]; - b3Float4 edge0World = b3QuatRotate(ornA, edge0); - - for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++) - { - const b3Float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset + e1]; - b3Float4 edge1World = b3QuatRotate(ornB, edge1); - - b3Float4 crossje = b3Cross(edge0World, edge1World); - - curEdgeEdge++; - if (!b3IsAlmostZero(crossje)) - { - crossje = b3Normalized(crossje); - if (b3Dot(DeltaC2, crossje) < 0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0, Max0; - float Min1, Max1; - b3Project(hullA, posA, ornA, &crossje, verticesA, &Min0, &Max0); - b3Project(hullB, posB, ornB, &crossje, verticesB, &Min1, &Max1); - - if (Max0 < Min1 || Max1 < Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0 < d1 ? d0 : d1; - result = true; - } - - if (dist < *dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - } - } - else - { - int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - //printf("numDirections =%d\n",numDirections ); - - for (int i = 0; i < numDirections; i++) - { - b3Float4 crossje = unitSphere162[i]; - { - //if (b3Dot(DeltaC2,crossje)>0) - { - float dist; - bool result = true; - { - float Min0, Max0; - float Min1, Max1; - b3Project(hullA, posA, ornA, &crossje, verticesA, &Min0, &Max0); - b3Project(hullB, posB, ornB, &crossje, verticesB, &Min1, &Max1); - - if (Max0 < Min1 || Max1 < Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0 < d1 ? d0 : d1; - result = true; - } - - if (dist < *dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - } - } - - if ((b3Dot(-DeltaC2, *sep)) > 0.0f) - { - *sep = -(*sep); - } - return true; -} - -inline int b3FindClippingFaces(b3Float4ConstArg separatingNormal, - __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - b3Float4ConstArg posA, b3QuatConstArg ornA, b3Float4ConstArg posB, b3QuatConstArg ornB, - __global b3Float4* worldVertsA1, - __global b3Float4* worldNormalsA1, - __global b3Float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - __global const b3Float4* verticesA, - __global const b3GpuFace_t* facesA, - __global const int* indicesA, - __global const b3Float4* verticesB, - __global const b3GpuFace_t* facesB, - __global const int* indicesB, - - __global b3Int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1 = 0; - - int closestFaceB = -1; - float dmax = -FLT_MAX; - - { - for (int face = 0; face < hullB->m_numFaces; face++) - { - const b3Float4 Normal = b3MakeFloat4(facesB[hullB->m_faceOffset + face].m_plane.x, - facesB[hullB->m_faceOffset + face].m_plane.y, facesB[hullB->m_faceOffset + face].m_plane.z, 0.f); - const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal); - float d = b3Dot(WorldNormal, separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = facesB[hullB->m_faceOffset + closestFaceB]; - const int numVertices = polyB.m_numIndices; - for (int e0 = 0; e0 < numVertices; e0++) - { - const b3Float4 b = verticesB[hullB->m_vertexOffset + indicesB[polyB.m_indexOffset + e0]]; - worldVertsB1[pairIndex * capacityWorldVerts + numWorldVertsB1++] = b3TransformPoint(b, posB, ornB); - } - } - - int closestFaceA = -1; - { - float dmin = FLT_MAX; - for (int face = 0; face < hullA->m_numFaces; face++) - { - const b3Float4 Normal = b3MakeFloat4( - facesA[hullA->m_faceOffset + face].m_plane.x, - facesA[hullA->m_faceOffset + face].m_plane.y, - facesA[hullA->m_faceOffset + face].m_plane.z, - 0.f); - const b3Float4 faceANormalWS = b3QuatRotate(ornA, Normal); - - float d = b3Dot(faceANormalWS, separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = facesA[hullA->m_faceOffset + closestFaceA].m_numIndices; - for (int e0 = 0; e0 < numVerticesA; e0++) - { - const b3Float4 a = verticesA[hullA->m_vertexOffset + indicesA[facesA[hullA->m_faceOffset + closestFaceA].m_indexOffset + e0]]; - worldVertsA1[pairIndex * capacityWorldVerts + e0] = b3TransformPoint(a, posA, ornA); - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - return numContactsOut; -} - -__kernel void b3FindConcaveSeparatingAxisKernel(__global b3Int4* concavePairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3Float4* vertices, - __global const b3Float4* uniqueEdges, - __global const b3GpuFace* faces, - __global const int* indices, - __global const b3GpuChildShape* gpuChildShapes, - __global b3Aabb* aabbs, - __global b3Float4* concaveSeparatingNormalsOut, - __global b3Int4* clippingFacesOut, - __global b3Vector3* worldVertsA1Out, - __global b3Vector3* worldNormalsA1Out, - __global b3Vector3* worldVertsB1Out, - __global int* hasSeparatingNormals, - int vertexFaceCapacity, - int numConcavePairs, - int pairIdx) -{ - int i = pairIdx; - /* int i = get_global_id(0); - if (i>=numConcavePairs) - return; - int pairIdx = i; - */ - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL && - collidables[collidableIndexB].m_shapeType != SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - concavePairs[pairIdx].w = -1; - return; - } - - hasSeparatingNormals[i] = 0; - - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - b3ConvexPolyhedronData convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - b3Float4 localCenter = b3MakeFloat4(0.f, 0.f, 0.f, 0.f); - - b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset + f]; - b3Aabb triAabb; - triAabb.m_minVec = b3MakeFloat4(1e30f, 1e30f, 1e30f, 0.f); - triAabb.m_maxVec = b3MakeFloat4(-1e30f, -1e30f, -1e30f, 0.f); - - b3Float4 verticesA[3]; - for (int i = 0; i < 3; i++) - { - int index = indices[face.m_indexOffset + i]; - b3Float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset + index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_minVec = b3MinFloat4(triAabb.m_minVec, vert); - triAabb.m_maxVec = b3MaxFloat4(triAabb.m_maxVec, vert); - } - - overlap = true; - overlap = (triAabb.m_minVec.x > aabbs[bodyIndexB].m_maxVec.x || triAabb.m_maxVec.x < aabbs[bodyIndexB].m_minVec.x) ? false : overlap; - overlap = (triAabb.m_minVec.z > aabbs[bodyIndexB].m_maxVec.z || triAabb.m_maxVec.z < aabbs[bodyIndexB].m_minVec.z) ? false : overlap; - overlap = (triAabb.m_minVec.y > aabbs[bodyIndexB].m_maxVec.y || triAabb.m_maxVec.y < aabbs[bodyIndexB].m_minVec.y) ? false : overlap; - - if (overlap) - { - float dmin = FLT_MAX; - int hasSeparatingAxis = 5; - b3Float4 sepAxis = b3MakeFloat4(1, 2, 3, 4); - - // int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - b3Float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1] - verticesA[0]); - uniqueEdgesA[1] = (verticesA[2] - verticesA[1]); - uniqueEdgesA[2] = (verticesA[0] - verticesA[2]); - - convexPolyhedronA.m_faceOffset = 0; - - b3Float4 normal = b3MakeFloat4(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f); - - b3GpuFace facesA[B3_TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3 + 3 + 2 + 2 + 2]; - int curUsedIndices = 0; - int fidx = 0; - - //front size of triangle - { - facesA[fidx].m_indexOffset = curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices += 3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices = 3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset = curUsedIndices; - indicesA[3] = 2; - indicesA[4] = 1; - indicesA[5] = 0; - curUsedIndices += 3; - float c = b3Dot(normal, verticesA[0]); - // float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices = 3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices = 3; - int prevVertex = numVertices - 1; - for (int i = 0; i < numVertices; i++) - { - b3Float4 v0 = verticesA[i]; - b3Float4 v1 = verticesA[prevVertex]; - - b3Float4 edgeNormal = b3Normalized(b3Cross(normal, v1 - v0)); - float c = -b3Dot(edgeNormal, v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset = curUsedIndices; - indicesA[curUsedIndices++] = i; - indicesA[curUsedIndices++] = prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = B3_TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter * (1.f / 3.f); - - b3Float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - b3Float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - - /////////////////// - ///compound shape support - - if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int compoundChild = concavePairs[pairIdx].w; - int childShapeIndexB = compoundChild; //collidables[collidableIndexB].m_shapeIndex+compoundChild; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - b3Float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quaternion childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - b3Float4 newPosB = b3TransformPoint(childPosB, posB, ornB); - b3Quaternion newOrnB = b3QuatMul(ornB, childOrnB); - posB = newPosB; - ornB = newOrnB; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - } - ////////////////// - - b3Float4 c0local = convexPolyhedronA.m_localCenter; - b3Float4 c0 = b3TransformPoint(c0local, posA, ornA); - b3Float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Float4 c1 = b3TransformPoint(c1local, posB, ornB); - const b3Float4 DeltaC2 = c0 - c1; - - bool sepA = b3FindSeparatingAxis(&convexPolyhedronA, &convexShapes[shapeIndexB], - posA, ornA, - posB, ornB, - DeltaC2, - verticesA, uniqueEdgesA, facesA, indicesA, - vertices, uniqueEdges, faces, indices, - &sepAxis, &dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } - else - { - bool sepB = b3FindSeparatingAxis(&convexShapes[shapeIndexB], &convexPolyhedronA, - posB, ornB, - posA, ornA, - DeltaC2, - vertices, uniqueEdges, faces, indices, - verticesA, uniqueEdgesA, facesA, indicesA, - &sepAxis, &dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } - else - { - bool sepEE = b3FindSeparatingAxisEdgeEdge(&convexPolyhedronA, &convexShapes[shapeIndexB], - posA, ornA, - posB, ornB, - DeltaC2, - verticesA, uniqueEdgesA, facesA, indicesA, - vertices, uniqueEdges, faces, indices, - &sepAxis, &dmin, true); - - if (!sepEE) - { - hasSeparatingAxis = 0; - } - else - { - hasSeparatingAxis = 1; - } - } - } - - if (hasSeparatingAxis) - { - hasSeparatingNormals[i] = 1; - sepAxis.w = dmin; - concaveSeparatingNormalsOut[pairIdx] = sepAxis; - - //now compute clipping faces A and B, and world-space clipping vertices A and B... - - float minDist = -1e30f; - float maxDist = 0.02f; - - b3FindClippingFaces(sepAxis, - &convexPolyhedronA, - &convexShapes[shapeIndexB], - posA, ornA, - posB, ornB, - worldVertsA1Out, - worldNormalsA1Out, - worldVertsB1Out, - vertexFaceCapacity, - minDist, maxDist, - verticesA, - facesA, - indicesA, - - vertices, - faces, - indices, - clippingFacesOut, pairIdx); - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } -} - -#endif //B3_FIND_CONCAVE_SEPARATING_AXIS_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h deleted file mode 100644 index b4981ae6540..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h +++ /dev/null @@ -1,197 +0,0 @@ -#ifndef B3_FIND_SEPARATING_AXIS_H -#define B3_FIND_SEPARATING_AXIS_H - -inline void b3ProjectAxis(const b3ConvexPolyhedronData& hull, const b3Float4& pos, const b3Quaternion& orn, const b3Float4& dir, const b3AlignedObjectArray& vertices, b3Scalar& min, b3Scalar& max) -{ - min = FLT_MAX; - max = -FLT_MAX; - int numVerts = hull.m_numVertices; - - const b3Float4 localDir = b3QuatRotate(orn.inverse(), dir); - - b3Scalar offset = b3Dot3F4(pos, dir); - - for (int i = 0; i < numVerts; i++) - { - //b3Vector3 pt = trans * vertices[m_vertexOffset+i]; - //b3Scalar dp = pt.dot(dir); - //b3Vector3 vertex = vertices[hull.m_vertexOffset+i]; - b3Scalar dp = b3Dot3F4((b3Float4&)vertices[hull.m_vertexOffset + i], localDir); - //b3Assert(dp==dpL); - if (dp < min) min = dp; - if (dp > max) max = dp; - } - if (min > max) - { - b3Scalar tmp = min; - min = max; - max = tmp; - } - min += offset; - max += offset; -} - -inline bool b3TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const b3Float4& posA, const b3Quaternion& ornA, - const b3Float4& posB, const b3Quaternion& ornB, - const b3Float4& sep_axis, const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& verticesB, b3Scalar& depth) -{ - b3Scalar Min0, Max0; - b3Scalar Min1, Max1; - b3ProjectAxis(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0); - b3ProjectAxis(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1); - - if (Max0 < Min1 || Max1 < Min0) - return false; - - b3Scalar d0 = Max0 - Min1; - b3Assert(d0 >= 0.0f); - b3Scalar d1 = Max1 - Min0; - b3Assert(d1 >= 0.0f); - depth = d0 < d1 ? d0 : d1; - return true; -} - -inline bool b3FindSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const b3Float4& posA1, - const b3Quaternion& ornA, - const b3Float4& posB1, - const b3Quaternion& ornB, - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& uniqueEdgesA, - const b3AlignedObjectArray& facesA, - const b3AlignedObjectArray& indicesA, - const b3AlignedObjectArray& verticesB, - const b3AlignedObjectArray& uniqueEdgesB, - const b3AlignedObjectArray& facesB, - const b3AlignedObjectArray& indicesB, - - b3Vector3& sep) -{ - B3_PROFILE("findSeparatingAxis"); - - b3Float4 posA = posA1; - posA.w = 0.f; - b3Float4 posB = posB1; - posB.w = 0.f; - //#ifdef TEST_INTERNAL_OBJECTS - b3Float4 c0local = (b3Float4&)hullA.m_localCenter; - - b3Float4 c0 = b3TransformPoint(c0local, posA, ornA); - b3Float4 c1local = (b3Float4&)hullB.m_localCenter; - b3Float4 c1 = b3TransformPoint(c1local, posB, ornB); - const b3Float4 deltaC2 = c0 - c1; - //#endif - - b3Scalar dmin = FLT_MAX; - int curPlaneTests = 0; - - int numFacesA = hullA.m_numFaces; - // Test normals from hullA - for (int i = 0; i < numFacesA; i++) - { - const b3Float4& normal = (b3Float4&)facesA[hullA.m_faceOffset + i].m_plane; - b3Float4 faceANormalWS = b3QuatRotate(ornA, normal); - - if (b3Dot3F4(deltaC2, faceANormalWS) < 0) - faceANormalWS *= -1.f; - - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar d; - if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d)) - return false; - - if (d < dmin) - { - dmin = d; - sep = (b3Vector3&)faceANormalWS; - } - } - - int numFacesB = hullB.m_numFaces; - // Test normals from hullB - for (int i = 0; i < numFacesB; i++) - { - b3Float4 normal = (b3Float4&)facesB[hullB.m_faceOffset + i].m_plane; - b3Float4 WorldNormal = b3QuatRotate(ornB, normal); - - if (b3Dot3F4(deltaC2, WorldNormal) < 0) - { - WorldNormal *= -1.f; - } - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar d; - if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d)) - return false; - - if (d < dmin) - { - dmin = d; - sep = (b3Vector3&)WorldNormal; - } - } - - // b3Vector3 edgeAstart,edgeAend,edgeBstart,edgeBend; - - int curEdgeEdge = 0; - // Test edges - for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++) - { - const b3Float4& edge0 = (b3Float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0]; - b3Float4 edge0World = b3QuatRotate(ornA, (b3Float4&)edge0); - - for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++) - { - const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1]; - b3Float4 edge1World = b3QuatRotate(ornB, (b3Float4&)edge1); - - b3Float4 crossje = b3Cross3(edge0World, edge1World); - - curEdgeEdge++; - if (!b3IsAlmostZero((b3Vector3&)crossje)) - { - crossje = b3FastNormalized3(crossje); - if (b3Dot3F4(deltaC2, crossje) < 0) - crossje *= -1.f; - -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar dist; - if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist)) - return false; - - if (dist < dmin) - { - dmin = dist; - sep = (b3Vector3&)crossje; - } - } - } - } - - if ((b3Dot3F4(-deltaC2, (b3Float4&)sep)) > 0.0f) - sep = -sep; - - return true; -} - -#endif //B3_FIND_SEPARATING_AXIS_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h deleted file mode 100644 index a3bfbf2995a..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h +++ /dev/null @@ -1,888 +0,0 @@ - -/*** - * --------------------------------- - * Copyright (c)2012 Daniel Fiser - * - * This file was ported from mpr.c file, part of libccd. - * The Minkoski Portal Refinement implementation was ported - * to OpenCL by Erwin Coumans for the Bullet 3 Physics library. - * at http://github.com/erwincoumans/bullet3 - * - * Distributed under the OSI-approved BSD License (the "License"); - * see . - * This software is distributed WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the License for more information. - */ - -#ifndef B3_MPR_PENETRATION_H -#define B3_MPR_PENETRATION_H - -#include "Bullet3Common/shared/b3PlatformDefinitions.h" -#include "Bullet3Common/shared/b3Float4.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" - -#ifdef __cplusplus -#define B3_MPR_SQRT sqrtf -#else -#define B3_MPR_SQRT sqrt -#endif -#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y)) -#define B3_MPR_FABS fabs - -#define B3_MPR_TOLERANCE 1E-6f -#define B3_MPR_MAX_ITERATIONS 1000 - -struct _b3MprSupport_t -{ - b3Float4 v; //!< Support point in minkowski sum - b3Float4 v1; //!< Support point in obj1 - b3Float4 v2; //!< Support point in obj2 -}; -typedef struct _b3MprSupport_t b3MprSupport_t; - -struct _b3MprSimplex_t -{ - b3MprSupport_t ps[4]; - int last; //!< index of last added point -}; -typedef struct _b3MprSimplex_t b3MprSimplex_t; - -inline b3MprSupport_t *b3MprSimplexPointW(b3MprSimplex_t *s, int idx) -{ - return &s->ps[idx]; -} - -inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size) -{ - s->last = size - 1; -} - -inline int b3MprSimplexSize(const b3MprSimplex_t *s) -{ - return s->last + 1; -} - -inline const b3MprSupport_t *b3MprSimplexPoint(const b3MprSimplex_t *s, int idx) -{ - // here is no check on boundaries - return &s->ps[idx]; -} - -inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s) -{ - *d = *s; -} - -inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a) -{ - b3MprSupportCopy(s->ps + pos, a); -} - -inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2) -{ - b3MprSupport_t supp; - - b3MprSupportCopy(&supp, &s->ps[pos1]); - b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]); - b3MprSupportCopy(&s->ps[pos2], &supp); -} - -inline int b3MprIsZero(float val) -{ - return B3_MPR_FABS(val) < FLT_EPSILON; -} - -inline int b3MprEq(float _a, float _b) -{ - float ab; - float a, b; - - ab = B3_MPR_FABS(_a - _b); - if (B3_MPR_FABS(ab) < FLT_EPSILON) - return 1; - - a = B3_MPR_FABS(_a); - b = B3_MPR_FABS(_b); - if (b > a) - { - return ab < FLT_EPSILON * b; - } - else - { - return ab < FLT_EPSILON * a; - } -} - -inline int b3MprVec3Eq(const b3Float4 *a, const b3Float4 *b) -{ - return b3MprEq((*a).x, (*b).x) && b3MprEq((*a).y, (*b).y) && b3MprEq((*a).z, (*b).z); -} - -inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec, __global const b3ConvexPolyhedronData_t *hull, b3ConstArray(b3Float4) verticesA) -{ - b3Float4 supVec = b3MakeFloat4(0, 0, 0, 0); - float maxDot = -B3_LARGE_FLOAT; - - if (0 < hull->m_numVertices) - { - const b3Float4 scaled = supportVec; - int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot); - return verticesA[hull->m_vertexOffset + index]; - } - - return supVec; -} - -B3_STATIC void b3MprConvexSupport(int pairIndex, int bodyIndex, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, - b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, - b3ConstArray(b3Collidable_t) cpuCollidables, - b3ConstArray(b3Float4) cpuVertices, - __global b3Float4 *sepAxis, - const b3Float4 *_dir, b3Float4 *outp, int logme) -{ - //dir is in worldspace, move to local space - - b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos; - b3Quat orn = cpuBodyBuf[bodyIndex].m_quat; - - b3Float4 dir = b3MakeFloat4((*_dir).x, (*_dir).y, (*_dir).z, 0.f); - - const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn), dir); - - //find local support vertex - int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx; - - b3Assert(cpuCollidables[colIndex].m_shapeType == SHAPE_CONVEX_HULL); - __global const b3ConvexPolyhedronData_t *hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex]; - - b3Float4 pInA; - if (logme) - { - // b3Float4 supVec = b3MakeFloat4(0,0,0,0); - float maxDot = -B3_LARGE_FLOAT; - - if (0 < hull->m_numVertices) - { - const b3Float4 scaled = localDir; - int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot); - pInA = cpuVertices[hull->m_vertexOffset + index]; - } - } - else - { - pInA = b3LocalGetSupportVertex(localDir, hull, cpuVertices); - } - - //move vertex to world space - *outp = b3TransformPoint(pInA, pos, orn); -} - -inline void b3MprSupport(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, - b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, - b3ConstArray(b3Collidable_t) cpuCollidables, - b3ConstArray(b3Float4) cpuVertices, - __global b3Float4 *sepAxis, - const b3Float4 *_dir, b3MprSupport_t *supp) -{ - b3Float4 dir; - dir = *_dir; - b3MprConvexSupport(pairIndex, bodyIndexA, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &supp->v1, 0); - dir = *_dir * -1.f; - b3MprConvexSupport(pairIndex, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &supp->v2, 0); - supp->v = supp->v1 - supp->v2; -} - -inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center) -{ - center->v1 = cpuBodyBuf[bodyIndexA].m_pos; - center->v2 = cpuBodyBuf[bodyIndexB].m_pos; - center->v = center->v1 - center->v2; -} - -inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z) -{ - (*v).x = x; - (*v).y = y; - (*v).z = z; - (*v).w = 0.f; -} - -inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w) -{ - (*v).x += (*w).x; - (*v).y += (*w).y; - (*v).z += (*w).z; -} - -inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w) -{ - *v = *w; -} - -inline void b3MprVec3Scale(b3Float4 *d, float k) -{ - *d *= k; -} - -inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b) -{ - float dot; - - dot = b3Dot3F4(*a, *b); - return dot; -} - -inline float b3MprVec3Len2(const b3Float4 *v) -{ - return b3MprVec3Dot(v, v); -} - -inline void b3MprVec3Normalize(b3Float4 *d) -{ - float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d)); - b3MprVec3Scale(d, k); -} - -inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b) -{ - *d = b3Cross3(*a, *b); -} - -inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w) -{ - *d = *v - *w; -} - -inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir) -{ - b3Float4 v2v1, v3v1; - - b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v, - &b3MprSimplexPoint(portal, 1)->v); - b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v, - &b3MprSimplexPoint(portal, 1)->v); - b3MprVec3Cross(dir, &v2v1, &v3v1); - b3MprVec3Normalize(dir); -} - -inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal, - const b3Float4 *dir) -{ - float dot; - dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v); - return b3MprIsZero(dot) || dot > 0.f; -} - -inline int portalReachTolerance(const b3MprSimplex_t *portal, - const b3MprSupport_t *v4, - const b3Float4 *dir) -{ - float dv1, dv2, dv3, dv4; - float dot1, dot2, dot3; - - // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4} - - dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir); - dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir); - dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir); - dv4 = b3MprVec3Dot(&v4->v, dir); - - dot1 = dv4 - dv1; - dot2 = dv4 - dv2; - dot3 = dv4 - dv3; - - dot1 = B3_MPR_FMIN(dot1, dot2); - dot1 = B3_MPR_FMIN(dot1, dot3); - - return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE; -} - -inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal, - const b3MprSupport_t *v4, - const b3Float4 *dir) -{ - float dot; - dot = b3MprVec3Dot(&v4->v, dir); - return b3MprIsZero(dot) || dot > 0.f; -} - -inline void b3ExpandPortal(b3MprSimplex_t *portal, - const b3MprSupport_t *v4) -{ - float dot; - b3Float4 v4v0; - - b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v); - dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0); - if (dot > 0.f) - { - dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0); - if (dot > 0.f) - { - b3MprSimplexSet(portal, 1, v4); - } - else - { - b3MprSimplexSet(portal, 3, v4); - } - } - else - { - dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0); - if (dot > 0.f) - { - b3MprSimplexSet(portal, 2, v4); - } - else - { - b3MprSimplexSet(portal, 1, v4); - } - } -} - -B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, - b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, - b3ConstArray(b3Collidable_t) cpuCollidables, - b3ConstArray(b3Float4) cpuVertices, - __global b3Float4 *sepAxis, - __global int *hasSepAxis, - b3MprSimplex_t *portal) -{ - b3Float4 dir, va, vb; - float dot; - int cont; - - // vertex 0 is center of portal - b3FindOrigin(bodyIndexA, bodyIndexB, cpuBodyBuf, b3MprSimplexPointW(portal, 0)); - // vertex 0 is center of portal - b3MprSimplexSetSize(portal, 1); - - b3Float4 zero = b3MakeFloat4(0, 0, 0, 0); - b3Float4 *b3mpr_vec3_origin = &zero; - - if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)) - { - // Portal's center lies on origin (0,0,0) => we know that objects - // intersect but we would need to know penetration info. - // So move center little bit... - b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f); - b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va); - } - - // vertex 1 = support in direction of origin - b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v); - b3MprVec3Scale(&dir, -1.f); - b3MprVec3Normalize(&dir); - - b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, b3MprSimplexPointW(portal, 1)); - - b3MprSimplexSetSize(portal, 2); - - // test if origin isn't outside of v1 - dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir); - - if (b3MprIsZero(dot) || dot < 0.f) - return -1; - - // vertex 2 - b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v, - &b3MprSimplexPoint(portal, 1)->v); - if (b3MprIsZero(b3MprVec3Len2(&dir))) - { - if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)) - { - // origin lies on v1 - return 1; - } - else - { - // origin lies on v0-v1 segment - return 2; - } - } - - b3MprVec3Normalize(&dir); - b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, b3MprSimplexPointW(portal, 2)); - - dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir); - if (b3MprIsZero(dot) || dot < 0.f) - return -1; - - b3MprSimplexSetSize(portal, 3); - - // vertex 3 direction - b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v, - &b3MprSimplexPoint(portal, 0)->v); - b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v, - &b3MprSimplexPoint(portal, 0)->v); - b3MprVec3Cross(&dir, &va, &vb); - b3MprVec3Normalize(&dir); - - // it is better to form portal faces to be oriented "outside" origin - dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v); - if (dot > 0.f) - { - b3MprSimplexSwap(portal, 1, 2); - b3MprVec3Scale(&dir, -1.f); - } - - while (b3MprSimplexSize(portal) < 4) - { - b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, b3MprSimplexPointW(portal, 3)); - - dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir); - if (b3MprIsZero(dot) || dot < 0.f) - return -1; - - cont = 0; - - // test if origin is outside (v1, v0, v3) - set v2 as v3 and - // continue - b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v, - &b3MprSimplexPoint(portal, 3)->v); - dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v); - if (dot < 0.f && !b3MprIsZero(dot)) - { - b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3)); - cont = 1; - } - - if (!cont) - { - // test if origin is outside (v3, v0, v2) - set v1 as v3 and - // continue - b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v, - &b3MprSimplexPoint(portal, 2)->v); - dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v); - if (dot < 0.f && !b3MprIsZero(dot)) - { - b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3)); - cont = 1; - } - } - - if (cont) - { - b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v, - &b3MprSimplexPoint(portal, 0)->v); - b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v, - &b3MprSimplexPoint(portal, 0)->v); - b3MprVec3Cross(&dir, &va, &vb); - b3MprVec3Normalize(&dir); - } - else - { - b3MprSimplexSetSize(portal, 4); - } - } - - return 0; -} - -B3_STATIC int b3RefinePortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, - b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, - b3ConstArray(b3Collidable_t) cpuCollidables, - b3ConstArray(b3Float4) cpuVertices, - __global b3Float4 *sepAxis, - b3MprSimplex_t *portal) -{ - b3Float4 dir; - b3MprSupport_t v4; - - for (int i = 0; i < B3_MPR_MAX_ITERATIONS; i++) - //while (1) - { - // compute direction outside the portal (from v0 throught v1,v2,v3 - // face) - b3PortalDir(portal, &dir); - - // test if origin is inside the portal - if (portalEncapsulesOrigin(portal, &dir)) - return 0; - - // get next support point - - b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &v4); - - // test if v4 can expand portal to contain origin and if portal - // expanding doesn't reach given tolerance - if (!portalCanEncapsuleOrigin(portal, &v4, &dir) || portalReachTolerance(portal, &v4, &dir)) - { - return -1; - } - - // v1-v2-v3 triangle must be rearranged to face outside Minkowski - // difference (direction from v0). - b3ExpandPortal(portal, &v4); - } - - return -1; -} - -B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos) -{ - b3Float4 zero = b3MakeFloat4(0, 0, 0, 0); - b3Float4 *b3mpr_vec3_origin = &zero; - - b3Float4 dir; - size_t i; - float b[4], sum, inv; - b3Float4 vec, p1, p2; - - b3PortalDir(portal, &dir); - - // use barycentric coordinates of tetrahedron to find origin - b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v, - &b3MprSimplexPoint(portal, 2)->v); - b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v); - - b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v, - &b3MprSimplexPoint(portal, 2)->v); - b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v); - - b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v, - &b3MprSimplexPoint(portal, 1)->v); - b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v); - - b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v, - &b3MprSimplexPoint(portal, 1)->v); - b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v); - - sum = b[0] + b[1] + b[2] + b[3]; - - if (b3MprIsZero(sum) || sum < 0.f) - { - b[0] = 0.f; - - b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v, - &b3MprSimplexPoint(portal, 3)->v); - b[1] = b3MprVec3Dot(&vec, &dir); - b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v, - &b3MprSimplexPoint(portal, 1)->v); - b[2] = b3MprVec3Dot(&vec, &dir); - b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v, - &b3MprSimplexPoint(portal, 2)->v); - b[3] = b3MprVec3Dot(&vec, &dir); - - sum = b[1] + b[2] + b[3]; - } - - inv = 1.f / sum; - - b3MprVec3Copy(&p1, b3mpr_vec3_origin); - b3MprVec3Copy(&p2, b3mpr_vec3_origin); - for (i = 0; i < 4; i++) - { - b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1); - b3MprVec3Scale(&vec, b[i]); - b3MprVec3Add(&p1, &vec); - - b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2); - b3MprVec3Scale(&vec, b[i]); - b3MprVec3Add(&p2, &vec); - } - b3MprVec3Scale(&p1, inv); - b3MprVec3Scale(&p2, inv); - - b3MprVec3Copy(pos, &p1); - b3MprVec3Add(pos, &p2); - b3MprVec3Scale(pos, 0.5); -} - -inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b) -{ - b3Float4 ab; - b3MprVec3Sub2(&ab, a, b); - return b3MprVec3Len2(&ab); -} - -inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P, - const b3Float4 *x0, - const b3Float4 *b, - b3Float4 *witness) -{ - // The computation comes from solving equation of segment: - // S(t) = x0 + t.d - // where - x0 is initial point of segment - // - d is direction of segment from x0 (|d| > 0) - // - t belongs to <0, 1> interval - // - // Than, distance from a segment to some point P can be expressed: - // D(t) = |x0 + t.d - P|^2 - // which is distance from any point on segment. Minimization - // of this function brings distance from P to segment. - // Minimization of D(t) leads to simple quadratic equation that's - // solving is straightforward. - // - // Bonus of this method is witness point for free. - - float dist, t; - b3Float4 d, a; - - // direction of segment - b3MprVec3Sub2(&d, b, x0); - - // precompute vector from P to x0 - b3MprVec3Sub2(&a, x0, P); - - t = -1.f * b3MprVec3Dot(&a, &d); - t /= b3MprVec3Len2(&d); - - if (t < 0.f || b3MprIsZero(t)) - { - dist = b3MprVec3Dist2(x0, P); - if (witness) - b3MprVec3Copy(witness, x0); - } - else if (t > 1.f || b3MprEq(t, 1.f)) - { - dist = b3MprVec3Dist2(b, P); - if (witness) - b3MprVec3Copy(witness, b); - } - else - { - if (witness) - { - b3MprVec3Copy(witness, &d); - b3MprVec3Scale(witness, t); - b3MprVec3Add(witness, x0); - dist = b3MprVec3Dist2(witness, P); - } - else - { - // recycling variables - b3MprVec3Scale(&d, t); - b3MprVec3Add(&d, &a); - dist = b3MprVec3Len2(&d); - } - } - - return dist; -} - -inline float b3MprVec3PointTriDist2(const b3Float4 *P, - const b3Float4 *x0, const b3Float4 *B, - const b3Float4 *C, - b3Float4 *witness) -{ - // Computation comes from analytic expression for triangle (x0, B, C) - // T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and - // Then equation for distance is: - // D(s, t) = | T(s, t) - P |^2 - // This leads to minimization of quadratic function of two variables. - // The solution from is taken only if s is between 0 and 1, t is - // between 0 and 1 and t + s < 1, otherwise distance from segment is - // computed. - - b3Float4 d1, d2, a; - float u, v, w, p, q, r; - float s, t, dist, dist2; - b3Float4 witness2; - - b3MprVec3Sub2(&d1, B, x0); - b3MprVec3Sub2(&d2, C, x0); - b3MprVec3Sub2(&a, x0, P); - - u = b3MprVec3Dot(&a, &a); - v = b3MprVec3Dot(&d1, &d1); - w = b3MprVec3Dot(&d2, &d2); - p = b3MprVec3Dot(&a, &d1); - q = b3MprVec3Dot(&a, &d2); - r = b3MprVec3Dot(&d1, &d2); - - s = (q * r - w * p) / (w * v - r * r); - t = (-s * r - q) / w; - - if ((b3MprIsZero(s) || s > 0.f) && (b3MprEq(s, 1.f) || s < 1.f) && (b3MprIsZero(t) || t > 0.f) && (b3MprEq(t, 1.f) || t < 1.f) && (b3MprEq(t + s, 1.f) || t + s < 1.f)) - { - if (witness) - { - b3MprVec3Scale(&d1, s); - b3MprVec3Scale(&d2, t); - b3MprVec3Copy(witness, x0); - b3MprVec3Add(witness, &d1); - b3MprVec3Add(witness, &d2); - - dist = b3MprVec3Dist2(witness, P); - } - else - { - dist = s * s * v; - dist += t * t * w; - dist += 2.f * s * t * r; - dist += 2.f * s * p; - dist += 2.f * t * q; - dist += u; - } - } - else - { - dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness); - - dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2); - if (dist2 < dist) - { - dist = dist2; - if (witness) - b3MprVec3Copy(witness, &witness2); - } - - dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2); - if (dist2 < dist) - { - dist = dist2; - if (witness) - b3MprVec3Copy(witness, &witness2); - } - } - - return dist; -} - -B3_STATIC void b3FindPenetr(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, - b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, - b3ConstArray(b3Collidable_t) cpuCollidables, - b3ConstArray(b3Float4) cpuVertices, - __global b3Float4 *sepAxis, - b3MprSimplex_t *portal, - float *depth, b3Float4 *pdir, b3Float4 *pos) -{ - b3Float4 dir; - b3MprSupport_t v4; - unsigned long iterations; - - b3Float4 zero = b3MakeFloat4(0, 0, 0, 0); - b3Float4 *b3mpr_vec3_origin = &zero; - - iterations = 1UL; - for (int i = 0; i < B3_MPR_MAX_ITERATIONS; i++) - //while (1) - { - // compute portal direction and obtain next support point - b3PortalDir(portal, &dir); - - b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &v4); - - // reached tolerance -> find penetration info - if (portalReachTolerance(portal, &v4, &dir) || iterations == B3_MPR_MAX_ITERATIONS) - { - *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin, &b3MprSimplexPoint(portal, 1)->v, &b3MprSimplexPoint(portal, 2)->v, &b3MprSimplexPoint(portal, 3)->v, pdir); - *depth = B3_MPR_SQRT(*depth); - - if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z)) - { - *pdir = dir; - } - b3MprVec3Normalize(pdir); - - // barycentric coordinates: - b3FindPos(portal, pos); - - return; - } - - b3ExpandPortal(portal, &v4); - - iterations++; - } -} - -B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal, float *depth, b3Float4 *dir, b3Float4 *pos) -{ - // Touching contact on portal's v1 - so depth is zero and direction - // is unimportant and pos can be guessed - *depth = 0.f; - b3Float4 zero = b3MakeFloat4(0, 0, 0, 0); - b3Float4 *b3mpr_vec3_origin = &zero; - - b3MprVec3Copy(dir, b3mpr_vec3_origin); - - b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1); - b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2); - b3MprVec3Scale(pos, 0.5); -} - -B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal, - float *depth, b3Float4 *dir, b3Float4 *pos) -{ - // Origin lies on v0-v1 segment. - // Depth is distance to v1, direction also and position must be - // computed - - b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1); - b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2); - b3MprVec3Scale(pos, 0.5f); - - b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v); - *depth = B3_MPR_SQRT(b3MprVec3Len2(dir)); - b3MprVec3Normalize(dir); -} - -inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB, - b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, - b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, - b3ConstArray(b3Collidable_t) cpuCollidables, - b3ConstArray(b3Float4) cpuVertices, - __global b3Float4 *sepAxis, - __global int *hasSepAxis, - float *depthOut, b3Float4 *dirOut, b3Float4 *posOut) -{ - b3MprSimplex_t portal; - - // if (!hasSepAxis[pairIndex]) - // return -1; - - hasSepAxis[pairIndex] = 0; - int res; - - // Phase 1: Portal discovery - res = b3DiscoverPortal(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, hasSepAxis, &portal); - - //sepAxis[pairIndex] = *pdir;//or -dir? - - switch (res) - { - case 0: - { - // Phase 2: Portal refinement - - res = b3RefinePortal(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &portal); - if (res < 0) - return -1; - - // Phase 3. Penetration info - b3FindPenetr(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &portal, depthOut, dirOut, posOut); - hasSepAxis[pairIndex] = 1; - sepAxis[pairIndex] = -*dirOut; - break; - } - case 1: - { - // Touching contact on portal's v1. - b3FindPenetrTouch(&portal, depthOut, dirOut, posOut); - break; - } - case 2: - { - b3FindPenetrSegment(&portal, depthOut, dirOut, posOut); - break; - } - default: - { - hasSepAxis[pairIndex] = 0; - //if (res < 0) - //{ - // Origin isn't inside portal - no collision. - return -1; - //} - } - }; - - return 0; -}; - -#endif //B3_MPR_PENETRATION_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h deleted file mode 100644 index 6e991e14b05..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h +++ /dev/null @@ -1,175 +0,0 @@ - -#ifndef B3_NEW_CONTACT_REDUCTION_H -#define B3_NEW_CONTACT_REDUCTION_H - -#include "Bullet3Common/shared/b3Float4.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#define GET_NPOINTS(x) (x).m_worldNormalOnB.w - -int b3ExtractManifoldSequentialGlobal(__global const b3Float4* p, int nPoints, b3Float4ConstArg nearNormal, b3Int4* contactIdx) -{ - if (nPoints == 0) - return 0; - - if (nPoints <= 4) - return nPoints; - - if (nPoints > 64) - nPoints = 64; - - b3Float4 center = b3MakeFloat4(0, 0, 0, 0); - { - for (int i = 0; i < nPoints; i++) - center += p[i]; - center /= (float)nPoints; - } - - // sample 4 directions - - b3Float4 aVector = p[0] - center; - b3Float4 u = b3Cross(nearNormal, aVector); - b3Float4 v = b3Cross(nearNormal, u); - u = b3Normalized(u); - v = b3Normalized(v); - - //keep point with deepest penetration - float minW = FLT_MAX; - - int minIndex = -1; - - b3Float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for (int ie = 0; ie < nPoints; ie++) - { - if (p[ie].w < minW) - { - minW = p[ie].w; - minIndex = ie; - } - float f; - b3Float4 r = p[ie] - center; - f = b3Dot(u, r); - if (f < maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = b3Dot(-u, r); - if (f < maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - f = b3Dot(v, r); - if (f < maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = b3Dot(-v, r); - if (f < maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; -} - -__kernel void b3NewContactReductionKernel(__global b3Int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* globalContactsOut, - __global b3Int4* clippingFaces, - __global b3Float4* worldVertsB2, - volatile __global int* nGlobalContactsOut, - int vertexFaceCapacity, - int contactCapacity, - int numPairs, - int pairIndex) -{ - // int i = get_global_id(0); - //int pairIndex = i; - int i = pairIndex; - - b3Int4 contactIdx; - contactIdx = b3MakeInt4(0, 1, 2, 3); - - if (i < numPairs) - { - if (hasSeparatingAxis[i]) - { - int nPoints = clippingFaces[pairIndex].w; - - if (nPoints > 0) - { - __global b3Float4* pointsIn = &worldVertsB2[pairIndex * vertexFaceCapacity]; - b3Float4 normal = -separatingNormals[i]; - - int nReducedContacts = b3ExtractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - - int dstIdx; - dstIdx = b3AtomicInc(nGlobalContactsOut); - - //#if 0 - b3Assert(dstIdx < contactCapacity); - if (dstIdx < contactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f * 0xffff); - c->m_frictionCoeffCmp = (0.7f * 0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - - pairs[pairIndex].w = dstIdx; - - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - switch (nReducedContacts) - { - case 4: - c->m_worldPosB[3] = pointsIn[contactIdx.w]; - case 3: - c->m_worldPosB[2] = pointsIn[contactIdx.z]; - case 2: - c->m_worldPosB[1] = pointsIn[contactIdx.y]; - case 1: - c->m_worldPosB[0] = pointsIn[contactIdx.x]; - default: - { - } - }; - - GET_NPOINTS(*c) = nReducedContacts; - } - - //#endif - - } // if (numContactsOut>0) - } // if (hasSeparatingAxis[i]) - } // if (im_escapeIndexOrTriangleIndex & ~(y)); -} - -inline int b3IsLeaf(const b3QuantizedBvhNodeData* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0) ? 1 : 0; -} - -inline int b3GetEscapeIndex(const b3QuantizedBvhNodeData* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -inline void b3QuantizeWithClamp(unsigned short* out, b3Float4ConstArg point2, int isMax, b3Float4ConstArg bvhAabbMin, b3Float4ConstArg bvhAabbMax, b3Float4ConstArg bvhQuantization) -{ - b3Float4 clampedPoint = b3MaxFloat4(point2, bvhAabbMin); - clampedPoint = b3MinFloat4(clampedPoint, bvhAabbMax); - - b3Float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization; - if (isMax) - { - out[0] = (unsigned short)(((unsigned short)(v.x + 1.f) | 1)); - out[1] = (unsigned short)(((unsigned short)(v.y + 1.f) | 1)); - out[2] = (unsigned short)(((unsigned short)(v.z + 1.f) | 1)); - } - else - { - out[0] = (unsigned short)(((unsigned short)(v.x) & 0xfffe)); - out[1] = (unsigned short)(((unsigned short)(v.y) & 0xfffe)); - out[2] = (unsigned short)(((unsigned short)(v.z) & 0xfffe)); - } -} - -inline int b3TestQuantizedAabbAgainstQuantizedAabbSlow( - const unsigned short int* aabbMin1, - const unsigned short int* aabbMax1, - const unsigned short int* aabbMin2, - const unsigned short int* aabbMax2) -{ - //int overlap = 1; - if (aabbMin1[0] > aabbMax2[0]) - return 0; - if (aabbMax1[0] < aabbMin2[0]) - return 0; - if (aabbMin1[1] > aabbMax2[1]) - return 0; - if (aabbMax1[1] < aabbMin2[1]) - return 0; - if (aabbMin1[2] > aabbMax2[2]) - return 0; - if (aabbMax1[2] < aabbMin2[2]) - return 0; - return 1; - //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap; - //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap; - //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap; - //return overlap; -} - -#endif //B3_QUANTIZED_BVH_NODE_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h deleted file mode 100644 index c108255b9f4..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef B3_REDUCE_CONTACTS_H -#define B3_REDUCE_CONTACTS_H - -inline int b3ReduceContacts(const b3Float4* p, int nPoints, const b3Float4& nearNormal, b3Int4* contactIdx) -{ - if (nPoints == 0) - return 0; - - if (nPoints <= 4) - return nPoints; - - if (nPoints > 64) - nPoints = 64; - - b3Float4 center = b3MakeFloat4(0, 0, 0, 0); - { - for (int i = 0; i < nPoints; i++) - center += p[i]; - center /= (float)nPoints; - } - - // sample 4 directions - - b3Float4 aVector = p[0] - center; - b3Float4 u = b3Cross3(nearNormal, aVector); - b3Float4 v = b3Cross3(nearNormal, u); - u = b3FastNormalized3(u); - v = b3FastNormalized3(v); - - //keep point with deepest penetration - float minW = FLT_MAX; - - int minIndex = -1; - - b3Float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for (int ie = 0; ie < nPoints; ie++) - { - if (p[ie].w < minW) - { - minW = p[ie].w; - minIndex = ie; - } - float f; - b3Float4 r = p[ie] - center; - f = b3Dot3F4(u, r); - if (f < maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = b3Dot3F4(-u, r); - if (f < maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - f = b3Dot3F4(v, r); - if (f < maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = b3Dot3F4(-v, r); - if (f < maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; -} - -#endif //B3_REDUCE_CONTACTS_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h deleted file mode 100644 index 663e946fc1e..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef B3_RIGIDBODY_DATA_H -#define B3_RIGIDBODY_DATA_H - -#include "Bullet3Common/shared/b3Float4.h" -#include "Bullet3Common/shared/b3Quat.h" -#include "Bullet3Common/shared/b3Mat3x3.h" - -typedef struct b3RigidBodyData b3RigidBodyData_t; - -struct b3RigidBodyData -{ - b3Float4 m_pos; - b3Quat m_quat; - b3Float4 m_linVel; - b3Float4 m_angVel; - - int m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -}; - -typedef struct b3InertiaData b3InertiaData_t; - -struct b3InertiaData -{ - b3Mat3x3 m_invInertiaWorld; - b3Mat3x3 m_initInvInertia; -}; - -#endif //B3_RIGIDBODY_DATA_H diff --git a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h b/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h deleted file mode 100644 index e0c3a5cf978..00000000000 --- a/thirdparty/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef B3_UPDATE_AABBS_H -#define B3_UPDATE_AABBS_H - -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -void b3ComputeWorldAabb(int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs) -{ - __global const b3RigidBodyData_t* body = &bodies[bodyId]; - - b3Float4 position = body->m_pos; - b3Quat orientation = body->m_quat; - - int collidableIndex = body->m_collidableIdx; - int shapeIndex = collidables[collidableIndex].m_shapeIndex; - - if (shapeIndex >= 0) - { - b3Aabb_t localAabb = localShapeAABB[collidableIndex]; - b3Aabb_t worldAabb; - - b3Float4 aabbAMinOut, aabbAMaxOut; - float margin = 0.f; - b3TransformAabb2(localAabb.m_minVec, localAabb.m_maxVec, margin, position, orientation, &aabbAMinOut, &aabbAMaxOut); - - worldAabb.m_minVec = aabbAMinOut; - worldAabb.m_minIndices[3] = bodyId; - worldAabb.m_maxVec = aabbAMaxOut; - worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass == 0.f ? 0 : 1; - worldAabbs[bodyId] = worldAabb; - } -} - -#endif //B3_UPDATE_AABBS_H diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp b/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp deleted file mode 100644 index d546d5e0669..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3AlignedAllocator.h" - -#ifdef B3_ALLOCATOR_STATISTICS -int b3g_numAlignedAllocs = 0; -int b3g_numAlignedFree = 0; -int b3g_totalBytesAlignedAllocs = 0; //detect memory leaks -#endif - -static void *b3AllocDefault(size_t size) -{ - return malloc(size); -} - -static void b3FreeDefault(void *ptr) -{ - free(ptr); -} - -static b3AllocFunc *b3s_allocFunc = b3AllocDefault; -static b3FreeFunc *b3s_freeFunc = b3FreeDefault; - -#if defined(B3_HAS_ALIGNED_ALLOCATOR) -#include -static void *b3AlignedAllocDefault(size_t size, int alignment) -{ - return _aligned_malloc(size, (size_t)alignment); -} - -static void b3AlignedFreeDefault(void *ptr) -{ - _aligned_free(ptr); -} -#elif defined(__CELLOS_LV2__) -#include - -static inline void *b3AlignedAllocDefault(size_t size, int alignment) -{ - return memalign(alignment, size); -} - -static inline void b3AlignedFreeDefault(void *ptr) -{ - free(ptr); -} -#else - -static inline void *b3AlignedAllocDefault(size_t size, int alignment) -{ - void *ret; - char *real; - real = (char *)b3s_allocFunc(size + sizeof(void *) + (alignment - 1)); - if (real) - { - ret = b3AlignPointer(real + sizeof(void *), alignment); - *((void **)(ret)-1) = (void *)(real); - } - else - { - ret = (void *)(real); - } - return (ret); -} - -static inline void b3AlignedFreeDefault(void *ptr) -{ - void *real; - - if (ptr) - { - real = *((void **)(ptr)-1); - b3s_freeFunc(real); - } -} -#endif - -static b3AlignedAllocFunc *b3s_alignedAllocFunc = b3AlignedAllocDefault; -static b3AlignedFreeFunc *b3s_alignedFreeFunc = b3AlignedFreeDefault; - -void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc *allocFunc, b3AlignedFreeFunc *freeFunc) -{ - b3s_alignedAllocFunc = allocFunc ? allocFunc : b3AlignedAllocDefault; - b3s_alignedFreeFunc = freeFunc ? freeFunc : b3AlignedFreeDefault; -} - -void b3AlignedAllocSetCustom(b3AllocFunc *allocFunc, b3FreeFunc *freeFunc) -{ - b3s_allocFunc = allocFunc ? allocFunc : b3AllocDefault; - b3s_freeFunc = freeFunc ? freeFunc : b3FreeDefault; -} - -#ifdef B3_DEBUG_MEMORY_ALLOCATIONS -//this generic allocator provides the total allocated number of bytes -#include - -void *b3AlignedAllocInternal(size_t size, int alignment, int line, char *filename) -{ - void *ret; - char *real; -#ifdef B3_ALLOCATOR_STATISTICS - b3g_totalBytesAlignedAllocs += size; - b3g_numAlignedAllocs++; -#endif - real = (char *)b3s_allocFunc(size + 2 * sizeof(void *) + (alignment - 1)); - if (real) - { - ret = (void *)b3AlignPointer(real + 2 * sizeof(void *), alignment); - *((void **)(ret)-1) = (void *)(real); - *((int *)(ret)-2) = size; - } - else - { - ret = (void *)(real); //?? - } - - b3Printf("allocation#%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedAllocs, real, filename, line, size); - - int *ptr = (int *)ret; - *ptr = 12; - return (ret); -} - -void b3AlignedFreeInternal(void *ptr, int line, char *filename) -{ - void *real; -#ifdef B3_ALLOCATOR_STATISTICS - b3g_numAlignedFree++; -#endif - if (ptr) - { - real = *((void **)(ptr)-1); - int size = *((int *)(ptr)-2); -#ifdef B3_ALLOCATOR_STATISTICS - b3g_totalBytesAlignedAllocs -= size; -#endif - b3Printf("free #%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedFree, real, filename, line, size); - - b3s_freeFunc(real); - } - else - { - b3Printf("NULL ptr\n"); - } -} - -#else //B3_DEBUG_MEMORY_ALLOCATIONS - -void *b3AlignedAllocInternal(size_t size, int alignment) -{ -#ifdef B3_ALLOCATOR_STATISTICS - b3g_numAlignedAllocs++; -#endif - void *ptr; - ptr = b3s_alignedAllocFunc(size, alignment); - // b3Printf("b3AlignedAllocInternal %d, %x\n",size,ptr); - return ptr; -} - -void b3AlignedFreeInternal(void *ptr) -{ - if (!ptr) - { - return; - } -#ifdef B3_ALLOCATOR_STATISTICS - b3g_numAlignedFree++; -#endif - // b3Printf("b3AlignedFreeInternal %x\n",ptr); - b3s_alignedFreeFunc(ptr); -} - -#endif //B3_DEBUG_MEMORY_ALLOCATIONS diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h b/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h deleted file mode 100644 index bcff9f128e7..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3AlignedAllocator.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_ALIGNED_ALLOCATOR -#define B3_ALIGNED_ALLOCATOR - -///we probably replace this with our own aligned memory allocator -///so we replace _aligned_malloc and _aligned_free with our own -///that is better portable and more predictable - -#include "b3Scalar.h" -//#define B3_DEBUG_MEMORY_ALLOCATIONS 1 -#ifdef B3_DEBUG_MEMORY_ALLOCATIONS - -#define b3AlignedAlloc(a, b) \ - b3AlignedAllocInternal(a, b, __LINE__, __FILE__) - -#define b3AlignedFree(ptr) \ - b3AlignedFreeInternal(ptr, __LINE__, __FILE__) - -void* b3AlignedAllocInternal(size_t size, int alignment, int line, char* filename); - -void b3AlignedFreeInternal(void* ptr, int line, char* filename); - -#else -void* b3AlignedAllocInternal(size_t size, int alignment); -void b3AlignedFreeInternal(void* ptr); - -#define b3AlignedAlloc(size, alignment) b3AlignedAllocInternal(size, alignment) -#define b3AlignedFree(ptr) b3AlignedFreeInternal(ptr) - -#endif -typedef int btSizeType; - -typedef void*(b3AlignedAllocFunc)(size_t size, int alignment); -typedef void(b3AlignedFreeFunc)(void* memblock); -typedef void*(b3AllocFunc)(size_t size); -typedef void(b3FreeFunc)(void* memblock); - -///The developer can let all Bullet memory allocations go through a custom memory allocator, using b3AlignedAllocSetCustom -void b3AlignedAllocSetCustom(b3AllocFunc* allocFunc, b3FreeFunc* freeFunc); -///If the developer has already an custom aligned allocator, then b3AlignedAllocSetCustomAligned can be used. The default aligned allocator pre-allocates extra memory using the non-aligned allocator, and instruments it. -void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc* allocFunc, b3AlignedFreeFunc* freeFunc); - -///The b3AlignedAllocator is a portable class for aligned memory allocations. -///Default implementations for unaligned and aligned allocations can be overridden by a custom allocator using b3AlignedAllocSetCustom and b3AlignedAllocSetCustomAligned. -template -class b3AlignedAllocator -{ - typedef b3AlignedAllocator self_type; - -public: - //just going down a list: - b3AlignedAllocator() {} - /* - b3AlignedAllocator( const self_type & ) {} - */ - - template - b3AlignedAllocator(const b3AlignedAllocator&) - { - } - - typedef const T* const_pointer; - typedef const T& const_reference; - typedef T* pointer; - typedef T& reference; - typedef T value_type; - - pointer address(reference ref) const { return &ref; } - const_pointer address(const_reference ref) const { return &ref; } - pointer allocate(btSizeType n, const_pointer* hint = 0) - { - (void)hint; - return reinterpret_cast(b3AlignedAlloc(sizeof(value_type) * n, Alignment)); - } - void construct(pointer ptr, const value_type& value) { new (ptr) value_type(value); } - void deallocate(pointer ptr) - { - b3AlignedFree(reinterpret_cast(ptr)); - } - void destroy(pointer ptr) { ptr->~value_type(); } - - template - struct rebind - { - typedef b3AlignedAllocator other; - }; - template - self_type& operator=(const b3AlignedAllocator&) - { - return *this; - } - - friend bool operator==(const self_type&, const self_type&) { return true; } -}; - -#endif //B3_ALIGNED_ALLOCATOR diff --git a/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h b/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h deleted file mode 100644 index 8ef3331f77a..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3AlignedObjectArray.h +++ /dev/null @@ -1,530 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_OBJECT_ARRAY__ -#define B3_OBJECT_ARRAY__ - -#include "b3Scalar.h" // has definitions like B3_FORCE_INLINE -#include "b3AlignedAllocator.h" - -///If the platform doesn't support placement new, you can disable B3_USE_PLACEMENT_NEW -///then the b3AlignedObjectArray doesn't support objects with virtual methods, and non-trivial constructors/destructors -///You can enable B3_USE_MEMCPY, then swapping elements in the array will use memcpy instead of operator= -///see discussion here: https://bulletphysics.orgphpBB2/viewtopic.php?t=1231 and -///http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1240 - -#define B3_USE_PLACEMENT_NEW 1 -//#define B3_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in or or otherwise... -#define B3_ALLOW_ARRAY_COPY_OPERATOR // enabling this can accidently perform deep copies of data if you are not careful - -#ifdef B3_USE_MEMCPY -#include -#include -#endif //B3_USE_MEMCPY - -#ifdef B3_USE_PLACEMENT_NEW -#include //for placement new -#endif //B3_USE_PLACEMENT_NEW - -///The b3AlignedObjectArray template class uses a subset of the stl::vector interface for its methods -///It is developed to replace stl::vector to avoid portability issues, including STL alignment issues to add SIMD/SSE data -template -//template -class b3AlignedObjectArray -{ - b3AlignedAllocator m_allocator; - - int m_size; - int m_capacity; - T* m_data; - //PCK: added this line - bool m_ownsMemory; - -#ifdef B3_ALLOW_ARRAY_COPY_OPERATOR -public: - B3_FORCE_INLINE b3AlignedObjectArray& operator=(const b3AlignedObjectArray& other) - { - copyFromArray(other); - return *this; - } -#else //B3_ALLOW_ARRAY_COPY_OPERATOR -private: - B3_FORCE_INLINE b3AlignedObjectArray& operator=(const b3AlignedObjectArray& other); -#endif //B3_ALLOW_ARRAY_COPY_OPERATOR - -protected: - B3_FORCE_INLINE int allocSize(int size) - { - return (size ? size * 2 : 1); - } - B3_FORCE_INLINE void copy(int start, int end, T* dest) const - { - int i; - for (i = start; i < end; ++i) -#ifdef B3_USE_PLACEMENT_NEW - new (&dest[i]) T(m_data[i]); -#else - dest[i] = m_data[i]; -#endif //B3_USE_PLACEMENT_NEW - } - - B3_FORCE_INLINE void init() - { - //PCK: added this line - m_ownsMemory = true; - m_data = 0; - m_size = 0; - m_capacity = 0; - } - B3_FORCE_INLINE void destroy(int first, int last) - { - int i; - for (i = first; i < last; i++) - { - m_data[i].~T(); - } - } - - B3_FORCE_INLINE void* allocate(int size) - { - if (size) - return m_allocator.allocate(size); - return 0; - } - - B3_FORCE_INLINE void deallocate() - { - if (m_data) - { - //PCK: enclosed the deallocation in this block - if (m_ownsMemory) - { - m_allocator.deallocate(m_data); - } - m_data = 0; - } - } - -public: - b3AlignedObjectArray() - { - init(); - } - - ~b3AlignedObjectArray() - { - clear(); - } - - ///Generally it is best to avoid using the copy constructor of an b3AlignedObjectArray, and use a (const) reference to the array instead. - b3AlignedObjectArray(const b3AlignedObjectArray& otherArray) - { - init(); - - int otherSize = otherArray.size(); - resize(otherSize); - //don't use otherArray.copy, it can leak memory - for (int i = 0; i < otherSize; i++) - { - m_data[i] = otherArray[i]; - } - } - - /// return the number of elements in the array - B3_FORCE_INLINE int size() const - { - return m_size; - } - - B3_FORCE_INLINE const T& at(int n) const - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - B3_FORCE_INLINE T& at(int n) - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - B3_FORCE_INLINE const T& operator[](int n) const - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - B3_FORCE_INLINE T& operator[](int n) - { - b3Assert(n >= 0); - b3Assert(n < size()); - return m_data[n]; - } - - ///clear the array, deallocated memory. Generally it is better to use array.resize(0), to reduce performance overhead of run-time memory (de)allocations. - B3_FORCE_INLINE void clear() - { - destroy(0, size()); - - deallocate(); - - init(); - } - - B3_FORCE_INLINE void pop_back() - { - b3Assert(m_size > 0); - m_size--; - m_data[m_size].~T(); - } - - ///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument. - ///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations. - B3_FORCE_INLINE void resizeNoInitialize(int newsize) - { - int curSize = size(); - - if (newsize < curSize) - { - } - else - { - if (newsize > size()) - { - reserve(newsize); - } - //leave this uninitialized - } - m_size = newsize; - } - - B3_FORCE_INLINE void resize(int newsize, const T& fillData = T()) - { - int curSize = size(); - - if (newsize < curSize) - { - for (int i = newsize; i < curSize; i++) - { - m_data[i].~T(); - } - } - else - { - if (newsize > size()) - { - reserve(newsize); - } -#ifdef B3_USE_PLACEMENT_NEW - for (int i = curSize; i < newsize; i++) - { - new (&m_data[i]) T(fillData); - } -#endif //B3_USE_PLACEMENT_NEW - } - - m_size = newsize; - } - B3_FORCE_INLINE T& expandNonInitializing() - { - int sz = size(); - if (sz == capacity()) - { - reserve(allocSize(size())); - } - m_size++; - - return m_data[sz]; - } - - B3_FORCE_INLINE T& expand(const T& fillValue = T()) - { - int sz = size(); - if (sz == capacity()) - { - reserve(allocSize(size())); - } - m_size++; -#ifdef B3_USE_PLACEMENT_NEW - new (&m_data[sz]) T(fillValue); //use the in-place new (not really allocating heap memory) -#endif - - return m_data[sz]; - } - - B3_FORCE_INLINE void push_back(const T& _Val) - { - int sz = size(); - if (sz == capacity()) - { - reserve(allocSize(size())); - } - -#ifdef B3_USE_PLACEMENT_NEW - new (&m_data[m_size]) T(_Val); -#else - m_data[size()] = _Val; -#endif //B3_USE_PLACEMENT_NEW - - m_size++; - } - - /// return the pre-allocated (reserved) elements, this is at least as large as the total number of elements,see size() and reserve() - B3_FORCE_INLINE int capacity() const - { - return m_capacity; - } - - B3_FORCE_INLINE void reserve(int _Count) - { // determine new minimum length of allocated storage - if (capacity() < _Count) - { // not enough room, reallocate - T* s = (T*)allocate(_Count); - b3Assert(s); - if (s == 0) - { - b3Error("b3AlignedObjectArray reserve out-of-memory\n"); - _Count = 0; - m_size = 0; - } - copy(0, size(), s); - - destroy(0, size()); - - deallocate(); - - //PCK: added this line - m_ownsMemory = true; - - m_data = s; - - m_capacity = _Count; - } - } - - class less - { - public: - bool operator()(const T& a, const T& b) - { - return (a < b); - } - }; - - template - void quickSortInternal(const L& CompareFunc, int lo, int hi) - { - // lo is the lower index, hi is the upper index - // of the region of array a that is to be sorted - int i = lo, j = hi; - T x = m_data[(lo + hi) / 2]; - - // partition - do - { - while (CompareFunc(m_data[i], x)) - i++; - while (CompareFunc(x, m_data[j])) - j--; - if (i <= j) - { - swap(i, j); - i++; - j--; - } - } while (i <= j); - - // recursion - if (lo < j) - quickSortInternal(CompareFunc, lo, j); - if (i < hi) - quickSortInternal(CompareFunc, i, hi); - } - - template - void quickSort(const L& CompareFunc) - { - //don't sort 0 or 1 elements - if (size() > 1) - { - quickSortInternal(CompareFunc, 0, size() - 1); - } - } - - ///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/ - template - void downHeap(T* pArr, int k, int n, const L& CompareFunc) - { - /* PRE: a[k+1..N] is a heap */ - /* POST: a[k..N] is a heap */ - - T temp = pArr[k - 1]; - /* k has child(s) */ - while (k <= n / 2) - { - int child = 2 * k; - - if ((child < n) && CompareFunc(pArr[child - 1], pArr[child])) - { - child++; - } - /* pick larger child */ - if (CompareFunc(temp, pArr[child - 1])) - { - /* move child up */ - pArr[k - 1] = pArr[child - 1]; - k = child; - } - else - { - break; - } - } - pArr[k - 1] = temp; - } /*downHeap*/ - - void swap(int index0, int index1) - { -#ifdef B3_USE_MEMCPY - char temp[sizeof(T)]; - memcpy(temp, &m_data[index0], sizeof(T)); - memcpy(&m_data[index0], &m_data[index1], sizeof(T)); - memcpy(&m_data[index1], temp, sizeof(T)); -#else - T temp = m_data[index0]; - m_data[index0] = m_data[index1]; - m_data[index1] = temp; -#endif //B3_USE_PLACEMENT_NEW - } - - template - void heapSort(const L& CompareFunc) - { - /* sort a[0..N-1], N.B. 0 to N-1 */ - int k; - int n = m_size; - for (k = n / 2; k > 0; k--) - { - downHeap(m_data, k, n, CompareFunc); - } - - /* a[1..N] is now a heap */ - while (n >= 1) - { - swap(0, n - 1); /* largest of a[0..n-1] */ - - n = n - 1; - /* restore a[1..i-1] heap */ - downHeap(m_data, 1, n, CompareFunc); - } - } - - ///non-recursive binary search, assumes sorted array - int findBinarySearch(const T& key) const - { - int first = 0; - int last = size() - 1; - - //assume sorted array - while (first <= last) - { - int mid = (first + last) / 2; // compute mid point. - if (key > m_data[mid]) - first = mid + 1; // repeat search in top half. - else if (key < m_data[mid]) - last = mid - 1; // repeat search in bottom half. - else - return mid; // found it. return position ///// - } - return size(); // failed to find key - } - - int findLinearSearch(const T& key) const - { - int index = size(); - int i; - - for (i = 0; i < size(); i++) - { - if (m_data[i] == key) - { - index = i; - break; - } - } - return index; - } - - int findLinearSearch2(const T& key) const - { - int index = -1; - int i; - - for (i = 0; i < size(); i++) - { - if (m_data[i] == key) - { - index = i; - break; - } - } - return index; - } - - void remove(const T& key) - { - int findIndex = findLinearSearch(key); - if (findIndex < size()) - { - swap(findIndex, size() - 1); - pop_back(); - } - } - - //PCK: whole function - void initializeFromBuffer(void* buffer, int size, int capacity) - { - clear(); - m_ownsMemory = false; - m_data = (T*)buffer; - m_size = size; - m_capacity = capacity; - } - - void copyFromArray(const b3AlignedObjectArray& otherArray) - { - int otherSize = otherArray.size(); - resize(otherSize); - //don't use otherArray.copy, it can leak memory - for (int i = 0; i < otherSize; i++) - { - m_data[i] = otherArray[i]; - } - } - - void removeAtIndex(int index) - { - if (index < size()) - { - swap(index, size() - 1); - pop_back(); - } - } -}; - -#endif //B3_OBJECT_ARRAY__ diff --git a/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h b/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h deleted file mode 100644 index 5fe4f25f8de..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3CommandLineArgs.h +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef COMMAND_LINE_ARGS_H -#define COMMAND_LINE_ARGS_H - -/****************************************************************************** - * Command-line parsing - ******************************************************************************/ -#include -#include -#include -#include -#include -class b3CommandLineArgs -{ -protected: - std::map pairs; - -public: - // Constructor - b3CommandLineArgs(int argc, char **argv) - { - addArgs(argc, argv); - } - - void addArgs(int argc, char **argv) - { - for (int i = 1; i < argc; i++) - { - std::string arg = argv[i]; - - if ((arg.length() < 2) || (arg[0] != '-') || (arg[1] != '-')) - { - continue; - } - - std::string::size_type pos; - std::string key, val; - if ((pos = arg.find('=')) == std::string::npos) - { - key = std::string(arg, 2, arg.length() - 2); - val = ""; - } - else - { - key = std::string(arg, 2, pos - 2); - val = std::string(arg, pos + 1, arg.length() - 1); - } - - //only add new keys, don't replace existing - if (pairs.find(key) == pairs.end()) - { - pairs[key] = val; - } - } - } - - bool CheckCmdLineFlag(const char *arg_name) - { - std::map::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) - { - return true; - } - return false; - } - - template - bool GetCmdLineArgument(const char *arg_name, T &val); - - int ParsedArgc() - { - return pairs.size(); - } -}; - -template -inline bool b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val) -{ - std::map::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) - { - std::istringstream strstream(itr->second); - strstream >> val; - return true; - } - return false; -} - -template <> -inline bool b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, char *&val) -{ - std::map::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) - { - std::string s = itr->second; - val = (char *)malloc(sizeof(char) * (s.length() + 1)); - std::strcpy(val, s.c_str()); - return true; - } - else - { - val = NULL; - } - return false; -} - -#endif //COMMAND_LINE_ARGS_H diff --git a/thirdparty/bullet/Bullet3Common/b3FileUtils.h b/thirdparty/bullet/Bullet3Common/b3FileUtils.h deleted file mode 100644 index 9ded17eaafa..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3FileUtils.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef B3_FILE_UTILS_H -#define B3_FILE_UTILS_H - -#include -#include "b3Scalar.h" -#include //ptrdiff_h -#include - -struct b3FileUtils -{ - b3FileUtils() - { - } - virtual ~b3FileUtils() - { - } - - static bool findFile(const char* orgFileName, char* relativeFileName, int maxRelativeFileNameMaxLen) - { - FILE* f = 0; - f = fopen(orgFileName, "rb"); - if (f) - { - //printf("original file found: [%s]\n", orgFileName); - sprintf(relativeFileName, "%s", orgFileName); - fclose(f); - return true; - } - - //printf("Trying various directories, relative to current working directory\n"); - const char* prefix[] = {"./", "./data/", "../data/", "../../data/", "../../../data/", "../../../../data/"}; - int numPrefixes = sizeof(prefix) / sizeof(const char*); - - f = 0; - bool fileFound = false; - - for (int i = 0; !f && i < numPrefixes; i++) - { -#ifdef _MSC_VER - sprintf_s(relativeFileName, maxRelativeFileNameMaxLen, "%s%s", prefix[i], orgFileName); -#else - sprintf(relativeFileName, "%s%s", prefix[i], orgFileName); -#endif - f = fopen(relativeFileName, "rb"); - if (f) - { - fileFound = true; - break; - } - } - if (f) - { - fclose(f); - } - - return fileFound; - } - - static const char* strip2(const char* name, const char* pattern) - { - size_t const patlen = strlen(pattern); - size_t patcnt = 0; - const char* oriptr; - const char* patloc; - // find how many times the pattern occurs in the original string - for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen) - { - patcnt++; - } - return oriptr; - } - - static int extractPath(const char* fileName, char* path, int maxPathLength) - { - const char* stripped = strip2(fileName, "/"); - stripped = strip2(stripped, "\\"); - - ptrdiff_t len = stripped - fileName; - b3Assert((len + 1) < maxPathLength); - - if (len && ((len + 1) < maxPathLength)) - { - for (int i = 0; i < len; i++) - { - path[i] = fileName[i]; - } - path[len] = 0; - } - else - { - len = 0; - b3Assert(maxPathLength > 0); - if (maxPathLength > 0) - { - path[len] = 0; - } - } - return len; - } - - static char toLowerChar(const char t) - { - if (t >= (char)'A' && t <= (char)'Z') - return t + ((char)'a' - (char)'A'); - else - return t; - } - - static void toLower(char* str) - { - int len = strlen(str); - for (int i = 0; i < len; i++) - { - str[i] = toLowerChar(str[i]); - } - } - - /*static const char* strip2(const char* name, const char* pattern) - { - size_t const patlen = strlen(pattern); - size_t patcnt = 0; - const char * oriptr; - const char * patloc; - // find how many times the pattern occurs in the original string - for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen) - { - patcnt++; - } - return oriptr; - } - */ -}; -#endif //B3_FILE_UTILS_H diff --git a/thirdparty/bullet/Bullet3Common/b3HashMap.h b/thirdparty/bullet/Bullet3Common/b3HashMap.h deleted file mode 100644 index 3009e2cf2fd..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3HashMap.h +++ /dev/null @@ -1,462 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_HASH_MAP_H -#define B3_HASH_MAP_H - -#include "b3AlignedObjectArray.h" - -#include - -///very basic hashable string implementation, compatible with b3HashMap -struct b3HashString -{ - std::string m_string; - unsigned int m_hash; - - B3_FORCE_INLINE unsigned int getHash() const - { - return m_hash; - } - - b3HashString(const char* name) - : m_string(name) - { - /* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */ - static const unsigned int InitialFNV = 2166136261u; - static const unsigned int FNVMultiple = 16777619u; - - /* Fowler / Noll / Vo (FNV) Hash */ - unsigned int hash = InitialFNV; - int len = m_string.length(); - for (int i = 0; i < len; i++) - { - hash = hash ^ (m_string[i]); /* xor the low 8 bits */ - hash = hash * FNVMultiple; /* multiply by the magic number */ - } - m_hash = hash; - } - - int portableStringCompare(const char* src, const char* dst) const - { - int ret = 0; - - while (!(ret = *(unsigned char*)src - *(unsigned char*)dst) && *dst) - ++src, ++dst; - - if (ret < 0) - ret = -1; - else if (ret > 0) - ret = 1; - - return (ret); - } - - bool equals(const b3HashString& other) const - { - return (m_string == other.m_string); - } -}; - -const int B3_HASH_NULL = 0xffffffff; - -class b3HashInt -{ - int m_uid; - -public: - b3HashInt(int uid) : m_uid(uid) - { - } - - int getUid1() const - { - return m_uid; - } - - void setUid1(int uid) - { - m_uid = uid; - } - - bool equals(const b3HashInt& other) const - { - return getUid1() == other.getUid1(); - } - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - int key = m_uid; - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -class b3HashPtr -{ - union { - const void* m_pointer; - int m_hashValues[2]; - }; - -public: - b3HashPtr(const void* ptr) - : m_pointer(ptr) - { - } - - const void* getPointer() const - { - return m_pointer; - } - - bool equals(const b3HashPtr& other) const - { - return getPointer() == other.getPointer(); - } - - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - const bool VOID_IS_8 = ((sizeof(void*) == 8)); - - int key = VOID_IS_8 ? m_hashValues[0] + m_hashValues[1] : m_hashValues[0]; - - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -template -class b3HashKeyPtr -{ - int m_uid; - -public: - b3HashKeyPtr(int uid) : m_uid(uid) - { - } - - int getUid1() const - { - return m_uid; - } - - bool equals(const b3HashKeyPtr& other) const - { - return getUid1() == other.getUid1(); - } - - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - int key = m_uid; - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -template -class b3HashKey -{ - int m_uid; - -public: - b3HashKey(int uid) : m_uid(uid) - { - } - - int getUid1() const - { - return m_uid; - } - - bool equals(const b3HashKey& other) const - { - return getUid1() == other.getUid1(); - } - //to our success - B3_FORCE_INLINE unsigned int getHash() const - { - int key = m_uid; - // Thomas Wang's hash - key += ~(key << 15); - key ^= (key >> 10); - key += (key << 3); - key ^= (key >> 6); - key += ~(key << 11); - key ^= (key >> 16); - return key; - } -}; - -///The b3HashMap template class implements a generic and lightweight hashmap. -///A basic sample of how to use b3HashMap is located in Demos\BasicDemo\main.cpp -template -class b3HashMap -{ -protected: - b3AlignedObjectArray m_hashTable; - b3AlignedObjectArray m_next; - - b3AlignedObjectArray m_valueArray; - b3AlignedObjectArray m_keyArray; - - void growTables(const Key& /*key*/) - { - int newCapacity = m_valueArray.capacity(); - - if (m_hashTable.size() < newCapacity) - { - //grow hashtable and next table - int curHashtableSize = m_hashTable.size(); - - m_hashTable.resize(newCapacity); - m_next.resize(newCapacity); - - int i; - - for (i = 0; i < newCapacity; ++i) - { - m_hashTable[i] = B3_HASH_NULL; - } - for (i = 0; i < newCapacity; ++i) - { - m_next[i] = B3_HASH_NULL; - } - - for (i = 0; i < curHashtableSize; i++) - { - //const Value& value = m_valueArray[i]; - //const Key& key = m_keyArray[i]; - - int hashValue = m_keyArray[i].getHash() & (m_valueArray.capacity() - 1); // New hash value with new mask - m_next[i] = m_hashTable[hashValue]; - m_hashTable[hashValue] = i; - } - } - } - -public: - void insert(const Key& key, const Value& value) - { - int hash = key.getHash() & (m_valueArray.capacity() - 1); - - //replace value if the key is already there - int index = findIndex(key); - if (index != B3_HASH_NULL) - { - m_valueArray[index] = value; - return; - } - - int count = m_valueArray.size(); - int oldCapacity = m_valueArray.capacity(); - m_valueArray.push_back(value); - m_keyArray.push_back(key); - - int newCapacity = m_valueArray.capacity(); - if (oldCapacity < newCapacity) - { - growTables(key); - //hash with new capacity - hash = key.getHash() & (m_valueArray.capacity() - 1); - } - m_next[count] = m_hashTable[hash]; - m_hashTable[hash] = count; - } - - void remove(const Key& key) - { - int hash = key.getHash() & (m_valueArray.capacity() - 1); - - int pairIndex = findIndex(key); - - if (pairIndex == B3_HASH_NULL) - { - return; - } - - // Remove the pair from the hash table. - int index = m_hashTable[hash]; - b3Assert(index != B3_HASH_NULL); - - int previous = B3_HASH_NULL; - while (index != pairIndex) - { - previous = index; - index = m_next[index]; - } - - if (previous != B3_HASH_NULL) - { - b3Assert(m_next[previous] == pairIndex); - m_next[previous] = m_next[pairIndex]; - } - else - { - m_hashTable[hash] = m_next[pairIndex]; - } - - // We now move the last pair into spot of the - // pair being removed. We need to fix the hash - // table indices to support the move. - - int lastPairIndex = m_valueArray.size() - 1; - - // If the removed pair is the last pair, we are done. - if (lastPairIndex == pairIndex) - { - m_valueArray.pop_back(); - m_keyArray.pop_back(); - return; - } - - // Remove the last pair from the hash table. - int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity() - 1); - - index = m_hashTable[lastHash]; - b3Assert(index != B3_HASH_NULL); - - previous = B3_HASH_NULL; - while (index != lastPairIndex) - { - previous = index; - index = m_next[index]; - } - - if (previous != B3_HASH_NULL) - { - b3Assert(m_next[previous] == lastPairIndex); - m_next[previous] = m_next[lastPairIndex]; - } - else - { - m_hashTable[lastHash] = m_next[lastPairIndex]; - } - - // Copy the last pair into the remove pair's spot. - m_valueArray[pairIndex] = m_valueArray[lastPairIndex]; - m_keyArray[pairIndex] = m_keyArray[lastPairIndex]; - - // Insert the last pair into the hash table - m_next[pairIndex] = m_hashTable[lastHash]; - m_hashTable[lastHash] = pairIndex; - - m_valueArray.pop_back(); - m_keyArray.pop_back(); - } - - int size() const - { - return m_valueArray.size(); - } - - const Value* getAtIndex(int index) const - { - b3Assert(index < m_valueArray.size()); - - return &m_valueArray[index]; - } - - Value* getAtIndex(int index) - { - b3Assert(index < m_valueArray.size()); - - return &m_valueArray[index]; - } - - Key getKeyAtIndex(int index) - { - b3Assert(index < m_keyArray.size()); - return m_keyArray[index]; - } - - const Key getKeyAtIndex(int index) const - { - b3Assert(index < m_keyArray.size()); - return m_keyArray[index]; - } - - Value* operator[](const Key& key) - { - return find(key); - } - - const Value* find(const Key& key) const - { - int index = findIndex(key); - if (index == B3_HASH_NULL) - { - return NULL; - } - return &m_valueArray[index]; - } - - Value* find(const Key& key) - { - int index = findIndex(key); - if (index == B3_HASH_NULL) - { - return NULL; - } - return &m_valueArray[index]; - } - - int findIndex(const Key& key) const - { - unsigned int hash = key.getHash() & (m_valueArray.capacity() - 1); - - if (hash >= (unsigned int)m_hashTable.size()) - { - return B3_HASH_NULL; - } - - int index = m_hashTable[hash]; - while ((index != B3_HASH_NULL) && key.equals(m_keyArray[index]) == false) - { - index = m_next[index]; - } - return index; - } - - void clear() - { - m_hashTable.clear(); - m_next.clear(); - m_valueArray.clear(); - m_keyArray.clear(); - } -}; - -#endif //B3_HASH_MAP_H diff --git a/thirdparty/bullet/Bullet3Common/b3Logging.cpp b/thirdparty/bullet/Bullet3Common/b3Logging.cpp deleted file mode 100644 index 9c9f7c09ea9..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Logging.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3Logging.h" - -#include -#include - -#ifdef _WIN32 -#include -#endif //_WIN32 - -void b3PrintfFuncDefault(const char* msg) -{ -#ifdef _WIN32 - OutputDebugStringA(msg); -#endif - printf("%s", msg); - //is this portable? - fflush(stdout); -} - -void b3WarningMessageFuncDefault(const char* msg) -{ -#ifdef _WIN32 - OutputDebugStringA(msg); -#endif - printf("%s", msg); - //is this portable? - fflush(stdout); -} - -void b3ErrorMessageFuncDefault(const char* msg) -{ -#ifdef _WIN32 - OutputDebugStringA(msg); -#endif - printf("%s", msg); - - //is this portable? - fflush(stdout); -} - -static b3PrintfFunc* b3s_printfFunc = b3PrintfFuncDefault; -static b3WarningMessageFunc* b3s_warningMessageFunc = b3WarningMessageFuncDefault; -static b3ErrorMessageFunc* b3s_errorMessageFunc = b3ErrorMessageFuncDefault; - -///The developer can route b3Printf output using their own implementation -void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc) -{ - b3s_printfFunc = printfFunc; -} -void b3SetCustomWarningMessageFunc(b3PrintfFunc* warningMessageFunc) -{ - b3s_warningMessageFunc = warningMessageFunc; -} -void b3SetCustomErrorMessageFunc(b3PrintfFunc* errorMessageFunc) -{ - b3s_errorMessageFunc = errorMessageFunc; -} - -//#define B3_MAX_DEBUG_STRING_LENGTH 2048 -#define B3_MAX_DEBUG_STRING_LENGTH 32768 - -void b3OutputPrintfVarArgsInternal(const char* str, ...) -{ - char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0}; - va_list argList; - va_start(argList, str); -#ifdef _MSC_VER - vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#else - vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#endif - (b3s_printfFunc)(strDebug); - va_end(argList); -} -void b3OutputWarningMessageVarArgsInternal(const char* str, ...) -{ - char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0}; - va_list argList; - va_start(argList, str); -#ifdef _MSC_VER - vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#else - vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#endif - (b3s_warningMessageFunc)(strDebug); - va_end(argList); -} -void b3OutputErrorMessageVarArgsInternal(const char* str, ...) -{ - char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0}; - va_list argList; - va_start(argList, str); -#ifdef _MSC_VER - vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#else - vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList); -#endif - (b3s_errorMessageFunc)(strDebug); - va_end(argList); -} - -void b3EnterProfileZoneDefault(const char* name) -{ -} -void b3LeaveProfileZoneDefault() -{ -} -static b3EnterProfileZoneFunc* b3s_enterFunc = b3EnterProfileZoneDefault; -static b3LeaveProfileZoneFunc* b3s_leaveFunc = b3LeaveProfileZoneDefault; -void b3EnterProfileZone(const char* name) -{ - (b3s_enterFunc)(name); -} -void b3LeaveProfileZone() -{ - (b3s_leaveFunc)(); -} - -void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc) -{ - b3s_enterFunc = enterFunc; -} -void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc) -{ - b3s_leaveFunc = leaveFunc; -} - -#ifndef _MSC_VER -#undef vsprintf_s -#endif diff --git a/thirdparty/bullet/Bullet3Common/b3Logging.h b/thirdparty/bullet/Bullet3Common/b3Logging.h deleted file mode 100644 index f61149de772..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Logging.h +++ /dev/null @@ -1,74 +0,0 @@ - -#ifndef B3_LOGGING_H -#define B3_LOGGING_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail -///You can also customize the message by uncommenting out a different line below -#define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__) - //#define b3Printf(...) do {b3OutputPrintfVarArgsInternal("b3Printf[%s,%d]:",__FILE__,__LINE__);b3OutputPrintfVarArgsInternal(__VA_ARGS__); } while(0) - //#define b3Printf b3OutputPrintfVarArgsInternal - //#define b3Printf(...) printf(__VA_ARGS__) - //#define b3Printf(...) -#define b3Warning(...) do{ b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n", __FILE__, __LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__);} while (0) -#define b3Error(...)do {b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n", __FILE__, __LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__);} while (0) -#ifndef B3_NO_PROFILE - - void b3EnterProfileZone(const char* name); - void b3LeaveProfileZone(); -#ifdef __cplusplus - - class b3ProfileZone - { - public: - b3ProfileZone(const char* name) - { - b3EnterProfileZone(name); - } - - ~b3ProfileZone() - { - b3LeaveProfileZone(); - } - }; - -#define B3_PROFILE(name) b3ProfileZone __profile(name) -#endif - -#else //B3_NO_PROFILE - -#define B3_PROFILE(name) -#define b3StartProfile(a) -#define b3StopProfile - -#endif //#ifndef B3_NO_PROFILE - - typedef void(b3PrintfFunc)(const char* msg); - typedef void(b3WarningMessageFunc)(const char* msg); - typedef void(b3ErrorMessageFunc)(const char* msg); - typedef void(b3EnterProfileZoneFunc)(const char* msg); - typedef void(b3LeaveProfileZoneFunc)(); - - ///The developer can route b3Printf output using their own implementation - void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc); - void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc); - void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc); - - ///Set custom profile zone functions (zones can be nested) - void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc); - void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc); - - ///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version) - void b3OutputPrintfVarArgsInternal(const char* str, ...); - void b3OutputWarningMessageVarArgsInternal(const char* str, ...); - void b3OutputErrorMessageVarArgsInternal(const char* str, ...); - -#ifdef __cplusplus -} -#endif - -#endif //B3_LOGGING_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h b/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h deleted file mode 100644 index 6c46536a811..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Matrix3x3.h +++ /dev/null @@ -1,1354 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_MATRIX3x3_H -#define B3_MATRIX3x3_H - -#include "b3Vector3.h" -#include "b3Quaternion.h" -#include - -#ifdef B3_USE_SSE -//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f}; -#endif - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f}; -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f}; -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f}; -#endif - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3Matrix3x3Data b3Matrix3x3DoubleData -#else -#define b3Matrix3x3Data b3Matrix3x3FloatData -#endif //B3_USE_DOUBLE_PRECISION - -/**@brief The b3Matrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with b3Quaternion, b3Transform and b3Vector3. -* Make sure to only include a pure orthogonal matrix without scaling. */ -B3_ATTRIBUTE_ALIGNED16(class) -b3Matrix3x3 -{ - ///Data storage for the matrix, each vector is a row of the matrix - b3Vector3 m_el[3]; - -public: - /** @brief No initializaion constructor */ - b3Matrix3x3() {} - - // explicit b3Matrix3x3(const b3Scalar *m) { setFromOpenGLSubMatrix(m); } - - /**@brief Constructor from Quaternion */ - explicit b3Matrix3x3(const b3Quaternion& q) { setRotation(q); } - /* - template - Matrix3x3(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { - setEulerYPR(yaw, pitch, roll); - } - */ - /** @brief Constructor with row major formatting */ - b3Matrix3x3(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz, - const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz, - const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz) - { - setValue(xx, xy, xz, - yx, yy, yz, - zx, zy, zz); - } - -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - B3_FORCE_INLINE b3Matrix3x3(const b3SimdFloat4 v0, const b3SimdFloat4 v1, const b3SimdFloat4 v2) - { - m_el[0].mVec128 = v0; - m_el[1].mVec128 = v1; - m_el[2].mVec128 = v2; - } - - B3_FORCE_INLINE b3Matrix3x3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) - { - m_el[0] = v0; - m_el[1] = v1; - m_el[2] = v2; - } - - // Copy constructor - B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& rhs) - { - m_el[0].mVec128 = rhs.m_el[0].mVec128; - m_el[1].mVec128 = rhs.m_el[1].mVec128; - m_el[2].mVec128 = rhs.m_el[2].mVec128; - } - - // Assignment Operator - B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& m) - { - m_el[0].mVec128 = m.m_el[0].mVec128; - m_el[1].mVec128 = m.m_el[1].mVec128; - m_el[2].mVec128 = m.m_el[2].mVec128; - - return *this; - } - -#else - - /** @brief Copy constructor */ - B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& other) - { - m_el[0] = other.m_el[0]; - m_el[1] = other.m_el[1]; - m_el[2] = other.m_el[2]; - } - - /** @brief Assignment Operator */ - B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& other) - { - m_el[0] = other.m_el[0]; - m_el[1] = other.m_el[1]; - m_el[2] = other.m_el[2]; - return *this; - } - -#endif - - /** @brief Get a column of the matrix as a vector - * @param i Column number 0 indexed */ - B3_FORCE_INLINE b3Vector3 getColumn(int i) const - { - return b3MakeVector3(m_el[0][i], m_el[1][i], m_el[2][i]); - } - - /** @brief Get a row of the matrix as a vector - * @param i Row number 0 indexed */ - B3_FORCE_INLINE const b3Vector3& getRow(int i) const - { - b3FullAssert(0 <= i && i < 3); - return m_el[i]; - } - - /** @brief Get a mutable reference to a row of the matrix as a vector - * @param i Row number 0 indexed */ - B3_FORCE_INLINE b3Vector3& operator[](int i) - { - b3FullAssert(0 <= i && i < 3); - return m_el[i]; - } - - /** @brief Get a const reference to a row of the matrix as a vector - * @param i Row number 0 indexed */ - B3_FORCE_INLINE const b3Vector3& operator[](int i) const - { - b3FullAssert(0 <= i && i < 3); - return m_el[i]; - } - - /** @brief Multiply by the target matrix on the right - * @param m Rotation matrix to be applied - * Equivilant to this = this * m */ - b3Matrix3x3& operator*=(const b3Matrix3x3& m); - - /** @brief Adds by the target matrix on the right - * @param m matrix to be applied - * Equivilant to this = this + m */ - b3Matrix3x3& operator+=(const b3Matrix3x3& m); - - /** @brief Substractss by the target matrix on the right - * @param m matrix to be applied - * Equivilant to this = this - m */ - b3Matrix3x3& operator-=(const b3Matrix3x3& m); - - /** @brief Set from the rotational part of a 4x4 OpenGL matrix - * @param m A pointer to the beginning of the array of scalars*/ - void setFromOpenGLSubMatrix(const b3Scalar* m) - { - m_el[0].setValue(m[0], m[4], m[8]); - m_el[1].setValue(m[1], m[5], m[9]); - m_el[2].setValue(m[2], m[6], m[10]); - } - /** @brief Set the values of the matrix explicitly (row major) - * @param xx Top left - * @param xy Top Middle - * @param xz Top Right - * @param yx Middle Left - * @param yy Middle Middle - * @param yz Middle Right - * @param zx Bottom Left - * @param zy Bottom Middle - * @param zz Bottom Right*/ - void setValue(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz, - const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz, - const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz) - { - m_el[0].setValue(xx, xy, xz); - m_el[1].setValue(yx, yy, yz); - m_el[2].setValue(zx, zy, zz); - } - - /** @brief Set the matrix from a quaternion - * @param q The Quaternion to match */ - void setRotation(const b3Quaternion& q) - { - b3Scalar d = q.length2(); - b3FullAssert(d != b3Scalar(0.0)); - b3Scalar s = b3Scalar(2.0) / d; - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs, Q = q.get128(); - __m128i Qi = b3CastfTo128i(Q); - __m128 Y, Z; - __m128 V1, V2, V3; - __m128 V11, V21, V31; - __m128 NQ = _mm_xor_ps(Q, b3vMzeroMask); - __m128i NQi = b3CastfTo128i(NQ); - - V1 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 2, 3))); // Y X Z W - V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0, 0, 1, 3)); // -X -X Y W - V3 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(2, 1, 0, 3))); // Z Y X W - V1 = _mm_xor_ps(V1, b3vMPPP); // change the sign of the first element - - V11 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 1, 0, 3))); // Y Y X W - V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W - V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(0, 2, 0, 3)); // X Z -X -W - - V2 = V2 * V1; // - V1 = V1 * V11; // - V3 = V3 * V31; // - - V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2, 3, 1, 3)); // -Z -W Y W - V11 = V11 * V21; // - V21 = _mm_xor_ps(V21, b3vMPPP); // change the sign of the first element - V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3, 3, 1, 3)); // W W -Y -W - V31 = _mm_xor_ps(V31, b3vMPPP); // change the sign of the first element - Y = b3CastiTo128f(_mm_shuffle_epi32(NQi, B3_SHUFFLE(3, 2, 0, 3))); // -W -Z -X -W - Z = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 1, 3))); // Y X Y W - - vs = _mm_load_ss(&s); - V21 = V21 * Y; - V31 = V31 * Z; - - V1 = V1 + V11; - V2 = V2 + V21; - V3 = V3 + V31; - - vs = b3_splat3_ps(vs, 0); - // s ready - V1 = V1 * vs; - V2 = V2 * vs; - V3 = V3 * vs; - - V1 = V1 + b3v1000; - V2 = V2 + b3v0100; - V3 = V3 + b3v0010; - - m_el[0] = b3MakeVector3(V1); - m_el[1] = b3MakeVector3(V2); - m_el[2] = b3MakeVector3(V3); -#else - b3Scalar xs = q.getX() * s, ys = q.getY() * s, zs = q.getZ() * s; - b3Scalar wx = q.getW() * xs, wy = q.getW() * ys, wz = q.getW() * zs; - b3Scalar xx = q.getX() * xs, xy = q.getX() * ys, xz = q.getX() * zs; - b3Scalar yy = q.getY() * ys, yz = q.getY() * zs, zz = q.getZ() * zs; - setValue( - b3Scalar(1.0) - (yy + zz), xy - wz, xz + wy, - xy + wz, b3Scalar(1.0) - (xx + zz), yz - wx, - xz - wy, yz + wx, b3Scalar(1.0) - (xx + yy)); -#endif - } - - /** @brief Set the matrix from euler angles using YPR around YXZ respectively - * @param yaw Yaw about Y axis - * @param pitch Pitch about X axis - * @param roll Roll about Z axis - */ - void setEulerYPR(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { - setEulerZYX(roll, pitch, yaw); - } - - /** @brief Set the matrix from euler angles YPR around ZYX axes - * @param eulerX Roll about X axis - * @param eulerY Pitch around Y axis - * @param eulerZ Yaw aboud Z axis - * - * These angles are used to produce a rotation matrix. The euler - * angles are applied in ZYX order. I.e a vector is first rotated - * about X then Y and then Z - **/ - void setEulerZYX(b3Scalar eulerX, b3Scalar eulerY, b3Scalar eulerZ) - { - ///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code - b3Scalar ci(b3Cos(eulerX)); - b3Scalar cj(b3Cos(eulerY)); - b3Scalar ch(b3Cos(eulerZ)); - b3Scalar si(b3Sin(eulerX)); - b3Scalar sj(b3Sin(eulerY)); - b3Scalar sh(b3Sin(eulerZ)); - b3Scalar cc = ci * ch; - b3Scalar cs = ci * sh; - b3Scalar sc = si * ch; - b3Scalar ss = si * sh; - - setValue(cj * ch, sj * sc - cs, sj * cc + ss, - cj * sh, sj * ss + cc, sj * cs - sc, - -sj, cj * si, cj * ci); - } - - /**@brief Set the matrix to the identity */ - void setIdentity() - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - m_el[0] = b3MakeVector3(b3v1000); - m_el[1] = b3MakeVector3(b3v0100); - m_el[2] = b3MakeVector3(b3v0010); -#else - setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0)); -#endif - } - - static const b3Matrix3x3& getIdentity() - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - static const b3Matrix3x3 - identityMatrix(b3v1000, b3v0100, b3v0010); -#else - static const b3Matrix3x3 - identityMatrix( - b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0), - b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0)); -#endif - return identityMatrix; - } - - /**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective - * @param m The array to be filled */ - void getOpenGLSubMatrix(b3Scalar * m) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 v0 = m_el[0].mVec128; - __m128 v1 = m_el[1].mVec128; - __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2 - __m128* vm = (__m128*)m; - __m128 vT; - - v2 = _mm_and_ps(v2, b3vFFF0fMask); // x2 y2 z2 0 - - vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * * - v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1 - - v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0 - v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0 - v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT))); // z0 z1 z2 0 - - vm[0] = v0; - vm[1] = v1; - vm[2] = v2; -#elif defined(B3_USE_NEON) - // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions. - static const uint32x2_t zMask = (const uint32x2_t){-1, 0}; - float32x4_t* vm = (float32x4_t*)m; - float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1} - float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0} - float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]); - float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]); - float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask); - float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0 - - vm[0] = v0; - vm[1] = v1; - vm[2] = v2; -#else - m[0] = b3Scalar(m_el[0].getX()); - m[1] = b3Scalar(m_el[1].getX()); - m[2] = b3Scalar(m_el[2].getX()); - m[3] = b3Scalar(0.0); - m[4] = b3Scalar(m_el[0].getY()); - m[5] = b3Scalar(m_el[1].getY()); - m[6] = b3Scalar(m_el[2].getY()); - m[7] = b3Scalar(0.0); - m[8] = b3Scalar(m_el[0].getZ()); - m[9] = b3Scalar(m_el[1].getZ()); - m[10] = b3Scalar(m_el[2].getZ()); - m[11] = b3Scalar(0.0); -#endif - } - - /**@brief Get the matrix represented as a quaternion - * @param q The quaternion which will be set */ - void getRotation(b3Quaternion & q) const - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ(); - b3Scalar s, x; - - union { - b3SimdFloat4 vec; - b3Scalar f[4]; - } temp; - - if (trace > b3Scalar(0.0)) - { - x = trace + b3Scalar(1.0); - - temp.f[0] = m_el[2].getY() - m_el[1].getZ(); - temp.f[1] = m_el[0].getZ() - m_el[2].getX(); - temp.f[2] = m_el[1].getX() - m_el[0].getY(); - temp.f[3] = x; - //temp.f[3]= s * b3Scalar(0.5); - } - else - { - int i, j, k; - if (m_el[0].getX() < m_el[1].getY()) - { - if (m_el[1].getY() < m_el[2].getZ()) - { - i = 2; - j = 0; - k = 1; - } - else - { - i = 1; - j = 2; - k = 0; - } - } - else - { - if (m_el[0].getX() < m_el[2].getZ()) - { - i = 2; - j = 0; - k = 1; - } - else - { - i = 0; - j = 1; - k = 2; - } - } - - x = m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0); - - temp.f[3] = (m_el[k][j] - m_el[j][k]); - temp.f[j] = (m_el[j][i] + m_el[i][j]); - temp.f[k] = (m_el[k][i] + m_el[i][k]); - temp.f[i] = x; - //temp.f[i] = s * b3Scalar(0.5); - } - - s = b3Sqrt(x); - q.set128(temp.vec); - s = b3Scalar(0.5) / s; - - q *= s; -#else - b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ(); - - b3Scalar temp[4]; - - if (trace > b3Scalar(0.0)) - { - b3Scalar s = b3Sqrt(trace + b3Scalar(1.0)); - temp[3] = (s * b3Scalar(0.5)); - s = b3Scalar(0.5) / s; - - temp[0] = ((m_el[2].getY() - m_el[1].getZ()) * s); - temp[1] = ((m_el[0].getZ() - m_el[2].getX()) * s); - temp[2] = ((m_el[1].getX() - m_el[0].getY()) * s); - } - else - { - int i = m_el[0].getX() < m_el[1].getY() ? (m_el[1].getY() < m_el[2].getZ() ? 2 : 1) : (m_el[0].getX() < m_el[2].getZ() ? 2 : 0); - int j = (i + 1) % 3; - int k = (i + 2) % 3; - - b3Scalar s = b3Sqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0)); - temp[i] = s * b3Scalar(0.5); - s = b3Scalar(0.5) / s; - - temp[3] = (m_el[k][j] - m_el[j][k]) * s; - temp[j] = (m_el[j][i] + m_el[i][j]) * s; - temp[k] = (m_el[k][i] + m_el[i][k]) * s; - } - q.setValue(temp[0], temp[1], temp[2], temp[3]); -#endif - } - - /**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR - * @param yaw Yaw around Y axis - * @param pitch Pitch around X axis - * @param roll around Z axis */ - void getEulerYPR(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll) const - { - // first use the normal calculus - yaw = b3Scalar(b3Atan2(m_el[1].getX(), m_el[0].getX())); - pitch = b3Scalar(b3Asin(-m_el[2].getX())); - roll = b3Scalar(b3Atan2(m_el[2].getY(), m_el[2].getZ())); - - // on pitch = +/-HalfPI - if (b3Fabs(pitch) == B3_HALF_PI) - { - if (yaw > 0) - yaw -= B3_PI; - else - yaw += B3_PI; - - if (roll > 0) - roll -= B3_PI; - else - roll += B3_PI; - } - }; - - /**@brief Get the matrix represented as euler angles around ZYX - * @param yaw Yaw around X axis - * @param pitch Pitch around Y axis - * @param roll around X axis - * @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/ - void getEulerZYX(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll, unsigned int solution_number = 1) const - { - struct Euler - { - b3Scalar yaw; - b3Scalar pitch; - b3Scalar roll; - }; - - Euler euler_out; - Euler euler_out2; //second solution - //get the pointer to the raw data - - // Check that pitch is not at a singularity - if (b3Fabs(m_el[2].getX()) >= 1) - { - euler_out.yaw = 0; - euler_out2.yaw = 0; - - // From difference of angles formula - b3Scalar delta = b3Atan2(m_el[0].getX(), m_el[0].getZ()); - if (m_el[2].getX() > 0) //gimbal locked up - { - euler_out.pitch = B3_PI / b3Scalar(2.0); - euler_out2.pitch = B3_PI / b3Scalar(2.0); - euler_out.roll = euler_out.pitch + delta; - euler_out2.roll = euler_out.pitch + delta; - } - else // gimbal locked down - { - euler_out.pitch = -B3_PI / b3Scalar(2.0); - euler_out2.pitch = -B3_PI / b3Scalar(2.0); - euler_out.roll = -euler_out.pitch + delta; - euler_out2.roll = -euler_out.pitch + delta; - } - } - else - { - euler_out.pitch = -b3Asin(m_el[2].getX()); - euler_out2.pitch = B3_PI - euler_out.pitch; - - euler_out.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out.pitch), - m_el[2].getZ() / b3Cos(euler_out.pitch)); - euler_out2.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out2.pitch), - m_el[2].getZ() / b3Cos(euler_out2.pitch)); - - euler_out.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out.pitch), - m_el[0].getX() / b3Cos(euler_out.pitch)); - euler_out2.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out2.pitch), - m_el[0].getX() / b3Cos(euler_out2.pitch)); - } - - if (solution_number == 1) - { - yaw = euler_out.yaw; - pitch = euler_out.pitch; - roll = euler_out.roll; - } - else - { - yaw = euler_out2.yaw; - pitch = euler_out2.pitch; - roll = euler_out2.roll; - } - } - - /**@brief Create a scaled copy of the matrix - * @param s Scaling vector The elements of the vector will scale each column */ - - b3Matrix3x3 scaled(const b3Vector3& s) const - { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return b3Matrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s); -#else - return b3Matrix3x3( - m_el[0].getX() * s.getX(), m_el[0].getY() * s.getY(), m_el[0].getZ() * s.getZ(), - m_el[1].getX() * s.getX(), m_el[1].getY() * s.getY(), m_el[1].getZ() * s.getZ(), - m_el[2].getX() * s.getX(), m_el[2].getY() * s.getY(), m_el[2].getZ() * s.getZ()); -#endif - } - - /**@brief Return the determinant of the matrix */ - b3Scalar determinant() const; - /**@brief Return the adjoint of the matrix */ - b3Matrix3x3 adjoint() const; - /**@brief Return the matrix with all values non negative */ - b3Matrix3x3 absolute() const; - /**@brief Return the transpose of the matrix */ - b3Matrix3x3 transpose() const; - /**@brief Return the inverse of the matrix */ - b3Matrix3x3 inverse() const; - - b3Matrix3x3 transposeTimes(const b3Matrix3x3& m) const; - b3Matrix3x3 timesTranspose(const b3Matrix3x3& m) const; - - B3_FORCE_INLINE b3Scalar tdotx(const b3Vector3& v) const - { - return m_el[0].getX() * v.getX() + m_el[1].getX() * v.getY() + m_el[2].getX() * v.getZ(); - } - B3_FORCE_INLINE b3Scalar tdoty(const b3Vector3& v) const - { - return m_el[0].getY() * v.getX() + m_el[1].getY() * v.getY() + m_el[2].getY() * v.getZ(); - } - B3_FORCE_INLINE b3Scalar tdotz(const b3Vector3& v) const - { - return m_el[0].getZ() * v.getX() + m_el[1].getZ() * v.getY() + m_el[2].getZ() * v.getZ(); - } - - /**@brief diagonalizes this matrix by the Jacobi method. - * @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original - * coordinate system, i.e., old_this = rot * new_this * rot^T. - * @param threshold See iteration - * @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied - * by the sum of the absolute values of the diagonal, or when maxSteps have been executed. - * - * Note that this matrix is assumed to be symmetric. - */ - void diagonalize(b3Matrix3x3 & rot, b3Scalar threshold, int maxSteps) - { - rot.setIdentity(); - for (int step = maxSteps; step > 0; step--) - { - // find off-diagonal element [p][q] with largest magnitude - int p = 0; - int q = 1; - int r = 2; - b3Scalar max = b3Fabs(m_el[0][1]); - b3Scalar v = b3Fabs(m_el[0][2]); - if (v > max) - { - q = 2; - r = 1; - max = v; - } - v = b3Fabs(m_el[1][2]); - if (v > max) - { - p = 1; - q = 2; - r = 0; - max = v; - } - - b3Scalar t = threshold * (b3Fabs(m_el[0][0]) + b3Fabs(m_el[1][1]) + b3Fabs(m_el[2][2])); - if (max <= t) - { - if (max <= B3_EPSILON * t) - { - return; - } - step = 1; - } - - // compute Jacobi rotation J which leads to a zero for element [p][q] - b3Scalar mpq = m_el[p][q]; - b3Scalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq); - b3Scalar theta2 = theta * theta; - b3Scalar cos; - b3Scalar sin; - if (theta2 * theta2 < b3Scalar(10 / B3_EPSILON)) - { - t = (theta >= 0) ? 1 / (theta + b3Sqrt(1 + theta2)) - : 1 / (theta - b3Sqrt(1 + theta2)); - cos = 1 / b3Sqrt(1 + t * t); - sin = cos * t; - } - else - { - // approximation for large theta-value, i.e., a nearly diagonal matrix - t = 1 / (theta * (2 + b3Scalar(0.5) / theta2)); - cos = 1 - b3Scalar(0.5) * t * t; - sin = cos * t; - } - - // apply rotation to matrix (this = J^T * this * J) - m_el[p][q] = m_el[q][p] = 0; - m_el[p][p] -= t * mpq; - m_el[q][q] += t * mpq; - b3Scalar mrp = m_el[r][p]; - b3Scalar mrq = m_el[r][q]; - m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq; - m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp; - - // apply rotation to rot (rot = rot * J) - for (int i = 0; i < 3; i++) - { - b3Vector3& row = rot[i]; - mrp = row[p]; - mrq = row[q]; - row[p] = cos * mrp - sin * mrq; - row[q] = cos * mrq + sin * mrp; - } - } - } - - /**@brief Calculate the matrix cofactor - * @param r1 The first row to use for calculating the cofactor - * @param c1 The first column to use for calculating the cofactor - * @param r1 The second row to use for calculating the cofactor - * @param c1 The second column to use for calculating the cofactor - * See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details - */ - b3Scalar cofac(int r1, int c1, int r2, int c2) const - { - return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1]; - } - - void serialize(struct b3Matrix3x3Data & dataOut) const; - - void serializeFloat(struct b3Matrix3x3FloatData & dataOut) const; - - void deSerialize(const struct b3Matrix3x3Data& dataIn); - - void deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn); - - void deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn); -}; - -B3_FORCE_INLINE b3Matrix3x3& -b3Matrix3x3::operator*=(const b3Matrix3x3& m) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 rv00, rv01, rv02; - __m128 rv10, rv11, rv12; - __m128 rv20, rv21, rv22; - __m128 mv0, mv1, mv2; - - rv02 = m_el[0].mVec128; - rv12 = m_el[1].mVec128; - rv22 = m_el[2].mVec128; - - mv0 = _mm_and_ps(m[0].mVec128, b3vFFF0fMask); - mv1 = _mm_and_ps(m[1].mVec128, b3vFFF0fMask); - mv2 = _mm_and_ps(m[2].mVec128, b3vFFF0fMask); - - // rv0 - rv00 = b3_splat_ps(rv02, 0); - rv01 = b3_splat_ps(rv02, 1); - rv02 = b3_splat_ps(rv02, 2); - - rv00 = _mm_mul_ps(rv00, mv0); - rv01 = _mm_mul_ps(rv01, mv1); - rv02 = _mm_mul_ps(rv02, mv2); - - // rv1 - rv10 = b3_splat_ps(rv12, 0); - rv11 = b3_splat_ps(rv12, 1); - rv12 = b3_splat_ps(rv12, 2); - - rv10 = _mm_mul_ps(rv10, mv0); - rv11 = _mm_mul_ps(rv11, mv1); - rv12 = _mm_mul_ps(rv12, mv2); - - // rv2 - rv20 = b3_splat_ps(rv22, 0); - rv21 = b3_splat_ps(rv22, 1); - rv22 = b3_splat_ps(rv22, 2); - - rv20 = _mm_mul_ps(rv20, mv0); - rv21 = _mm_mul_ps(rv21, mv1); - rv22 = _mm_mul_ps(rv22, mv2); - - rv00 = _mm_add_ps(rv00, rv01); - rv10 = _mm_add_ps(rv10, rv11); - rv20 = _mm_add_ps(rv20, rv21); - - m_el[0].mVec128 = _mm_add_ps(rv00, rv02); - m_el[1].mVec128 = _mm_add_ps(rv10, rv12); - m_el[2].mVec128 = _mm_add_ps(rv20, rv22); - -#elif defined(B3_USE_NEON) - - float32x4_t rv0, rv1, rv2; - float32x4_t v0, v1, v2; - float32x4_t mv0, mv1, mv2; - - v0 = m_el[0].mVec128; - v1 = m_el[1].mVec128; - v2 = m_el[2].mVec128; - - mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask); - mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask); - mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask); - - rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0); - rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0); - rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0); - - rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1); - rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1); - rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1); - - rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0); - rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0); - rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0); - - m_el[0].mVec128 = rv0; - m_el[1].mVec128 = rv1; - m_el[2].mVec128 = rv2; -#else - setValue( - m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]), - m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]), - m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2])); -#endif - return *this; -} - -B3_FORCE_INLINE b3Matrix3x3& -b3Matrix3x3::operator+=(const b3Matrix3x3& m) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128; - m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128; - m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128; -#else - setValue( - m_el[0][0] + m.m_el[0][0], - m_el[0][1] + m.m_el[0][1], - m_el[0][2] + m.m_el[0][2], - m_el[1][0] + m.m_el[1][0], - m_el[1][1] + m.m_el[1][1], - m_el[1][2] + m.m_el[1][2], - m_el[2][0] + m.m_el[2][0], - m_el[2][1] + m.m_el[2][1], - m_el[2][2] + m.m_el[2][2]); -#endif - return *this; -} - -B3_FORCE_INLINE b3Matrix3x3 -operator*(const b3Matrix3x3& m, const b3Scalar& k) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 vk = b3_splat_ps(_mm_load_ss((float*)&k), 0x80); - return b3Matrix3x3( - _mm_mul_ps(m[0].mVec128, vk), - _mm_mul_ps(m[1].mVec128, vk), - _mm_mul_ps(m[2].mVec128, vk)); -#elif defined(B3_USE_NEON) - return b3Matrix3x3( - vmulq_n_f32(m[0].mVec128, k), - vmulq_n_f32(m[1].mVec128, k), - vmulq_n_f32(m[2].mVec128, k)); -#else - return b3Matrix3x3( - m[0].getX() * k, m[0].getY() * k, m[0].getZ() * k, - m[1].getX() * k, m[1].getY() * k, m[1].getZ() * k, - m[2].getX() * k, m[2].getY() * k, m[2].getZ() * k); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -operator+(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return b3Matrix3x3( - m1[0].mVec128 + m2[0].mVec128, - m1[1].mVec128 + m2[1].mVec128, - m1[2].mVec128 + m2[2].mVec128); -#else - return b3Matrix3x3( - m1[0][0] + m2[0][0], - m1[0][1] + m2[0][1], - m1[0][2] + m2[0][2], - - m1[1][0] + m2[1][0], - m1[1][1] + m2[1][1], - m1[1][2] + m2[1][2], - - m1[2][0] + m2[2][0], - m1[2][1] + m2[2][1], - m1[2][2] + m2[2][2]); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -operator-(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return b3Matrix3x3( - m1[0].mVec128 - m2[0].mVec128, - m1[1].mVec128 - m2[1].mVec128, - m1[2].mVec128 - m2[2].mVec128); -#else - return b3Matrix3x3( - m1[0][0] - m2[0][0], - m1[0][1] - m2[0][1], - m1[0][2] - m2[0][2], - - m1[1][0] - m2[1][0], - m1[1][1] - m2[1][1], - m1[1][2] - m2[1][2], - - m1[2][0] - m2[2][0], - m1[2][1] - m2[2][1], - m1[2][2] - m2[2][2]); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3& -b3Matrix3x3::operator-=(const b3Matrix3x3& m) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128; - m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128; - m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128; -#else - setValue( - m_el[0][0] - m.m_el[0][0], - m_el[0][1] - m.m_el[0][1], - m_el[0][2] - m.m_el[0][2], - m_el[1][0] - m.m_el[1][0], - m_el[1][1] - m.m_el[1][1], - m_el[1][2] - m.m_el[1][2], - m_el[2][0] - m.m_el[2][0], - m_el[2][1] - m.m_el[2][1], - m_el[2][2] - m.m_el[2][2]); -#endif - return *this; -} - -B3_FORCE_INLINE b3Scalar -b3Matrix3x3::determinant() const -{ - return b3Triple((*this)[0], (*this)[1], (*this)[2]); -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::absolute() const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - return b3Matrix3x3( - _mm_and_ps(m_el[0].mVec128, b3vAbsfMask), - _mm_and_ps(m_el[1].mVec128, b3vAbsfMask), - _mm_and_ps(m_el[2].mVec128, b3vAbsfMask)); -#elif defined(B3_USE_NEON) - return b3Matrix3x3( - (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, b3v3AbsMask), - (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, b3v3AbsMask), - (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, b3v3AbsMask)); -#else - return b3Matrix3x3( - b3Fabs(m_el[0].getX()), b3Fabs(m_el[0].getY()), b3Fabs(m_el[0].getZ()), - b3Fabs(m_el[1].getX()), b3Fabs(m_el[1].getY()), b3Fabs(m_el[1].getZ()), - b3Fabs(m_el[2].getX()), b3Fabs(m_el[2].getY()), b3Fabs(m_el[2].getZ())); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::transpose() const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 v0 = m_el[0].mVec128; - __m128 v1 = m_el[1].mVec128; - __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2 - __m128 vT; - - v2 = _mm_and_ps(v2, b3vFFF0fMask); // x2 y2 z2 0 - - vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * * - v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1 - - v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0 - v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0 - v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT))); // z0 z1 z2 0 - - return b3Matrix3x3(v0, v1, v2); -#elif defined(B3_USE_NEON) - // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions. - static const uint32x2_t zMask = (const uint32x2_t){-1, 0}; - float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1} - float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0} - float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]); - float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]); - float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask); - float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0 - return b3Matrix3x3(v0, v1, v2); -#else - return b3Matrix3x3(m_el[0].getX(), m_el[1].getX(), m_el[2].getX(), - m_el[0].getY(), m_el[1].getY(), m_el[2].getY(), - m_el[0].getZ(), m_el[1].getZ(), m_el[2].getZ()); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::adjoint() const -{ - return b3Matrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2), - cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0), - cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1)); -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::inverse() const -{ - b3Vector3 co = b3MakeVector3(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1)); - b3Scalar det = (*this)[0].dot(co); - b3FullAssert(det != b3Scalar(0.0)); - b3Scalar s = b3Scalar(1.0) / det; - return b3Matrix3x3(co.getX() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s, - co.getY() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s, - co.getZ() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s); -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::transposeTimes(const b3Matrix3x3& m) const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - // zeros w - // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL }; - __m128 row = m_el[0].mVec128; - __m128 m0 = _mm_and_ps(m.getRow(0).mVec128, b3vFFF0fMask); - __m128 m1 = _mm_and_ps(m.getRow(1).mVec128, b3vFFF0fMask); - __m128 m2 = _mm_and_ps(m.getRow(2).mVec128, b3vFFF0fMask); - __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0)); - __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55)); - __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa)); - row = m_el[1].mVec128; - r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0))); - r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55))); - r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa))); - row = m_el[2].mVec128; - r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0))); - r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55))); - r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa))); - return b3Matrix3x3(r0, r1, r2); - -#elif defined B3_USE_NEON - // zeros w - static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0}; - float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(0).mVec128, xyzMask); - float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(1).mVec128, xyzMask); - float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(2).mVec128, xyzMask); - float32x4_t row = m_el[0].mVec128; - float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0); - float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1); - float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0); - row = m_el[1].mVec128; - r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0); - r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1); - r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0); - row = m_el[2].mVec128; - r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0); - r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1); - r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0); - return b3Matrix3x3(r0, r1, r2); -#else - return b3Matrix3x3( - m_el[0].getX() * m[0].getX() + m_el[1].getX() * m[1].getX() + m_el[2].getX() * m[2].getX(), - m_el[0].getX() * m[0].getY() + m_el[1].getX() * m[1].getY() + m_el[2].getX() * m[2].getY(), - m_el[0].getX() * m[0].getZ() + m_el[1].getX() * m[1].getZ() + m_el[2].getX() * m[2].getZ(), - m_el[0].getY() * m[0].getX() + m_el[1].getY() * m[1].getX() + m_el[2].getY() * m[2].getX(), - m_el[0].getY() * m[0].getY() + m_el[1].getY() * m[1].getY() + m_el[2].getY() * m[2].getY(), - m_el[0].getY() * m[0].getZ() + m_el[1].getY() * m[1].getZ() + m_el[2].getY() * m[2].getZ(), - m_el[0].getZ() * m[0].getX() + m_el[1].getZ() * m[1].getX() + m_el[2].getZ() * m[2].getX(), - m_el[0].getZ() * m[0].getY() + m_el[1].getZ() * m[1].getY() + m_el[2].getZ() * m[2].getY(), - m_el[0].getZ() * m[0].getZ() + m_el[1].getZ() * m[1].getZ() + m_el[2].getZ() * m[2].getZ()); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -b3Matrix3x3::timesTranspose(const b3Matrix3x3& m) const -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 a0 = m_el[0].mVec128; - __m128 a1 = m_el[1].mVec128; - __m128 a2 = m_el[2].mVec128; - - b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here - __m128 mx = mT[0].mVec128; - __m128 my = mT[1].mVec128; - __m128 mz = mT[2].mVec128; - - __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00)); - __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00)); - __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00)); - r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55))); - r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55))); - r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55))); - r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa))); - r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa))); - r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa))); - return b3Matrix3x3(r0, r1, r2); - -#elif defined B3_USE_NEON - float32x4_t a0 = m_el[0].mVec128; - float32x4_t a1 = m_el[1].mVec128; - float32x4_t a2 = m_el[2].mVec128; - - b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here - float32x4_t mx = mT[0].mVec128; - float32x4_t my = mT[1].mVec128; - float32x4_t mz = mT[2].mVec128; - - float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0); - float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0); - float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0); - r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1); - r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1); - r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1); - r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0); - r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0); - r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0); - return b3Matrix3x3(r0, r1, r2); - -#else - return b3Matrix3x3( - m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]), - m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]), - m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2])); -#endif -} - -B3_FORCE_INLINE b3Vector3 -operator*(const b3Matrix3x3& m, const b3Vector3& v) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - return v.dot3(m[0], m[1], m[2]); -#else - return b3MakeVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v)); -#endif -} - -B3_FORCE_INLINE b3Vector3 -operator*(const b3Vector3& v, const b3Matrix3x3& m) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - const __m128 vv = v.mVec128; - - __m128 c0 = b3_splat_ps(vv, 0); - __m128 c1 = b3_splat_ps(vv, 1); - __m128 c2 = b3_splat_ps(vv, 2); - - c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, b3vFFF0fMask)); - c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, b3vFFF0fMask)); - c0 = _mm_add_ps(c0, c1); - c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, b3vFFF0fMask)); - - return b3MakeVector3(_mm_add_ps(c0, c2)); -#elif defined(B3_USE_NEON) - const float32x4_t vv = v.mVec128; - const float32x2_t vlo = vget_low_f32(vv); - const float32x2_t vhi = vget_high_f32(vv); - - float32x4_t c0, c1, c2; - - c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask); - c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask); - c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask); - - c0 = vmulq_lane_f32(c0, vlo, 0); - c1 = vmulq_lane_f32(c1, vlo, 1); - c2 = vmulq_lane_f32(c2, vhi, 0); - c0 = vaddq_f32(c0, c1); - c0 = vaddq_f32(c0, c2); - - return b3MakeVector3(c0); -#else - return b3MakeVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v)); -#endif -} - -B3_FORCE_INLINE b3Matrix3x3 -operator*(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - __m128 m10 = m1[0].mVec128; - __m128 m11 = m1[1].mVec128; - __m128 m12 = m1[2].mVec128; - - __m128 m2v = _mm_and_ps(m2[0].mVec128, b3vFFF0fMask); - - __m128 c0 = b3_splat_ps(m10, 0); - __m128 c1 = b3_splat_ps(m11, 0); - __m128 c2 = b3_splat_ps(m12, 0); - - c0 = _mm_mul_ps(c0, m2v); - c1 = _mm_mul_ps(c1, m2v); - c2 = _mm_mul_ps(c2, m2v); - - m2v = _mm_and_ps(m2[1].mVec128, b3vFFF0fMask); - - __m128 c0_1 = b3_splat_ps(m10, 1); - __m128 c1_1 = b3_splat_ps(m11, 1); - __m128 c2_1 = b3_splat_ps(m12, 1); - - c0_1 = _mm_mul_ps(c0_1, m2v); - c1_1 = _mm_mul_ps(c1_1, m2v); - c2_1 = _mm_mul_ps(c2_1, m2v); - - m2v = _mm_and_ps(m2[2].mVec128, b3vFFF0fMask); - - c0 = _mm_add_ps(c0, c0_1); - c1 = _mm_add_ps(c1, c1_1); - c2 = _mm_add_ps(c2, c2_1); - - m10 = b3_splat_ps(m10, 2); - m11 = b3_splat_ps(m11, 2); - m12 = b3_splat_ps(m12, 2); - - m10 = _mm_mul_ps(m10, m2v); - m11 = _mm_mul_ps(m11, m2v); - m12 = _mm_mul_ps(m12, m2v); - - c0 = _mm_add_ps(c0, m10); - c1 = _mm_add_ps(c1, m11); - c2 = _mm_add_ps(c2, m12); - - return b3Matrix3x3(c0, c1, c2); - -#elif defined(B3_USE_NEON) - - float32x4_t rv0, rv1, rv2; - float32x4_t v0, v1, v2; - float32x4_t mv0, mv1, mv2; - - v0 = m1[0].mVec128; - v1 = m1[1].mVec128; - v2 = m1[2].mVec128; - - mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, b3vFFF0Mask); - mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, b3vFFF0Mask); - mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, b3vFFF0Mask); - - rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0); - rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0); - rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0); - - rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1); - rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1); - rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1); - - rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0); - rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0); - rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0); - - return b3Matrix3x3(rv0, rv1, rv2); - -#else - return b3Matrix3x3( - m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]), - m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]), - m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2])); -#endif -} - -/* -B3_FORCE_INLINE b3Matrix3x3 b3MultTransposeLeft(const b3Matrix3x3& m1, const b3Matrix3x3& m2) { -return b3Matrix3x3( -m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0], -m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1], -m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2], -m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0], -m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1], -m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2], -m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0], -m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1], -m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]); -} -*/ - -/**@brief Equality operator between two matrices -* It will test all elements are equal. */ -B3_FORCE_INLINE bool operator==(const b3Matrix3x3& m1, const b3Matrix3x3& m2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - __m128 c0, c1, c2; - - c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128); - c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128); - c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128); - - c0 = _mm_and_ps(c0, c1); - c0 = _mm_and_ps(c0, c2); - - return (0x7 == _mm_movemask_ps((__m128)c0)); -#else - return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] && - m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] && - m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]); -#endif -} - -///for serialization -struct b3Matrix3x3FloatData -{ - b3Vector3FloatData m_el[3]; -}; - -///for serialization -struct b3Matrix3x3DoubleData -{ - b3Vector3DoubleData m_el[3]; -}; - -B3_FORCE_INLINE void b3Matrix3x3::serialize(struct b3Matrix3x3Data& dataOut) const -{ - for (int i = 0; i < 3; i++) - m_el[i].serialize(dataOut.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::serializeFloat(struct b3Matrix3x3FloatData& dataOut) const -{ - for (int i = 0; i < 3; i++) - m_el[i].serializeFloat(dataOut.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::deSerialize(const struct b3Matrix3x3Data& dataIn) -{ - for (int i = 0; i < 3; i++) - m_el[i].deSerialize(dataIn.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn) -{ - for (int i = 0; i < 3; i++) - m_el[i].deSerializeFloat(dataIn.m_el[i]); -} - -B3_FORCE_INLINE void b3Matrix3x3::deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn) -{ - for (int i = 0; i < 3; i++) - m_el[i].deSerializeDouble(dataIn.m_el[i]); -} - -#endif //B3_MATRIX3x3_H diff --git a/thirdparty/bullet/Bullet3Common/b3MinMax.h b/thirdparty/bullet/Bullet3Common/b3MinMax.h deleted file mode 100644 index c09c3db3f5a..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3MinMax.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GEN_MINMAX_H -#define B3_GEN_MINMAX_H - -#include "b3Scalar.h" - -template -B3_FORCE_INLINE const T& b3Min(const T& a, const T& b) -{ - return a < b ? a : b; -} - -template -B3_FORCE_INLINE const T& b3Max(const T& a, const T& b) -{ - return a > b ? a : b; -} - -template -B3_FORCE_INLINE const T& b3Clamped(const T& a, const T& lb, const T& ub) -{ - return a < lb ? lb : (ub < a ? ub : a); -} - -template -B3_FORCE_INLINE void b3SetMin(T& a, const T& b) -{ - if (b < a) - { - a = b; - } -} - -template -B3_FORCE_INLINE void b3SetMax(T& a, const T& b) -{ - if (a < b) - { - a = b; - } -} - -template -B3_FORCE_INLINE void b3Clamp(T& a, const T& lb, const T& ub) -{ - if (a < lb) - { - a = lb; - } - else if (ub < a) - { - a = ub; - } -} - -#endif //B3_GEN_MINMAX_H diff --git a/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h b/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h deleted file mode 100644 index ed56bc627dc..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3PoolAllocator.h +++ /dev/null @@ -1,121 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef _BT_POOL_ALLOCATOR_H -#define _BT_POOL_ALLOCATOR_H - -#include "b3Scalar.h" -#include "b3AlignedAllocator.h" - -///The b3PoolAllocator class allows to efficiently allocate a large pool of objects, instead of dynamically allocating them separately. -class b3PoolAllocator -{ - int m_elemSize; - int m_maxElements; - int m_freeCount; - void* m_firstFree; - unsigned char* m_pool; - -public: - b3PoolAllocator(int elemSize, int maxElements) - : m_elemSize(elemSize), - m_maxElements(maxElements) - { - m_pool = (unsigned char*)b3AlignedAlloc(static_cast(m_elemSize * m_maxElements), 16); - - unsigned char* p = m_pool; - m_firstFree = p; - m_freeCount = m_maxElements; - int count = m_maxElements; - while (--count) - { - *(void**)p = (p + m_elemSize); - p += m_elemSize; - } - *(void**)p = 0; - } - - ~b3PoolAllocator() - { - b3AlignedFree(m_pool); - } - - int getFreeCount() const - { - return m_freeCount; - } - - int getUsedCount() const - { - return m_maxElements - m_freeCount; - } - - int getMaxCount() const - { - return m_maxElements; - } - - void* allocate(int size) - { - // release mode fix - (void)size; - b3Assert(!size || size <= m_elemSize); - b3Assert(m_freeCount > 0); - void* result = m_firstFree; - m_firstFree = *(void**)m_firstFree; - --m_freeCount; - return result; - } - - bool validPtr(void* ptr) - { - if (ptr) - { - if (((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize)) - { - return true; - } - } - return false; - } - - void freeMemory(void* ptr) - { - if (ptr) - { - b3Assert((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize); - - *(void**)ptr = m_firstFree; - m_firstFree = ptr; - ++m_freeCount; - } - } - - int getElementSize() const - { - return m_elemSize; - } - - unsigned char* getPoolAddress() - { - return m_pool; - } - - const unsigned char* getPoolAddress() const - { - return m_pool; - } -}; - -#endif //_BT_POOL_ALLOCATOR_H diff --git a/thirdparty/bullet/Bullet3Common/b3QuadWord.h b/thirdparty/bullet/Bullet3Common/b3QuadWord.h deleted file mode 100644 index 0def305fac8..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3QuadWord.h +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SIMD_QUADWORD_H -#define B3_SIMD_QUADWORD_H - -#include "b3Scalar.h" -#include "b3MinMax.h" - -#if defined(__CELLOS_LV2) && defined(__SPU__) -#include -#endif - -/**@brief The b3QuadWord class is base class for b3Vector3 and b3Quaternion. - * Some issues under PS3 Linux with IBM 2.1 SDK, gcc compiler prevent from using aligned quadword. - */ -#ifndef USE_LIBSPE2 -B3_ATTRIBUTE_ALIGNED16(class) -b3QuadWord -#else -class b3QuadWord -#endif -{ -protected: -#if defined(__SPU__) && defined(__CELLOS_LV2__) - union { - vec_float4 mVec128; - b3Scalar m_floats[4]; - }; - -public: - vec_float4 get128() const - { - return mVec128; - } - -#else //__CELLOS_LV2__ __SPU__ - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -public: - union { - b3SimdFloat4 mVec128; - b3Scalar m_floats[4]; - struct - { - b3Scalar x, y, z, w; - }; - }; - -public: - B3_FORCE_INLINE b3SimdFloat4 get128() const - { - return mVec128; - } - B3_FORCE_INLINE void set128(b3SimdFloat4 v128) - { - mVec128 = v128; - } -#else -public: - union { - b3Scalar m_floats[4]; - struct - { - b3Scalar x, y, z, w; - }; - }; -#endif // B3_USE_SSE - -#endif //__CELLOS_LV2__ __SPU__ - -public: -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - - // Set Vector - B3_FORCE_INLINE b3QuadWord(const b3SimdFloat4 vec) - { - mVec128 = vec; - } - - // Copy constructor - B3_FORCE_INLINE b3QuadWord(const b3QuadWord& rhs) - { - mVec128 = rhs.mVec128; - } - - // Assignment Operator - B3_FORCE_INLINE b3QuadWord& - operator=(const b3QuadWord& v) - { - mVec128 = v.mVec128; - - return *this; - } - -#endif - - /**@brief Return the x value */ - B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; } - /**@brief Return the y value */ - B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; } - /**@brief Return the z value */ - B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; } - /**@brief Set the x value */ - B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; }; - /**@brief Set the y value */ - B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; }; - /**@brief Set the z value */ - B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; }; - /**@brief Set the w value */ - B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; }; - /**@brief Return the x value */ - - //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; } - //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; } - ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons. - B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; } - B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; } - - B3_FORCE_INLINE bool operator==(const b3QuadWord& other) const - { -#ifdef B3_USE_SSE - return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128))); -#else - return ((m_floats[3] == other.m_floats[3]) && - (m_floats[2] == other.m_floats[2]) && - (m_floats[1] == other.m_floats[1]) && - (m_floats[0] == other.m_floats[0])); -#endif - } - - B3_FORCE_INLINE bool operator!=(const b3QuadWord& other) const - { - return !(*this == other); - } - - /**@brief Set x,y,z and zero w - * @param x Value of x - * @param y Value of y - * @param z Value of z - */ - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = 0.f; - } - - /* void getValue(b3Scalar *m) const - { - m[0] = m_floats[0]; - m[1] = m_floats[1]; - m[2] = m_floats[2]; - } -*/ - /**@brief Set the values - * @param x Value of x - * @param y Value of y - * @param z Value of z - * @param w Value of w - */ - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = _w; - } - /**@brief No initialization constructor */ - B3_FORCE_INLINE b3QuadWord() - // :m_floats[0](b3Scalar(0.)),m_floats[1](b3Scalar(0.)),m_floats[2](b3Scalar(0.)),m_floats[3](b3Scalar(0.)) - { - } - - /**@brief Three argument constructor (zeros w) - * @param x Value of x - * @param y Value of y - * @param z Value of z - */ - B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) - { - m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = 0.0f; - } - - /**@brief Initializing constructor - * @param x Value of x - * @param y Value of y - * @param z Value of z - * @param w Value of w - */ - B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - { - m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = _w; - } - - /**@brief Set each element to the max of the current values and the values of another b3QuadWord - * @param other The other b3QuadWord to compare with - */ - B3_FORCE_INLINE void setMax(const b3QuadWord& other) - { -#ifdef B3_USE_SSE - mVec128 = _mm_max_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vmaxq_f32(mVec128, other.mVec128); -#else - b3SetMax(m_floats[0], other.m_floats[0]); - b3SetMax(m_floats[1], other.m_floats[1]); - b3SetMax(m_floats[2], other.m_floats[2]); - b3SetMax(m_floats[3], other.m_floats[3]); -#endif - } - /**@brief Set each element to the min of the current values and the values of another b3QuadWord - * @param other The other b3QuadWord to compare with - */ - B3_FORCE_INLINE void setMin(const b3QuadWord& other) - { -#ifdef B3_USE_SSE - mVec128 = _mm_min_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vminq_f32(mVec128, other.mVec128); -#else - b3SetMin(m_floats[0], other.m_floats[0]); - b3SetMin(m_floats[1], other.m_floats[1]); - b3SetMin(m_floats[2], other.m_floats[2]); - b3SetMin(m_floats[3], other.m_floats[3]); -#endif - } -}; - -#endif //B3_SIMD_QUADWORD_H diff --git a/thirdparty/bullet/Bullet3Common/b3Quaternion.h b/thirdparty/bullet/Bullet3Common/b3Quaternion.h deleted file mode 100644 index 4fdd72dcc4b..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Quaternion.h +++ /dev/null @@ -1,908 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SIMD__QUATERNION_H_ -#define B3_SIMD__QUATERNION_H_ - -#include "b3Vector3.h" -#include "b3QuadWord.h" - -#ifdef B3_USE_SSE - -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f}; - -#endif - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f}; -const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f}; - -#endif - -/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */ -class b3Quaternion : public b3QuadWord -{ -public: - /**@brief No initialization constructor */ - b3Quaternion() {} - -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON) - // Set Vector - B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec) - { - mVec128 = vec; - } - - // Copy constructor - B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs) - { - mVec128 = rhs.mVec128; - } - - // Assignment Operator - B3_FORCE_INLINE b3Quaternion& - operator=(const b3Quaternion& v) - { - mVec128 = v.mVec128; - - return *this; - } - -#endif - - // template - // explicit Quaternion(const b3Scalar *v) : Tuple4(v) {} - /**@brief Constructor from scalars */ - b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - : b3QuadWord(_x, _y, _z, _w) - { - //b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f))); - } - /**@brief Axis angle Constructor - * @param axis The axis which the rotation is around - * @param angle The magnitude of the rotation around the angle (Radians) */ - b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle) - { - setRotation(_axis, _angle); - } - /**@brief Constructor from Euler angles - * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z - * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y - * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */ - b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { -#ifndef B3_EULER_DEFAULT_ZYX - setEuler(yaw, pitch, roll); -#else - setEulerZYX(yaw, pitch, roll); -#endif - } - /**@brief Set the rotation using axis angle notation - * @param axis The axis around which to rotate - * @param angle The magnitude of the rotation in Radians */ - void setRotation(const b3Vector3& axis1, const b3Scalar& _angle) - { - b3Vector3 axis = axis1; - axis.safeNormalize(); - - b3Scalar d = axis.length(); - b3Assert(d != b3Scalar(0.0)); - if (d < B3_EPSILON) - { - setValue(0, 0, 0, 1); - } - else - { - b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d; - setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s, - b3Cos(_angle * b3Scalar(0.5))); - } - } - /**@brief Set the quaternion using Euler angles - * @param yaw Angle around Y - * @param pitch Angle around X - * @param roll Angle around Z */ - void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) - { - b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5); - b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5); - b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5); - b3Scalar cosYaw = b3Cos(halfYaw); - b3Scalar sinYaw = b3Sin(halfYaw); - b3Scalar cosPitch = b3Cos(halfPitch); - b3Scalar sinPitch = b3Sin(halfPitch); - b3Scalar cosRoll = b3Cos(halfRoll); - b3Scalar sinRoll = b3Sin(halfRoll); - setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, - cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, - sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, - cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); - } - - /**@brief Set the quaternion using euler angles - * @param yaw Angle around Z - * @param pitch Angle around Y - * @param roll Angle around X */ - void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX) - { - b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5); - b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5); - b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5); - b3Scalar cosYaw = b3Cos(halfYaw); - b3Scalar sinYaw = b3Sin(halfYaw); - b3Scalar cosPitch = b3Cos(halfPitch); - b3Scalar sinPitch = b3Sin(halfPitch); - b3Scalar cosRoll = b3Cos(halfRoll); - b3Scalar sinRoll = b3Sin(halfRoll); - setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x - cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y - cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z - cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx - normalize(); - } - - /**@brief Get the euler angles from this quaternion - * @param yaw Angle around Z - * @param pitch Angle around Y - * @param roll Angle around X */ - void getEulerZYX(b3Scalar& yawZ, b3Scalar& pitchY, b3Scalar& rollX) const - { - b3Scalar squ; - b3Scalar sqx; - b3Scalar sqy; - b3Scalar sqz; - b3Scalar sarg; - sqx = m_floats[0] * m_floats[0]; - sqy = m_floats[1] * m_floats[1]; - sqz = m_floats[2] * m_floats[2]; - squ = m_floats[3] * m_floats[3]; - rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz); - sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]); - pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg)); - yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz); - } - - /**@brief Add two quaternions - * @param q The quaternion to add to this one */ - B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_add_ps(mVec128, q.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vaddq_f32(mVec128, q.mVec128); -#else - m_floats[0] += q.getX(); - m_floats[1] += q.getY(); - m_floats[2] += q.getZ(); - m_floats[3] += q.m_floats[3]; -#endif - return *this; - } - - /**@brief Subtract out a quaternion - * @param q The quaternion to subtract from this one */ - b3Quaternion& operator-=(const b3Quaternion& q) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_sub_ps(mVec128, q.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vsubq_f32(mVec128, q.mVec128); -#else - m_floats[0] -= q.getX(); - m_floats[1] -= q.getY(); - m_floats[2] -= q.getZ(); - m_floats[3] -= q.m_floats[3]; -#endif - return *this; - } - - /**@brief Scale this quaternion - * @param s The scalar to scale by */ - b3Quaternion& operator*=(const b3Scalar& s) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0); // (S S S S) - mVec128 = _mm_mul_ps(mVec128, vs); -#elif defined(B3_USE_NEON) - mVec128 = vmulq_n_f32(mVec128, s); -#else - m_floats[0] *= s; - m_floats[1] *= s; - m_floats[2] *= s; - m_floats[3] *= s; -#endif - return *this; - } - - /**@brief Multiply this quaternion by q on the right - * @param q The other quaternion - * Equivilant to this = this * q */ - b3Quaternion& operator*=(const b3Quaternion& q) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ2 = q.get128(); - - __m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0)); - __m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); - - A1 = A1 * B1; - - __m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1)); - __m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); - - A2 = A2 * B2; - - B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2)); - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); - - B1 = B1 * B2; // A3 *= B3 - - mVec128 = b3_splat_ps(mVec128, 3); // A0 - mVec128 = mVec128 * vQ2; // A0 * B0 - - A1 = A1 + A2; // AB12 - mVec128 = mVec128 - B1; // AB03 = AB0 - AB3 - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - mVec128 = mVec128 + A1; // AB03 + AB12 - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = mVec128; - float32x4_t vQ2 = q.get128(); - float32x4_t A0, A1, B1, A2, B2, A3, B3; - float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; - - { - float32x2x2_t tmp; - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - } - vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x - B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - A0 = vaddq_f32(A0, A1); // AB03 + AB12 - - mVec128 = A0; -#else - setValue( - m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(), - m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(), - m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(), - m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ()); -#endif - return *this; - } - /**@brief Return the dot product between this quaternion and another - * @param q The other quaternion */ - b3Scalar dot(const b3Quaternion& q) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vd; - - vd = _mm_mul_ps(mVec128, q.mVec128); - - __m128 t = _mm_movehl_ps(vd, vd); - vd = _mm_add_ps(vd, t); - t = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, t); - - return _mm_cvtss_f32(vd); -#elif defined(B3_USE_NEON) - float32x4_t vd = vmulq_f32(mVec128, q.mVec128); - float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd)); - x = vpadd_f32(x, x); - return vget_lane_f32(x, 0); -#else - return m_floats[0] * q.getX() + - m_floats[1] * q.getY() + - m_floats[2] * q.getZ() + - m_floats[3] * q.m_floats[3]; -#endif - } - - /**@brief Return the length squared of the quaternion */ - b3Scalar length2() const - { - return dot(*this); - } - - /**@brief Return the length of the quaternion */ - b3Scalar length() const - { - return b3Sqrt(length2()); - } - - /**@brief Normalize the quaternion - * Such that x^2 + y^2 + z^2 +w^2 = 1 */ - b3Quaternion& normalize() - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vd; - - vd = _mm_mul_ps(mVec128, mVec128); - - __m128 t = _mm_movehl_ps(vd, vd); - vd = _mm_add_ps(vd, t); - t = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, t); - - vd = _mm_sqrt_ss(vd); - vd = _mm_div_ss(b3vOnes, vd); - vd = b3_pshufd_ps(vd, 0); // splat - mVec128 = _mm_mul_ps(mVec128, vd); - - return *this; -#else - return *this /= length(); -#endif - } - - /**@brief Return a scaled version of this quaternion - * @param s The scale factor */ - B3_FORCE_INLINE b3Quaternion - operator*(const b3Scalar& s) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x00); // (S S S S) - - return b3Quaternion(_mm_mul_ps(mVec128, vs)); -#elif defined(B3_USE_NEON) - return b3Quaternion(vmulq_n_f32(mVec128, s)); -#else - return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s); -#endif - } - - /**@brief Return an inversely scaled versionof this quaternion - * @param s The inverse scale factor */ - b3Quaternion operator/(const b3Scalar& s) const - { - b3Assert(s != b3Scalar(0.0)); - return *this * (b3Scalar(1.0) / s); - } - - /**@brief Inversely scale this quaternion - * @param s The scale factor */ - b3Quaternion& operator/=(const b3Scalar& s) - { - b3Assert(s != b3Scalar(0.0)); - return *this *= b3Scalar(1.0) / s; - } - - /**@brief Return a normalized version of this quaternion */ - b3Quaternion normalized() const - { - return *this / length(); - } - /**@brief Return the angle between this quaternion and the other - * @param q The other quaternion */ - b3Scalar angle(const b3Quaternion& q) const - { - b3Scalar s = b3Sqrt(length2() * q.length2()); - b3Assert(s != b3Scalar(0.0)); - return b3Acos(dot(q) / s); - } - /**@brief Return the angle of rotation represented by this quaternion */ - b3Scalar getAngle() const - { - b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]); - return s; - } - - /**@brief Return the axis of the rotation represented by this quaternion */ - b3Vector3 getAxis() const - { - b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3]; - - if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero - return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary - b3Scalar s = 1.f / b3Sqrt(s_squared); - return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s); - } - - /**@brief Return the inverse of this quaternion */ - b3Quaternion inverse() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv)); -#elif defined(B3_USE_NEON) - return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv)); -#else - return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]); -#endif - } - - /**@brief Return the sum of this quaternion and the other - * @param q2 The other quaternion */ - B3_FORCE_INLINE b3Quaternion - operator+(const b3Quaternion& q2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128)); -#elif defined(B3_USE_NEON) - return b3Quaternion(vaddq_f32(mVec128, q2.mVec128)); -#else - const b3Quaternion& q1 = *this; - return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]); -#endif - } - - /**@brief Return the difference between this quaternion and the other - * @param q2 The other quaternion */ - B3_FORCE_INLINE b3Quaternion - operator-(const b3Quaternion& q2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128)); -#elif defined(B3_USE_NEON) - return b3Quaternion(vsubq_f32(mVec128, q2.mVec128)); -#else - const b3Quaternion& q1 = *this; - return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]); -#endif - } - - /**@brief Return the negative of this quaternion - * This simply negates each element */ - B3_FORCE_INLINE b3Quaternion operator-() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask)); -#elif defined(B3_USE_NEON) - return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask)); -#else - const b3Quaternion& q2 = *this; - return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]); -#endif - } - /**@todo document this and it's use */ - B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const - { - b3Quaternion diff, sum; - diff = *this - qd; - sum = *this + qd; - if (diff.dot(diff) > sum.dot(sum)) - return qd; - return (-qd); - } - - /**@todo document this and it's use */ - B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const - { - b3Quaternion diff, sum; - diff = *this - qd; - sum = *this + qd; - if (diff.dot(diff) < sum.dot(sum)) - return qd; - return (-qd); - } - - /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion - * @param q The other quaternion to interpolate with - * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q. - * Slerp interpolates assuming constant velocity. */ - b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const - { - b3Scalar magnitude = b3Sqrt(length2() * q.length2()); - b3Assert(magnitude > b3Scalar(0)); - - b3Scalar product = dot(q) / magnitude; - if (b3Fabs(product) < b3Scalar(1)) - { - // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp - const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1); - - const b3Scalar theta = b3Acos(sign * product); - const b3Scalar s1 = b3Sin(sign * t * theta); - const b3Scalar d = b3Scalar(1.0) / b3Sin(theta); - const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta); - - return b3Quaternion( - (m_floats[0] * s0 + q.getX() * s1) * d, - (m_floats[1] * s0 + q.getY() * s1) * d, - (m_floats[2] * s0 + q.getZ() * s1) * d, - (m_floats[3] * s0 + q.m_floats[3] * s1) * d); - } - else - { - return *this; - } - } - - static const b3Quaternion& getIdentity() - { - static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.)); - return identityQuat; - } - - B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; } -}; - -/**@brief Return the product of two quaternions */ -B3_FORCE_INLINE b3Quaternion -operator*(const b3Quaternion& q1, const b3Quaternion& q2) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ1 = q1.get128(); - __m128 vQ2 = q2.get128(); - __m128 A0, A1, B1, A2, B2; - - A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn - B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext - - A1 = A1 * B1; - - A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup - - A2 = A2 * B2; - - B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn - - B1 = B1 * B2; // A3 *= B3 - - A0 = b3_splat_ps(vQ1, 3); // A0 - A0 = A0 * vQ2; // A0 * B0 - - A1 = A1 + A2; // AB12 - A0 = A0 - B1; // AB03 = AB0 - AB3 - - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - A0 = A0 + A1; // AB03 + AB12 - - return b3Quaternion(A0); - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = q1.get128(); - float32x4_t vQ2 = q2.get128(); - float32x4_t A0, A1, B1, A2, B2, A3, B3; - float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; - - { - float32x2x2_t tmp; - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - } - vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x - B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - A0 = vaddq_f32(A0, A1); // AB03 + AB12 - - return b3Quaternion(A0); - -#else - return b3Quaternion( - q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(), - q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(), - q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(), - q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ()); -#endif -} - -B3_FORCE_INLINE b3Quaternion -operator*(const b3Quaternion& q, const b3Vector3& w) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ1 = q.get128(); - __m128 vQ2 = w.get128(); - __m128 A1, B1, A2, B2, A3, B3; - - A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0)); - B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0)); - - A1 = A1 * B1; - - A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); - - A2 = A2 * B2; - - A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); - B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); - - A3 = A3 * B3; // A3 *= B3 - - A1 = A1 + A2; // AB12 - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - A1 = A1 - A3; // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = q.get128(); - float32x4_t vQ2 = w.get128(); - float32x4_t A1, B1, A2, B2, A3, B3; - float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz; - - vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1); - { - float32x2x2_t tmp; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - } - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X - B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - - A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#else - return b3Quaternion( - q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(), - q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(), - q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(), - -q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ()); -#endif -} - -B3_FORCE_INLINE b3Quaternion -operator*(const b3Vector3& w, const b3Quaternion& q) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vQ1 = w.get128(); - __m128 vQ2 = q.get128(); - __m128 A1, B1, A2, B2, A3, B3; - - A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x - B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X - - A1 = A1 * B1; - - A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); - B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); - - A2 = A2 * B2; - - A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); - B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); - - A3 = A3 * B3; // A3 *= B3 - - A1 = A1 + A2; // AB12 - A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element - A1 = A1 - A3; // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#elif defined(B3_USE_NEON) - - float32x4_t vQ1 = w.get128(); - float32x4_t vQ2 = q.get128(); - float32x4_t A1, B1, A2, B2, A3, B3; - float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; - - { - float32x2x2_t tmp; - - tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y} - vQ1zx = tmp.val[0]; - - tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y} - vQ2zx = tmp.val[0]; - } - vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); - - vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); - - vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); - vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); - - A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x - B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X - - A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); - B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); - - A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z - B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z - - A1 = vmulq_f32(A1, B1); - A2 = vmulq_f32(A2, B2); - A3 = vmulq_f32(A3, B3); // A3 *= B3 - - A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 - - // change the sign of the last element - A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM); - - A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3 - - return b3Quaternion(A1); - -#else - return b3Quaternion( - +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(), - +w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(), - +w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(), - -w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ()); -#endif -} - -/**@brief Calculate the dot product between two quaternions */ -B3_FORCE_INLINE b3Scalar -b3Dot(const b3Quaternion& q1, const b3Quaternion& q2) -{ - return q1.dot(q2); -} - -/**@brief Return the length of a quaternion */ -B3_FORCE_INLINE b3Scalar -b3Length(const b3Quaternion& q) -{ - return q.length(); -} - -/**@brief Return the angle between two quaternions*/ -B3_FORCE_INLINE b3Scalar -b3Angle(const b3Quaternion& q1, const b3Quaternion& q2) -{ - return q1.angle(q2); -} - -/**@brief Return the inverse of a quaternion*/ -B3_FORCE_INLINE b3Quaternion -b3Inverse(const b3Quaternion& q) -{ - return q.inverse(); -} - -/**@brief Return the result of spherical linear interpolation betwen two quaternions - * @param q1 The first quaternion - * @param q2 The second quaternion - * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2 - * Slerp assumes constant velocity between positions. */ -B3_FORCE_INLINE b3Quaternion -b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t) -{ - return q1.slerp(q2, t); -} - -B3_FORCE_INLINE b3Quaternion -b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1) -{ - return rot0 * rot1; -} - -B3_FORCE_INLINE b3Quaternion -b3QuatNormalized(const b3Quaternion& orn) -{ - return orn.normalized(); -} - -B3_FORCE_INLINE b3Vector3 -b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v) -{ - b3Quaternion q = rotation * v; - q *= rotation.inverse(); -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask)); -#elif defined(B3_USE_NEON) - return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask)); -#else - return b3MakeVector3(q.getX(), q.getY(), q.getZ()); -#endif -} - -B3_FORCE_INLINE b3Quaternion -b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized -{ - b3Vector3 c = v0.cross(v1); - b3Scalar d = v0.dot(v1); - - if (d < -1.0 + B3_EPSILON) - { - b3Vector3 n, unused; - b3PlaneSpace1(v0, n, unused); - return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f); // just pick any vector that is orthogonal to v0 - } - - b3Scalar s = b3Sqrt((1.0f + d) * 2.0f); - b3Scalar rs = 1.0f / s; - - return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f); -} - -B3_FORCE_INLINE b3Quaternion -b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1) -{ - v0.normalize(); - v1.normalize(); - return b3ShortestArcQuat(v0, v1); -} - -#endif //B3_SIMD__QUATERNION_H_ diff --git a/thirdparty/bullet/Bullet3Common/b3Random.h b/thirdparty/bullet/Bullet3Common/b3Random.h deleted file mode 100644 index c2e21496c76..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Random.h +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GEN_RANDOM_H -#define B3_GEN_RANDOM_H - -#include "b3Scalar.h" - -#ifdef MT19937 - -#include -#include - -#define B3_RAND_MAX UINT_MAX - -B3_FORCE_INLINE void b3Srand(unsigned int seed) { init_genrand(seed); } -B3_FORCE_INLINE unsigned int b3rand() { return genrand_int32(); } - -#else - -#include - -#define B3_RAND_MAX RAND_MAX - -B3_FORCE_INLINE void b3Srand(unsigned int seed) { srand(seed); } -B3_FORCE_INLINE unsigned int b3rand() { return rand(); } - -#endif - -inline b3Scalar b3RandRange(b3Scalar minRange, b3Scalar maxRange) -{ - return (b3rand() / (b3Scalar(B3_RAND_MAX) + b3Scalar(1.0))) * (maxRange - minRange) + minRange; -} - -#endif //B3_GEN_RANDOM_H diff --git a/thirdparty/bullet/Bullet3Common/b3ResizablePool.h b/thirdparty/bullet/Bullet3Common/b3ResizablePool.h deleted file mode 100644 index cafe3ff3964..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3ResizablePool.h +++ /dev/null @@ -1,171 +0,0 @@ - -#ifndef B3_RESIZABLE_POOL_H -#define B3_RESIZABLE_POOL_H - -#include "Bullet3Common/b3AlignedObjectArray.h" - -enum -{ - B3_POOL_HANDLE_TERMINAL_FREE = -1, - B3_POOL_HANDLE_TERMINAL_USED = -2 -}; - -template -struct b3PoolBodyHandle : public U -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_nextFreeHandle; - void setNextFree(int next) - { - m_nextFreeHandle = next; - } - int getNextFree() const - { - return m_nextFreeHandle; - } -}; - -template -class b3ResizablePool -{ -protected: - b3AlignedObjectArray m_bodyHandles; - int m_numUsedHandles; // number of active handles - int m_firstFreeHandle; // free handles list - - T* getHandleInternal(int handle) - { - return &m_bodyHandles[handle]; - } - const T* getHandleInternal(int handle) const - { - return &m_bodyHandles[handle]; - } - -public: - b3ResizablePool() - { - initHandles(); - } - - virtual ~b3ResizablePool() - { - exitHandles(); - } - ///handle management - - int getNumHandles() const - { - return m_bodyHandles.size(); - } - - void getUsedHandles(b3AlignedObjectArray& usedHandles) const - { - for (int i = 0; i < m_bodyHandles.size(); i++) - { - if (m_bodyHandles[i].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - usedHandles.push_back(i); - } - } - } - - T* getHandle(int handle) - { - b3Assert(handle >= 0); - b3Assert(handle < m_bodyHandles.size()); - if ((handle < 0) || (handle >= m_bodyHandles.size())) - { - return 0; - } - - if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - return &m_bodyHandles[handle]; - } - return 0; - } - const T* getHandle(int handle) const - { - b3Assert(handle >= 0); - b3Assert(handle < m_bodyHandles.size()); - if ((handle < 0) || (handle >= m_bodyHandles.size())) - { - return 0; - } - - if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - return &m_bodyHandles[handle]; - } - return 0; - } - - void increaseHandleCapacity(int extraCapacity) - { - int curCapacity = m_bodyHandles.size(); - //b3Assert(curCapacity == m_numUsedHandles); - int newCapacity = curCapacity + extraCapacity; - m_bodyHandles.resize(newCapacity); - - { - for (int i = curCapacity; i < newCapacity; i++) - m_bodyHandles[i].setNextFree(i + 1); - - m_bodyHandles[newCapacity - 1].setNextFree(-1); - } - m_firstFreeHandle = curCapacity; - } - void initHandles() - { - m_numUsedHandles = 0; - m_firstFreeHandle = -1; - - increaseHandleCapacity(1); - } - - void exitHandles() - { - m_bodyHandles.resize(0); - m_firstFreeHandle = -1; - m_numUsedHandles = 0; - } - - int allocHandle() - { - b3Assert(m_firstFreeHandle >= 0); - - int handle = m_firstFreeHandle; - m_firstFreeHandle = getHandleInternal(handle)->getNextFree(); - m_numUsedHandles++; - - if (m_firstFreeHandle < 0) - { - //int curCapacity = m_bodyHandles.size(); - int additionalCapacity = m_bodyHandles.size(); - increaseHandleCapacity(additionalCapacity); - - getHandleInternal(handle)->setNextFree(m_firstFreeHandle); - } - getHandleInternal(handle)->setNextFree(B3_POOL_HANDLE_TERMINAL_USED); - getHandleInternal(handle)->clear(); - return handle; - } - - void freeHandle(int handle) - { - b3Assert(handle >= 0); - - if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED) - { - getHandleInternal(handle)->clear(); - getHandleInternal(handle)->setNextFree(m_firstFreeHandle); - m_firstFreeHandle = handle; - m_numUsedHandles--; - } - } -}; -///end handle management - -#endif //B3_RESIZABLE_POOL_H diff --git a/thirdparty/bullet/Bullet3Common/b3Scalar.h b/thirdparty/bullet/Bullet3Common/b3Scalar.h deleted file mode 100644 index eeb70ed6323..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Scalar.h +++ /dev/null @@ -1,689 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SCALAR_H -#define B3_SCALAR_H - -#ifdef B3_MANAGED_CODE -//Aligned data types not supported in managed code -#pragma unmanaged -#endif - -#include -#include //size_t for MSVC 6.0 -#include - -//Original repository is at http://github.com/erwincoumans/bullet3 -#define B3_BULLET_VERSION 300 - -inline int b3GetVersion() -{ - return B3_BULLET_VERSION; -} - -#if defined(DEBUG) || defined(_DEBUG) -#define B3_DEBUG -#endif - -#include "b3Logging.h" //for b3Error - -#ifdef _WIN32 - -#if defined(__GNUC__) // it should handle both MINGW and CYGWIN -#define B3_FORCE_INLINE __inline__ __attribute__((always_inline)) -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#elif ( defined(_MSC_VER) && _MSC_VER < 1300 ) -#define B3_FORCE_INLINE inline -#define B3_ATTRIBUTE_ALIGNED16(a) a -#define B3_ATTRIBUTE_ALIGNED64(a) a -#define B3_ATTRIBUTE_ALIGNED128(a) a -#else -//#define B3_HAS_ALIGNED_ALLOCATOR -#pragma warning(disable : 4324) // disable padding warning -// #pragma warning(disable:4530) // Disable the exception disable but used in MSCV Stl warning. -#pragma warning(disable : 4996) //Turn off warnings about deprecated C routines -// #pragma warning(disable:4786) // Disable the "debug name too long" warning - -#define B3_FORCE_INLINE __forceinline -#define B3_ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a -#define B3_ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a -#define B3_ATTRIBUTE_ALIGNED128(a) __declspec(align(128)) a -#ifdef _XBOX -#define B3_USE_VMX128 - -#include -#define B3_HAVE_NATIVE_FSEL -#define b3Fsel(a, b, c) __fsel((a), (b), (c)) -#else - -#if (defined(_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined(B3_USE_DOUBLE_PRECISION)) -#if (defined(_M_IX86) || defined(_M_X64)) - - -#ifdef __clang__ -//#define B3_NO_SIMD_OPERATOR_OVERLOADS -#define B3_DISABLE_SSE -#endif //__clang__ - -#ifndef B3_DISABLE_SSE -#define B3_USE_SSE -#endif //B3_DISABLE_SSE - -#ifdef B3_USE_SSE -//B3_USE_SSE_IN_API is disabled under Windows by default, because -//it makes it harder to integrate Bullet into your application under Windows -//(structured embedding Bullet structs/classes need to be 16-byte aligned) -//with relatively little performance gain -//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries -//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage) -//#define B3_USE_SSE_IN_API -#endif //B3_USE_SSE -#include -#endif -#endif - -#endif //_XBOX - -#endif //__MINGW32__ - -#ifdef B3_DEBUG -#ifdef _MSC_VER -#include -#define b3Assert(x) { if(!(x)){b3Error("Assert " __FILE__ ":%u (%s)\n", __LINE__, #x);__debugbreak(); }} -#else //_MSC_VER -#include -#define b3Assert assert -#endif //_MSC_VER -#else -#define b3Assert(x) -#endif -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) - -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c - -#else - -#if defined(__CELLOS_LV2__) -#define B3_FORCE_INLINE inline __attribute__((always_inline)) -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#ifndef assert -#include -#endif -#ifdef B3_DEBUG -#ifdef __SPU__ -#include -#define printf spu_printf -#define b3Assert(x) \ - { \ - if (!(x)) \ - { \ - b3Error( \ - "Assert "__FILE__ \ - ":%u (" #x ")\n", \ - __LINE__); \ - spu_hcmpeq(0, 0); \ - } \ - } -#else -#define b3Assert assert -#endif - -#else -#define b3Assert(x) -#endif -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) - -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c - -#else - -#ifdef USE_LIBSPE2 - -#define B3_FORCE_INLINE __inline -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#ifndef assert -#include -#endif -#ifdef B3_DEBUG -#define b3Assert assert -#else -#define b3Assert(x) -#endif -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) - -#define b3Likely(_c) __builtin_expect((_c), 1) -#define b3Unlikely(_c) __builtin_expect((_c), 0) - -#else -//non-windows systems - -#if (defined(__APPLE__) && (!defined(B3_USE_DOUBLE_PRECISION))) -#if defined(__i386__) || defined(__x86_64__) -#define B3_USE_SSE -//B3_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries -//if apps run into issues, we will disable the next line -#define B3_USE_SSE_IN_API -#ifdef B3_USE_SSE -// include appropriate SSE level -#if defined(__SSE4_1__) -#include -#elif defined(__SSSE3__) -#include -#elif defined(__SSE3__) -#include -#else -#include -#endif -#endif //B3_USE_SSE -#elif defined(__armv7__) -#ifdef __clang__ -#define B3_USE_NEON 1 - -#if defined B3_USE_NEON && defined(__clang__) -#include -#endif //B3_USE_NEON -#endif //__clang__ -#endif //__arm__ - -#define B3_FORCE_INLINE inline __attribute__((always_inline)) -///@todo: check out alignment methods for other platforms/compilers -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -#ifndef assert -#include -#endif - -#if defined(DEBUG) || defined(_DEBUG) -#if defined(__i386__) || defined(__x86_64__) -#include -#define b3Assert(x) \ - { \ - if (!(x)) \ - { \ - b3Error("Assert %s in line %d, file %s\n", #x, __LINE__, __FILE__); \ - asm volatile("int3"); \ - } \ - } -#else //defined (__i386__) || defined (__x86_64__) -#define b3Assert assert -#endif //defined (__i386__) || defined (__x86_64__) -#else //defined(DEBUG) || defined (_DEBUG) -#define b3Assert(x) -#endif //defined(DEBUG) || defined (_DEBUG) - -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c - -#else - -#define B3_FORCE_INLINE inline -///@todo: check out alignment methods for other platforms/compilers -#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16))) -#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64))) -#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128))) -///#define B3_ATTRIBUTE_ALIGNED16(a) a -///#define B3_ATTRIBUTE_ALIGNED64(a) a -///#define B3_ATTRIBUTE_ALIGNED128(a) a -#ifndef assert -#include -#endif - -#if defined(DEBUG) || defined(_DEBUG) -#define b3Assert assert -#else -#define b3Assert(x) -#endif - -//b3FullAssert is optional, slows down a lot -#define b3FullAssert(x) -#define b3Likely(_c) _c -#define b3Unlikely(_c) _c -#endif //__APPLE__ - -#endif // LIBSPE2 - -#endif //__CELLOS_LV2__ -#endif - -///The b3Scalar type abstracts floating point numbers, to easily switch between double and single floating point precision. -#if defined(B3_USE_DOUBLE_PRECISION) -typedef double b3Scalar; -//this number could be bigger in double precision -#define B3_LARGE_FLOAT 1e30 -#else -typedef float b3Scalar; -//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX -#define B3_LARGE_FLOAT 1e18f -#endif - -#ifdef B3_USE_SSE -typedef __m128 b3SimdFloat4; -#endif //B3_USE_SSE - -#if defined B3_USE_SSE_IN_API && defined(B3_USE_SSE) -#ifdef _WIN32 - -#ifndef B3_NAN -static int b3NanMask = 0x7F800001; -#define B3_NAN (*(float *)&b3NanMask) -#endif - -#ifndef B3_INFINITY_MASK -static int b3InfinityMask = 0x7F800000; -#define B3_INFINITY_MASK (*(float *)&b3InfinityMask) -#endif -#ifndef B3_NO_SIMD_OPERATOR_OVERLOADS -inline __m128 operator+(const __m128 A, const __m128 B) -{ - return _mm_add_ps(A, B); -} - -inline __m128 operator-(const __m128 A, const __m128 B) -{ - return _mm_sub_ps(A, B); -} - -inline __m128 operator*(const __m128 A, const __m128 B) -{ - return _mm_mul_ps(A, B); -} -#endif //B3_NO_SIMD_OPERATOR_OVERLOADS -#define b3CastfTo128i(a) (_mm_castps_si128(a)) -#define b3CastfTo128d(a) (_mm_castps_pd(a)) -#define b3CastiTo128f(a) (_mm_castsi128_ps(a)) -#define b3CastdTo128f(a) (_mm_castpd_ps(a)) -#define b3CastdTo128i(a) (_mm_castpd_si128(a)) -#define b3Assign128(r0, r1, r2, r3) _mm_setr_ps(r0, r1, r2, r3) - -#else //_WIN32 - -#define b3CastfTo128i(a) ((__m128i)(a)) -#define b3CastfTo128d(a) ((__m128d)(a)) -#define b3CastiTo128f(a) ((__m128)(a)) -#define b3CastdTo128f(a) ((__m128)(a)) -#define b3CastdTo128i(a) ((__m128i)(a)) -#define b3Assign128(r0, r1, r2, r3) \ - (__m128) { r0, r1, r2, r3 } -#endif //_WIN32 -#endif //B3_USE_SSE_IN_API - -#ifdef B3_USE_NEON -#include - -typedef float32x4_t b3SimdFloat4; -#define B3_INFINITY INFINITY -#define B3_NAN NAN -#define b3Assign128(r0, r1, r2, r3) \ - (float32x4_t) { r0, r1, r2, r3 } -#endif - -#define B3_DECLARE_ALIGNED_ALLOCATOR() \ - B3_FORCE_INLINE void *operator new(size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); } \ - B3_FORCE_INLINE void operator delete(void *ptr) { b3AlignedFree(ptr); } \ - B3_FORCE_INLINE void *operator new(size_t, void *ptr) { return ptr; } \ - B3_FORCE_INLINE void operator delete(void *, void *) {} \ - B3_FORCE_INLINE void *operator new[](size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); } \ - B3_FORCE_INLINE void operator delete[](void *ptr) { b3AlignedFree(ptr); } \ - B3_FORCE_INLINE void *operator new[](size_t, void *ptr) { return ptr; } \ - B3_FORCE_INLINE void operator delete[](void *, void *) {} - -#if defined(B3_USE_DOUBLE_PRECISION) || defined(B3_FORCE_DOUBLE_FUNCTIONS) - -B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar x) -{ - return sqrt(x); -} -B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabs(x); } -B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cos(x); } -B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sin(x); } -B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tan(x); } -B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) -{ - if (x < b3Scalar(-1)) x = b3Scalar(-1); - if (x > b3Scalar(1)) x = b3Scalar(1); - return acos(x); -} -B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) -{ - if (x < b3Scalar(-1)) x = b3Scalar(-1); - if (x > b3Scalar(1)) x = b3Scalar(1); - return asin(x); -} -B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atan(x); } -B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2(x, y); } -B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return exp(x); } -B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return log(x); } -B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return pow(x, y); } -B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmod(x, y); } - -#else - -B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar y) -{ -#ifdef USE_APPROXIMATION - double x, z, tempf; - unsigned long *tfptr = ((unsigned long *)&tempf) + 1; - - tempf = y; - *tfptr = (0xbfcdd90a - *tfptr) >> 1; /* estimate of 1/sqrt(y) */ - x = tempf; - z = y * b3Scalar(0.5); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); /* iteration formula */ - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - x = (b3Scalar(1.5) * x) - (x * x) * (x * z); - return x * y; -#else - return sqrtf(y); -#endif -} -B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabsf(x); } -B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cosf(x); } -B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sinf(x); } -B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tanf(x); } -B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) -{ - if (x < b3Scalar(-1)) - x = b3Scalar(-1); - if (x > b3Scalar(1)) - x = b3Scalar(1); - return acosf(x); -} -B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) -{ - if (x < b3Scalar(-1)) - x = b3Scalar(-1); - if (x > b3Scalar(1)) - x = b3Scalar(1); - return asinf(x); -} -B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atanf(x); } -B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2f(x, y); } -B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return expf(x); } -B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return logf(x); } -B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return powf(x, y); } -B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmodf(x, y); } - -#endif - -#define B3_2_PI b3Scalar(6.283185307179586232) -#define B3_PI (B3_2_PI * b3Scalar(0.5)) -#define B3_HALF_PI (B3_2_PI * b3Scalar(0.25)) -#define B3_RADS_PER_DEG (B3_2_PI / b3Scalar(360.0)) -#define B3_DEGS_PER_RAD (b3Scalar(360.0) / B3_2_PI) -#define B3_SQRT12 b3Scalar(0.7071067811865475244008443621048490) - -#define b3RecipSqrt(x) ((b3Scalar)(b3Scalar(1.0) / b3Sqrt(b3Scalar(x)))) /* reciprocal square root */ - -#ifdef B3_USE_DOUBLE_PRECISION -#define B3_EPSILON DBL_EPSILON -#define B3_INFINITY DBL_MAX -#else -#define B3_EPSILON FLT_EPSILON -#define B3_INFINITY FLT_MAX -#endif - -B3_FORCE_INLINE b3Scalar b3Atan2Fast(b3Scalar y, b3Scalar x) -{ - b3Scalar coeff_1 = B3_PI / 4.0f; - b3Scalar coeff_2 = 3.0f * coeff_1; - b3Scalar abs_y = b3Fabs(y); - b3Scalar angle; - if (x >= 0.0f) - { - b3Scalar r = (x - abs_y) / (x + abs_y); - angle = coeff_1 - coeff_1 * r; - } - else - { - b3Scalar r = (x + abs_y) / (abs_y - x); - angle = coeff_2 - coeff_1 * r; - } - return (y < 0.0f) ? -angle : angle; -} - -B3_FORCE_INLINE bool b3FuzzyZero(b3Scalar x) { return b3Fabs(x) < B3_EPSILON; } - -B3_FORCE_INLINE bool b3Equal(b3Scalar a, b3Scalar eps) -{ - return (((a) <= eps) && !((a) < -eps)); -} -B3_FORCE_INLINE bool b3GreaterEqual(b3Scalar a, b3Scalar eps) -{ - return (!((a) <= eps)); -} - -B3_FORCE_INLINE int b3IsNegative(b3Scalar x) -{ - return x < b3Scalar(0.0) ? 1 : 0; -} - -B3_FORCE_INLINE b3Scalar b3Radians(b3Scalar x) { return x * B3_RADS_PER_DEG; } -B3_FORCE_INLINE b3Scalar b3Degrees(b3Scalar x) { return x * B3_DEGS_PER_RAD; } - -#define B3_DECLARE_HANDLE(name) \ - typedef struct name##__ \ - { \ - int unused; \ - } * name - -#ifndef b3Fsel -B3_FORCE_INLINE b3Scalar b3Fsel(b3Scalar a, b3Scalar b, b3Scalar c) -{ - return a >= 0 ? b : c; -} -#endif -#define b3Fsels(a, b, c) (b3Scalar) b3Fsel(a, b, c) - -B3_FORCE_INLINE bool b3MachineIsLittleEndian() -{ - long int i = 1; - const char *p = (const char *)&i; - if (p[0] == 1) // Lowest address contains the least significant byte - return true; - else - return false; -} - -///b3Select avoids branches, which makes performance much better for consoles like Playstation 3 and XBox 360 -///Thanks Phil Knight. See also http://www.cellperformance.com/articles/2006/04/more_techniques_for_eliminatin_1.html -B3_FORCE_INLINE unsigned b3Select(unsigned condition, unsigned valueIfConditionNonZero, unsigned valueIfConditionZero) -{ - // Set testNz to 0xFFFFFFFF if condition is nonzero, 0x00000000 if condition is zero - // Rely on positive value or'ed with its negative having sign bit on - // and zero value or'ed with its negative (which is still zero) having sign bit off - // Use arithmetic shift right, shifting the sign bit through all 32 bits - unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31); - unsigned testEqz = ~testNz; - return ((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz)); -} -B3_FORCE_INLINE int b3Select(unsigned condition, int valueIfConditionNonZero, int valueIfConditionZero) -{ - unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31); - unsigned testEqz = ~testNz; - return static_cast((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz)); -} -B3_FORCE_INLINE float b3Select(unsigned condition, float valueIfConditionNonZero, float valueIfConditionZero) -{ -#ifdef B3_HAVE_NATIVE_FSEL - return (float)b3Fsel((b3Scalar)condition - b3Scalar(1.0f), valueIfConditionNonZero, valueIfConditionZero); -#else - return (condition != 0) ? valueIfConditionNonZero : valueIfConditionZero; -#endif -} - -template -B3_FORCE_INLINE void b3Swap(T &a, T &b) -{ - T tmp = a; - a = b; - b = tmp; -} - -//PCK: endian swapping functions -B3_FORCE_INLINE unsigned b3SwapEndian(unsigned val) -{ - return (((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); -} - -B3_FORCE_INLINE unsigned short b3SwapEndian(unsigned short val) -{ - return static_cast(((val & 0xff00) >> 8) | ((val & 0x00ff) << 8)); -} - -B3_FORCE_INLINE unsigned b3SwapEndian(int val) -{ - return b3SwapEndian((unsigned)val); -} - -B3_FORCE_INLINE unsigned short b3SwapEndian(short val) -{ - return b3SwapEndian((unsigned short)val); -} - -///b3SwapFloat uses using char pointers to swap the endianness -////b3SwapFloat/b3SwapDouble will NOT return a float, because the machine might 'correct' invalid floating point values -///Not all values of sign/exponent/mantissa are valid floating point numbers according to IEEE 754. -///When a floating point unit is faced with an invalid value, it may actually change the value, or worse, throw an exception. -///In most systems, running user mode code, you wouldn't get an exception, but instead the hardware/os/runtime will 'fix' the number for you. -///so instead of returning a float/double, we return integer/long long integer -B3_FORCE_INLINE unsigned int b3SwapEndianFloat(float d) -{ - unsigned int a = 0; - unsigned char *dst = (unsigned char *)&a; - unsigned char *src = (unsigned char *)&d; - - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; - return a; -} - -// unswap using char pointers -B3_FORCE_INLINE float b3UnswapEndianFloat(unsigned int a) -{ - float d = 0.0f; - unsigned char *src = (unsigned char *)&a; - unsigned char *dst = (unsigned char *)&d; - - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; - - return d; -} - -// swap using char pointers -B3_FORCE_INLINE void b3SwapEndianDouble(double d, unsigned char *dst) -{ - unsigned char *src = (unsigned char *)&d; - - dst[0] = src[7]; - dst[1] = src[6]; - dst[2] = src[5]; - dst[3] = src[4]; - dst[4] = src[3]; - dst[5] = src[2]; - dst[6] = src[1]; - dst[7] = src[0]; -} - -// unswap using char pointers -B3_FORCE_INLINE double b3UnswapEndianDouble(const unsigned char *src) -{ - double d = 0.0; - unsigned char *dst = (unsigned char *)&d; - - dst[0] = src[7]; - dst[1] = src[6]; - dst[2] = src[5]; - dst[3] = src[4]; - dst[4] = src[3]; - dst[5] = src[2]; - dst[6] = src[1]; - dst[7] = src[0]; - - return d; -} - -// returns normalized value in range [-B3_PI, B3_PI] -B3_FORCE_INLINE b3Scalar b3NormalizeAngle(b3Scalar angleInRadians) -{ - angleInRadians = b3Fmod(angleInRadians, B3_2_PI); - if (angleInRadians < -B3_PI) - { - return angleInRadians + B3_2_PI; - } - else if (angleInRadians > B3_PI) - { - return angleInRadians - B3_2_PI; - } - else - { - return angleInRadians; - } -} - -///rudimentary class to provide type info -struct b3TypedObject -{ - b3TypedObject(int objectType) - : m_objectType(objectType) - { - } - int m_objectType; - inline int getObjectType() const - { - return m_objectType; - } -}; - -///align a pointer to the provided alignment, upwards -template -T *b3AlignPointer(T *unalignedPtr, size_t alignment) -{ - struct b3ConvertPointerSizeT - { - union { - T *ptr; - size_t integer; - }; - }; - b3ConvertPointerSizeT converter; - - const size_t bit_mask = ~(alignment - 1); - converter.ptr = unalignedPtr; - converter.integer += alignment - 1; - converter.integer &= bit_mask; - return converter.ptr; -} - -#endif //B3_SCALAR_H diff --git a/thirdparty/bullet/Bullet3Common/b3StackAlloc.h b/thirdparty/bullet/Bullet3Common/b3StackAlloc.h deleted file mode 100644 index 4972236ac7c..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3StackAlloc.h +++ /dev/null @@ -1,118 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -StackAlloc extracted from GJK-EPA collision solver by Nathanael Presson -Nov.2006 -*/ - -#ifndef B3_STACK_ALLOC -#define B3_STACK_ALLOC - -#include "b3Scalar.h" //for b3Assert -#include "b3AlignedAllocator.h" - -///The b3Block class is an internal structure for the b3StackAlloc memory allocator. -struct b3Block -{ - b3Block* previous; - unsigned char* address; -}; - -///The StackAlloc class provides some fast stack-based memory allocator (LIFO last-in first-out) -class b3StackAlloc -{ -public: - b3StackAlloc(unsigned int size) - { - ctor(); - create(size); - } - ~b3StackAlloc() { destroy(); } - - inline void create(unsigned int size) - { - destroy(); - data = (unsigned char*)b3AlignedAlloc(size, 16); - totalsize = size; - } - inline void destroy() - { - b3Assert(usedsize == 0); - //Raise(L"StackAlloc is still in use"); - - if (usedsize == 0) - { - if (!ischild && data) - b3AlignedFree(data); - - data = 0; - usedsize = 0; - } - } - - int getAvailableMemory() const - { - return static_cast(totalsize - usedsize); - } - - unsigned char* allocate(unsigned int size) - { - const unsigned int nus(usedsize + size); - if (nus < totalsize) - { - usedsize = nus; - return (data + (usedsize - size)); - } - b3Assert(0); - //&& (L"Not enough memory")); - - return (0); - } - B3_FORCE_INLINE b3Block* beginBlock() - { - b3Block* pb = (b3Block*)allocate(sizeof(b3Block)); - pb->previous = current; - pb->address = data + usedsize; - current = pb; - return (pb); - } - B3_FORCE_INLINE void endBlock(b3Block* block) - { - b3Assert(block == current); - //Raise(L"Unmatched blocks"); - if (block == current) - { - current = block->previous; - usedsize = (unsigned int)((block->address - data) - sizeof(b3Block)); - } - } - -private: - void ctor() - { - data = 0; - totalsize = 0; - usedsize = 0; - current = 0; - ischild = false; - } - unsigned char* data; - unsigned int totalsize; - unsigned int usedsize; - b3Block* current; - bool ischild; -}; - -#endif //B3_STACK_ALLOC diff --git a/thirdparty/bullet/Bullet3Common/b3Transform.h b/thirdparty/bullet/Bullet3Common/b3Transform.h deleted file mode 100644 index 149da9d1484..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Transform.h +++ /dev/null @@ -1,286 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRANSFORM_H -#define B3_TRANSFORM_H - -#include "b3Matrix3x3.h" - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3TransformData b3TransformDoubleData -#else -#define b3TransformData b3TransformFloatData -#endif - -/**@brief The b3Transform class supports rigid transforms with only translation and rotation and no scaling/shear. - *It can be used in combination with b3Vector3, b3Quaternion and b3Matrix3x3 linear algebra classes. */ -B3_ATTRIBUTE_ALIGNED16(class) -b3Transform -{ - ///Storage for the rotation - b3Matrix3x3 m_basis; - ///Storage for the translation - b3Vector3 m_origin; - -public: - /**@brief No initialization constructor */ - b3Transform() {} - /**@brief Constructor from b3Quaternion (optional b3Vector3 ) - * @param q Rotation from quaternion - * @param c Translation from Vector (default 0,0,0) */ - explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q, - const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) - : m_basis(q), - m_origin(c) - { - } - - /**@brief Constructor from b3Matrix3x3 (optional b3Vector3) - * @param b Rotation from Matrix - * @param c Translation from Vector default (0,0,0)*/ - explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b, - const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) - : m_basis(b), - m_origin(c) - { - } - /**@brief Copy constructor */ - B3_FORCE_INLINE b3Transform(const b3Transform& other) - : m_basis(other.m_basis), - m_origin(other.m_origin) - { - } - /**@brief Assignment Operator */ - B3_FORCE_INLINE b3Transform& operator=(const b3Transform& other) - { - m_basis = other.m_basis; - m_origin = other.m_origin; - return *this; - } - - /**@brief Set the current transform as the value of the product of two transforms - * @param t1 Transform 1 - * @param t2 Transform 2 - * This = Transform1 * Transform2 */ - B3_FORCE_INLINE void mult(const b3Transform& t1, const b3Transform& t2) - { - m_basis = t1.m_basis * t2.m_basis; - m_origin = t1(t2.m_origin); - } - - /* void multInverseLeft(const b3Transform& t1, const b3Transform& t2) { - b3Vector3 v = t2.m_origin - t1.m_origin; - m_basis = b3MultTransposeLeft(t1.m_basis, t2.m_basis); - m_origin = v * t1.m_basis; - } - */ - - /**@brief Return the transform of the vector */ - B3_FORCE_INLINE b3Vector3 operator()(const b3Vector3& x) const - { - return x.dot3(m_basis[0], m_basis[1], m_basis[2]) + m_origin; - } - - /**@brief Return the transform of the vector */ - B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& x) const - { - return (*this)(x); - } - - /**@brief Return the transform of the b3Quaternion */ - B3_FORCE_INLINE b3Quaternion operator*(const b3Quaternion& q) const - { - return getRotation() * q; - } - - /**@brief Return the basis matrix for the rotation */ - B3_FORCE_INLINE b3Matrix3x3& getBasis() { return m_basis; } - /**@brief Return the basis matrix for the rotation */ - B3_FORCE_INLINE const b3Matrix3x3& getBasis() const { return m_basis; } - - /**@brief Return the origin vector translation */ - B3_FORCE_INLINE b3Vector3& getOrigin() { return m_origin; } - /**@brief Return the origin vector translation */ - B3_FORCE_INLINE const b3Vector3& getOrigin() const { return m_origin; } - - /**@brief Return a quaternion representing the rotation */ - b3Quaternion getRotation() const - { - b3Quaternion q; - m_basis.getRotation(q); - return q; - } - - /**@brief Set from an array - * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */ - void setFromOpenGLMatrix(const b3Scalar* m) - { - m_basis.setFromOpenGLSubMatrix(m); - m_origin.setValue(m[12], m[13], m[14]); - } - - /**@brief Fill an array representation - * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */ - void getOpenGLMatrix(b3Scalar * m) const - { - m_basis.getOpenGLSubMatrix(m); - m[12] = m_origin.getX(); - m[13] = m_origin.getY(); - m[14] = m_origin.getZ(); - m[15] = b3Scalar(1.0); - } - - /**@brief Set the translational element - * @param origin The vector to set the translation to */ - B3_FORCE_INLINE void setOrigin(const b3Vector3& origin) - { - m_origin = origin; - } - - B3_FORCE_INLINE b3Vector3 invXform(const b3Vector3& inVec) const; - - /**@brief Set the rotational element by b3Matrix3x3 */ - B3_FORCE_INLINE void setBasis(const b3Matrix3x3& basis) - { - m_basis = basis; - } - - /**@brief Set the rotational element by b3Quaternion */ - B3_FORCE_INLINE void setRotation(const b3Quaternion& q) - { - m_basis.setRotation(q); - } - - /**@brief Set this transformation to the identity */ - void setIdentity() - { - m_basis.setIdentity(); - m_origin.setValue(b3Scalar(0.0), b3Scalar(0.0), b3Scalar(0.0)); - } - - /**@brief Multiply this Transform by another(this = this * another) - * @param t The other transform */ - b3Transform& operator*=(const b3Transform& t) - { - m_origin += m_basis * t.m_origin; - m_basis *= t.m_basis; - return *this; - } - - /**@brief Return the inverse of this transform */ - b3Transform inverse() const - { - b3Matrix3x3 inv = m_basis.transpose(); - return b3Transform(inv, inv * -m_origin); - } - - /**@brief Return the inverse of this transform times the other transform - * @param t The other transform - * return this.inverse() * the other */ - b3Transform inverseTimes(const b3Transform& t) const; - - /**@brief Return the product of this transform and the other */ - b3Transform operator*(const b3Transform& t) const; - - /**@brief Return an identity transform */ - static const b3Transform& getIdentity() - { - static const b3Transform identityTransform(b3Matrix3x3::getIdentity()); - return identityTransform; - } - - void serialize(struct b3TransformData & dataOut) const; - - void serializeFloat(struct b3TransformFloatData & dataOut) const; - - void deSerialize(const struct b3TransformData& dataIn); - - void deSerializeDouble(const struct b3TransformDoubleData& dataIn); - - void deSerializeFloat(const struct b3TransformFloatData& dataIn); -}; - -B3_FORCE_INLINE b3Vector3 -b3Transform::invXform(const b3Vector3& inVec) const -{ - b3Vector3 v = inVec - m_origin; - return (m_basis.transpose() * v); -} - -B3_FORCE_INLINE b3Transform -b3Transform::inverseTimes(const b3Transform& t) const -{ - b3Vector3 v = t.getOrigin() - m_origin; - return b3Transform(m_basis.transposeTimes(t.m_basis), - v * m_basis); -} - -B3_FORCE_INLINE b3Transform - b3Transform::operator*(const b3Transform& t) const -{ - return b3Transform(m_basis * t.m_basis, - (*this)(t.m_origin)); -} - -/**@brief Test if two transforms have all elements equal */ -B3_FORCE_INLINE bool operator==(const b3Transform& t1, const b3Transform& t2) -{ - return (t1.getBasis() == t2.getBasis() && - t1.getOrigin() == t2.getOrigin()); -} - -///for serialization -struct b3TransformFloatData -{ - b3Matrix3x3FloatData m_basis; - b3Vector3FloatData m_origin; -}; - -struct b3TransformDoubleData -{ - b3Matrix3x3DoubleData m_basis; - b3Vector3DoubleData m_origin; -}; - -B3_FORCE_INLINE void b3Transform::serialize(b3TransformData& dataOut) const -{ - m_basis.serialize(dataOut.m_basis); - m_origin.serialize(dataOut.m_origin); -} - -B3_FORCE_INLINE void b3Transform::serializeFloat(b3TransformFloatData& dataOut) const -{ - m_basis.serializeFloat(dataOut.m_basis); - m_origin.serializeFloat(dataOut.m_origin); -} - -B3_FORCE_INLINE void b3Transform::deSerialize(const b3TransformData& dataIn) -{ - m_basis.deSerialize(dataIn.m_basis); - m_origin.deSerialize(dataIn.m_origin); -} - -B3_FORCE_INLINE void b3Transform::deSerializeFloat(const b3TransformFloatData& dataIn) -{ - m_basis.deSerializeFloat(dataIn.m_basis); - m_origin.deSerializeFloat(dataIn.m_origin); -} - -B3_FORCE_INLINE void b3Transform::deSerializeDouble(const b3TransformDoubleData& dataIn) -{ - m_basis.deSerializeDouble(dataIn.m_basis); - m_origin.deSerializeDouble(dataIn.m_origin); -} - -#endif //B3_TRANSFORM_H diff --git a/thirdparty/bullet/Bullet3Common/b3TransformUtil.h b/thirdparty/bullet/Bullet3Common/b3TransformUtil.h deleted file mode 100644 index 1850a9be5f2..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3TransformUtil.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRANSFORM_UTIL_H -#define B3_TRANSFORM_UTIL_H - -#include "b3Transform.h" -#define B3_ANGULAR_MOTION_THRESHOLD b3Scalar(0.5) * B3_HALF_PI - -B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents, const b3Vector3& supportDir) -{ - return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(), - supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(), - supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ()); -} - -/// Utils related to temporal transforms -class b3TransformUtil -{ -public: - static void integrateTransform(const b3Transform& curTrans, const b3Vector3& linvel, const b3Vector3& angvel, b3Scalar timeStep, b3Transform& predictedTransform) - { - predictedTransform.setOrigin(curTrans.getOrigin() + linvel * timeStep); - // #define QUATERNION_DERIVATIVE -#ifdef QUATERNION_DERIVATIVE - b3Quaternion predictedOrn = curTrans.getRotation(); - predictedOrn += (angvel * predictedOrn) * (timeStep * b3Scalar(0.5)); - predictedOrn.normalize(); -#else - //Exponential map - //google for "Practical Parameterization of Rotations Using the Exponential Map", F. Sebastian Grassia - - b3Vector3 axis; - b3Scalar fAngle = angvel.length(); - //limit the angular motion - if (fAngle * timeStep > B3_ANGULAR_MOTION_THRESHOLD) - { - fAngle = B3_ANGULAR_MOTION_THRESHOLD / timeStep; - } - - if (fAngle < b3Scalar(0.001)) - { - // use Taylor's expansions of sync function - axis = angvel * (b3Scalar(0.5) * timeStep - (timeStep * timeStep * timeStep) * (b3Scalar(0.020833333333)) * fAngle * fAngle); - } - else - { - // sync(fAngle) = sin(c*fAngle)/t - axis = angvel * (b3Sin(b3Scalar(0.5) * fAngle * timeStep) / fAngle); - } - b3Quaternion dorn(axis.getX(), axis.getY(), axis.getZ(), b3Cos(fAngle * timeStep * b3Scalar(0.5))); - b3Quaternion orn0 = curTrans.getRotation(); - - b3Quaternion predictedOrn = dorn * orn0; - predictedOrn.normalize(); -#endif - predictedTransform.setRotation(predictedOrn); - } - - static void calculateVelocityQuaternion(const b3Vector3& pos0, const b3Vector3& pos1, const b3Quaternion& orn0, const b3Quaternion& orn1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel) - { - linVel = (pos1 - pos0) / timeStep; - b3Vector3 axis; - b3Scalar angle; - if (orn0 != orn1) - { - calculateDiffAxisAngleQuaternion(orn0, orn1, axis, angle); - angVel = axis * angle / timeStep; - } - else - { - angVel.setValue(0, 0, 0); - } - } - - static void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0, const b3Quaternion& orn1a, b3Vector3& axis, b3Scalar& angle) - { - b3Quaternion orn1 = orn0.nearest(orn1a); - b3Quaternion dorn = orn1 * orn0.inverse(); - angle = dorn.getAngle(); - axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ()); - axis[3] = b3Scalar(0.); - //check for axis length - b3Scalar len = axis.length2(); - if (len < B3_EPSILON * B3_EPSILON) - axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.)); - else - axis /= b3Sqrt(len); - } - - static void calculateVelocity(const b3Transform& transform0, const b3Transform& transform1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel) - { - linVel = (transform1.getOrigin() - transform0.getOrigin()) / timeStep; - b3Vector3 axis; - b3Scalar angle; - calculateDiffAxisAngle(transform0, transform1, axis, angle); - angVel = axis * angle / timeStep; - } - - static void calculateDiffAxisAngle(const b3Transform& transform0, const b3Transform& transform1, b3Vector3& axis, b3Scalar& angle) - { - b3Matrix3x3 dmat = transform1.getBasis() * transform0.getBasis().inverse(); - b3Quaternion dorn; - dmat.getRotation(dorn); - - ///floating point inaccuracy can lead to w component > 1..., which breaks - dorn.normalize(); - - angle = dorn.getAngle(); - axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ()); - axis[3] = b3Scalar(0.); - //check for axis length - b3Scalar len = axis.length2(); - if (len < B3_EPSILON * B3_EPSILON) - axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.)); - else - axis /= b3Sqrt(len); - } -}; - -///The b3ConvexSeparatingDistanceUtil can help speed up convex collision detection -///by conservatively updating a cached separating distance/vector instead of re-calculating the closest distance -class b3ConvexSeparatingDistanceUtil -{ - b3Quaternion m_ornA; - b3Quaternion m_ornB; - b3Vector3 m_posA; - b3Vector3 m_posB; - - b3Vector3 m_separatingNormal; - - b3Scalar m_boundingRadiusA; - b3Scalar m_boundingRadiusB; - b3Scalar m_separatingDistance; - -public: - b3ConvexSeparatingDistanceUtil(b3Scalar boundingRadiusA, b3Scalar boundingRadiusB) - : m_boundingRadiusA(boundingRadiusA), - m_boundingRadiusB(boundingRadiusB), - m_separatingDistance(0.f) - { - } - - b3Scalar getConservativeSeparatingDistance() - { - return m_separatingDistance; - } - - void updateSeparatingDistance(const b3Transform& transA, const b3Transform& transB) - { - const b3Vector3& toPosA = transA.getOrigin(); - const b3Vector3& toPosB = transB.getOrigin(); - b3Quaternion toOrnA = transA.getRotation(); - b3Quaternion toOrnB = transB.getRotation(); - - if (m_separatingDistance > 0.f) - { - b3Vector3 linVelA, angVelA, linVelB, angVelB; - b3TransformUtil::calculateVelocityQuaternion(m_posA, toPosA, m_ornA, toOrnA, b3Scalar(1.), linVelA, angVelA); - b3TransformUtil::calculateVelocityQuaternion(m_posB, toPosB, m_ornB, toOrnB, b3Scalar(1.), linVelB, angVelB); - b3Scalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB; - b3Vector3 relLinVel = (linVelB - linVelA); - b3Scalar relLinVelocLength = relLinVel.dot(m_separatingNormal); - if (relLinVelocLength < 0.f) - { - relLinVelocLength = 0.f; - } - - b3Scalar projectedMotion = maxAngularProjectedVelocity + relLinVelocLength; - m_separatingDistance -= projectedMotion; - } - - m_posA = toPosA; - m_posB = toPosB; - m_ornA = toOrnA; - m_ornB = toOrnB; - } - - void initSeparatingDistance(const b3Vector3& separatingVector, b3Scalar separatingDistance, const b3Transform& transA, const b3Transform& transB) - { - m_separatingDistance = separatingDistance; - - if (m_separatingDistance > 0.f) - { - m_separatingNormal = separatingVector; - - const b3Vector3& toPosA = transA.getOrigin(); - const b3Vector3& toPosB = transB.getOrigin(); - b3Quaternion toOrnA = transA.getRotation(); - b3Quaternion toOrnB = transB.getRotation(); - m_posA = toPosA; - m_posB = toPosB; - m_ornA = toOrnA; - m_ornB = toOrnB; - } - } -}; - -#endif //B3_TRANSFORM_UTIL_H diff --git a/thirdparty/bullet/Bullet3Common/b3Vector3.cpp b/thirdparty/bullet/Bullet3Common/b3Vector3.cpp deleted file mode 100644 index 100fb774c1b..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Vector3.cpp +++ /dev/null @@ -1,1637 +0,0 @@ -/* - Copyright (c) 2011-213 Apple Inc. http://bulletphysics.org - - This software is provided 'as-is', without any express or implied warranty. - In no event will the authors be held liable for any damages arising from the use of this software. - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it freely, - subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - This source version has been altered. - */ - -#if defined(_WIN32) || defined(__i386__) -#define B3_USE_SSE_IN_API -#endif - -#include "b3Vector3.h" - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - -#ifdef __APPLE__ -#include -typedef float float4 __attribute__((vector_size(16))); -#else -#define float4 __m128 -#endif -//typedef uint32_t uint4 __attribute__ ((vector_size(16))); - -#if defined B3_USE_SSE || defined _WIN32 - -#define LOG2_ARRAY_SIZE 6 -#define STACK_ARRAY_COUNT (1UL << LOG2_ARRAY_SIZE) - -#include - -long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult); -long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - const float4 *vertices = (const float4 *)vv; - static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; - float4 dotMax = b3Assign128(-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY); - float4 vvec = _mm_loadu_ps(vec); - float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa)); /// zzzz - float4 vLo = _mm_movelh_ps(vvec, vvec); /// xyxy - - long maxIndex = -1L; - - size_t segment = 0; - float4 stack_array[STACK_ARRAY_COUNT]; - -#if DEBUG - // memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) ); -#endif - - size_t index; - float4 max; - // Faster loop without cleanup code for full tiles - for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4) - { - max = dotMax; - - for (index = 0; index < STACK_ARRAY_COUNT; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - // It is too costly to keep the index of the max here. We will look for it again later. We save a lot of work this way. - } - - // If we found a new max - if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax))) - { - // copy the new max across all lanes of our max accumulator - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e)); - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1)); - - dotMax = max; - - // find first occurrence of that max - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++) // local_count must be a multiple of 4 - { - } - // record where it is. - maxIndex = 4 * index + segment + indexTable[test]; - } - } - - // account for work we've already done - count -= segment; - - // Deal with the last < STACK_ARRAY_COUNT vectors - max = dotMax; - index = 0; - - if (b3Unlikely(count > 16)) - { - for (; index + 4 <= count / 4; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - - // It is too costly to keep the index of the max here. We will look for it again later. We save a lot of work this way. - } - } - - size_t localCount = (count & -4L) - 4 * index; - if (localCount) - { -#ifdef __APPLE__ - float4 t0, t1, t2, t3, t4; - float4 *sap = &stack_array[index + localCount / 4]; - vertices += localCount; // counter the offset - size_t byteIndex = -(localCount) * sizeof(float); - //AT&T Code style assembly - asm volatile( - ".align 4 \n\ - 0: movaps %[max], %[t2] // move max out of the way to avoid propagating NaNs in max \n\ - movaps (%[vertices], %[byteIndex], 4), %[t0] // vertices[0] \n\ - movaps 16(%[vertices], %[byteIndex], 4), %[t1] // vertices[1] \n\ - movaps %[t0], %[max] // vertices[0] \n\ - movlhps %[t1], %[max] // x0y0x1y1 \n\ - movaps 32(%[vertices], %[byteIndex], 4), %[t3] // vertices[2] \n\ - movaps 48(%[vertices], %[byteIndex], 4), %[t4] // vertices[3] \n\ - mulps %[vLo], %[max] // x0y0x1y1 * vLo \n\ - movhlps %[t0], %[t1] // z0w0z1w1 \n\ - movaps %[t3], %[t0] // vertices[2] \n\ - movlhps %[t4], %[t0] // x2y2x3y3 \n\ - mulps %[vLo], %[t0] // x2y2x3y3 * vLo \n\ - movhlps %[t3], %[t4] // z2w2z3w3 \n\ - shufps $0x88, %[t4], %[t1] // z0z1z2z3 \n\ - mulps %[vHi], %[t1] // z0z1z2z3 * vHi \n\ - movaps %[max], %[t3] // x0y0x1y1 * vLo \n\ - shufps $0x88, %[t0], %[max] // x0x1x2x3 * vLo.x \n\ - shufps $0xdd, %[t0], %[t3] // y0y1y2y3 * vLo.y \n\ - addps %[t3], %[max] // x + y \n\ - addps %[t1], %[max] // x + y + z \n\ - movaps %[max], (%[sap], %[byteIndex]) // record result for later scrutiny \n\ - maxps %[t2], %[max] // record max, restore max \n\ - add $16, %[byteIndex] // advance loop counter\n\ - jnz 0b \n\ - " - : [max] "+x"(max), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex) - : [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap) - : "memory", "cc"); - index += localCount / 4; -#else - { - for (unsigned int i = 0; i < localCount / 4; i++, index++) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - } - } -#endif //__APPLE__ - } - - // process the last few points - if (count & 3) - { - float4 v0, v1, v2, x, y, z; - switch (count & 3) - { - case 3: - { - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - - // Calculate 3 dot products, transpose, duplicate v2 - float4 lo0 = _mm_movelh_ps(v0, v1); // xyxy.lo - float4 hi0 = _mm_movehl_ps(v1, v0); // z?z?.lo - lo0 = lo0 * vLo; - z = _mm_shuffle_ps(hi0, v2, 0xa8); // z0z1z2z2 - z = z * vHi; - float4 lo1 = _mm_movelh_ps(v2, v2); // xyxy - lo1 = lo1 * vLo; - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - } - break; - case 2: - { - v0 = vertices[0]; - v1 = vertices[1]; - float4 xy = _mm_movelh_ps(v0, v1); - z = _mm_movehl_ps(v1, v0); - xy = xy * vLo; - z = _mm_shuffle_ps(z, z, 0xa8); - x = _mm_shuffle_ps(xy, xy, 0xa8); - y = _mm_shuffle_ps(xy, xy, 0xfd); - z = z * vHi; - } - break; - case 1: - { - float4 xy = vertices[0]; - z = _mm_shuffle_ps(xy, xy, 0xaa); - xy = xy * vLo; - z = z * vHi; - x = _mm_shuffle_ps(xy, xy, 0); - y = _mm_shuffle_ps(xy, xy, 0x55); - } - break; - } - x = x + y; - x = x + z; - stack_array[index] = x; - max = _mm_max_ps(x, max); // control the order here so that max is never NaN even if x is nan - index++; - } - - // if we found a new max. - if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax))) - { // we found a new max. Search for it - // find max across the max vector, place in all elements of max -- big latency hit here - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e)); - max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1)); - - // It is slightly faster to do this part in scalar code when count < 8. However, the common case for - // this where it actually makes a difference is handled in the early out at the top of the function, - // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced - // complexity, and removed it. - - dotMax = max; - - // scan for the first occurence of max in the array - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++) // local_count must be a multiple of 4 - { - } - maxIndex = 4 * index + segment + indexTable[test]; - } - - _mm_store_ss(dotResult, dotMax); - return maxIndex; -} - -long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult); - -long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - const float4 *vertices = (const float4 *)vv; - static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; - - float4 dotmin = b3Assign128(B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY); - float4 vvec = _mm_loadu_ps(vec); - float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa)); /// zzzz - float4 vLo = _mm_movelh_ps(vvec, vvec); /// xyxy - - long minIndex = -1L; - - size_t segment = 0; - float4 stack_array[STACK_ARRAY_COUNT]; - -#if DEBUG - // memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) ); -#endif - - size_t index; - float4 min; - // Faster loop without cleanup code for full tiles - for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4) - { - min = dotmin; - - for (index = 0; index < STACK_ARRAY_COUNT; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - // It is too costly to keep the index of the min here. We will look for it again later. We save a lot of work this way. - } - - // If we found a new min - if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin))) - { - // copy the new min across all lanes of our min accumulator - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e)); - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1)); - - dotmin = min; - - // find first occurrence of that min - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++) // local_count must be a multiple of 4 - { - } - // record where it is. - minIndex = 4 * index + segment + indexTable[test]; - } - } - - // account for work we've already done - count -= segment; - - // Deal with the last < STACK_ARRAY_COUNT vectors - min = dotmin; - index = 0; - - if (b3Unlikely(count > 16)) - { - for (; index + 4 <= count / 4; index += 4) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 1] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 2] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - v3 = vertices[3]; - vertices += 4; - - lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - z = _mm_shuffle_ps(hi0, hi1, 0x88); - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index + 3] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - - // It is too costly to keep the index of the min here. We will look for it again later. We save a lot of work this way. - } - } - - size_t localCount = (count & -4L) - 4 * index; - if (localCount) - { -#ifdef __APPLE__ - vertices += localCount; // counter the offset - float4 t0, t1, t2, t3, t4; - size_t byteIndex = -(localCount) * sizeof(float); - float4 *sap = &stack_array[index + localCount / 4]; - - asm volatile( - ".align 4 \n\ - 0: movaps %[min], %[t2] // move min out of the way to avoid propagating NaNs in min \n\ - movaps (%[vertices], %[byteIndex], 4), %[t0] // vertices[0] \n\ - movaps 16(%[vertices], %[byteIndex], 4), %[t1] // vertices[1] \n\ - movaps %[t0], %[min] // vertices[0] \n\ - movlhps %[t1], %[min] // x0y0x1y1 \n\ - movaps 32(%[vertices], %[byteIndex], 4), %[t3] // vertices[2] \n\ - movaps 48(%[vertices], %[byteIndex], 4), %[t4] // vertices[3] \n\ - mulps %[vLo], %[min] // x0y0x1y1 * vLo \n\ - movhlps %[t0], %[t1] // z0w0z1w1 \n\ - movaps %[t3], %[t0] // vertices[2] \n\ - movlhps %[t4], %[t0] // x2y2x3y3 \n\ - movhlps %[t3], %[t4] // z2w2z3w3 \n\ - mulps %[vLo], %[t0] // x2y2x3y3 * vLo \n\ - shufps $0x88, %[t4], %[t1] // z0z1z2z3 \n\ - mulps %[vHi], %[t1] // z0z1z2z3 * vHi \n\ - movaps %[min], %[t3] // x0y0x1y1 * vLo \n\ - shufps $0x88, %[t0], %[min] // x0x1x2x3 * vLo.x \n\ - shufps $0xdd, %[t0], %[t3] // y0y1y2y3 * vLo.y \n\ - addps %[t3], %[min] // x + y \n\ - addps %[t1], %[min] // x + y + z \n\ - movaps %[min], (%[sap], %[byteIndex]) // record result for later scrutiny \n\ - minps %[t2], %[min] // record min, restore min \n\ - add $16, %[byteIndex] // advance loop counter\n\ - jnz 0b \n\ - " - : [min] "+x"(min), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex) - : [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap) - : "memory", "cc"); - index += localCount / 4; -#else - { - for (unsigned int i = 0; i < localCount / 4; i++, index++) - { // do four dot products at a time. Carefully avoid touching the w element. - float4 v0 = vertices[0]; - float4 v1 = vertices[1]; - float4 v2 = vertices[2]; - float4 v3 = vertices[3]; - vertices += 4; - - float4 lo0 = _mm_movelh_ps(v0, v1); // x0y0x1y1 - float4 hi0 = _mm_movehl_ps(v1, v0); // z0?0z1?1 - float4 lo1 = _mm_movelh_ps(v2, v3); // x2y2x3y3 - float4 hi1 = _mm_movehl_ps(v3, v2); // z2?2z3?3 - - lo0 = lo0 * vLo; - lo1 = lo1 * vLo; - float4 z = _mm_shuffle_ps(hi0, hi1, 0x88); - float4 x = _mm_shuffle_ps(lo0, lo1, 0x88); - float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd); - z = z * vHi; - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that max is never NaN even if x is nan - } - } - -#endif - } - - // process the last few points - if (count & 3) - { - float4 v0, v1, v2, x, y, z; - switch (count & 3) - { - case 3: - { - v0 = vertices[0]; - v1 = vertices[1]; - v2 = vertices[2]; - - // Calculate 3 dot products, transpose, duplicate v2 - float4 lo0 = _mm_movelh_ps(v0, v1); // xyxy.lo - float4 hi0 = _mm_movehl_ps(v1, v0); // z?z?.lo - lo0 = lo0 * vLo; - z = _mm_shuffle_ps(hi0, v2, 0xa8); // z0z1z2z2 - z = z * vHi; - float4 lo1 = _mm_movelh_ps(v2, v2); // xyxy - lo1 = lo1 * vLo; - x = _mm_shuffle_ps(lo0, lo1, 0x88); - y = _mm_shuffle_ps(lo0, lo1, 0xdd); - } - break; - case 2: - { - v0 = vertices[0]; - v1 = vertices[1]; - float4 xy = _mm_movelh_ps(v0, v1); - z = _mm_movehl_ps(v1, v0); - xy = xy * vLo; - z = _mm_shuffle_ps(z, z, 0xa8); - x = _mm_shuffle_ps(xy, xy, 0xa8); - y = _mm_shuffle_ps(xy, xy, 0xfd); - z = z * vHi; - } - break; - case 1: - { - float4 xy = vertices[0]; - z = _mm_shuffle_ps(xy, xy, 0xaa); - xy = xy * vLo; - z = z * vHi; - x = _mm_shuffle_ps(xy, xy, 0); - y = _mm_shuffle_ps(xy, xy, 0x55); - } - break; - } - x = x + y; - x = x + z; - stack_array[index] = x; - min = _mm_min_ps(x, min); // control the order here so that min is never NaN even if x is nan - index++; - } - - // if we found a new min. - if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin))) - { // we found a new min. Search for it - // find min across the min vector, place in all elements of min -- big latency hit here - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e)); - min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1)); - - // It is slightly faster to do this part in scalar code when count < 8. However, the common case for - // this where it actually makes a difference is handled in the early out at the top of the function, - // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced - // complexity, and removed it. - - dotmin = min; - - // scan for the first occurence of min in the array - size_t test; - for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++) // local_count must be a multiple of 4 - { - } - minIndex = 4 * index + segment + indexTable[test]; - } - - _mm_store_ss(dotResult, dotmin); - return minIndex; -} - -#elif defined B3_USE_NEON -#define ARM_NEON_GCC_COMPATIBILITY 1 -#include - -static long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult); -static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult); - -long (*b3_maxdot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_maxdot_large_sel; -long (*b3_mindot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_mindot_large_sel; - -extern "C" -{ - int _get_cpu_capabilities(void); -} - -static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - if (_get_cpu_capabilities() & 0x2000) - b3_maxdot_large = _maxdot_large_v1; - else - b3_maxdot_large = _maxdot_large_v0; - - return b3_maxdot_large(vv, vec, count, dotResult); -} - -static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - if (_get_cpu_capabilities() & 0x2000) - b3_mindot_large = _mindot_large_v1; - else - b3_mindot_large = _mindot_large_v0; - - return b3_mindot_large(vv, vec, count, dotResult); -} - -#define vld1q_f32_aligned_postincrement(_ptr) ({ float32x4_t _r; asm( "vld1.f32 {%0}, [%1, :128]!\n" : "=w" (_r), "+r" (_ptr) ); /*return*/ _r; }) - -long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - unsigned long i = 0; - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x2_t vLo = vget_low_f32(vvec); - float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0); - float32x2_t dotMaxLo = (float32x2_t){-B3_INFINITY, -B3_INFINITY}; - float32x2_t dotMaxHi = (float32x2_t){-B3_INFINITY, -B3_INFINITY}; - uint32x2_t indexLo = (uint32x2_t){0, 1}; - uint32x2_t indexHi = (uint32x2_t){2, 3}; - uint32x2_t iLo = (uint32x2_t){-1, -1}; - uint32x2_t iHi = (uint32x2_t){-1, -1}; - const uint32x2_t four = (uint32x2_t){4, 4}; - - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - xy0 = vmul_f32(vget_low_f32(v0), vLo); - xy1 = vmul_f32(vget_low_f32(v1), vLo); - xy2 = vmul_f32(vget_low_f32(v2), vLo); - xy3 = vmul_f32(vget_low_f32(v3), vLo); - - z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - zLo = vmul_f32(z0.val[0], vHi); - zHi = vmul_f32(z1.val[0], vHi); - - rLo = vpadd_f32(xy0, xy1); - rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - maskLo = vcgt_f32(rLo, dotMaxLo); - maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy2); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - } - break; - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - rLo = vadd_f32(rLo, zLo); - - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0); - float32x2_t zLo = vmul_f32(z0, vHi); - float32x2_t rLo = vpadd_f32(xy0, xy0); - rLo = vadd_f32(rLo, zLo); - uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo); - dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vcgt_f32(dotMaxHi, dotMaxLo); - dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo); - iLo = vbsl_u32(mask, iHi, iLo); - - // select best answer between even and odd results - dotMaxHi = vdup_lane_f32(dotMaxLo, 1); - iHi = vdup_lane_u32(iLo, 1); - mask = vcgt_f32(dotMaxHi, dotMaxLo); - dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo); - iLo = vbsl_u32(mask, iHi, iLo); - - *dotResult = vget_lane_f32(dotMaxLo, 0); - return vget_lane_u32(iLo, 0); -} - -long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec)); - float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0); - const uint32x4_t four = (uint32x4_t){4, 4, 4, 4}; - uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3}; - uint32x4_t index = (uint32x4_t){-1, -1, -1, -1}; - float32x4_t maxDot = (float32x4_t){-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY}; - - unsigned long i = 0; - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - zb = vuzpq_f32(z0, z1); - z = vmulq_f32(zb.val[0], vHi); - xy = vuzpq_f32(xy0, xy1); - x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - - xy0 = vmulq_f32(xy0, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z0); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0); - - xy0 = vmulq_f32(xy0, vLo); - - z = vmulq_f32(z, vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcgtq_f32(x, maxDot); - maxDot = vbslq_f32(mask, x, maxDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vcgt_f32(vget_high_f32(maxDot), vget_low_f32(maxDot)); - float32x2_t maxDot2 = vbsl_f32(mask, vget_high_f32(maxDot), vget_low_f32(maxDot)); - uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index)); - - // select best answer between even and odd results - float32x2_t maxDotO = vdup_lane_f32(maxDot2, 1); - uint32x2_t indexHi = vdup_lane_u32(index2, 1); - mask = vcgt_f32(maxDotO, maxDot2); - maxDot2 = vbsl_f32(mask, maxDotO, maxDot2); - index2 = vbsl_u32(mask, indexHi, index2); - - *dotResult = vget_lane_f32(maxDot2, 0); - return vget_lane_u32(index2, 0); -} - -long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - unsigned long i = 0; - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x2_t vLo = vget_low_f32(vvec); - float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0); - float32x2_t dotMinLo = (float32x2_t){B3_INFINITY, B3_INFINITY}; - float32x2_t dotMinHi = (float32x2_t){B3_INFINITY, B3_INFINITY}; - uint32x2_t indexLo = (uint32x2_t){0, 1}; - uint32x2_t indexHi = (uint32x2_t){2, 3}; - uint32x2_t iLo = (uint32x2_t){-1, -1}; - uint32x2_t iHi = (uint32x2_t){-1, -1}; - const uint32x2_t four = (uint32x2_t){4, 4}; - - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - uint32x2_t maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - xy0 = vmul_f32(vget_low_f32(v0), vLo); - xy1 = vmul_f32(vget_low_f32(v1), vLo); - xy2 = vmul_f32(vget_low_f32(v2), vLo); - xy3 = vmul_f32(vget_low_f32(v3), vLo); - - z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - zLo = vmul_f32(z0.val[0], vHi); - zHi = vmul_f32(z1.val[0], vHi); - - rLo = vpadd_f32(xy0, xy1); - rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - maskLo = vclt_f32(rLo, dotMinLo); - maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(z1.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy3); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - uint32x2_t maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - indexLo = vadd_u32(indexLo, four); - indexHi = vadd_u32(indexHi, four); - } - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - float32x2_t rHi = vpadd_f32(xy2, xy2); - rLo = vadd_f32(rLo, zLo); - rHi = vadd_f32(rHi, zHi); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - uint32x2_t maskHi = vclt_f32(rHi, dotMinHi); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi); - iLo = vbsl_u32(maskLo, indexLo, iLo); - iHi = vbsl_u32(maskHi, indexHi, iHi); - } - break; - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo); - - float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x2_t zLo = vmul_f32(z0.val[0], vHi); - - float32x2_t rLo = vpadd_f32(xy0, xy1); - rLo = vadd_f32(rLo, zLo); - - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo); - float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0); - float32x2_t zLo = vmul_f32(z0, vHi); - float32x2_t rLo = vpadd_f32(xy0, xy0); - rLo = vadd_f32(rLo, zLo); - uint32x2_t maskLo = vclt_f32(rLo, dotMinLo); - dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo); - iLo = vbsl_u32(maskLo, indexLo, iLo); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vclt_f32(dotMinHi, dotMinLo); - dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo); - iLo = vbsl_u32(mask, iHi, iLo); - - // select best answer between even and odd results - dotMinHi = vdup_lane_f32(dotMinLo, 1); - iHi = vdup_lane_u32(iLo, 1); - mask = vclt_f32(dotMinHi, dotMinLo); - dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo); - iLo = vbsl_u32(mask, iHi, iLo); - - *dotResult = vget_lane_f32(dotMinLo, 0); - return vget_lane_u32(iLo, 0); -} - -long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult) -{ - float32x4_t vvec = vld1q_f32_aligned_postincrement(vec); - float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec)); - float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0); - const uint32x4_t four = (uint32x4_t){4, 4, 4, 4}; - uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3}; - uint32x4_t index = (uint32x4_t){-1, -1, -1, -1}; - float32x4_t minDot = (float32x4_t){B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY}; - - unsigned long i = 0; - for (; i + 8 <= count; i += 8) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - - v0 = vld1q_f32_aligned_postincrement(vv); - v1 = vld1q_f32_aligned_postincrement(vv); - v2 = vld1q_f32_aligned_postincrement(vv); - v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - zb = vuzpq_f32(z0, z1); - z = vmulq_f32(zb.val[0], vHi); - xy = vuzpq_f32(xy0, xy1); - x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - for (; i + 4 <= count; i += 4) - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v3 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - - switch (count & 3) - { - case 3: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v2 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2)); - - xy0 = vmulq_f32(xy0, vLo); - xy1 = vmulq_f32(xy1, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z1); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy1); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 2: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - float32x4_t v1 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1)); - - xy0 = vmulq_f32(xy0, vLo); - - float32x4x2_t zb = vuzpq_f32(z0, z0); - float32x4_t z = vmulq_f32(zb.val[0], vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - case 1: - { - float32x4_t v0 = vld1q_f32_aligned_postincrement(vv); - - // the next two lines should resolve to a single vswp d, d - float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0)); - // the next two lines should resolve to a single vswp d, d - float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0); - - xy0 = vmulq_f32(xy0, vLo); - - z = vmulq_f32(z, vHi); - float32x4x2_t xy = vuzpq_f32(xy0, xy0); - float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]); - x = vaddq_f32(x, z); - - uint32x4_t mask = vcltq_f32(x, minDot); - minDot = vbslq_f32(mask, x, minDot); - index = vbslq_u32(mask, local_index, index); - local_index = vaddq_u32(local_index, four); - } - break; - - default: - break; - } - - // select best answer between hi and lo results - uint32x2_t mask = vclt_f32(vget_high_f32(minDot), vget_low_f32(minDot)); - float32x2_t minDot2 = vbsl_f32(mask, vget_high_f32(minDot), vget_low_f32(minDot)); - uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index)); - - // select best answer between even and odd results - float32x2_t minDotO = vdup_lane_f32(minDot2, 1); - uint32x2_t indexHi = vdup_lane_u32(index2, 1); - mask = vclt_f32(minDotO, minDot2); - minDot2 = vbsl_f32(mask, minDotO, minDot2); - index2 = vbsl_u32(mask, indexHi, index2); - - *dotResult = vget_lane_f32(minDot2, 0); - return vget_lane_u32(index2, 0); -} - -#else -#error Unhandled __APPLE__ arch -#endif - -#endif /* __APPLE__ */ diff --git a/thirdparty/bullet/Bullet3Common/b3Vector3.h b/thirdparty/bullet/Bullet3Common/b3Vector3.h deleted file mode 100644 index a70d68d6e14..00000000000 --- a/thirdparty/bullet/Bullet3Common/b3Vector3.h +++ /dev/null @@ -1,1303 +0,0 @@ -/* -Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_VECTOR3_H -#define B3_VECTOR3_H - -//#include -#include "b3Scalar.h" -#include "b3MinMax.h" -#include "b3AlignedAllocator.h" - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3Vector3Data b3Vector3DoubleData -#define b3Vector3DataName "b3Vector3DoubleData" -#else -#define b3Vector3Data b3Vector3FloatData -#define b3Vector3DataName "b3Vector3FloatData" -#endif //B3_USE_DOUBLE_PRECISION - -#if defined B3_USE_SSE - -//typedef uint32_t __m128i __attribute__ ((vector_size(16))); - -#ifdef _MSC_VER -#pragma warning(disable : 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255' -#endif - -#define B3_SHUFFLE(x, y, z, w) (((w) << 6 | (z) << 4 | (y) << 2 | (x)) & 0xff) -//#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) ) -#define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask)) -#define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3)) -#define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i)) - -#define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) -#define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) -#define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) -#define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask) -#define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask) -#define b3vxyzMaskf b3vFFF0fMask -#define b3vAbsfMask b3CastiTo128f(b3vAbsMask) - -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f}; - -#endif - -#ifdef B3_USE_NEON - -const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f}; -const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0}; -const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; -const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0}; - -#endif - -class b3Vector3; -class b3Vector4; - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) -//#if defined (B3_USE_SSE) || defined (B3_USE_NEON) -inline b3Vector3 b3MakeVector3(b3SimdFloat4 v); -inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec); -#endif - -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z); -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w); -inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w); - -/**@brief b3Vector3 can be used to represent 3D points and vectors. - * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user - * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers - */ -B3_ATTRIBUTE_ALIGNED16(class) -b3Vector3 -{ -public: -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM - union { - b3SimdFloat4 mVec128; - float m_floats[4]; - struct - { - float x, y, z, w; - }; - }; -#else - union { - float m_floats[4]; - struct - { - float x, y, z, w; - }; - }; -#endif - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM - - /*B3_FORCE_INLINE b3Vector3() - { - } - */ - - B3_FORCE_INLINE b3SimdFloat4 get128() const - { - return mVec128; - } - B3_FORCE_INLINE void set128(b3SimdFloat4 v128) - { - mVec128 = v128; - } -#endif - -public: - /**@brief Add a vector to this one - * @param The vector to add to this one */ - B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_add_ps(mVec128, v.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vaddq_f32(mVec128, v.mVec128); -#else - m_floats[0] += v.m_floats[0]; - m_floats[1] += v.m_floats[1]; - m_floats[2] += v.m_floats[2]; -#endif - return *this; - } - - /**@brief Subtract a vector from this one - * @param The vector to subtract */ - B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_sub_ps(mVec128, v.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vsubq_f32(mVec128, v.mVec128); -#else - m_floats[0] -= v.m_floats[0]; - m_floats[1] -= v.m_floats[1]; - m_floats[2] -= v.m_floats[2]; -#endif - return *this; - } - - /**@brief Scale the vector - * @param s Scale factor */ - B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) - mVec128 = _mm_mul_ps(mVec128, vs); -#elif defined(B3_USE_NEON) - mVec128 = vmulq_n_f32(mVec128, s); -#else - m_floats[0] *= s; - m_floats[1] *= s; - m_floats[2] *= s; -#endif - return *this; - } - - /**@brief Inversely scale the vector - * @param s Scale factor to divide by */ - B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s) - { - b3FullAssert(s != b3Scalar(0.0)); - -#if 0 //defined(B3_USE_SSE_IN_API) -// this code is not faster ! - __m128 vs = _mm_load_ss(&s); - vs = _mm_div_ss(b3v1110, vs); - vs = b3_pshufd_ps(vs, 0x00); // (S S S S) - - mVec128 = _mm_mul_ps(mVec128, vs); - - return *this; -#else - return *this *= b3Scalar(1.0) / s; -#endif - } - - /**@brief Return the dot product - * @param v The other vector in the dot product */ - B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vd = _mm_mul_ps(mVec128, v.mVec128); - __m128 z = _mm_movehl_ps(vd, vd); - __m128 y = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, y); - vd = _mm_add_ss(vd, z); - return _mm_cvtss_f32(vd); -#elif defined(B3_USE_NEON) - float32x4_t vd = vmulq_f32(mVec128, v.mVec128); - float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd)); - x = vadd_f32(x, vget_high_f32(vd)); - return vget_lane_f32(x, 0); -#else - return m_floats[0] * v.m_floats[0] + - m_floats[1] * v.m_floats[1] + - m_floats[2] * v.m_floats[2]; -#endif - } - - /**@brief Return the length of the vector squared */ - B3_FORCE_INLINE b3Scalar length2() const - { - return dot(*this); - } - - /**@brief Return the length of the vector */ - B3_FORCE_INLINE b3Scalar length() const - { - return b3Sqrt(length2()); - } - - /**@brief Return the distance squared between the ends of this and another vector - * This is symantically treating the vector like a point */ - B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const; - - /**@brief Return the distance between the ends of this and another vector - * This is symantically treating the vector like a point */ - B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const; - - B3_FORCE_INLINE b3Vector3& safeNormalize() - { - b3Scalar l2 = length2(); - //triNormal.normalize(); - if (l2 >= B3_EPSILON * B3_EPSILON) - { - (*this) /= b3Sqrt(l2); - } - else - { - setValue(1, 0, 0); - } - return *this; - } - - /**@brief Normalize this vector - * x^2 + y^2 + z^2 = 1 */ - B3_FORCE_INLINE b3Vector3& normalize() - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - // dot product first - __m128 vd = _mm_mul_ps(mVec128, mVec128); - __m128 z = _mm_movehl_ps(vd, vd); - __m128 y = _mm_shuffle_ps(vd, vd, 0x55); - vd = _mm_add_ss(vd, y); - vd = _mm_add_ss(vd, z); - -#if 0 - vd = _mm_sqrt_ss(vd); - vd = _mm_div_ss(b3v1110, vd); - vd = b3_splat_ps(vd, 0x80); - mVec128 = _mm_mul_ps(mVec128, vd); -#else - - // NR step 1/sqrt(x) - vd is x, y is output - y = _mm_rsqrt_ss(vd); // estimate - - // one step NR - z = b3v1_5; - vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5 - //x2 = vd; - vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 - vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0 - z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0 - - y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0) - - y = b3_splat_ps(y, 0x80); - mVec128 = _mm_mul_ps(mVec128, y); - -#endif - - return *this; -#else - return *this /= length(); -#endif - } - - /**@brief Return a normalized version of this vector */ - B3_FORCE_INLINE b3Vector3 normalized() const; - - /**@brief Return a rotated version of this vector - * @param wAxis The axis to rotate about - * @param angle The angle to rotate by */ - B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const; - - /**@brief Return the angle between this and another vector - * @param v The other vector */ - B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const - { - b3Scalar s = b3Sqrt(length2() * v.length2()); - b3FullAssert(s != b3Scalar(0.0)); - return b3Acos(dot(v) / s); - } - - /**@brief Return a vector will the absolute values of each element */ - B3_FORCE_INLINE b3Vector3 absolute() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask)); -#elif defined(B3_USE_NEON) - return b3Vector3(vabsq_f32(mVec128)); -#else - return b3MakeVector3( - b3Fabs(m_floats[0]), - b3Fabs(m_floats[1]), - b3Fabs(m_floats[2])); -#endif - } - - /**@brief Return the cross product between this and another vector - * @param v The other vector */ - B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 T, V; - - T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - - V = _mm_mul_ps(V, mVec128); - T = _mm_mul_ps(T, v.mVec128); - V = _mm_sub_ps(V, T); - - V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3)); - return b3MakeVector3(V); -#elif defined(B3_USE_NEON) - float32x4_t T, V; - // form (Y, Z, X, _) of mVec128 and v.mVec128 - float32x2_t Tlow = vget_low_f32(mVec128); - float32x2_t Vlow = vget_low_f32(v.mVec128); - T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow); - - V = vmulq_f32(V, mVec128); - T = vmulq_f32(T, v.mVec128); - V = vsubq_f32(V, T); - Vlow = vget_low_f32(V); - // form (Y, Z, X, _); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow); - V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask); - - return b3Vector3(V); -#else - return b3MakeVector3( - m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1], - m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2], - m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]); -#endif - } - - B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - // cross: - __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - - V = _mm_mul_ps(V, v1.mVec128); - T = _mm_mul_ps(T, v2.mVec128); - V = _mm_sub_ps(V, T); - - V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3)); - - // dot: - V = _mm_mul_ps(V, mVec128); - __m128 z = _mm_movehl_ps(V, V); - __m128 y = _mm_shuffle_ps(V, V, 0x55); - V = _mm_add_ss(V, y); - V = _mm_add_ss(V, z); - return _mm_cvtss_f32(V); - -#elif defined(B3_USE_NEON) - // cross: - float32x4_t T, V; - // form (Y, Z, X, _) of mVec128 and v.mVec128 - float32x2_t Tlow = vget_low_f32(v1.mVec128); - float32x2_t Vlow = vget_low_f32(v2.mVec128); - T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow); - - V = vmulq_f32(V, v1.mVec128); - T = vmulq_f32(T, v2.mVec128); - V = vsubq_f32(V, T); - Vlow = vget_low_f32(V); - // form (Y, Z, X, _); - V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow); - - // dot: - V = vmulq_f32(mVec128, V); - float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V)); - x = vadd_f32(x, vget_high_f32(V)); - return vget_lane_f32(x, 0); -#else - return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + - m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + - m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]); -#endif - } - - /**@brief Return the axis with the smallest value - * Note return values are 0,1,2 for x, y, or z */ - B3_FORCE_INLINE int minAxis() const - { - return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2); - } - - /**@brief Return the axis with the largest value - * Note return values are 0,1,2 for x, y, or z */ - B3_FORCE_INLINE int maxAxis() const - { - return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0); - } - - B3_FORCE_INLINE int furthestAxis() const - { - return absolute().minAxis(); - } - - B3_FORCE_INLINE int closestAxis() const - { - return absolute().maxAxis(); - } - - B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0) - b3Scalar s = b3Scalar(1.0) - rt; - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) - __m128 r0 = _mm_mul_ps(v0.mVec128, vs); - vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0) - __m128 r1 = _mm_mul_ps(v1.mVec128, vrt); - __m128 tmp3 = _mm_add_ps(r0, r1); - mVec128 = tmp3; -#elif defined(B3_USE_NEON) - float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128); - vl = vmulq_n_f32(vl, rt); - mVec128 = vaddq_f32(vl, v0.mVec128); -#else - b3Scalar s = b3Scalar(1.0) - rt; - m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0]; - m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1]; - m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2]; - //don't do the unused w component - // m_co[3] = s * v0[3] + rt * v1[3]; -#endif - } - - /**@brief Return the linear interpolation between this and another vector - * @param v The other vector - * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */ - B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vt = _mm_load_ss(&t); // (t 0 0 0) - vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0) - __m128 vl = _mm_sub_ps(v.mVec128, mVec128); - vl = _mm_mul_ps(vl, vt); - vl = _mm_add_ps(vl, mVec128); - - return b3MakeVector3(vl); -#elif defined(B3_USE_NEON) - float32x4_t vl = vsubq_f32(v.mVec128, mVec128); - vl = vmulq_n_f32(vl, t); - vl = vaddq_f32(vl, mVec128); - - return b3Vector3(vl); -#else - return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t, - m_floats[1] + (v.m_floats[1] - m_floats[1]) * t, - m_floats[2] + (v.m_floats[2] - m_floats[2]) * t); -#endif - } - - /**@brief Elementwise multiply this vector by the other - * @param v The other vector */ - B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_mul_ps(mVec128, v.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vmulq_f32(mVec128, v.mVec128); -#else - m_floats[0] *= v.m_floats[0]; - m_floats[1] *= v.m_floats[1]; - m_floats[2] *= v.m_floats[2]; -#endif - return *this; - } - - /**@brief Return the x value */ - B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; } - /**@brief Return the y value */ - B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; } - /**@brief Return the z value */ - B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; } - /**@brief Return the w value */ - B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; } - - /**@brief Set the x value */ - B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; }; - /**@brief Set the y value */ - B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; }; - /**@brief Set the z value */ - B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; }; - /**@brief Set the w value */ - B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; }; - - //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; } - //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; } - ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons. - B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; } - B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; } - - B3_FORCE_INLINE bool operator==(const b3Vector3& other) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128))); -#else - return ((m_floats[3] == other.m_floats[3]) && - (m_floats[2] == other.m_floats[2]) && - (m_floats[1] == other.m_floats[1]) && - (m_floats[0] == other.m_floats[0])); -#endif - } - - B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const - { - return !(*this == other); - } - - /**@brief Set each element to the max of the current values and the values of another b3Vector3 - * @param other The other b3Vector3 to compare with - */ - B3_FORCE_INLINE void setMax(const b3Vector3& other) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_max_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vmaxq_f32(mVec128, other.mVec128); -#else - b3SetMax(m_floats[0], other.m_floats[0]); - b3SetMax(m_floats[1], other.m_floats[1]); - b3SetMax(m_floats[2], other.m_floats[2]); - b3SetMax(m_floats[3], other.m_floats[3]); -#endif - } - - /**@brief Set each element to the min of the current values and the values of another b3Vector3 - * @param other The other b3Vector3 to compare with - */ - B3_FORCE_INLINE void setMin(const b3Vector3& other) - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = _mm_min_ps(mVec128, other.mVec128); -#elif defined(B3_USE_NEON) - mVec128 = vminq_f32(mVec128, other.mVec128); -#else - b3SetMin(m_floats[0], other.m_floats[0]); - b3SetMin(m_floats[1], other.m_floats[1]); - b3SetMin(m_floats[2], other.m_floats[2]); - b3SetMin(m_floats[3], other.m_floats[3]); -#endif - } - - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = b3Scalar(0.f); - } - - void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - - __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask); - __m128 V0 = _mm_xor_ps(b3vMzeroMask, V); - __m128 V2 = _mm_movelh_ps(V0, V); - - __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE); - - V0 = _mm_shuffle_ps(V0, V, 0xDB); - V2 = _mm_shuffle_ps(V2, V, 0xF9); - - v0->mVec128 = V0; - v1->mVec128 = V1; - v2->mVec128 = V2; -#else - v0->setValue(0., -getZ(), getY()); - v1->setValue(getZ(), 0., -getX()); - v2->setValue(-getY(), getX(), 0.); -#endif - } - - void setZero() - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128); -#elif defined(B3_USE_NEON) - int32x4_t vi = vdupq_n_s32(0); - mVec128 = vreinterpretq_f32_s32(vi); -#else - setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); -#endif - } - - B3_FORCE_INLINE bool isZero() const - { - return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0); - } - - B3_FORCE_INLINE bool fuzzyZero() const - { - return length2() < B3_EPSILON; - } - - B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const; - - B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn); - - B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const; - - B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn); - - B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const; - - B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn); - - /**@brief returns index of maximum dot product between this and vectors in array[] - * @param array The other vectors - * @param array_count The number of other vectors - * @param dotOut The maximum dot product */ - B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const; - - /**@brief returns index of minimum dot product between this and vectors in array[] - * @param array The other vectors - * @param array_count The number of other vectors - * @param dotOut The minimum dot product */ - B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const; - - /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */ - B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - - __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128); - __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128); - __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128); - __m128 b0 = _mm_unpacklo_ps(a0, a1); - __m128 b1 = _mm_unpackhi_ps(a0, a1); - __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps()); - __m128 r = _mm_movelh_ps(b0, b2); - r = _mm_add_ps(r, _mm_movehl_ps(b2, b0)); - a2 = _mm_and_ps(a2, b3vxyzMaskf); - r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1)))); - return b3MakeVector3(r); - -#elif defined(B3_USE_NEON) - static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0}; - float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128); - float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128); - float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128); - float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1)); - a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask); - float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]); - float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f)); - return b3Vector3(vcombine_f32(b0, b1)); -#else - return b3MakeVector3(dot(v0), dot(v1), dot(v2)); -#endif - } -}; - -/**@brief Return the sum of two vectors (Point symantics)*/ -B3_FORCE_INLINE b3Vector3 -operator+(const b3Vector3& v1, const b3Vector3& v2) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128)); -#elif defined(B3_USE_NEON) - return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128)); -#else - return b3MakeVector3( - v1.m_floats[0] + v2.m_floats[0], - v1.m_floats[1] + v2.m_floats[1], - v1.m_floats[2] + v2.m_floats[2]); -#endif -} - -/**@brief Return the elementwise product of two vectors */ -B3_FORCE_INLINE b3Vector3 -operator*(const b3Vector3& v1, const b3Vector3& v2) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128)); -#elif defined(B3_USE_NEON) - return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128)); -#else - return b3MakeVector3( - v1.m_floats[0] * v2.m_floats[0], - v1.m_floats[1] * v2.m_floats[1], - v1.m_floats[2] * v2.m_floats[2]); -#endif -} - -/**@brief Return the difference between two vectors */ -B3_FORCE_INLINE b3Vector3 -operator-(const b3Vector3& v1, const b3Vector3& v2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - - // without _mm_and_ps this code causes slowdown in Concave moving - __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128); - return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); -#elif defined(B3_USE_NEON) - float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128); - return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); -#else - return b3MakeVector3( - v1.m_floats[0] - v2.m_floats[0], - v1.m_floats[1] - v2.m_floats[1], - v1.m_floats[2] - v2.m_floats[2]); -#endif -} - -/**@brief Return the negative of the vector */ -B3_FORCE_INLINE b3Vector3 -operator-(const b3Vector3& v) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask); - return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); -#elif defined(B3_USE_NEON) - return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask)); -#else - return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]); -#endif -} - -/**@brief Return the vector scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator*(const b3Vector3& v, const b3Scalar& s) -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) - return b3MakeVector3(_mm_mul_ps(v.mVec128, vs)); -#elif defined(B3_USE_NEON) - float32x4_t r = vmulq_n_f32(v.mVec128, s); - return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); -#else - return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s); -#endif -} - -/**@brief Return the vector scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator*(const b3Scalar& s, const b3Vector3& v) -{ - return v * s; -} - -/**@brief Return the vector inversely scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator/(const b3Vector3& v, const b3Scalar& s) -{ - b3FullAssert(s != b3Scalar(0.0)); -#if 0 //defined(B3_USE_SSE_IN_API) -// this code is not faster ! - __m128 vs = _mm_load_ss(&s); - vs = _mm_div_ss(b3v1110, vs); - vs = b3_pshufd_ps(vs, 0x00); // (S S S S) - - return b3Vector3(_mm_mul_ps(v.mVec128, vs)); -#else - return v * (b3Scalar(1.0) / s); -#endif -} - -/**@brief Return the vector inversely scaled by s */ -B3_FORCE_INLINE b3Vector3 -operator/(const b3Vector3& v1, const b3Vector3& v2) -{ -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) - __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128); - vec = _mm_and_ps(vec, b3vFFF0fMask); - return b3MakeVector3(vec); -#elif defined(B3_USE_NEON) - float32x4_t x, y, v, m; - - x = v1.mVec128; - y = v2.mVec128; - - v = vrecpeq_f32(y); // v ~ 1/y - m = vrecpsq_f32(y, v); // m = (2-v*y) - v = vmulq_f32(v, m); // vv = v*m ~~ 1/y - m = vrecpsq_f32(y, v); // mm = (2-vv*y) - v = vmulq_f32(v, x); // x*vv - v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y - - return b3Vector3(v); -#else - return b3MakeVector3( - v1.m_floats[0] / v2.m_floats[0], - v1.m_floats[1] / v2.m_floats[1], - v1.m_floats[2] / v2.m_floats[2]); -#endif -} - -/**@brief Return the dot product between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Dot(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.dot(v2); -} - -/**@brief Return the distance squared between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Distance2(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.distance2(v2); -} - -/**@brief Return the distance between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Distance(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.distance(v2); -} - -/**@brief Return the angle between two vectors */ -B3_FORCE_INLINE b3Scalar -b3Angle(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.angle(v2); -} - -/**@brief Return the cross product of two vectors */ -B3_FORCE_INLINE b3Vector3 -b3Cross(const b3Vector3& v1, const b3Vector3& v2) -{ - return v1.cross(v2); -} - -B3_FORCE_INLINE b3Scalar -b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3) -{ - return v1.triple(v2, v3); -} - -/**@brief Return the linear interpolation between two vectors - * @param v1 One vector - * @param v2 The other vector - * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */ -B3_FORCE_INLINE b3Vector3 -b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t) -{ - return v1.lerp(v2, t); -} - -B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const -{ - return (v - *this).length2(); -} - -B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const -{ - return (v - *this).length(); -} - -B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const -{ -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - b3Vector3 norm = *this; - - return norm.normalize(); -#else - return *this / length(); -#endif -} - -B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const -{ - // wAxis must be a unit lenght vector - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - - __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128); - b3Scalar ssin = b3Sin(_angle); - __m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128; - O = _mm_and_ps(O, b3vFFF0fMask); - b3Scalar scos = b3Cos(_angle); - - __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0) - __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0) - - __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0) - __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0) - O = _mm_add_ps(O, Y); - vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0) - O = _mm_add_ps(O, Z); - vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0) - - vsin = vsin * C; - O = O * wAxis.mVec128; - __m128 X = mVec128 - O; - - O = O + vsin; - vcos = vcos * X; - O = O + vcos; - - return b3MakeVector3(O); -#else - b3Vector3 o = wAxis * wAxis.dot(*this); - b3Vector3 _x = *this - o; - b3Vector3 _y; - - _y = wAxis.cross(*this); - - return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle)); -#endif -} - -B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const -{ -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -#if defined _WIN32 || defined(B3_USE_SSE) - const long scalar_cutoff = 10; - long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#elif defined B3_USE_NEON - const long scalar_cutoff = 4; - extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#endif - if (array_count < scalar_cutoff) -#else - -#endif //B3_USE_SSE || B3_USE_NEON - { - b3Scalar maxDot = -B3_INFINITY; - int i = 0; - int ptIndex = -1; - for (i = 0; i < array_count; i++) - { - b3Scalar dot = array[i].dot(*this); - - if (dot > maxDot) - { - maxDot = dot; - ptIndex = i; - } - } - - b3Assert(ptIndex >= 0); - if (ptIndex < 0) - { - ptIndex = 0; - } - dotOut = maxDot; - return ptIndex; - } -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut); -#endif -} - -B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const -{ -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) -#if defined B3_USE_SSE - const long scalar_cutoff = 10; - long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#elif defined B3_USE_NEON - const long scalar_cutoff = 4; - extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut); -#else -#error unhandled arch! -#endif - - if (array_count < scalar_cutoff) -#endif //B3_USE_SSE || B3_USE_NEON - { - b3Scalar minDot = B3_INFINITY; - int i = 0; - int ptIndex = -1; - - for (i = 0; i < array_count; i++) - { - b3Scalar dot = array[i].dot(*this); - - if (dot < minDot) - { - minDot = dot; - ptIndex = i; - } - } - - dotOut = minDot; - - return ptIndex; - } -#if defined(B3_USE_SSE) || defined(B3_USE_NEON) - return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut); -#endif -} - -class b3Vector4 : public b3Vector3 -{ -public: - B3_FORCE_INLINE b3Vector4 absolute4() const - { -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask)); -#elif defined(B3_USE_NEON) - return b3Vector4(vabsq_f32(mVec128)); -#else - return b3MakeVector4( - b3Fabs(m_floats[0]), - b3Fabs(m_floats[1]), - b3Fabs(m_floats[2]), - b3Fabs(m_floats[3])); -#endif - } - - b3Scalar getW() const { return m_floats[3]; } - - B3_FORCE_INLINE int maxAxis4() const - { - int maxIndex = -1; - b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT); - if (m_floats[0] > maxVal) - { - maxIndex = 0; - maxVal = m_floats[0]; - } - if (m_floats[1] > maxVal) - { - maxIndex = 1; - maxVal = m_floats[1]; - } - if (m_floats[2] > maxVal) - { - maxIndex = 2; - maxVal = m_floats[2]; - } - if (m_floats[3] > maxVal) - { - maxIndex = 3; - } - - return maxIndex; - } - - B3_FORCE_INLINE int minAxis4() const - { - int minIndex = -1; - b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT); - if (m_floats[0] < minVal) - { - minIndex = 0; - minVal = m_floats[0]; - } - if (m_floats[1] < minVal) - { - minIndex = 1; - minVal = m_floats[1]; - } - if (m_floats[2] < minVal) - { - minIndex = 2; - minVal = m_floats[2]; - } - if (m_floats[3] < minVal) - { - minIndex = 3; - minVal = m_floats[3]; - } - - return minIndex; - } - - B3_FORCE_INLINE int closestAxis4() const - { - return absolute4().maxAxis4(); - } - - /**@brief Set x,y,z and zero w - * @param x Value of x - * @param y Value of y - * @param z Value of z - */ - - /* void getValue(b3Scalar *m) const - { - m[0] = m_floats[0]; - m[1] = m_floats[1]; - m[2] =m_floats[2]; - } -*/ - /**@brief Set the values - * @param x Value of x - * @param y Value of y - * @param z Value of z - * @param w Value of w - */ - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) - { - m_floats[0] = _x; - m_floats[1] = _y; - m_floats[2] = _z; - m_floats[3] = _w; - } -}; - -///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal) -{ -#ifdef B3_USE_DOUBLE_PRECISION - unsigned char* dest = (unsigned char*)&destVal; - unsigned char* src = (unsigned char*)&sourceVal; - dest[0] = src[7]; - dest[1] = src[6]; - dest[2] = src[5]; - dest[3] = src[4]; - dest[4] = src[3]; - dest[5] = src[2]; - dest[6] = src[1]; - dest[7] = src[0]; -#else - unsigned char* dest = (unsigned char*)&destVal; - unsigned char* src = (unsigned char*)&sourceVal; - dest[0] = src[3]; - dest[1] = src[2]; - dest[2] = src[1]; - dest[3] = src[0]; -#endif //B3_USE_DOUBLE_PRECISION -} -///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec) -{ - for (int i = 0; i < 4; i++) - { - b3SwapScalarEndian(sourceVec[i], destVec[i]); - } -} - -///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector) -{ - b3Vector3 swappedVec; - for (int i = 0; i < 4; i++) - { - b3SwapScalarEndian(vector[i], swappedVec[i]); - } - vector = swappedVec; -} - -template -B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q) -{ - if (b3Fabs(n[2]) > B3_SQRT12) - { - // choose p in y-z plane - b3Scalar a = n[1] * n[1] + n[2] * n[2]; - b3Scalar k = b3RecipSqrt(a); - p[0] = 0; - p[1] = -n[2] * k; - p[2] = n[1] * k; - // set q = n x p - q[0] = a * k; - q[1] = -n[0] * p[2]; - q[2] = n[0] * p[1]; - } - else - { - // choose p in x-y plane - b3Scalar a = n[0] * n[0] + n[1] * n[1]; - b3Scalar k = b3RecipSqrt(a); - p[0] = -n[1] * k; - p[1] = n[0] * k; - p[2] = 0; - // set q = n x p - q[0] = -n[2] * p[1]; - q[1] = n[2] * p[0]; - q[2] = a * k; - } -} - -struct b3Vector3FloatData -{ - float m_floats[4]; -}; - -struct b3Vector3DoubleData -{ - double m_floats[4]; -}; - -B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const -{ - ///could also do a memcpy, check if it is worth it - for (int i = 0; i < 4; i++) - dataOut.m_floats[i] = float(m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn) -{ - for (int i = 0; i < 4; i++) - m_floats[i] = b3Scalar(dataIn.m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const -{ - ///could also do a memcpy, check if it is worth it - for (int i = 0; i < 4; i++) - dataOut.m_floats[i] = double(m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn) -{ - for (int i = 0; i < 4; i++) - m_floats[i] = b3Scalar(dataIn.m_floats[i]); -} - -B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const -{ - ///could also do a memcpy, check if it is worth it - for (int i = 0; i < 4; i++) - dataOut.m_floats[i] = m_floats[i]; -} - -B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn) -{ - for (int i = 0; i < 4; i++) - m_floats[i] = dataIn.m_floats[i]; -} - -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z) -{ - b3Vector3 tmp; - tmp.setValue(x, y, z); - return tmp; -} - -inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w) -{ - b3Vector3 tmp; - tmp.setValue(x, y, z); - tmp.w = w; - return tmp; -} - -inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w) -{ - b3Vector4 tmp; - tmp.setValue(x, y, z, w); - return tmp; -} - -#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - -inline b3Vector3 b3MakeVector3(b3SimdFloat4 v) -{ - b3Vector3 tmp; - tmp.set128(v); - return tmp; -} - -inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec) -{ - b3Vector4 tmp; - tmp.set128(vec); - return tmp; -} - -#endif - -#endif //B3_VECTOR3_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Float4.h b/thirdparty/bullet/Bullet3Common/shared/b3Float4.h deleted file mode 100644 index d8a9f474113..00000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Float4.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef B3_FLOAT4_H -#define B3_FLOAT4_H - -#include "Bullet3Common/shared/b3PlatformDefinitions.h" - -#ifdef __cplusplus -#include "Bullet3Common/b3Vector3.h" -#define b3Float4 b3Vector3 -#define b3Float4ConstArg const b3Vector3& -#define b3Dot3F4 b3Dot -#define b3Cross3 b3Cross -#define b3MakeFloat4 b3MakeVector3 -inline b3Vector3 b3Normalized(const b3Vector3& vec) -{ - return vec.normalized(); -} - -inline b3Float4 b3FastNormalized3(b3Float4ConstArg v) -{ - return v.normalized(); -} - -inline b3Float4 b3MaxFloat4(const b3Float4& a, const b3Float4& b) -{ - b3Float4 tmp = a; - tmp.setMax(b); - return tmp; -} -inline b3Float4 b3MinFloat4(const b3Float4& a, const b3Float4& b) -{ - b3Float4 tmp = a; - tmp.setMin(b); - return tmp; -} - -#else -typedef float4 b3Float4; -#define b3Float4ConstArg const b3Float4 -#define b3MakeFloat4 (float4) -float b3Dot3F4(b3Float4ConstArg v0, b3Float4ConstArg v1) -{ - float4 a1 = b3MakeFloat4(v0.xyz, 0.f); - float4 b1 = b3MakeFloat4(v1.xyz, 0.f); - return dot(a1, b1); -} -b3Float4 b3Cross3(b3Float4ConstArg v0, b3Float4ConstArg v1) -{ - float4 a1 = b3MakeFloat4(v0.xyz, 0.f); - float4 b1 = b3MakeFloat4(v1.xyz, 0.f); - return cross(a1, b1); -} -#define b3MinFloat4 min -#define b3MaxFloat4 max - -#define b3Normalized(a) normalize(a) - -#endif - -inline bool b3IsAlmostZero(b3Float4ConstArg v) -{ - if (b3Fabs(v.x) > 1e-6 || b3Fabs(v.y) > 1e-6 || b3Fabs(v.z) > 1e-6) - return false; - return true; -} - -inline int b3MaxDot(b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut) -{ - float maxDot = -B3_INFINITY; - int i = 0; - int ptIndex = -1; - for (i = 0; i < vecLen; i++) - { - float dot = b3Dot3F4(vecArray[i], vec); - - if (dot > maxDot) - { - maxDot = dot; - ptIndex = i; - } - } - b3Assert(ptIndex >= 0); - if (ptIndex < 0) - { - ptIndex = 0; - } - *dotOut = maxDot; - return ptIndex; -} - -#endif //B3_FLOAT4_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Int2.h b/thirdparty/bullet/Bullet3Common/shared/b3Int2.h deleted file mode 100644 index 7b84de4436d..00000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Int2.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_INT2_H -#define B3_INT2_H - -#ifdef __cplusplus - -struct b3UnsignedInt2 -{ - union { - struct - { - unsigned int x, y; - }; - struct - { - unsigned int s[2]; - }; - }; -}; - -struct b3Int2 -{ - union { - struct - { - int x, y; - }; - struct - { - int s[2]; - }; - }; -}; - -inline b3Int2 b3MakeInt2(int x, int y) -{ - b3Int2 v; - v.s[0] = x; - v.s[1] = y; - return v; -} -#else - -#define b3UnsignedInt2 uint2 -#define b3Int2 int2 -#define b3MakeInt2 (int2) - -#endif //__cplusplus -#endif \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Int4.h b/thirdparty/bullet/Bullet3Common/shared/b3Int4.h deleted file mode 100644 index f6a1754245c..00000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Int4.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef B3_INT4_H -#define B3_INT4_H - -#ifdef __cplusplus - -#include "Bullet3Common/b3Scalar.h" - -B3_ATTRIBUTE_ALIGNED16(struct) -b3UnsignedInt4 -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - union { - struct - { - unsigned int x, y, z, w; - }; - struct - { - unsigned int s[4]; - }; - }; -}; - -B3_ATTRIBUTE_ALIGNED16(struct) -b3Int4 -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - union { - struct - { - int x, y, z, w; - }; - struct - { - int s[4]; - }; - }; -}; - -B3_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0) -{ - b3Int4 v; - v.s[0] = x; - v.s[1] = y; - v.s[2] = z; - v.s[3] = w; - return v; -} - -B3_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0) -{ - b3UnsignedInt4 v; - v.s[0] = x; - v.s[1] = y; - v.s[2] = z; - v.s[3] = w; - return v; -} - -#else - -#define b3UnsignedInt4 uint4 -#define b3Int4 int4 -#define b3MakeInt4 (int4) -#define b3MakeUnsignedInt4 (uint4) - -#endif //__cplusplus - -#endif //B3_INT4_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h b/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h deleted file mode 100644 index ce6482b5a6c..00000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Mat3x3.h +++ /dev/null @@ -1,157 +0,0 @@ - -#ifndef B3_MAT3x3_H -#define B3_MAT3x3_H - -#include "Bullet3Common/shared/b3Quat.h" - -#ifdef __cplusplus - -#include "Bullet3Common/b3Matrix3x3.h" - -#define b3Mat3x3 b3Matrix3x3 -#define b3Mat3x3ConstArg const b3Matrix3x3& - -inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat) -{ - return b3Mat3x3(quat); -} - -inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat) -{ - return mat.absolute(); -} - -#define b3GetRow(m, row) m.getRow(row) - -__inline b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b) -{ - return b * a; -} - -#else - -typedef struct -{ - b3Float4 m_row[3]; -} b3Mat3x3; - -#define b3Mat3x3ConstArg const b3Mat3x3 -#define b3GetRow(m, row) (m.m_row[row]) - -inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat) -{ - b3Float4 quat2 = (b3Float4)(quat.x * quat.x, quat.y * quat.y, quat.z * quat.z, 0.f); - b3Mat3x3 out; - - out.m_row[0].x = 1 - 2 * quat2.y - 2 * quat2.z; - out.m_row[0].y = 2 * quat.x * quat.y - 2 * quat.w * quat.z; - out.m_row[0].z = 2 * quat.x * quat.z + 2 * quat.w * quat.y; - out.m_row[0].w = 0.f; - - out.m_row[1].x = 2 * quat.x * quat.y + 2 * quat.w * quat.z; - out.m_row[1].y = 1 - 2 * quat2.x - 2 * quat2.z; - out.m_row[1].z = 2 * quat.y * quat.z - 2 * quat.w * quat.x; - out.m_row[1].w = 0.f; - - out.m_row[2].x = 2 * quat.x * quat.z - 2 * quat.w * quat.y; - out.m_row[2].y = 2 * quat.y * quat.z + 2 * quat.w * quat.x; - out.m_row[2].z = 1 - 2 * quat2.x - 2 * quat2.y; - out.m_row[2].w = 0.f; - - return out; -} - -inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn) -{ - b3Mat3x3 out; - out.m_row[0] = fabs(matIn.m_row[0]); - out.m_row[1] = fabs(matIn.m_row[1]); - out.m_row[2] = fabs(matIn.m_row[2]); - return out; -} - -__inline b3Mat3x3 mtZero(); - -__inline b3Mat3x3 mtIdentity(); - -__inline b3Mat3x3 mtTranspose(b3Mat3x3 m); - -__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b); - -__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b); - -__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b); - -__inline b3Mat3x3 mtZero() -{ - b3Mat3x3 m; - m.m_row[0] = (b3Float4)(0.f); - m.m_row[1] = (b3Float4)(0.f); - m.m_row[2] = (b3Float4)(0.f); - return m; -} - -__inline b3Mat3x3 mtIdentity() -{ - b3Mat3x3 m; - m.m_row[0] = (b3Float4)(1, 0, 0, 0); - m.m_row[1] = (b3Float4)(0, 1, 0, 0); - m.m_row[2] = (b3Float4)(0, 0, 1, 0); - return m; -} - -__inline b3Mat3x3 mtTranspose(b3Mat3x3 m) -{ - b3Mat3x3 out; - out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b) -{ - b3Mat3x3 transB; - transB = mtTranspose(b); - b3Mat3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for (int i = 0; i < 3; i++) - { - // a.m_row[i].w = 0.f; - ans.m_row[i].x = b3Dot3F4(a.m_row[i], transB.m_row[0]); - ans.m_row[i].y = b3Dot3F4(a.m_row[i], transB.m_row[1]); - ans.m_row[i].z = b3Dot3F4(a.m_row[i], transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b) -{ - b3Float4 ans; - ans.x = b3Dot3F4(a.m_row[0], b); - ans.y = b3Dot3F4(a.m_row[1], b); - ans.z = b3Dot3F4(a.m_row[2], b); - ans.w = 0.f; - return ans; -} - -__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b) -{ - b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - b3Float4 ans; - ans.x = b3Dot3F4(a, colx); - ans.y = b3Dot3F4(a, coly); - ans.z = b3Dot3F4(a, colz); - return ans; -} - -#endif - -#endif //B3_MAT3x3_H diff --git a/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h b/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h deleted file mode 100644 index b72bee93106..00000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3PlatformDefinitions.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef B3_PLATFORM_DEFINITIONS_H -#define B3_PLATFORM_DEFINITIONS_H - -struct MyTest -{ - int bla; -}; - -#ifdef __cplusplus -//#define b3ConstArray(a) const b3AlignedObjectArray& -#define b3ConstArray(a) const a * -#define b3AtomicInc(a) ((*a)++) - -inline int b3AtomicAdd(volatile int *p, int val) -{ - int oldValue = *p; - int newValue = oldValue + val; - *p = newValue; - return oldValue; -} - -#define __global - -#define B3_STATIC static -#else -//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX -#define B3_LARGE_FLOAT 1e18f -#define B3_INFINITY 1e18f -#define b3Assert(a) -#define b3ConstArray(a) __global const a * -#define b3AtomicInc atomic_inc -#define b3AtomicAdd atomic_add -#define b3Fabs fabs -#define b3Sqrt native_sqrt -#define b3Sin native_sin -#define b3Cos native_cos - -#define B3_STATIC -#endif - -#endif diff --git a/thirdparty/bullet/Bullet3Common/shared/b3Quat.h b/thirdparty/bullet/Bullet3Common/shared/b3Quat.h deleted file mode 100644 index 940610c77b9..00000000000 --- a/thirdparty/bullet/Bullet3Common/shared/b3Quat.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef B3_QUAT_H -#define B3_QUAT_H - -#include "Bullet3Common/shared/b3PlatformDefinitions.h" -#include "Bullet3Common/shared/b3Float4.h" - -#ifdef __cplusplus -#include "Bullet3Common/b3Quaternion.h" -#include "Bullet3Common/b3Transform.h" - -#define b3Quat b3Quaternion -#define b3QuatConstArg const b3Quaternion& -inline b3Quat b3QuatInverse(b3QuatConstArg orn) -{ - return orn.inverse(); -} - -inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation) -{ - b3Transform tr; - tr.setOrigin(translation); - tr.setRotation(orientation); - return tr(point); -} - -#else -typedef float4 b3Quat; -#define b3QuatConstArg const b3Quat - -inline float4 b3FastNormalize4(float4 v) -{ - v = (float4)(v.xyz, 0.f); - return fast_normalize(v); -} - -inline b3Quat b3QuatMul(b3Quat a, b3Quat b); -inline b3Quat b3QuatNormalized(b3QuatConstArg in); -inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec); -inline b3Quat b3QuatInvert(b3QuatConstArg q); -inline b3Quat b3QuatInverse(b3QuatConstArg q); - -inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b) -{ - b3Quat ans; - ans = b3Cross3(a, b); - ans += a.w * b + b.w * a; - // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w * b.w - b3Dot3F4(a, b); - return ans; -} - -inline b3Quat b3QuatNormalized(b3QuatConstArg in) -{ - b3Quat q; - q = in; - //return b3FastNormalize4(in); - float len = native_sqrt(dot(q, q)); - if (len > 0.f) - { - q *= 1.f / len; - } - else - { - q.x = q.y = q.z = 0.f; - q.w = 1.f; - } - return q; -} -inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec) -{ - b3Quat qInv = b3QuatInvert(q); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = b3QuatMul(b3QuatMul(q, vcpy), qInv); - return out; -} - -inline b3Quat b3QuatInverse(b3QuatConstArg q) -{ - return (b3Quat)(-q.xyz, q.w); -} - -inline b3Quat b3QuatInvert(b3QuatConstArg q) -{ - return (b3Quat)(-q.xyz, q.w); -} - -inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec) -{ - return b3QuatRotate(b3QuatInvert(q), vec); -} - -inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation) -{ - return b3QuatRotate(orientation, point) + (translation); -} - -#endif - -#endif //B3_QUAT_H diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h deleted file mode 100644 index e946c2ae507..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h +++ /dev/null @@ -1,149 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_CONTACT_SOLVER_INFO -#define B3_CONTACT_SOLVER_INFO - -#include "Bullet3Common/b3Scalar.h" - -enum b3SolverMode -{ - B3_SOLVER_RANDMIZE_ORDER = 1, - B3_SOLVER_FRICTION_SEPARATE = 2, - B3_SOLVER_USE_WARMSTARTING = 4, - B3_SOLVER_USE_2_FRICTION_DIRECTIONS = 16, - B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32, - B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64, - B3_SOLVER_CACHE_FRIENDLY = 128, - B3_SOLVER_SIMD = 256, - B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS = 512, - B3_SOLVER_ALLOW_ZERO_LENGTH_FRICTION_DIRECTIONS = 1024 -}; - -struct b3ContactSolverInfoData -{ - b3Scalar m_tau; - b3Scalar m_damping; //global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'. - b3Scalar m_friction; - b3Scalar m_timeStep; - b3Scalar m_restitution; - int m_numIterations; - b3Scalar m_maxErrorReduction; - b3Scalar m_sor; - b3Scalar m_erp; //used as Baumgarte factor - b3Scalar m_erp2; //used in Split Impulse - b3Scalar m_globalCfm; //constraint force mixing - int m_splitImpulse; - b3Scalar m_splitImpulsePenetrationThreshold; - b3Scalar m_splitImpulseTurnErp; - b3Scalar m_linearSlop; - b3Scalar m_warmstartingFactor; - - int m_solverMode; - int m_restingContactRestitutionThreshold; - int m_minimumSolverBatchSize; - b3Scalar m_maxGyroscopicForce; - b3Scalar m_singleAxisRollingFrictionThreshold; -}; - -struct b3ContactSolverInfo : public b3ContactSolverInfoData -{ - inline b3ContactSolverInfo() - { - m_tau = b3Scalar(0.6); - m_damping = b3Scalar(1.0); - m_friction = b3Scalar(0.3); - m_timeStep = b3Scalar(1.f / 60.f); - m_restitution = b3Scalar(0.); - m_maxErrorReduction = b3Scalar(20.); - m_numIterations = 10; - m_erp = b3Scalar(0.2); - m_erp2 = b3Scalar(0.8); - m_globalCfm = b3Scalar(0.); - m_sor = b3Scalar(1.); - m_splitImpulse = true; - m_splitImpulsePenetrationThreshold = -.04f; - m_splitImpulseTurnErp = 0.1f; - m_linearSlop = b3Scalar(0.0); - m_warmstartingFactor = b3Scalar(0.85); - //m_solverMode = B3_SOLVER_USE_WARMSTARTING | B3_SOLVER_SIMD | B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION|B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING;// | B3_SOLVER_RANDMIZE_ORDER; - m_solverMode = B3_SOLVER_USE_WARMSTARTING | B3_SOLVER_SIMD; // | B3_SOLVER_RANDMIZE_ORDER; - m_restingContactRestitutionThreshold = 2; //unused as of 2.81 - m_minimumSolverBatchSize = 128; //try to combine islands until the amount of constraints reaches this limit - m_maxGyroscopicForce = 100.f; ///only used to clamp forces for bodies that have their B3_ENABLE_GYROPSCOPIC_FORCE flag set (using b3RigidBody::setFlag) - m_singleAxisRollingFrictionThreshold = 1e30f; ///if the velocity is above this threshold, it will use a single constraint row (axis), otherwise 3 rows. - } -}; - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3ContactSolverInfoDoubleData -{ - double m_tau; - double m_damping; //global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'. - double m_friction; - double m_timeStep; - double m_restitution; - double m_maxErrorReduction; - double m_sor; - double m_erp; //used as Baumgarte factor - double m_erp2; //used in Split Impulse - double m_globalCfm; //constraint force mixing - double m_splitImpulsePenetrationThreshold; - double m_splitImpulseTurnErp; - double m_linearSlop; - double m_warmstartingFactor; - double m_maxGyroscopicForce; - double m_singleAxisRollingFrictionThreshold; - - int m_numIterations; - int m_solverMode; - int m_restingContactRestitutionThreshold; - int m_minimumSolverBatchSize; - int m_splitImpulse; - char m_padding[4]; -}; -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3ContactSolverInfoFloatData -{ - float m_tau; - float m_damping; //global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'. - float m_friction; - float m_timeStep; - - float m_restitution; - float m_maxErrorReduction; - float m_sor; - float m_erp; //used as Baumgarte factor - - float m_erp2; //used in Split Impulse - float m_globalCfm; //constraint force mixing - float m_splitImpulsePenetrationThreshold; - float m_splitImpulseTurnErp; - - float m_linearSlop; - float m_warmstartingFactor; - float m_maxGyroscopicForce; - float m_singleAxisRollingFrictionThreshold; - - int m_numIterations; - int m_solverMode; - int m_restingContactRestitutionThreshold; - int m_minimumSolverBatchSize; - - int m_splitImpulse; - char m_padding[4]; -}; - -#endif //B3_CONTACT_SOLVER_INFO diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.cpp b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.cpp deleted file mode 100644 index ace4b183884..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.cpp +++ /dev/null @@ -1,103 +0,0 @@ - -#include "b3FixedConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Common/b3TransformUtil.h" -#include - -b3FixedConstraint::b3FixedConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB) - : b3TypedConstraint(B3_FIXED_CONSTRAINT_TYPE, rbA, rbB) -{ - m_pivotInA = frameInA.getOrigin(); - m_pivotInB = frameInB.getOrigin(); - m_relTargetAB = frameInA.getRotation() * frameInB.getRotation().inverse(); -} - -b3FixedConstraint::~b3FixedConstraint() -{ -} - -void b3FixedConstraint::getInfo1(b3ConstraintInfo1* info, const b3RigidBodyData* bodies) -{ - info->m_numConstraintRows = 6; - info->nub = 6; -} - -void b3FixedConstraint::getInfo2(b3ConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - //fix the 3 linear degrees of freedom - - const b3Vector3& worldPosA = bodies[m_rbA].m_pos; - const b3Quaternion& worldOrnA = bodies[m_rbA].m_quat; - const b3Vector3& worldPosB = bodies[m_rbB].m_pos; - const b3Quaternion& worldOrnB = bodies[m_rbB].m_quat; - - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip + 1] = 1; - info->m_J1linearAxis[2 * info->rowskip + 2] = 1; - - b3Vector3 a1 = b3QuatRotate(worldOrnA, m_pivotInA); - { - b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis + 2 * info->rowskip); - b3Vector3 a1neg = -a1; - a1neg.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip + 1] = -1; - info->m_J2linearAxis[2 * info->rowskip + 2] = -1; - } - - b3Vector3 a2 = b3QuatRotate(worldOrnB, m_pivotInB); - - { - // b3Vector3 a2n = -a2; - b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis + 2 * info->rowskip); - a2.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - // set right hand side for the linear dofs - b3Scalar k = info->fps * info->erp; - b3Vector3 linearError = k * (a2 + worldPosB - a1 - worldPosA); - int j; - for (j = 0; j < 3; j++) - { - info->m_constraintError[j * info->rowskip] = linearError[j]; - //printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]); - } - - //fix the 3 angular degrees of freedom - - int start_row = 3; - int s = info->rowskip; - int start_index = start_row * s; - - // 3 rows to make body rotations equal - info->m_J1angularAxis[start_index] = 1; - info->m_J1angularAxis[start_index + s + 1] = 1; - info->m_J1angularAxis[start_index + s * 2 + 2] = 1; - if (info->m_J2angularAxis) - { - info->m_J2angularAxis[start_index] = -1; - info->m_J2angularAxis[start_index + s + 1] = -1; - info->m_J2angularAxis[start_index + s * 2 + 2] = -1; - } - - // set right hand side for the angular dofs - - b3Vector3 diff; - b3Scalar angle; - b3Quaternion qrelCur = worldOrnA * worldOrnB.inverse(); - - b3TransformUtil::calculateDiffAxisAngleQuaternion(m_relTargetAB, qrelCur, diff, angle); - diff *= -angle; - for (j = 0; j < 3; j++) - { - info->m_constraintError[(3 + j) * info->rowskip] = k * diff[j]; - } -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h deleted file mode 100644 index 64809666e48..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h +++ /dev/null @@ -1,34 +0,0 @@ - -#ifndef B3_FIXED_CONSTRAINT_H -#define B3_FIXED_CONSTRAINT_H - -#include "b3TypedConstraint.h" - -B3_ATTRIBUTE_ALIGNED16(class) -b3FixedConstraint : public b3TypedConstraint -{ - b3Vector3 m_pivotInA; - b3Vector3 m_pivotInB; - b3Quaternion m_relTargetAB; - -public: - b3FixedConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB); - - virtual ~b3FixedConstraint(); - - virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies); - - virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies); - - virtual void setParam(int num, b3Scalar value, int axis = -1) - { - b3Assert(0); - } - virtual b3Scalar getParam(int num, int axis = -1) const - { - b3Assert(0); - return 0.f; - } -}; - -#endif //B3_FIXED_CONSTRAINT_H diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.cpp b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.cpp deleted file mode 100644 index fd3e5185de1..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.cpp +++ /dev/null @@ -1,737 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -/* -2007-09-09 -Refactored by Francisco Le?n -email: projectileman@yahoo.com -http://gimpact.sf.net -*/ - -#include "b3Generic6DofConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Common/b3TransformUtil.h" -#include "Bullet3Common/b3TransformUtil.h" -#include - -#define D6_USE_OBSOLETE_METHOD false -#define D6_USE_FRAME_OFFSET true - -b3Generic6DofConstraint::b3Generic6DofConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB, bool useLinearReferenceFrameA, const b3RigidBodyData* bodies) - : b3TypedConstraint(B3_D6_CONSTRAINT_TYPE, rbA, rbB), m_frameInA(frameInA), m_frameInB(frameInB), m_useLinearReferenceFrameA(useLinearReferenceFrameA), m_useOffsetForConstraintFrame(D6_USE_FRAME_OFFSET), m_flags(0) -{ - calculateTransforms(bodies); -} - -#define GENERIC_D6_DISABLE_WARMSTARTING 1 - -b3Scalar btGetMatrixElem(const b3Matrix3x3& mat, int index); -b3Scalar btGetMatrixElem(const b3Matrix3x3& mat, int index) -{ - int i = index % 3; - int j = index / 3; - return mat[i][j]; -} - -///MatrixToEulerXYZ from http://www.geometrictools.com/LibFoundation/Mathematics/Wm4Matrix3.inl.html -bool matrixToEulerXYZ(const b3Matrix3x3& mat, b3Vector3& xyz); -bool matrixToEulerXYZ(const b3Matrix3x3& mat, b3Vector3& xyz) -{ - // // rot = cy*cz -cy*sz sy - // // cz*sx*sy+cx*sz cx*cz-sx*sy*sz -cy*sx - // // -cx*cz*sy+sx*sz cz*sx+cx*sy*sz cx*cy - // - - b3Scalar fi = btGetMatrixElem(mat, 2); - if (fi < b3Scalar(1.0f)) - { - if (fi > b3Scalar(-1.0f)) - { - xyz[0] = b3Atan2(-btGetMatrixElem(mat, 5), btGetMatrixElem(mat, 8)); - xyz[1] = b3Asin(btGetMatrixElem(mat, 2)); - xyz[2] = b3Atan2(-btGetMatrixElem(mat, 1), btGetMatrixElem(mat, 0)); - return true; - } - else - { - // WARNING. Not unique. XA - ZA = -atan2(r10,r11) - xyz[0] = -b3Atan2(btGetMatrixElem(mat, 3), btGetMatrixElem(mat, 4)); - xyz[1] = -B3_HALF_PI; - xyz[2] = b3Scalar(0.0); - return false; - } - } - else - { - // WARNING. Not unique. XAngle + ZAngle = atan2(r10,r11) - xyz[0] = b3Atan2(btGetMatrixElem(mat, 3), btGetMatrixElem(mat, 4)); - xyz[1] = B3_HALF_PI; - xyz[2] = 0.0; - } - return false; -} - -//////////////////////////// b3RotationalLimitMotor //////////////////////////////////// - -int b3RotationalLimitMotor::testLimitValue(b3Scalar test_value) -{ - if (m_loLimit > m_hiLimit) - { - m_currentLimit = 0; //Free from violation - return 0; - } - if (test_value < m_loLimit) - { - m_currentLimit = 1; //low limit violation - m_currentLimitError = test_value - m_loLimit; - if (m_currentLimitError > B3_PI) - m_currentLimitError -= B3_2_PI; - else if (m_currentLimitError < -B3_PI) - m_currentLimitError += B3_2_PI; - return 1; - } - else if (test_value > m_hiLimit) - { - m_currentLimit = 2; //High limit violation - m_currentLimitError = test_value - m_hiLimit; - if (m_currentLimitError > B3_PI) - m_currentLimitError -= B3_2_PI; - else if (m_currentLimitError < -B3_PI) - m_currentLimitError += B3_2_PI; - return 2; - }; - - m_currentLimit = 0; //Free from violation - return 0; -} - -//////////////////////////// End b3RotationalLimitMotor //////////////////////////////////// - -//////////////////////////// b3TranslationalLimitMotor //////////////////////////////////// - -int b3TranslationalLimitMotor::testLimitValue(int limitIndex, b3Scalar test_value) -{ - b3Scalar loLimit = m_lowerLimit[limitIndex]; - b3Scalar hiLimit = m_upperLimit[limitIndex]; - if (loLimit > hiLimit) - { - m_currentLimit[limitIndex] = 0; //Free from violation - m_currentLimitError[limitIndex] = b3Scalar(0.f); - return 0; - } - - if (test_value < loLimit) - { - m_currentLimit[limitIndex] = 2; //low limit violation - m_currentLimitError[limitIndex] = test_value - loLimit; - return 2; - } - else if (test_value > hiLimit) - { - m_currentLimit[limitIndex] = 1; //High limit violation - m_currentLimitError[limitIndex] = test_value - hiLimit; - return 1; - }; - - m_currentLimit[limitIndex] = 0; //Free from violation - m_currentLimitError[limitIndex] = b3Scalar(0.f); - return 0; -} - -//////////////////////////// b3TranslationalLimitMotor //////////////////////////////////// - -void b3Generic6DofConstraint::calculateAngleInfo() -{ - b3Matrix3x3 relative_frame = m_calculatedTransformA.getBasis().inverse() * m_calculatedTransformB.getBasis(); - matrixToEulerXYZ(relative_frame, m_calculatedAxisAngleDiff); - // in euler angle mode we do not actually constrain the angular velocity - // along the axes axis[0] and axis[2] (although we do use axis[1]) : - // - // to get constrain w2-w1 along ...not - // ------ --------------------- ------ - // d(angle[0])/dt = 0 ax[1] x ax[2] ax[0] - // d(angle[1])/dt = 0 ax[1] - // d(angle[2])/dt = 0 ax[0] x ax[1] ax[2] - // - // constraining w2-w1 along an axis 'a' means that a'*(w2-w1)=0. - // to prove the result for angle[0], write the expression for angle[0] from - // GetInfo1 then take the derivative. to prove this for angle[2] it is - // easier to take the euler rate expression for d(angle[2])/dt with respect - // to the components of w and set that to 0. - b3Vector3 axis0 = m_calculatedTransformB.getBasis().getColumn(0); - b3Vector3 axis2 = m_calculatedTransformA.getBasis().getColumn(2); - - m_calculatedAxis[1] = axis2.cross(axis0); - m_calculatedAxis[0] = m_calculatedAxis[1].cross(axis2); - m_calculatedAxis[2] = axis0.cross(m_calculatedAxis[1]); - - m_calculatedAxis[0].normalize(); - m_calculatedAxis[1].normalize(); - m_calculatedAxis[2].normalize(); -} - -static b3Transform getCenterOfMassTransform(const b3RigidBodyData& body) -{ - b3Transform tr(body.m_quat, body.m_pos); - return tr; -} - -void b3Generic6DofConstraint::calculateTransforms(const b3RigidBodyData* bodies) -{ - b3Transform transA; - b3Transform transB; - transA = getCenterOfMassTransform(bodies[m_rbA]); - transB = getCenterOfMassTransform(bodies[m_rbB]); - calculateTransforms(transA, transB, bodies); -} - -void b3Generic6DofConstraint::calculateTransforms(const b3Transform& transA, const b3Transform& transB, const b3RigidBodyData* bodies) -{ - m_calculatedTransformA = transA * m_frameInA; - m_calculatedTransformB = transB * m_frameInB; - calculateLinearInfo(); - calculateAngleInfo(); - if (m_useOffsetForConstraintFrame) - { // get weight factors depending on masses - b3Scalar miA = bodies[m_rbA].m_invMass; - b3Scalar miB = bodies[m_rbB].m_invMass; - m_hasStaticBody = (miA < B3_EPSILON) || (miB < B3_EPSILON); - b3Scalar miS = miA + miB; - if (miS > b3Scalar(0.f)) - { - m_factA = miB / miS; - } - else - { - m_factA = b3Scalar(0.5f); - } - m_factB = b3Scalar(1.0f) - m_factA; - } -} - -bool b3Generic6DofConstraint::testAngularLimitMotor(int axis_index) -{ - b3Scalar angle = m_calculatedAxisAngleDiff[axis_index]; - angle = b3AdjustAngleToLimits(angle, m_angularLimits[axis_index].m_loLimit, m_angularLimits[axis_index].m_hiLimit); - m_angularLimits[axis_index].m_currentPosition = angle; - //test limits - m_angularLimits[axis_index].testLimitValue(angle); - return m_angularLimits[axis_index].needApplyTorques(); -} - -void b3Generic6DofConstraint::getInfo1(b3ConstraintInfo1* info, const b3RigidBodyData* bodies) -{ - //prepare constraint - calculateTransforms(getCenterOfMassTransform(bodies[m_rbA]), getCenterOfMassTransform(bodies[m_rbB]), bodies); - info->m_numConstraintRows = 0; - info->nub = 6; - int i; - //test linear limits - for (i = 0; i < 3; i++) - { - if (m_linearLimits.needApplyForce(i)) - { - info->m_numConstraintRows++; - info->nub--; - } - } - //test angular limits - for (i = 0; i < 3; i++) - { - if (testAngularLimitMotor(i)) - { - info->m_numConstraintRows++; - info->nub--; - } - } - // printf("info->m_numConstraintRows=%d\n",info->m_numConstraintRows); -} - -void b3Generic6DofConstraint::getInfo1NonVirtual(b3ConstraintInfo1* info, const b3RigidBodyData* bodies) -{ - //pre-allocate all 6 - info->m_numConstraintRows = 6; - info->nub = 0; -} - -void b3Generic6DofConstraint::getInfo2(b3ConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - b3Transform transA = getCenterOfMassTransform(bodies[m_rbA]); - b3Transform transB = getCenterOfMassTransform(bodies[m_rbB]); - const b3Vector3& linVelA = bodies[m_rbA].m_linVel; - const b3Vector3& linVelB = bodies[m_rbB].m_linVel; - const b3Vector3& angVelA = bodies[m_rbA].m_angVel; - const b3Vector3& angVelB = bodies[m_rbB].m_angVel; - - if (m_useOffsetForConstraintFrame) - { // for stability better to solve angular limits first - int row = setAngularLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB); - setLinearLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB); - } - else - { // leave old version for compatibility - int row = setLinearLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB); - setAngularLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB); - } -} - -void b3Generic6DofConstraint::getInfo2NonVirtual(b3ConstraintInfo2* info, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB, const b3RigidBodyData* bodies) -{ - //prepare constraint - calculateTransforms(transA, transB, bodies); - - int i; - for (i = 0; i < 3; i++) - { - testAngularLimitMotor(i); - } - - if (m_useOffsetForConstraintFrame) - { // for stability better to solve angular limits first - int row = setAngularLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB); - setLinearLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB); - } - else - { // leave old version for compatibility - int row = setLinearLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB); - setAngularLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB); - } -} - -int b3Generic6DofConstraint::setLinearLimits(b3ConstraintInfo2* info, int row, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB) -{ - // int row = 0; - //solve linear limits - b3RotationalLimitMotor limot; - for (int i = 0; i < 3; i++) - { - if (m_linearLimits.needApplyForce(i)) - { // re-use rotational motor code - limot.m_bounce = b3Scalar(0.f); - limot.m_currentLimit = m_linearLimits.m_currentLimit[i]; - limot.m_currentPosition = m_linearLimits.m_currentLinearDiff[i]; - limot.m_currentLimitError = m_linearLimits.m_currentLimitError[i]; - limot.m_damping = m_linearLimits.m_damping; - limot.m_enableMotor = m_linearLimits.m_enableMotor[i]; - limot.m_hiLimit = m_linearLimits.m_upperLimit[i]; - limot.m_limitSoftness = m_linearLimits.m_limitSoftness; - limot.m_loLimit = m_linearLimits.m_lowerLimit[i]; - limot.m_maxLimitForce = b3Scalar(0.f); - limot.m_maxMotorForce = m_linearLimits.m_maxMotorForce[i]; - limot.m_targetVelocity = m_linearLimits.m_targetVelocity[i]; - b3Vector3 axis = m_calculatedTransformA.getBasis().getColumn(i); - int flags = m_flags >> (i * B3_6DOF_FLAGS_AXIS_SHIFT); - limot.m_normalCFM = (flags & B3_6DOF_FLAGS_CFM_NORM) ? m_linearLimits.m_normalCFM[i] : info->cfm[0]; - limot.m_stopCFM = (flags & B3_6DOF_FLAGS_CFM_STOP) ? m_linearLimits.m_stopCFM[i] : info->cfm[0]; - limot.m_stopERP = (flags & B3_6DOF_FLAGS_ERP_STOP) ? m_linearLimits.m_stopERP[i] : info->erp; - if (m_useOffsetForConstraintFrame) - { - int indx1 = (i + 1) % 3; - int indx2 = (i + 2) % 3; - int rotAllowed = 1; // rotations around orthos to current axis - if (m_angularLimits[indx1].m_currentLimit && m_angularLimits[indx2].m_currentLimit) - { - rotAllowed = 0; - } - row += get_limit_motor_info2(&limot, transA, transB, linVelA, linVelB, angVelA, angVelB, info, row, axis, 0, rotAllowed); - } - else - { - row += get_limit_motor_info2(&limot, transA, transB, linVelA, linVelB, angVelA, angVelB, info, row, axis, 0); - } - } - } - return row; -} - -int b3Generic6DofConstraint::setAngularLimits(b3ConstraintInfo2* info, int row_offset, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB) -{ - b3Generic6DofConstraint* d6constraint = this; - int row = row_offset; - //solve angular limits - for (int i = 0; i < 3; i++) - { - if (d6constraint->getRotationalLimitMotor(i)->needApplyTorques()) - { - b3Vector3 axis = d6constraint->getAxis(i); - int flags = m_flags >> ((i + 3) * B3_6DOF_FLAGS_AXIS_SHIFT); - if (!(flags & B3_6DOF_FLAGS_CFM_NORM)) - { - m_angularLimits[i].m_normalCFM = info->cfm[0]; - } - if (!(flags & B3_6DOF_FLAGS_CFM_STOP)) - { - m_angularLimits[i].m_stopCFM = info->cfm[0]; - } - if (!(flags & B3_6DOF_FLAGS_ERP_STOP)) - { - m_angularLimits[i].m_stopERP = info->erp; - } - row += get_limit_motor_info2(d6constraint->getRotationalLimitMotor(i), - transA, transB, linVelA, linVelB, angVelA, angVelB, info, row, axis, 1); - } - } - - return row; -} - -void b3Generic6DofConstraint::updateRHS(b3Scalar timeStep) -{ - (void)timeStep; -} - -void b3Generic6DofConstraint::setFrames(const b3Transform& frameA, const b3Transform& frameB, const b3RigidBodyData* bodies) -{ - m_frameInA = frameA; - m_frameInB = frameB; - - calculateTransforms(bodies); -} - -b3Vector3 b3Generic6DofConstraint::getAxis(int axis_index) const -{ - return m_calculatedAxis[axis_index]; -} - -b3Scalar b3Generic6DofConstraint::getRelativePivotPosition(int axisIndex) const -{ - return m_calculatedLinearDiff[axisIndex]; -} - -b3Scalar b3Generic6DofConstraint::getAngle(int axisIndex) const -{ - return m_calculatedAxisAngleDiff[axisIndex]; -} - -void b3Generic6DofConstraint::calcAnchorPos(const b3RigidBodyData* bodies) -{ - b3Scalar imA = bodies[m_rbA].m_invMass; - b3Scalar imB = bodies[m_rbB].m_invMass; - b3Scalar weight; - if (imB == b3Scalar(0.0)) - { - weight = b3Scalar(1.0); - } - else - { - weight = imA / (imA + imB); - } - const b3Vector3& pA = m_calculatedTransformA.getOrigin(); - const b3Vector3& pB = m_calculatedTransformB.getOrigin(); - m_AnchorPos = pA * weight + pB * (b3Scalar(1.0) - weight); - return; -} - -void b3Generic6DofConstraint::calculateLinearInfo() -{ - m_calculatedLinearDiff = m_calculatedTransformB.getOrigin() - m_calculatedTransformA.getOrigin(); - m_calculatedLinearDiff = m_calculatedTransformA.getBasis().inverse() * m_calculatedLinearDiff; - for (int i = 0; i < 3; i++) - { - m_linearLimits.m_currentLinearDiff[i] = m_calculatedLinearDiff[i]; - m_linearLimits.testLimitValue(i, m_calculatedLinearDiff[i]); - } -} - -int b3Generic6DofConstraint::get_limit_motor_info2( - b3RotationalLimitMotor* limot, - const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB, - b3ConstraintInfo2* info, int row, b3Vector3& ax1, int rotational, int rotAllowed) -{ - int srow = row * info->rowskip; - bool powered = limot->m_enableMotor; - int limit = limot->m_currentLimit; - if (powered || limit) - { // if the joint is powered, or has joint limits, add in the extra row - b3Scalar* J1 = rotational ? info->m_J1angularAxis : info->m_J1linearAxis; - b3Scalar* J2 = rotational ? info->m_J2angularAxis : info->m_J2linearAxis; - if (J1) - { - J1[srow + 0] = ax1[0]; - J1[srow + 1] = ax1[1]; - J1[srow + 2] = ax1[2]; - } - if (J2) - { - J2[srow + 0] = -ax1[0]; - J2[srow + 1] = -ax1[1]; - J2[srow + 2] = -ax1[2]; - } - if ((!rotational)) - { - if (m_useOffsetForConstraintFrame) - { - b3Vector3 tmpA, tmpB, relA, relB; - // get vector from bodyB to frameB in WCS - relB = m_calculatedTransformB.getOrigin() - transB.getOrigin(); - // get its projection to constraint axis - b3Vector3 projB = ax1 * relB.dot(ax1); - // get vector directed from bodyB to constraint axis (and orthogonal to it) - b3Vector3 orthoB = relB - projB; - // same for bodyA - relA = m_calculatedTransformA.getOrigin() - transA.getOrigin(); - b3Vector3 projA = ax1 * relA.dot(ax1); - b3Vector3 orthoA = relA - projA; - // get desired offset between frames A and B along constraint axis - b3Scalar desiredOffs = limot->m_currentPosition - limot->m_currentLimitError; - // desired vector from projection of center of bodyA to projection of center of bodyB to constraint axis - b3Vector3 totalDist = projA + ax1 * desiredOffs - projB; - // get offset vectors relA and relB - relA = orthoA + totalDist * m_factA; - relB = orthoB - totalDist * m_factB; - tmpA = relA.cross(ax1); - tmpB = relB.cross(ax1); - if (m_hasStaticBody && (!rotAllowed)) - { - tmpA *= m_factA; - tmpB *= m_factB; - } - int i; - for (i = 0; i < 3; i++) info->m_J1angularAxis[srow + i] = tmpA[i]; - for (i = 0; i < 3; i++) info->m_J2angularAxis[srow + i] = -tmpB[i]; - } - else - { - b3Vector3 ltd; // Linear Torque Decoupling vector - b3Vector3 c = m_calculatedTransformB.getOrigin() - transA.getOrigin(); - ltd = c.cross(ax1); - info->m_J1angularAxis[srow + 0] = ltd[0]; - info->m_J1angularAxis[srow + 1] = ltd[1]; - info->m_J1angularAxis[srow + 2] = ltd[2]; - - c = m_calculatedTransformB.getOrigin() - transB.getOrigin(); - ltd = -c.cross(ax1); - info->m_J2angularAxis[srow + 0] = ltd[0]; - info->m_J2angularAxis[srow + 1] = ltd[1]; - info->m_J2angularAxis[srow + 2] = ltd[2]; - } - } - // if we're limited low and high simultaneously, the joint motor is - // ineffective - if (limit && (limot->m_loLimit == limot->m_hiLimit)) powered = false; - info->m_constraintError[srow] = b3Scalar(0.f); - if (powered) - { - info->cfm[srow] = limot->m_normalCFM; - if (!limit) - { - b3Scalar tag_vel = rotational ? limot->m_targetVelocity : -limot->m_targetVelocity; - - b3Scalar mot_fact = getMotorFactor(limot->m_currentPosition, - limot->m_loLimit, - limot->m_hiLimit, - tag_vel, - info->fps * limot->m_stopERP); - info->m_constraintError[srow] += mot_fact * limot->m_targetVelocity; - info->m_lowerLimit[srow] = -limot->m_maxMotorForce / info->fps; - info->m_upperLimit[srow] = limot->m_maxMotorForce / info->fps; - } - } - if (limit) - { - b3Scalar k = info->fps * limot->m_stopERP; - if (!rotational) - { - info->m_constraintError[srow] += k * limot->m_currentLimitError; - } - else - { - info->m_constraintError[srow] += -k * limot->m_currentLimitError; - } - info->cfm[srow] = limot->m_stopCFM; - if (limot->m_loLimit == limot->m_hiLimit) - { // limited low and high simultaneously - info->m_lowerLimit[srow] = -B3_INFINITY; - info->m_upperLimit[srow] = B3_INFINITY; - } - else - { - if (limit == 1) - { - info->m_lowerLimit[srow] = 0; - info->m_upperLimit[srow] = B3_INFINITY; - } - else - { - info->m_lowerLimit[srow] = -B3_INFINITY; - info->m_upperLimit[srow] = 0; - } - // deal with bounce - if (limot->m_bounce > 0) - { - // calculate joint velocity - b3Scalar vel; - if (rotational) - { - vel = angVelA.dot(ax1); - //make sure that if no body -> angVelB == zero vec - // if (body1) - vel -= angVelB.dot(ax1); - } - else - { - vel = linVelA.dot(ax1); - //make sure that if no body -> angVelB == zero vec - // if (body1) - vel -= linVelB.dot(ax1); - } - // only apply bounce if the velocity is incoming, and if the - // resulting c[] exceeds what we already have. - if (limit == 1) - { - if (vel < 0) - { - b3Scalar newc = -limot->m_bounce * vel; - if (newc > info->m_constraintError[srow]) - info->m_constraintError[srow] = newc; - } - } - else - { - if (vel > 0) - { - b3Scalar newc = -limot->m_bounce * vel; - if (newc < info->m_constraintError[srow]) - info->m_constraintError[srow] = newc; - } - } - } - } - } - return 1; - } - else - return 0; -} - -///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). -///If no axis is provided, it uses the default axis for this constraint. -void b3Generic6DofConstraint::setParam(int num, b3Scalar value, int axis) -{ - if ((axis >= 0) && (axis < 3)) - { - switch (num) - { - case B3_CONSTRAINT_STOP_ERP: - m_linearLimits.m_stopERP[axis] = value; - m_flags |= B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT); - break; - case B3_CONSTRAINT_STOP_CFM: - m_linearLimits.m_stopCFM[axis] = value; - m_flags |= B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT); - break; - case B3_CONSTRAINT_CFM: - m_linearLimits.m_normalCFM[axis] = value; - m_flags |= B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT); - break; - default: - b3AssertConstrParams(0); - } - } - else if ((axis >= 3) && (axis < 6)) - { - switch (num) - { - case B3_CONSTRAINT_STOP_ERP: - m_angularLimits[axis - 3].m_stopERP = value; - m_flags |= B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT); - break; - case B3_CONSTRAINT_STOP_CFM: - m_angularLimits[axis - 3].m_stopCFM = value; - m_flags |= B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT); - break; - case B3_CONSTRAINT_CFM: - m_angularLimits[axis - 3].m_normalCFM = value; - m_flags |= B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT); - break; - default: - b3AssertConstrParams(0); - } - } - else - { - b3AssertConstrParams(0); - } -} - -///return the local value of parameter -b3Scalar b3Generic6DofConstraint::getParam(int num, int axis) const -{ - b3Scalar retVal = 0; - if ((axis >= 0) && (axis < 3)) - { - switch (num) - { - case B3_CONSTRAINT_STOP_ERP: - b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT))); - retVal = m_linearLimits.m_stopERP[axis]; - break; - case B3_CONSTRAINT_STOP_CFM: - b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT))); - retVal = m_linearLimits.m_stopCFM[axis]; - break; - case B3_CONSTRAINT_CFM: - b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT))); - retVal = m_linearLimits.m_normalCFM[axis]; - break; - default: - b3AssertConstrParams(0); - } - } - else if ((axis >= 3) && (axis < 6)) - { - switch (num) - { - case B3_CONSTRAINT_STOP_ERP: - b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT))); - retVal = m_angularLimits[axis - 3].m_stopERP; - break; - case B3_CONSTRAINT_STOP_CFM: - b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT))); - retVal = m_angularLimits[axis - 3].m_stopCFM; - break; - case B3_CONSTRAINT_CFM: - b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT))); - retVal = m_angularLimits[axis - 3].m_normalCFM; - break; - default: - b3AssertConstrParams(0); - } - } - else - { - b3AssertConstrParams(0); - } - return retVal; -} - -void b3Generic6DofConstraint::setAxis(const b3Vector3& axis1, const b3Vector3& axis2, const b3RigidBodyData* bodies) -{ - b3Vector3 zAxis = axis1.normalized(); - b3Vector3 yAxis = axis2.normalized(); - b3Vector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system - - b3Transform frameInW; - frameInW.setIdentity(); - frameInW.getBasis().setValue(xAxis[0], yAxis[0], zAxis[0], - xAxis[1], yAxis[1], zAxis[1], - xAxis[2], yAxis[2], zAxis[2]); - - // now get constraint frame in local coordinate systems - m_frameInA = getCenterOfMassTransform(bodies[m_rbA]).inverse() * frameInW; - m_frameInB = getCenterOfMassTransform(bodies[m_rbB]).inverse() * frameInW; - - calculateTransforms(bodies); -} diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h deleted file mode 100644 index d162d58fec2..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h +++ /dev/null @@ -1,517 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/// 2009 March: b3Generic6DofConstraint refactored by Roman Ponomarev -/// Added support for generic constraint solver through getInfo1/getInfo2 methods - -/* -2007-09-09 -b3Generic6DofConstraint Refactored by Francisco Le?n -email: projectileman@yahoo.com -http://gimpact.sf.net -*/ - -#ifndef B3_GENERIC_6DOF_CONSTRAINT_H -#define B3_GENERIC_6DOF_CONSTRAINT_H - -#include "Bullet3Common/b3Vector3.h" -#include "b3JacobianEntry.h" -#include "b3TypedConstraint.h" - -struct b3RigidBodyData; - -//! Rotation Limit structure for generic joints -class b3RotationalLimitMotor -{ -public: - //! limit_parameters - //!@{ - b3Scalar m_loLimit; //!< joint limit - b3Scalar m_hiLimit; //!< joint limit - b3Scalar m_targetVelocity; //!< target motor velocity - b3Scalar m_maxMotorForce; //!< max force on motor - b3Scalar m_maxLimitForce; //!< max force on limit - b3Scalar m_damping; //!< Damping. - b3Scalar m_limitSoftness; //! Relaxation factor - b3Scalar m_normalCFM; //!< Constraint force mixing factor - b3Scalar m_stopERP; //!< Error tolerance factor when joint is at limit - b3Scalar m_stopCFM; //!< Constraint force mixing factor when joint is at limit - b3Scalar m_bounce; //!< restitution factor - bool m_enableMotor; - - //!@} - - //! temp_variables - //!@{ - b3Scalar m_currentLimitError; //! How much is violated this limit - b3Scalar m_currentPosition; //! current value of angle - int m_currentLimit; //!< 0=free, 1=at lo limit, 2=at hi limit - b3Scalar m_accumulatedImpulse; - //!@} - - b3RotationalLimitMotor() - { - m_accumulatedImpulse = 0.f; - m_targetVelocity = 0; - m_maxMotorForce = 6.0f; - m_maxLimitForce = 300.0f; - m_loLimit = 1.0f; - m_hiLimit = -1.0f; - m_normalCFM = 0.f; - m_stopERP = 0.2f; - m_stopCFM = 0.f; - m_bounce = 0.0f; - m_damping = 1.0f; - m_limitSoftness = 0.5f; - m_currentLimit = 0; - m_currentLimitError = 0; - m_enableMotor = false; - } - - b3RotationalLimitMotor(const b3RotationalLimitMotor& limot) - { - m_targetVelocity = limot.m_targetVelocity; - m_maxMotorForce = limot.m_maxMotorForce; - m_limitSoftness = limot.m_limitSoftness; - m_loLimit = limot.m_loLimit; - m_hiLimit = limot.m_hiLimit; - m_normalCFM = limot.m_normalCFM; - m_stopERP = limot.m_stopERP; - m_stopCFM = limot.m_stopCFM; - m_bounce = limot.m_bounce; - m_currentLimit = limot.m_currentLimit; - m_currentLimitError = limot.m_currentLimitError; - m_enableMotor = limot.m_enableMotor; - } - - //! Is limited - bool isLimited() - { - if (m_loLimit > m_hiLimit) return false; - return true; - } - - //! Need apply correction - bool needApplyTorques() - { - if (m_currentLimit == 0 && m_enableMotor == false) return false; - return true; - } - - //! calculates error - /*! - calculates m_currentLimit and m_currentLimitError. - */ - int testLimitValue(b3Scalar test_value); - - //! apply the correction impulses for two bodies - b3Scalar solveAngularLimits(b3Scalar timeStep, b3Vector3& axis, b3Scalar jacDiagABInv, b3RigidBodyData* body0, b3RigidBodyData* body1); -}; - -class b3TranslationalLimitMotor -{ -public: - b3Vector3 m_lowerLimit; //!< the constraint lower limits - b3Vector3 m_upperLimit; //!< the constraint upper limits - b3Vector3 m_accumulatedImpulse; - //! Linear_Limit_parameters - //!@{ - b3Vector3 m_normalCFM; //!< Constraint force mixing factor - b3Vector3 m_stopERP; //!< Error tolerance factor when joint is at limit - b3Vector3 m_stopCFM; //!< Constraint force mixing factor when joint is at limit - b3Vector3 m_targetVelocity; //!< target motor velocity - b3Vector3 m_maxMotorForce; //!< max force on motor - b3Vector3 m_currentLimitError; //! How much is violated this limit - b3Vector3 m_currentLinearDiff; //! Current relative offset of constraint frames - b3Scalar m_limitSoftness; //!< Softness for linear limit - b3Scalar m_damping; //!< Damping for linear limit - b3Scalar m_restitution; //! Bounce parameter for linear limit - //!@} - bool m_enableMotor[3]; - int m_currentLimit[3]; //!< 0=free, 1=at lower limit, 2=at upper limit - - b3TranslationalLimitMotor() - { - m_lowerLimit.setValue(0.f, 0.f, 0.f); - m_upperLimit.setValue(0.f, 0.f, 0.f); - m_accumulatedImpulse.setValue(0.f, 0.f, 0.f); - m_normalCFM.setValue(0.f, 0.f, 0.f); - m_stopERP.setValue(0.2f, 0.2f, 0.2f); - m_stopCFM.setValue(0.f, 0.f, 0.f); - - m_limitSoftness = 0.7f; - m_damping = b3Scalar(1.0f); - m_restitution = b3Scalar(0.5f); - for (int i = 0; i < 3; i++) - { - m_enableMotor[i] = false; - m_targetVelocity[i] = b3Scalar(0.f); - m_maxMotorForce[i] = b3Scalar(0.f); - } - } - - b3TranslationalLimitMotor(const b3TranslationalLimitMotor& other) - { - m_lowerLimit = other.m_lowerLimit; - m_upperLimit = other.m_upperLimit; - m_accumulatedImpulse = other.m_accumulatedImpulse; - - m_limitSoftness = other.m_limitSoftness; - m_damping = other.m_damping; - m_restitution = other.m_restitution; - m_normalCFM = other.m_normalCFM; - m_stopERP = other.m_stopERP; - m_stopCFM = other.m_stopCFM; - - for (int i = 0; i < 3; i++) - { - m_enableMotor[i] = other.m_enableMotor[i]; - m_targetVelocity[i] = other.m_targetVelocity[i]; - m_maxMotorForce[i] = other.m_maxMotorForce[i]; - } - } - - //! Test limit - /*! - - free means upper < lower, - - locked means upper == lower - - limited means upper > lower - - limitIndex: first 3 are linear, next 3 are angular - */ - inline bool isLimited(int limitIndex) - { - return (m_upperLimit[limitIndex] >= m_lowerLimit[limitIndex]); - } - inline bool needApplyForce(int limitIndex) - { - if (m_currentLimit[limitIndex] == 0 && m_enableMotor[limitIndex] == false) return false; - return true; - } - int testLimitValue(int limitIndex, b3Scalar test_value); - - b3Scalar solveLinearAxis( - b3Scalar timeStep, - b3Scalar jacDiagABInv, - b3RigidBodyData& body1, const b3Vector3& pointInA, - b3RigidBodyData& body2, const b3Vector3& pointInB, - int limit_index, - const b3Vector3& axis_normal_on_a, - const b3Vector3& anchorPos); -}; - -enum b36DofFlags -{ - B3_6DOF_FLAGS_CFM_NORM = 1, - B3_6DOF_FLAGS_CFM_STOP = 2, - B3_6DOF_FLAGS_ERP_STOP = 4 -}; -#define B3_6DOF_FLAGS_AXIS_SHIFT 3 // bits per axis - -/// b3Generic6DofConstraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space -/*! -b3Generic6DofConstraint can leave any of the 6 degree of freedom 'free' or 'locked'. -currently this limit supports rotational motors
-
    -
  • For Linear limits, use b3Generic6DofConstraint.setLinearUpperLimit, b3Generic6DofConstraint.setLinearLowerLimit. You can set the parameters with the b3TranslationalLimitMotor structure accsesible through the b3Generic6DofConstraint.getTranslationalLimitMotor method. -At this moment translational motors are not supported. May be in the future.
  • - -
  • For Angular limits, use the b3RotationalLimitMotor structure for configuring the limit. -This is accessible through b3Generic6DofConstraint.getLimitMotor method, -This brings support for limit parameters and motors.
  • - -
  • Angulars limits have these possible ranges: - - - - - - - - - - - - - - - - - - -
    AXISMIN ANGLEMAX ANGLE
    X-PIPI
    Y-PI/2PI/2
    Z-PIPI
    -
  • -
- -*/ -B3_ATTRIBUTE_ALIGNED16(class) -b3Generic6DofConstraint : public b3TypedConstraint -{ -protected: - //! relative_frames - //!@{ - b3Transform m_frameInA; //!< the constraint space w.r.t body A - b3Transform m_frameInB; //!< the constraint space w.r.t body B - //!@} - - //! Jacobians - //!@{ - // b3JacobianEntry m_jacLinear[3];//!< 3 orthogonal linear constraints - // b3JacobianEntry m_jacAng[3];//!< 3 orthogonal angular constraints - //!@} - - //! Linear_Limit_parameters - //!@{ - b3TranslationalLimitMotor m_linearLimits; - //!@} - - //! hinge_parameters - //!@{ - b3RotationalLimitMotor m_angularLimits[3]; - //!@} - -protected: - //! temporal variables - //!@{ - b3Transform m_calculatedTransformA; - b3Transform m_calculatedTransformB; - b3Vector3 m_calculatedAxisAngleDiff; - b3Vector3 m_calculatedAxis[3]; - b3Vector3 m_calculatedLinearDiff; - b3Scalar m_timeStep; - b3Scalar m_factA; - b3Scalar m_factB; - bool m_hasStaticBody; - - b3Vector3 m_AnchorPos; // point betwen pivots of bodies A and B to solve linear axes - - bool m_useLinearReferenceFrameA; - bool m_useOffsetForConstraintFrame; - - int m_flags; - - //!@} - - b3Generic6DofConstraint& operator=(b3Generic6DofConstraint& other) - { - b3Assert(0); - (void)other; - return *this; - } - - int setAngularLimits(b3ConstraintInfo2 * info, int row_offset, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB); - - int setLinearLimits(b3ConstraintInfo2 * info, int row, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB); - - // tests linear limits - void calculateLinearInfo(); - - //! calcs the euler angles between the two bodies. - void calculateAngleInfo(); - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3Generic6DofConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB, bool useLinearReferenceFrameA, const b3RigidBodyData* bodies); - - //! Calcs global transform of the offsets - /*! - Calcs the global transform for the joint offset for body A an B, and also calcs the agle differences between the bodies. - \sa b3Generic6DofConstraint.getCalculatedTransformA , b3Generic6DofConstraint.getCalculatedTransformB, b3Generic6DofConstraint.calculateAngleInfo - */ - void calculateTransforms(const b3Transform& transA, const b3Transform& transB, const b3RigidBodyData* bodies); - - void calculateTransforms(const b3RigidBodyData* bodies); - - //! Gets the global transform of the offset for body A - /*! - \sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo. - */ - const b3Transform& getCalculatedTransformA() const - { - return m_calculatedTransformA; - } - - //! Gets the global transform of the offset for body B - /*! - \sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo. - */ - const b3Transform& getCalculatedTransformB() const - { - return m_calculatedTransformB; - } - - const b3Transform& getFrameOffsetA() const - { - return m_frameInA; - } - - const b3Transform& getFrameOffsetB() const - { - return m_frameInB; - } - - b3Transform& getFrameOffsetA() - { - return m_frameInA; - } - - b3Transform& getFrameOffsetB() - { - return m_frameInB; - } - - virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies); - - void getInfo1NonVirtual(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies); - - virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies); - - void getInfo2NonVirtual(b3ConstraintInfo2 * info, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB, const b3RigidBodyData* bodies); - - void updateRHS(b3Scalar timeStep); - - //! Get the rotation axis in global coordinates - b3Vector3 getAxis(int axis_index) const; - - //! Get the relative Euler angle - /*! - \pre b3Generic6DofConstraint::calculateTransforms() must be called previously. - */ - b3Scalar getAngle(int axis_index) const; - - //! Get the relative position of the constraint pivot - /*! - \pre b3Generic6DofConstraint::calculateTransforms() must be called previously. - */ - b3Scalar getRelativePivotPosition(int axis_index) const; - - void setFrames(const b3Transform& frameA, const b3Transform& frameB, const b3RigidBodyData* bodies); - - //! Test angular limit. - /*! - Calculates angular correction and returns true if limit needs to be corrected. - \pre b3Generic6DofConstraint::calculateTransforms() must be called previously. - */ - bool testAngularLimitMotor(int axis_index); - - void setLinearLowerLimit(const b3Vector3& linearLower) - { - m_linearLimits.m_lowerLimit = linearLower; - } - - void getLinearLowerLimit(b3Vector3 & linearLower) - { - linearLower = m_linearLimits.m_lowerLimit; - } - - void setLinearUpperLimit(const b3Vector3& linearUpper) - { - m_linearLimits.m_upperLimit = linearUpper; - } - - void getLinearUpperLimit(b3Vector3 & linearUpper) - { - linearUpper = m_linearLimits.m_upperLimit; - } - - void setAngularLowerLimit(const b3Vector3& angularLower) - { - for (int i = 0; i < 3; i++) - m_angularLimits[i].m_loLimit = b3NormalizeAngle(angularLower[i]); - } - - void getAngularLowerLimit(b3Vector3 & angularLower) - { - for (int i = 0; i < 3; i++) - angularLower[i] = m_angularLimits[i].m_loLimit; - } - - void setAngularUpperLimit(const b3Vector3& angularUpper) - { - for (int i = 0; i < 3; i++) - m_angularLimits[i].m_hiLimit = b3NormalizeAngle(angularUpper[i]); - } - - void getAngularUpperLimit(b3Vector3 & angularUpper) - { - for (int i = 0; i < 3; i++) - angularUpper[i] = m_angularLimits[i].m_hiLimit; - } - - //! Retrieves the angular limit informacion - b3RotationalLimitMotor* getRotationalLimitMotor(int index) - { - return &m_angularLimits[index]; - } - - //! Retrieves the limit informacion - b3TranslationalLimitMotor* getTranslationalLimitMotor() - { - return &m_linearLimits; - } - - //first 3 are linear, next 3 are angular - void setLimit(int axis, b3Scalar lo, b3Scalar hi) - { - if (axis < 3) - { - m_linearLimits.m_lowerLimit[axis] = lo; - m_linearLimits.m_upperLimit[axis] = hi; - } - else - { - lo = b3NormalizeAngle(lo); - hi = b3NormalizeAngle(hi); - m_angularLimits[axis - 3].m_loLimit = lo; - m_angularLimits[axis - 3].m_hiLimit = hi; - } - } - - //! Test limit - /*! - - free means upper < lower, - - locked means upper == lower - - limited means upper > lower - - limitIndex: first 3 are linear, next 3 are angular - */ - bool isLimited(int limitIndex) - { - if (limitIndex < 3) - { - return m_linearLimits.isLimited(limitIndex); - } - return m_angularLimits[limitIndex - 3].isLimited(); - } - - virtual void calcAnchorPos(const b3RigidBodyData* bodies); // overridable - - int get_limit_motor_info2(b3RotationalLimitMotor * limot, - const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB, - b3ConstraintInfo2* info, int row, b3Vector3& ax1, int rotational, int rotAllowed = false); - - // access for UseFrameOffset - bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; } - void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; } - - ///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). - ///If no axis is provided, it uses the default axis for this constraint. - virtual void setParam(int num, b3Scalar value, int axis = -1); - ///return the local value of parameter - virtual b3Scalar getParam(int num, int axis = -1) const; - - void setAxis(const b3Vector3& axis1, const b3Vector3& axis2, const b3RigidBodyData* bodies); -}; - -#endif //B3_GENERIC_6DOF_CONSTRAINT_H diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h deleted file mode 100644 index 3cc4f60262f..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h +++ /dev/null @@ -1,150 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_JACOBIAN_ENTRY_H -#define B3_JACOBIAN_ENTRY_H - -#include "Bullet3Common/b3Matrix3x3.h" - -//notes: -// Another memory optimization would be to store m_1MinvJt in the remaining 3 w components -// which makes the b3JacobianEntry memory layout 16 bytes -// if you only are interested in angular part, just feed massInvA and massInvB zero - -/// Jacobian entry is an abstraction that allows to describe constraints -/// it can be used in combination with a constraint solver -/// Can be used to relate the effect of an impulse to the constraint error -B3_ATTRIBUTE_ALIGNED16(class) -b3JacobianEntry -{ -public: - b3JacobianEntry(){}; - //constraint between two different rigidbodies - b3JacobianEntry( - const b3Matrix3x3& world2A, - const b3Matrix3x3& world2B, - const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, - const b3Vector3& jointAxis, - const b3Vector3& inertiaInvA, - const b3Scalar massInvA, - const b3Vector3& inertiaInvB, - const b3Scalar massInvB) - : m_linearJointAxis(jointAxis) - { - m_aJ = world2A * (rel_pos1.cross(m_linearJointAxis)); - m_bJ = world2B * (rel_pos2.cross(-m_linearJointAxis)); - m_0MinvJt = inertiaInvA * m_aJ; - m_1MinvJt = inertiaInvB * m_bJ; - m_Adiag = massInvA + m_0MinvJt.dot(m_aJ) + massInvB + m_1MinvJt.dot(m_bJ); - - b3Assert(m_Adiag > b3Scalar(0.0)); - } - - //angular constraint between two different rigidbodies - b3JacobianEntry(const b3Vector3& jointAxis, - const b3Matrix3x3& world2A, - const b3Matrix3x3& world2B, - const b3Vector3& inertiaInvA, - const b3Vector3& inertiaInvB) - : m_linearJointAxis(b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.))) - { - m_aJ = world2A * jointAxis; - m_bJ = world2B * -jointAxis; - m_0MinvJt = inertiaInvA * m_aJ; - m_1MinvJt = inertiaInvB * m_bJ; - m_Adiag = m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ); - - b3Assert(m_Adiag > b3Scalar(0.0)); - } - - //angular constraint between two different rigidbodies - b3JacobianEntry(const b3Vector3& axisInA, - const b3Vector3& axisInB, - const b3Vector3& inertiaInvA, - const b3Vector3& inertiaInvB) - : m_linearJointAxis(b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.))), m_aJ(axisInA), m_bJ(-axisInB) - { - m_0MinvJt = inertiaInvA * m_aJ; - m_1MinvJt = inertiaInvB * m_bJ; - m_Adiag = m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ); - - b3Assert(m_Adiag > b3Scalar(0.0)); - } - - //constraint on one rigidbody - b3JacobianEntry( - const b3Matrix3x3& world2A, - const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, - const b3Vector3& jointAxis, - const b3Vector3& inertiaInvA, - const b3Scalar massInvA) - : m_linearJointAxis(jointAxis) - { - m_aJ = world2A * (rel_pos1.cross(jointAxis)); - m_bJ = world2A * (rel_pos2.cross(-jointAxis)); - m_0MinvJt = inertiaInvA * m_aJ; - m_1MinvJt = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - m_Adiag = massInvA + m_0MinvJt.dot(m_aJ); - - b3Assert(m_Adiag > b3Scalar(0.0)); - } - - b3Scalar getDiagonal() const { return m_Adiag; } - - // for two constraints on the same rigidbody (for example vehicle friction) - b3Scalar getNonDiagonal(const b3JacobianEntry& jacB, const b3Scalar massInvA) const - { - const b3JacobianEntry& jacA = *this; - b3Scalar lin = massInvA * jacA.m_linearJointAxis.dot(jacB.m_linearJointAxis); - b3Scalar ang = jacA.m_0MinvJt.dot(jacB.m_aJ); - return lin + ang; - } - - // for two constraints on sharing two same rigidbodies (for example two contact points between two rigidbodies) - b3Scalar getNonDiagonal(const b3JacobianEntry& jacB, const b3Scalar massInvA, const b3Scalar massInvB) const - { - const b3JacobianEntry& jacA = *this; - b3Vector3 lin = jacA.m_linearJointAxis * jacB.m_linearJointAxis; - b3Vector3 ang0 = jacA.m_0MinvJt * jacB.m_aJ; - b3Vector3 ang1 = jacA.m_1MinvJt * jacB.m_bJ; - b3Vector3 lin0 = massInvA * lin; - b3Vector3 lin1 = massInvB * lin; - b3Vector3 sum = ang0 + ang1 + lin0 + lin1; - return sum[0] + sum[1] + sum[2]; - } - - b3Scalar getRelativeVelocity(const b3Vector3& linvelA, const b3Vector3& angvelA, const b3Vector3& linvelB, const b3Vector3& angvelB) - { - b3Vector3 linrel = linvelA - linvelB; - b3Vector3 angvela = angvelA * m_aJ; - b3Vector3 angvelb = angvelB * m_bJ; - linrel *= m_linearJointAxis; - angvela += angvelb; - angvela += linrel; - b3Scalar rel_vel2 = angvela[0] + angvela[1] + angvela[2]; - return rel_vel2 + B3_EPSILON; - } - //private: - - b3Vector3 m_linearJointAxis; - b3Vector3 m_aJ; - b3Vector3 m_bJ; - b3Vector3 m_0MinvJt; - b3Vector3 m_1MinvJt; - //Optimization: can be stored in the w/last component of one of the vectors - b3Scalar m_Adiag; -}; - -#endif //B3_JACOBIAN_ENTRY_H diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp deleted file mode 100644 index b7050b10705..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp +++ /dev/null @@ -1,1696 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2012 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//enable B3_SOLVER_DEBUG if you experience solver crashes -//#define B3_SOLVER_DEBUG -//#define COMPUTE_IMPULSE_DENOM 1 -//It is not necessary (redundant) to refresh contact manifolds, this refresh has been moved to the collision algorithms. - -//#define DISABLE_JOINTS - -#include "b3PgsJacobiSolver.h" -#include "Bullet3Common/b3MinMax.h" -#include "b3TypedConstraint.h" -#include -#include "Bullet3Common/b3StackAlloc.h" - -//#include "b3SolverBody.h" -//#include "b3SolverConstraint.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include //for memset -//#include "../../dynamics/basic_demo/Stubs/AdlContact4.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -static b3Transform getWorldTransform(b3RigidBodyData* rb) -{ - b3Transform newTrans; - newTrans.setOrigin(rb->m_pos); - newTrans.setRotation(rb->m_quat); - return newTrans; -} - -static const b3Matrix3x3& getInvInertiaTensorWorld(b3InertiaData* inertia) -{ - return inertia->m_invInertiaWorld; -} - -static const b3Vector3& getLinearVelocity(b3RigidBodyData* rb) -{ - return rb->m_linVel; -} - -static const b3Vector3& getAngularVelocity(b3RigidBodyData* rb) -{ - return rb->m_angVel; -} - -static b3Vector3 getVelocityInLocalPoint(b3RigidBodyData* rb, const b3Vector3& rel_pos) -{ - //we also calculate lin/ang velocity for kinematic objects - return getLinearVelocity(rb) + getAngularVelocity(rb).cross(rel_pos); -} - -struct b3ContactPoint -{ - b3Vector3 m_positionWorldOnA; - b3Vector3 m_positionWorldOnB; - b3Vector3 m_normalWorldOnB; - b3Scalar m_appliedImpulse; - b3Scalar m_distance; - b3Scalar m_combinedRestitution; - - ///information related to friction - b3Scalar m_combinedFriction; - b3Vector3 m_lateralFrictionDir1; - b3Vector3 m_lateralFrictionDir2; - b3Scalar m_appliedImpulseLateral1; - b3Scalar m_appliedImpulseLateral2; - b3Scalar m_combinedRollingFriction; - b3Scalar m_contactMotion1; - b3Scalar m_contactMotion2; - b3Scalar m_contactCFM1; - b3Scalar m_contactCFM2; - - bool m_lateralFrictionInitialized; - - b3Vector3 getPositionWorldOnA() - { - return m_positionWorldOnA; - } - b3Vector3 getPositionWorldOnB() - { - return m_positionWorldOnB; - } - b3Scalar getDistance() - { - return m_distance; - } -}; - -void getContactPoint(b3Contact4* contact, int contactIndex, b3ContactPoint& pointOut) -{ - pointOut.m_appliedImpulse = 0.f; - pointOut.m_appliedImpulseLateral1 = 0.f; - pointOut.m_appliedImpulseLateral2 = 0.f; - pointOut.m_combinedFriction = contact->getFrictionCoeff(); - pointOut.m_combinedRestitution = contact->getRestituitionCoeff(); - pointOut.m_combinedRollingFriction = 0.f; - pointOut.m_contactCFM1 = 0.f; - pointOut.m_contactCFM2 = 0.f; - pointOut.m_contactMotion1 = 0.f; - pointOut.m_contactMotion2 = 0.f; - pointOut.m_distance = contact->getPenetration(contactIndex); //??0.01f - b3Vector3 normalOnB = contact->m_worldNormalOnB; - normalOnB.normalize(); //is this needed? - - b3Vector3 l1, l2; - b3PlaneSpace1(normalOnB, l1, l2); - - pointOut.m_normalWorldOnB = normalOnB; - //printf("normalOnB = %f,%f,%f\n",normalOnB.getX(),normalOnB.getY(),normalOnB.getZ()); - pointOut.m_lateralFrictionDir1 = l1; - pointOut.m_lateralFrictionDir2 = l2; - pointOut.m_lateralFrictionInitialized = true; - - b3Vector3 worldPosB = contact->m_worldPosB[contactIndex]; - pointOut.m_positionWorldOnB = worldPosB; - pointOut.m_positionWorldOnA = worldPosB + normalOnB * pointOut.m_distance; -} - -int getNumContacts(b3Contact4* contact) -{ - return contact->getNPoints(); -} - -b3PgsJacobiSolver::b3PgsJacobiSolver(bool usePgs) - : m_usePgs(usePgs), - m_numSplitImpulseRecoveries(0), - m_btSeed2(0) -{ -} - -b3PgsJacobiSolver::~b3PgsJacobiSolver() -{ -} - -void b3PgsJacobiSolver::solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts, int numConstraints, b3TypedConstraint** constraints) -{ - b3ContactSolverInfo infoGlobal; - infoGlobal.m_splitImpulse = false; - infoGlobal.m_timeStep = 1.f / 60.f; - infoGlobal.m_numIterations = 4; //4; - // infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS|B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION; - //infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS; - infoGlobal.m_solverMode |= B3_SOLVER_USE_2_FRICTION_DIRECTIONS; - - //if (infoGlobal.m_solverMode & B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS) - //if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION)) - - solveGroup(bodies, inertias, numBodies, contacts, numContacts, constraints, numConstraints, infoGlobal); - - if (!numContacts) - return; -} - -/// b3PgsJacobiSolver Sequentially applies impulses -b3Scalar b3PgsJacobiSolver::solveGroup(b3RigidBodyData* bodies, - b3InertiaData* inertias, - int numBodies, - b3Contact4* manifoldPtr, - int numManifolds, - b3TypedConstraint** constraints, - int numConstraints, - const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveGroup"); - //you need to provide at least some bodies - - solveGroupCacheFriendlySetup(bodies, inertias, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal); - - solveGroupCacheFriendlyIterations(constraints, numConstraints, infoGlobal); - - solveGroupCacheFriendlyFinish(bodies, inertias, numBodies, infoGlobal); - - return 0.f; -} - -#ifdef USE_SIMD -#include -#define b3VecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e)) -static inline __m128 b3SimdDot3(__m128 vec0, __m128 vec1) -{ - __m128 result = _mm_mul_ps(vec0, vec1); - return _mm_add_ps(b3VecSplat(result, 0), _mm_add_ps(b3VecSplat(result, 1), b3VecSplat(result, 2))); -} -#endif //USE_SIMD - -// Project Gauss Seidel or the equivalent Sequential Impulse -void b3PgsJacobiSolver::resolveSingleConstraintRowGenericSIMD(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c) -{ -#ifdef USE_SIMD - __m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse); - __m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit); - __m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit); - __m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse), _mm_set1_ps(c.m_cfm))); - __m128 deltaVel1Dotn = _mm_add_ps(b3SimdDot3(c.m_contactNormal.mVec128, body1.internalGetDeltaLinearVelocity().mVec128), b3SimdDot3(c.m_relpos1CrossNormal.mVec128, body1.internalGetDeltaAngularVelocity().mVec128)); - __m128 deltaVel2Dotn = _mm_sub_ps(b3SimdDot3(c.m_relpos2CrossNormal.mVec128, body2.internalGetDeltaAngularVelocity().mVec128), b3SimdDot3((c.m_contactNormal).mVec128, body2.internalGetDeltaLinearVelocity().mVec128)); - deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.m_jacDiagABInv))); - deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.m_jacDiagABInv))); - b3SimdScalar sum = _mm_add_ps(cpAppliedImp, deltaImpulse); - b3SimdScalar resultLowerLess, resultUpperLess; - resultLowerLess = _mm_cmplt_ps(sum, lowerLimit1); - resultUpperLess = _mm_cmplt_ps(sum, upperLimit1); - __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp); - deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse)); - c.m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum)); - __m128 upperMinApplied = _mm_sub_ps(upperLimit1, cpAppliedImp); - deltaImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied)); - c.m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, c.m_appliedImpulse), _mm_andnot_ps(resultUpperLess, upperLimit1)); - __m128 linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128, body1.internalGetInvMass().mVec128); - __m128 linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128, body2.internalGetInvMass().mVec128); - __m128 impulseMagnitude = deltaImpulse; - body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentA, impulseMagnitude)); - body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude)); - body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude)); - body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude)); -#else - resolveSingleConstraintRowGeneric(body1, body2, c); -#endif -} - -// Project Gauss Seidel or the equivalent Sequential Impulse -void b3PgsJacobiSolver::resolveSingleConstraintRowGeneric(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c) -{ - b3Scalar deltaImpulse = c.m_rhs - b3Scalar(c.m_appliedImpulse) * c.m_cfm; - const b3Scalar deltaVel1Dotn = c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) + c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity()); - const b3Scalar deltaVel2Dotn = -c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity()); - - // const b3Scalar delta_rel_vel = deltaVel1Dotn-deltaVel2Dotn; - deltaImpulse -= deltaVel1Dotn * c.m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn * c.m_jacDiagABInv; - - const b3Scalar sum = b3Scalar(c.m_appliedImpulse) + deltaImpulse; - if (sum < c.m_lowerLimit) - { - deltaImpulse = c.m_lowerLimit - c.m_appliedImpulse; - c.m_appliedImpulse = c.m_lowerLimit; - } - else if (sum > c.m_upperLimit) - { - deltaImpulse = c.m_upperLimit - c.m_appliedImpulse; - c.m_appliedImpulse = c.m_upperLimit; - } - else - { - c.m_appliedImpulse = sum; - } - - body1.internalApplyImpulse(c.m_contactNormal * body1.internalGetInvMass(), c.m_angularComponentA, deltaImpulse); - body2.internalApplyImpulse(-c.m_contactNormal * body2.internalGetInvMass(), c.m_angularComponentB, deltaImpulse); -} - -void b3PgsJacobiSolver::resolveSingleConstraintRowLowerLimitSIMD(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c) -{ -#ifdef USE_SIMD - __m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse); - __m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit); - __m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit); - __m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse), _mm_set1_ps(c.m_cfm))); - __m128 deltaVel1Dotn = _mm_add_ps(b3SimdDot3(c.m_contactNormal.mVec128, body1.internalGetDeltaLinearVelocity().mVec128), b3SimdDot3(c.m_relpos1CrossNormal.mVec128, body1.internalGetDeltaAngularVelocity().mVec128)); - __m128 deltaVel2Dotn = _mm_sub_ps(b3SimdDot3(c.m_relpos2CrossNormal.mVec128, body2.internalGetDeltaAngularVelocity().mVec128), b3SimdDot3((c.m_contactNormal).mVec128, body2.internalGetDeltaLinearVelocity().mVec128)); - deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.m_jacDiagABInv))); - deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.m_jacDiagABInv))); - b3SimdScalar sum = _mm_add_ps(cpAppliedImp, deltaImpulse); - b3SimdScalar resultLowerLess, resultUpperLess; - resultLowerLess = _mm_cmplt_ps(sum, lowerLimit1); - resultUpperLess = _mm_cmplt_ps(sum, upperLimit1); - __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp); - deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse)); - c.m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum)); - __m128 linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128, body1.internalGetInvMass().mVec128); - __m128 linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128, body2.internalGetInvMass().mVec128); - __m128 impulseMagnitude = deltaImpulse; - body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentA, impulseMagnitude)); - body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude)); - body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude)); - body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude)); -#else - resolveSingleConstraintRowLowerLimit(body1, body2, c); -#endif -} - -// Project Gauss Seidel or the equivalent Sequential Impulse -void b3PgsJacobiSolver::resolveSingleConstraintRowLowerLimit(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c) -{ - b3Scalar deltaImpulse = c.m_rhs - b3Scalar(c.m_appliedImpulse) * c.m_cfm; - const b3Scalar deltaVel1Dotn = c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) + c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity()); - const b3Scalar deltaVel2Dotn = -c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity()); - - deltaImpulse -= deltaVel1Dotn * c.m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn * c.m_jacDiagABInv; - const b3Scalar sum = b3Scalar(c.m_appliedImpulse) + deltaImpulse; - if (sum < c.m_lowerLimit) - { - deltaImpulse = c.m_lowerLimit - c.m_appliedImpulse; - c.m_appliedImpulse = c.m_lowerLimit; - } - else - { - c.m_appliedImpulse = sum; - } - body1.internalApplyImpulse(c.m_contactNormal * body1.internalGetInvMass(), c.m_angularComponentA, deltaImpulse); - body2.internalApplyImpulse(-c.m_contactNormal * body2.internalGetInvMass(), c.m_angularComponentB, deltaImpulse); -} - -void b3PgsJacobiSolver::resolveSplitPenetrationImpulseCacheFriendly( - b3SolverBody& body1, - b3SolverBody& body2, - const b3SolverConstraint& c) -{ - if (c.m_rhsPenetration) - { - m_numSplitImpulseRecoveries++; - b3Scalar deltaImpulse = c.m_rhsPenetration - b3Scalar(c.m_appliedPushImpulse) * c.m_cfm; - const b3Scalar deltaVel1Dotn = c.m_contactNormal.dot(body1.internalGetPushVelocity()) + c.m_relpos1CrossNormal.dot(body1.internalGetTurnVelocity()); - const b3Scalar deltaVel2Dotn = -c.m_contactNormal.dot(body2.internalGetPushVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetTurnVelocity()); - - deltaImpulse -= deltaVel1Dotn * c.m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn * c.m_jacDiagABInv; - const b3Scalar sum = b3Scalar(c.m_appliedPushImpulse) + deltaImpulse; - if (sum < c.m_lowerLimit) - { - deltaImpulse = c.m_lowerLimit - c.m_appliedPushImpulse; - c.m_appliedPushImpulse = c.m_lowerLimit; - } - else - { - c.m_appliedPushImpulse = sum; - } - body1.internalApplyPushImpulse(c.m_contactNormal * body1.internalGetInvMass(), c.m_angularComponentA, deltaImpulse); - body2.internalApplyPushImpulse(-c.m_contactNormal * body2.internalGetInvMass(), c.m_angularComponentB, deltaImpulse); - } -} - -void b3PgsJacobiSolver::resolveSplitPenetrationSIMD(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c) -{ -#ifdef USE_SIMD - if (!c.m_rhsPenetration) - return; - - m_numSplitImpulseRecoveries++; - - __m128 cpAppliedImp = _mm_set1_ps(c.m_appliedPushImpulse); - __m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit); - __m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit); - __m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhsPenetration), _mm_mul_ps(_mm_set1_ps(c.m_appliedPushImpulse), _mm_set1_ps(c.m_cfm))); - __m128 deltaVel1Dotn = _mm_add_ps(b3SimdDot3(c.m_contactNormal.mVec128, body1.internalGetPushVelocity().mVec128), b3SimdDot3(c.m_relpos1CrossNormal.mVec128, body1.internalGetTurnVelocity().mVec128)); - __m128 deltaVel2Dotn = _mm_sub_ps(b3SimdDot3(c.m_relpos2CrossNormal.mVec128, body2.internalGetTurnVelocity().mVec128), b3SimdDot3((c.m_contactNormal).mVec128, body2.internalGetPushVelocity().mVec128)); - deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.m_jacDiagABInv))); - deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.m_jacDiagABInv))); - b3SimdScalar sum = _mm_add_ps(cpAppliedImp, deltaImpulse); - b3SimdScalar resultLowerLess, resultUpperLess; - resultLowerLess = _mm_cmplt_ps(sum, lowerLimit1); - resultUpperLess = _mm_cmplt_ps(sum, upperLimit1); - __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp); - deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse)); - c.m_appliedPushImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum)); - __m128 linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128, body1.internalGetInvMass().mVec128); - __m128 linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128, body2.internalGetInvMass().mVec128); - __m128 impulseMagnitude = deltaImpulse; - body1.internalGetPushVelocity().mVec128 = _mm_add_ps(body1.internalGetPushVelocity().mVec128, _mm_mul_ps(linearComponentA, impulseMagnitude)); - body1.internalGetTurnVelocity().mVec128 = _mm_add_ps(body1.internalGetTurnVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude)); - body2.internalGetPushVelocity().mVec128 = _mm_sub_ps(body2.internalGetPushVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude)); - body2.internalGetTurnVelocity().mVec128 = _mm_add_ps(body2.internalGetTurnVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude)); -#else - resolveSplitPenetrationImpulseCacheFriendly(body1, body2, c); -#endif -} - -unsigned long b3PgsJacobiSolver::b3Rand2() -{ - m_btSeed2 = (1664525L * m_btSeed2 + 1013904223L) & 0xffffffff; - return m_btSeed2; -} - -//See ODE: adam's all-int straightforward(?) dRandInt (0..n-1) -int b3PgsJacobiSolver::b3RandInt2(int n) -{ - // seems good; xor-fold and modulus - const unsigned long un = static_cast(n); - unsigned long r = b3Rand2(); - - // note: probably more aggressive than it needs to be -- might be - // able to get away without one or two of the innermost branches. - if (un <= 0x00010000UL) - { - r ^= (r >> 16); - if (un <= 0x00000100UL) - { - r ^= (r >> 8); - if (un <= 0x00000010UL) - { - r ^= (r >> 4); - if (un <= 0x00000004UL) - { - r ^= (r >> 2); - if (un <= 0x00000002UL) - { - r ^= (r >> 1); - } - } - } - } - } - - return (int)(r % un); -} - -void b3PgsJacobiSolver::initSolverBody(int bodyIndex, b3SolverBody* solverBody, b3RigidBodyData* rb) -{ - solverBody->m_deltaLinearVelocity.setValue(0.f, 0.f, 0.f); - solverBody->m_deltaAngularVelocity.setValue(0.f, 0.f, 0.f); - solverBody->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - solverBody->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - - if (rb) - { - solverBody->m_worldTransform = getWorldTransform(rb); - solverBody->internalSetInvMass(b3MakeVector3(rb->m_invMass, rb->m_invMass, rb->m_invMass)); - solverBody->m_originalBodyIndex = bodyIndex; - solverBody->m_angularFactor = b3MakeVector3(1, 1, 1); - solverBody->m_linearFactor = b3MakeVector3(1, 1, 1); - solverBody->m_linearVelocity = getLinearVelocity(rb); - solverBody->m_angularVelocity = getAngularVelocity(rb); - } - else - { - solverBody->m_worldTransform.setIdentity(); - solverBody->internalSetInvMass(b3MakeVector3(0, 0, 0)); - solverBody->m_originalBodyIndex = bodyIndex; - solverBody->m_angularFactor.setValue(1, 1, 1); - solverBody->m_linearFactor.setValue(1, 1, 1); - solverBody->m_linearVelocity.setValue(0, 0, 0); - solverBody->m_angularVelocity.setValue(0, 0, 0); - } -} - -b3Scalar b3PgsJacobiSolver::restitutionCurve(b3Scalar rel_vel, b3Scalar restitution) -{ - b3Scalar rest = restitution * -rel_vel; - return rest; -} - -void b3PgsJacobiSolver::setupFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity, b3Scalar cfmSlip) -{ - solverConstraint.m_contactNormal = normalAxis; - b3SolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA]; - b3SolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB]; - - b3RigidBodyData* body0 = &bodies[solverBodyA.m_originalBodyIndex]; - b3RigidBodyData* body1 = &bodies[solverBodyB.m_originalBodyIndex]; - - solverConstraint.m_solverBodyIdA = solverBodyIdA; - solverConstraint.m_solverBodyIdB = solverBodyIdB; - - solverConstraint.m_friction = cp.m_combinedFriction; - solverConstraint.m_originalContactPoint = 0; - - solverConstraint.m_appliedImpulse = 0.f; - solverConstraint.m_appliedPushImpulse = 0.f; - - { - b3Vector3 ftorqueAxis1 = rel_pos1.cross(solverConstraint.m_contactNormal); - solverConstraint.m_relpos1CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0); - } - { - b3Vector3 ftorqueAxis1 = rel_pos2.cross(-solverConstraint.m_contactNormal); - solverConstraint.m_relpos2CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0); - } - - b3Scalar scaledDenom; - - { - b3Vector3 vec; - b3Scalar denom0 = 0.f; - b3Scalar denom1 = 0.f; - if (body0) - { - vec = (solverConstraint.m_angularComponentA).cross(rel_pos1); - denom0 = body0->m_invMass + normalAxis.dot(vec); - } - if (body1) - { - vec = (-solverConstraint.m_angularComponentB).cross(rel_pos2); - denom1 = body1->m_invMass + normalAxis.dot(vec); - } - - b3Scalar denom; - if (m_usePgs) - { - scaledDenom = denom = relaxation / (denom0 + denom1); - } - else - { - denom = relaxation / (denom0 + denom1); - b3Scalar countA = body0->m_invMass ? b3Scalar(m_bodyCount[solverBodyA.m_originalBodyIndex]) : 1.f; - b3Scalar countB = body1->m_invMass ? b3Scalar(m_bodyCount[solverBodyB.m_originalBodyIndex]) : 1.f; - - scaledDenom = relaxation / (denom0 * countA + denom1 * countB); - } - - solverConstraint.m_jacDiagABInv = denom; - } - - { - b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0 ? solverBodyA.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos1CrossNormal.dot(body0 ? solverBodyA.m_angularVelocity : b3MakeVector3(0, 0, 0)); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1 ? solverBodyB.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos2CrossNormal.dot(body1 ? solverBodyB.m_angularVelocity : b3MakeVector3(0, 0, 0)); - - rel_vel = vel1Dotn + vel2Dotn; - - // b3Scalar positionalError = 0.f; - - b3SimdScalar velocityError = desiredVelocity - rel_vel; - b3SimdScalar velocityImpulse = velocityError * b3SimdScalar(scaledDenom); //solverConstraint.m_jacDiagABInv); - solverConstraint.m_rhs = velocityImpulse; - solverConstraint.m_cfm = cfmSlip; - solverConstraint.m_lowerLimit = 0; - solverConstraint.m_upperLimit = 1e10f; - } -} - -b3SolverConstraint& b3PgsJacobiSolver::addFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity, b3Scalar cfmSlip) -{ - b3SolverConstraint& solverConstraint = m_tmpSolverContactFrictionConstraintPool.expandNonInitializing(); - solverConstraint.m_frictionIndex = frictionIndex; - setupFrictionConstraint(bodies, inertias, solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2, - colObj0, colObj1, relaxation, desiredVelocity, cfmSlip); - return solverConstraint; -} - -void b3PgsJacobiSolver::setupRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis1, int solverBodyIdA, int solverBodyIdB, - b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, - b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, - b3Scalar desiredVelocity, b3Scalar cfmSlip) - -{ - b3Vector3 normalAxis = b3MakeVector3(0, 0, 0); - - solverConstraint.m_contactNormal = normalAxis; - b3SolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA]; - b3SolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB]; - - b3RigidBodyData* body0 = &bodies[m_tmpSolverBodyPool[solverBodyIdA].m_originalBodyIndex]; - b3RigidBodyData* body1 = &bodies[m_tmpSolverBodyPool[solverBodyIdB].m_originalBodyIndex]; - - solverConstraint.m_solverBodyIdA = solverBodyIdA; - solverConstraint.m_solverBodyIdB = solverBodyIdB; - - solverConstraint.m_friction = cp.m_combinedRollingFriction; - solverConstraint.m_originalContactPoint = 0; - - solverConstraint.m_appliedImpulse = 0.f; - solverConstraint.m_appliedPushImpulse = 0.f; - - { - b3Vector3 ftorqueAxis1 = -normalAxis1; - solverConstraint.m_relpos1CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0); - } - { - b3Vector3 ftorqueAxis1 = normalAxis1; - solverConstraint.m_relpos2CrossNormal = ftorqueAxis1; - solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0); - } - - { - b3Vector3 iMJaA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex]) * solverConstraint.m_relpos1CrossNormal : b3MakeVector3(0, 0, 0); - b3Vector3 iMJaB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex]) * solverConstraint.m_relpos2CrossNormal : b3MakeVector3(0, 0, 0); - b3Scalar sum = 0; - sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); - sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); - solverConstraint.m_jacDiagABInv = b3Scalar(1.) / sum; - } - - { - b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0 ? solverBodyA.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos1CrossNormal.dot(body0 ? solverBodyA.m_angularVelocity : b3MakeVector3(0, 0, 0)); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1 ? solverBodyB.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos2CrossNormal.dot(body1 ? solverBodyB.m_angularVelocity : b3MakeVector3(0, 0, 0)); - - rel_vel = vel1Dotn + vel2Dotn; - - // b3Scalar positionalError = 0.f; - - b3SimdScalar velocityError = desiredVelocity - rel_vel; - b3SimdScalar velocityImpulse = velocityError * b3SimdScalar(solverConstraint.m_jacDiagABInv); - solverConstraint.m_rhs = velocityImpulse; - solverConstraint.m_cfm = cfmSlip; - solverConstraint.m_lowerLimit = 0; - solverConstraint.m_upperLimit = 1e10f; - } -} - -b3SolverConstraint& b3PgsJacobiSolver::addRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity, b3Scalar cfmSlip) -{ - b3SolverConstraint& solverConstraint = m_tmpSolverContactRollingFrictionConstraintPool.expandNonInitializing(); - solverConstraint.m_frictionIndex = frictionIndex; - setupRollingFrictionConstraint(bodies, inertias, solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2, - colObj0, colObj1, relaxation, desiredVelocity, cfmSlip); - return solverConstraint; -} - -int b3PgsJacobiSolver::getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies, b3InertiaData* inertias) -{ - //b3Assert(bodyIndex< m_tmpSolverBodyPool.size()); - - b3RigidBodyData& body = bodies[bodyIndex]; - int curIndex = -1; - if (m_usePgs || body.m_invMass == 0.f) - { - if (m_bodyCount[bodyIndex] < 0) - { - curIndex = m_tmpSolverBodyPool.size(); - b3SolverBody& solverBody = m_tmpSolverBodyPool.expand(); - initSolverBody(bodyIndex, &solverBody, &body); - solverBody.m_originalBodyIndex = bodyIndex; - m_bodyCount[bodyIndex] = curIndex; - } - else - { - curIndex = m_bodyCount[bodyIndex]; - } - } - else - { - b3Assert(m_bodyCount[bodyIndex] > 0); - m_bodyCountCheck[bodyIndex]++; - curIndex = m_tmpSolverBodyPool.size(); - b3SolverBody& solverBody = m_tmpSolverBodyPool.expand(); - initSolverBody(bodyIndex, &solverBody, &body); - solverBody.m_originalBodyIndex = bodyIndex; - } - - b3Assert(curIndex >= 0); - return curIndex; -} -#include - -void b3PgsJacobiSolver::setupContactConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, - int solverBodyIdA, int solverBodyIdB, - b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal, - b3Vector3& vel, b3Scalar& rel_vel, b3Scalar& relaxation, - b3Vector3& rel_pos1, b3Vector3& rel_pos2) -{ - const b3Vector3& pos1 = cp.getPositionWorldOnA(); - const b3Vector3& pos2 = cp.getPositionWorldOnB(); - - b3SolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA]; - b3SolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB]; - - b3RigidBodyData* rb0 = &bodies[bodyA->m_originalBodyIndex]; - b3RigidBodyData* rb1 = &bodies[bodyB->m_originalBodyIndex]; - - // b3Vector3 rel_pos1 = pos1 - colObj0->getWorldTransform().getOrigin(); - // b3Vector3 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin(); - rel_pos1 = pos1 - bodyA->getWorldTransform().getOrigin(); - rel_pos2 = pos2 - bodyB->getWorldTransform().getOrigin(); - - relaxation = 1.f; - - b3Vector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB); - solverConstraint.m_angularComponentA = rb0 ? getInvInertiaTensorWorld(&inertias[bodyA->m_originalBodyIndex]) * torqueAxis0 : b3MakeVector3(0, 0, 0); - b3Vector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB); - solverConstraint.m_angularComponentB = rb1 ? getInvInertiaTensorWorld(&inertias[bodyB->m_originalBodyIndex]) * -torqueAxis1 : b3MakeVector3(0, 0, 0); - - b3Scalar scaledDenom; - { -#ifdef COMPUTE_IMPULSE_DENOM - b3Scalar denom0 = rb0->computeImpulseDenominator(pos1, cp.m_normalWorldOnB); - b3Scalar denom1 = rb1->computeImpulseDenominator(pos2, cp.m_normalWorldOnB); -#else - b3Vector3 vec; - b3Scalar denom0 = 0.f; - b3Scalar denom1 = 0.f; - if (rb0) - { - vec = (solverConstraint.m_angularComponentA).cross(rel_pos1); - denom0 = rb0->m_invMass + cp.m_normalWorldOnB.dot(vec); - } - if (rb1) - { - vec = (-solverConstraint.m_angularComponentB).cross(rel_pos2); - denom1 = rb1->m_invMass + cp.m_normalWorldOnB.dot(vec); - } -#endif //COMPUTE_IMPULSE_DENOM - - b3Scalar denom; - if (m_usePgs) - { - scaledDenom = denom = relaxation / (denom0 + denom1); - } - else - { - denom = relaxation / (denom0 + denom1); - - b3Scalar countA = rb0->m_invMass ? b3Scalar(m_bodyCount[bodyA->m_originalBodyIndex]) : 1.f; - b3Scalar countB = rb1->m_invMass ? b3Scalar(m_bodyCount[bodyB->m_originalBodyIndex]) : 1.f; - scaledDenom = relaxation / (denom0 * countA + denom1 * countB); - } - solverConstraint.m_jacDiagABInv = denom; - } - - solverConstraint.m_contactNormal = cp.m_normalWorldOnB; - solverConstraint.m_relpos1CrossNormal = torqueAxis0; - solverConstraint.m_relpos2CrossNormal = -torqueAxis1; - - b3Scalar restitution = 0.f; - b3Scalar penetration = cp.getDistance() + infoGlobal.m_linearSlop; - - { - b3Vector3 vel1, vel2; - - vel1 = rb0 ? getVelocityInLocalPoint(rb0, rel_pos1) : b3MakeVector3(0, 0, 0); - vel2 = rb1 ? getVelocityInLocalPoint(rb1, rel_pos2) : b3MakeVector3(0, 0, 0); - - // b3Vector3 vel2 = rb1 ? rb1->getVelocityInLocalPoint(rel_pos2) : b3Vector3(0,0,0); - vel = vel1 - vel2; - rel_vel = cp.m_normalWorldOnB.dot(vel); - - solverConstraint.m_friction = cp.m_combinedFriction; - - restitution = restitutionCurve(rel_vel, cp.m_combinedRestitution); - if (restitution <= b3Scalar(0.)) - { - restitution = 0.f; - }; - } - - ///warm starting (or zero if disabled) - if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING) - { - solverConstraint.m_appliedImpulse = cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor; - if (rb0) - bodyA->internalApplyImpulse(solverConstraint.m_contactNormal * bodyA->internalGetInvMass(), solverConstraint.m_angularComponentA, solverConstraint.m_appliedImpulse); - if (rb1) - bodyB->internalApplyImpulse(solverConstraint.m_contactNormal * bodyB->internalGetInvMass(), -solverConstraint.m_angularComponentB, -(b3Scalar)solverConstraint.m_appliedImpulse); - } - else - { - solverConstraint.m_appliedImpulse = 0.f; - } - - solverConstraint.m_appliedPushImpulse = 0.f; - - { - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0 ? bodyA->m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos1CrossNormal.dot(rb0 ? bodyA->m_angularVelocity : b3MakeVector3(0, 0, 0)); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1 ? bodyB->m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos2CrossNormal.dot(rb1 ? bodyB->m_angularVelocity : b3MakeVector3(0, 0, 0)); - b3Scalar rel_vel = vel1Dotn + vel2Dotn; - - b3Scalar positionalError = 0.f; - b3Scalar velocityError = restitution - rel_vel; // * damping; - - b3Scalar erp = infoGlobal.m_erp2; - if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold)) - { - erp = infoGlobal.m_erp; - } - - if (penetration > 0) - { - positionalError = 0; - - velocityError -= penetration / infoGlobal.m_timeStep; - } - else - { - positionalError = -penetration * erp / infoGlobal.m_timeStep; - } - - b3Scalar penetrationImpulse = positionalError * scaledDenom; //solverConstraint.m_jacDiagABInv; - b3Scalar velocityImpulse = velocityError * scaledDenom; //solverConstraint.m_jacDiagABInv; - - if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold)) - { - //combine position and velocity into rhs - solverConstraint.m_rhs = penetrationImpulse + velocityImpulse; - solverConstraint.m_rhsPenetration = 0.f; - } - else - { - //split position and velocity into rhs and m_rhsPenetration - solverConstraint.m_rhs = velocityImpulse; - solverConstraint.m_rhsPenetration = penetrationImpulse; - } - solverConstraint.m_cfm = 0.f; - solverConstraint.m_lowerLimit = 0; - solverConstraint.m_upperLimit = 1e10f; - } -} - -void b3PgsJacobiSolver::setFrictionConstraintImpulse(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, - int solverBodyIdA, int solverBodyIdB, - b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal) -{ - b3SolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA]; - b3SolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB]; - - { - b3SolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex]; - if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING) - { - frictionConstraint1.m_appliedImpulse = cp.m_appliedImpulseLateral1 * infoGlobal.m_warmstartingFactor; - if (bodies[bodyA->m_originalBodyIndex].m_invMass) - bodyA->internalApplyImpulse(frictionConstraint1.m_contactNormal * bodies[bodyA->m_originalBodyIndex].m_invMass, frictionConstraint1.m_angularComponentA, frictionConstraint1.m_appliedImpulse); - if (bodies[bodyB->m_originalBodyIndex].m_invMass) - bodyB->internalApplyImpulse(frictionConstraint1.m_contactNormal * bodies[bodyB->m_originalBodyIndex].m_invMass, -frictionConstraint1.m_angularComponentB, -(b3Scalar)frictionConstraint1.m_appliedImpulse); - } - else - { - frictionConstraint1.m_appliedImpulse = 0.f; - } - } - - if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS)) - { - b3SolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex + 1]; - if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING) - { - frictionConstraint2.m_appliedImpulse = cp.m_appliedImpulseLateral2 * infoGlobal.m_warmstartingFactor; - if (bodies[bodyA->m_originalBodyIndex].m_invMass) - bodyA->internalApplyImpulse(frictionConstraint2.m_contactNormal * bodies[bodyA->m_originalBodyIndex].m_invMass, frictionConstraint2.m_angularComponentA, frictionConstraint2.m_appliedImpulse); - if (bodies[bodyB->m_originalBodyIndex].m_invMass) - bodyB->internalApplyImpulse(frictionConstraint2.m_contactNormal * bodies[bodyB->m_originalBodyIndex].m_invMass, -frictionConstraint2.m_angularComponentB, -(b3Scalar)frictionConstraint2.m_appliedImpulse); - } - else - { - frictionConstraint2.m_appliedImpulse = 0.f; - } - } -} - -void b3PgsJacobiSolver::convertContact(b3RigidBodyData* bodies, b3InertiaData* inertias, b3Contact4* manifold, const b3ContactSolverInfo& infoGlobal) -{ - b3RigidBodyData *colObj0 = 0, *colObj1 = 0; - - int solverBodyIdA = getOrInitSolverBody(manifold->getBodyA(), bodies, inertias); - int solverBodyIdB = getOrInitSolverBody(manifold->getBodyB(), bodies, inertias); - - // b3RigidBody* bodyA = b3RigidBody::upcast(colObj0); - // b3RigidBody* bodyB = b3RigidBody::upcast(colObj1); - - b3SolverBody* solverBodyA = &m_tmpSolverBodyPool[solverBodyIdA]; - b3SolverBody* solverBodyB = &m_tmpSolverBodyPool[solverBodyIdB]; - - ///avoid collision response between two static objects - if (solverBodyA->m_invMass.isZero() && solverBodyB->m_invMass.isZero()) - return; - - int rollingFriction = 1; - int numContacts = getNumContacts(manifold); - for (int j = 0; j < numContacts; j++) - { - b3ContactPoint cp; - getContactPoint(manifold, j, cp); - - if (cp.getDistance() <= getContactProcessingThreshold(manifold)) - { - b3Vector3 rel_pos1; - b3Vector3 rel_pos2; - b3Scalar relaxation; - b3Scalar rel_vel; - b3Vector3 vel; - - int frictionIndex = m_tmpSolverContactConstraintPool.size(); - b3SolverConstraint& solverConstraint = m_tmpSolverContactConstraintPool.expandNonInitializing(); - // b3RigidBody* rb0 = b3RigidBody::upcast(colObj0); - // b3RigidBody* rb1 = b3RigidBody::upcast(colObj1); - solverConstraint.m_solverBodyIdA = solverBodyIdA; - solverConstraint.m_solverBodyIdB = solverBodyIdB; - - solverConstraint.m_originalContactPoint = &cp; - - setupContactConstraint(bodies, inertias, solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, vel, rel_vel, relaxation, rel_pos1, rel_pos2); - - // const b3Vector3& pos1 = cp.getPositionWorldOnA(); - // const b3Vector3& pos2 = cp.getPositionWorldOnB(); - - /////setup the friction constraints - - solverConstraint.m_frictionIndex = m_tmpSolverContactFrictionConstraintPool.size(); - - b3Vector3 angVelA, angVelB; - solverBodyA->getAngularVelocity(angVelA); - solverBodyB->getAngularVelocity(angVelB); - b3Vector3 relAngVel = angVelB - angVelA; - - if ((cp.m_combinedRollingFriction > 0.f) && (rollingFriction > 0)) - { - //only a single rollingFriction per manifold - rollingFriction--; - if (relAngVel.length() > infoGlobal.m_singleAxisRollingFrictionThreshold) - { - relAngVel.normalize(); - if (relAngVel.length() > 0.001) - addRollingFrictionConstraint(bodies, inertias, relAngVel, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - } - else - { - addRollingFrictionConstraint(bodies, inertias, cp.m_normalWorldOnB, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - b3Vector3 axis0, axis1; - b3PlaneSpace1(cp.m_normalWorldOnB, axis0, axis1); - if (axis0.length() > 0.001) - addRollingFrictionConstraint(bodies, inertias, axis0, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - if (axis1.length() > 0.001) - addRollingFrictionConstraint(bodies, inertias, axis1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - } - } - - ///Bullet has several options to set the friction directions - ///By default, each contact has only a single friction direction that is recomputed automatically very frame - ///based on the relative linear velocity. - ///If the relative velocity it zero, it will automatically compute a friction direction. - - ///You can also enable two friction directions, using the B3_SOLVER_USE_2_FRICTION_DIRECTIONS. - ///In that case, the second friction direction will be orthogonal to both contact normal and first friction direction. - /// - ///If you choose B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION, then the friction will be independent from the relative projected velocity. - /// - ///The user can manually override the friction directions for certain contacts using a contact callback, - ///and set the cp.m_lateralFrictionInitialized to true - ///In that case, you can set the target relative motion in each friction direction (cp.m_contactMotion1 and cp.m_contactMotion2) - ///this will give a conveyor belt effect - /// - if (!(infoGlobal.m_solverMode & B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING) || !cp.m_lateralFrictionInitialized) - { - cp.m_lateralFrictionDir1 = vel - cp.m_normalWorldOnB * rel_vel; - b3Scalar lat_rel_vel = cp.m_lateralFrictionDir1.length2(); - if (!(infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION) && lat_rel_vel > B3_EPSILON) - { - cp.m_lateralFrictionDir1 *= 1.f / b3Sqrt(lat_rel_vel); - if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS)) - { - cp.m_lateralFrictionDir2 = cp.m_lateralFrictionDir1.cross(cp.m_normalWorldOnB); - cp.m_lateralFrictionDir2.normalize(); //?? - addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - } - - addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - } - else - { - b3PlaneSpace1(cp.m_normalWorldOnB, cp.m_lateralFrictionDir1, cp.m_lateralFrictionDir2); - - if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS)) - { - addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - } - - addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation); - - if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION)) - { - cp.m_lateralFrictionInitialized = true; - } - } - } - else - { - addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, cp.m_contactMotion1, cp.m_contactCFM1); - - if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS)) - addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, cp.m_contactMotion2, cp.m_contactCFM2); - - setFrictionConstraintImpulse(bodies, inertias, solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal); - } - } - } -} - -b3Scalar b3PgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveGroupCacheFriendlySetup"); - - m_maxOverrideNumSolverIterations = 0; - - m_tmpSolverBodyPool.resize(0); - - m_bodyCount.resize(0); - m_bodyCount.resize(numBodies, 0); - m_bodyCountCheck.resize(0); - m_bodyCountCheck.resize(numBodies, 0); - - m_deltaLinearVelocities.resize(0); - m_deltaLinearVelocities.resize(numBodies, b3MakeVector3(0, 0, 0)); - m_deltaAngularVelocities.resize(0); - m_deltaAngularVelocities.resize(numBodies, b3MakeVector3(0, 0, 0)); - - //int totalBodies = 0; - - for (int i = 0; i < numConstraints; i++) - { - int bodyIndexA = constraints[i]->getRigidBodyA(); - int bodyIndexB = constraints[i]->getRigidBodyB(); - if (m_usePgs) - { - m_bodyCount[bodyIndexA] = -1; - m_bodyCount[bodyIndexB] = -1; - } - else - { - //didn't implement joints with Jacobi version yet - b3Assert(0); - } - } - for (int i = 0; i < numManifolds; i++) - { - int bodyIndexA = manifoldPtr[i].getBodyA(); - int bodyIndexB = manifoldPtr[i].getBodyB(); - if (m_usePgs) - { - m_bodyCount[bodyIndexA] = -1; - m_bodyCount[bodyIndexB] = -1; - } - else - { - if (bodies[bodyIndexA].m_invMass) - { - //m_bodyCount[bodyIndexA]+=manifoldPtr[i].getNPoints(); - m_bodyCount[bodyIndexA]++; - } - else - m_bodyCount[bodyIndexA] = -1; - - if (bodies[bodyIndexB].m_invMass) - // m_bodyCount[bodyIndexB]+=manifoldPtr[i].getNPoints(); - m_bodyCount[bodyIndexB]++; - else - m_bodyCount[bodyIndexB] = -1; - } - } - - if (1) - { - int j; - for (j = 0; j < numConstraints; j++) - { - b3TypedConstraint* constraint = constraints[j]; - - constraint->internalSetAppliedImpulse(0.0f); - } - } - - //b3RigidBody* rb0=0,*rb1=0; - //if (1) - { - { - int totalNumRows = 0; - int i; - - m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints); - //calculate the total number of contraint rows - for (i = 0; i < numConstraints; i++) - { - b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i]; - b3JointFeedback* fb = constraints[i]->getJointFeedback(); - if (fb) - { - fb->m_appliedForceBodyA.setZero(); - fb->m_appliedTorqueBodyA.setZero(); - fb->m_appliedForceBodyB.setZero(); - fb->m_appliedTorqueBodyB.setZero(); - } - - if (constraints[i]->isEnabled()) - { - } - if (constraints[i]->isEnabled()) - { - constraints[i]->getInfo1(&info1, bodies); - } - else - { - info1.m_numConstraintRows = 0; - info1.nub = 0; - } - totalNumRows += info1.m_numConstraintRows; - } - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); - -#ifndef DISABLE_JOINTS - ///setup the b3SolverConstraints - int currentRow = 0; - - for (i = 0; i < numConstraints; i++) - { - const b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i]; - - if (info1.m_numConstraintRows) - { - b3Assert(currentRow < totalNumRows); - - b3SolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow]; - b3TypedConstraint* constraint = constraints[i]; - - b3RigidBodyData& rbA = bodies[constraint->getRigidBodyA()]; - //b3RigidBody& rbA = constraint->getRigidBodyA(); - // b3RigidBody& rbB = constraint->getRigidBodyB(); - b3RigidBodyData& rbB = bodies[constraint->getRigidBodyB()]; - - int solverBodyIdA = getOrInitSolverBody(constraint->getRigidBodyA(), bodies, inertias); - int solverBodyIdB = getOrInitSolverBody(constraint->getRigidBodyB(), bodies, inertias); - - b3SolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA]; - b3SolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB]; - - int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; - if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations) - m_maxOverrideNumSolverIterations = overrideNumSolverIterations; - - int j; - for (j = 0; j < info1.m_numConstraintRows; j++) - { - memset(¤tConstraintRow[j], 0, sizeof(b3SolverConstraint)); - currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; - currentConstraintRow[j].m_upperLimit = B3_INFINITY; - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; - currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; - currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; - } - - bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - - b3TypedConstraint::b3ConstraintInfo2 info2; - info2.fps = 1.f / infoGlobal.m_timeStep; - info2.erp = infoGlobal.m_erp; - info2.m_J1linearAxis = currentConstraintRow->m_contactNormal; - info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal; - info2.m_J2linearAxis = 0; - info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal; - info2.rowskip = sizeof(b3SolverConstraint) / sizeof(b3Scalar); //check this - ///the size of b3SolverConstraint needs be a multiple of b3Scalar - b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3SolverConstraint)); - info2.m_constraintError = ¤tConstraintRow->m_rhs; - currentConstraintRow->m_cfm = infoGlobal.m_globalCfm; - info2.m_damping = infoGlobal.m_damping; - info2.cfm = ¤tConstraintRow->m_cfm; - info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; - info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; - info2.m_numIterations = infoGlobal.m_numIterations; - constraints[i]->getInfo2(&info2, bodies); - - ///finalize the constraint setup - for (j = 0; j < info1.m_numConstraintRows; j++) - { - b3SolverConstraint& solverConstraint = currentConstraintRow[j]; - - if (solverConstraint.m_upperLimit >= constraints[i]->getBreakingImpulseThreshold()) - { - solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold(); - } - - if (solverConstraint.m_lowerLimit <= -constraints[i]->getBreakingImpulseThreshold()) - { - solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold(); - } - - solverConstraint.m_originalContactPoint = constraint; - - b3Matrix3x3& invInertiaWorldA = inertias[constraint->getRigidBodyA()].m_invInertiaWorld; - { - //b3Vector3 angularFactorA(1,1,1); - const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal; - solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1; //*angularFactorA; - } - - b3Matrix3x3& invInertiaWorldB = inertias[constraint->getRigidBodyB()].m_invInertiaWorld; - { - const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; - solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2; //*constraint->getRigidBodyB().getAngularFactor(); - } - - { - //it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal - //because it gets multiplied iMJlB - b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass; - b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal; - b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass; //sign of normal? - b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal; - - b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal); - sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); - sum += iMJlB.dot(solverConstraint.m_contactNormal); - sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); - b3Scalar fsum = b3Fabs(sum); - b3Assert(fsum > B3_EPSILON); - solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f; - } - - ///fix rhs - ///todo: add force/torque accelerators - { - b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel); - - rel_vel = vel1Dotn + vel2Dotn; - - b3Scalar restitution = 0.f; - b3Scalar positionalError = solverConstraint.m_rhs; //already filled in by getConstraintInfo2 - b3Scalar velocityError = restitution - rel_vel * info2.m_damping; - b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv; - b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv; - solverConstraint.m_rhs = penetrationImpulse + velocityImpulse; - solverConstraint.m_appliedImpulse = 0.f; - } - } - } - currentRow += m_tmpConstraintSizesPool[i].m_numConstraintRows; - } -#endif //DISABLE_JOINTS - } - - { - int i; - - for (i = 0; i < numManifolds; i++) - { - b3Contact4& manifold = manifoldPtr[i]; - convertContact(bodies, inertias, &manifold, infoGlobal); - } - } - } - - // b3ContactSolverInfo info = infoGlobal; - - int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); - int numConstraintPool = m_tmpSolverContactConstraintPool.size(); - int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); - - ///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints - m_orderNonContactConstraintPool.resizeNoInitialize(numNonContactPool); - if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS)) - m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool * 2); - else - m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool); - - m_orderFrictionConstraintPool.resizeNoInitialize(numFrictionPool); - { - int i; - for (i = 0; i < numNonContactPool; i++) - { - m_orderNonContactConstraintPool[i] = i; - } - for (i = 0; i < numConstraintPool; i++) - { - m_orderTmpConstraintPool[i] = i; - } - for (i = 0; i < numFrictionPool; i++) - { - m_orderFrictionConstraintPool[i] = i; - } - } - - return 0.f; -} - -b3Scalar b3PgsJacobiSolver::solveSingleIteration(int iteration, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); - int numConstraintPool = m_tmpSolverContactConstraintPool.size(); - int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); - - if (infoGlobal.m_solverMode & B3_SOLVER_RANDMIZE_ORDER) - { - if (1) // uncomment this for a bit less random ((iteration & 7) == 0) - { - for (int j = 0; j < numNonContactPool; ++j) - { - int tmp = m_orderNonContactConstraintPool[j]; - int swapi = b3RandInt2(j + 1); - m_orderNonContactConstraintPool[j] = m_orderNonContactConstraintPool[swapi]; - m_orderNonContactConstraintPool[swapi] = tmp; - } - - //contact/friction constraints are not solved more than - if (iteration < infoGlobal.m_numIterations) - { - for (int j = 0; j < numConstraintPool; ++j) - { - int tmp = m_orderTmpConstraintPool[j]; - int swapi = b3RandInt2(j + 1); - m_orderTmpConstraintPool[j] = m_orderTmpConstraintPool[swapi]; - m_orderTmpConstraintPool[swapi] = tmp; - } - - for (int j = 0; j < numFrictionPool; ++j) - { - int tmp = m_orderFrictionConstraintPool[j]; - int swapi = b3RandInt2(j + 1); - m_orderFrictionConstraintPool[j] = m_orderFrictionConstraintPool[swapi]; - m_orderFrictionConstraintPool[swapi] = tmp; - } - } - } - } - - if (infoGlobal.m_solverMode & B3_SOLVER_SIMD) - { - ///solve all joint constraints, using SIMD, if available - for (int j = 0; j < m_tmpSolverNonContactConstraintPool.size(); j++) - { - b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]]; - if (iteration < constraint.m_overrideNumSolverIterations) - resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA], m_tmpSolverBodyPool[constraint.m_solverBodyIdB], constraint); - } - - if (iteration < infoGlobal.m_numIterations) - { - ///solve all contact constraints using SIMD, if available - if (infoGlobal.m_solverMode & B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS) - { - int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - int multiplier = (infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) ? 2 : 1; - - for (int c = 0; c < numPoolConstraints; c++) - { - b3Scalar totalImpulse = 0; - - { - const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[c]]; - resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - totalImpulse = solveManifold.m_appliedImpulse; - } - bool applyFriction = true; - if (applyFriction) - { - { - b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c * multiplier]]; - - if (totalImpulse > b3Scalar(0)) - { - solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse); - solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse; - - resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - } - - if (infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) - { - b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c * multiplier + 1]]; - - if (totalImpulse > b3Scalar(0)) - { - solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse); - solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse; - - resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - } - } - } - } - else //B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS - { - //solve the friction constraints after all contact constraints, don't interleave them - int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - int j; - - for (j = 0; j < numPoolConstraints; j++) - { - const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]]; - resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - - if (!m_usePgs) - averageVelocities(); - - ///solve all friction constraints, using SIMD, if available - - int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size(); - for (j = 0; j < numFrictionPoolConstraints; j++) - { - b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]]; - b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse; - - if (totalImpulse > b3Scalar(0)) - { - solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse); - solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse; - - resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - } - - int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size(); - for (j = 0; j < numRollingFrictionPoolConstraints; j++) - { - b3SolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j]; - b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse; - if (totalImpulse > b3Scalar(0)) - { - b3Scalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction * totalImpulse; - if (rollingFrictionMagnitude > rollingFrictionConstraint.m_friction) - rollingFrictionMagnitude = rollingFrictionConstraint.m_friction; - - rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude; - rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude; - - resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA], m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB], rollingFrictionConstraint); - } - } - } - } - } - else - { - //non-SIMD version - ///solve all joint constraints - for (int j = 0; j < m_tmpSolverNonContactConstraintPool.size(); j++) - { - b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]]; - if (iteration < constraint.m_overrideNumSolverIterations) - resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[constraint.m_solverBodyIdA], m_tmpSolverBodyPool[constraint.m_solverBodyIdB], constraint); - } - - if (iteration < infoGlobal.m_numIterations) - { - ///solve all contact constraints - int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - for (int j = 0; j < numPoolConstraints; j++) - { - const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]]; - resolveSingleConstraintRowLowerLimit(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - ///solve all friction constraints - int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size(); - for (int j = 0; j < numFrictionPoolConstraints; j++) - { - b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]]; - b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse; - - if (totalImpulse > b3Scalar(0)) - { - solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse); - solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse; - - resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - } - - int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size(); - for (int j = 0; j < numRollingFrictionPoolConstraints; j++) - { - b3SolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j]; - b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse; - if (totalImpulse > b3Scalar(0)) - { - b3Scalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction * totalImpulse; - if (rollingFrictionMagnitude > rollingFrictionConstraint.m_friction) - rollingFrictionMagnitude = rollingFrictionConstraint.m_friction; - - rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude; - rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude; - - resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA], m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB], rollingFrictionConstraint); - } - } - } - } - return 0.f; -} - -void b3PgsJacobiSolver::solveGroupCacheFriendlySplitImpulseIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - int iteration; - if (infoGlobal.m_splitImpulse) - { - if (infoGlobal.m_solverMode & B3_SOLVER_SIMD) - { - for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++) - { - { - int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - int j; - for (j = 0; j < numPoolConstraints; j++) - { - const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]]; - - resolveSplitPenetrationSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - } - } - } - else - { - for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++) - { - { - int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - int j; - for (j = 0; j < numPoolConstraints; j++) - { - const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]]; - - resolveSplitPenetrationImpulseCacheFriendly(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - } - } - } - } - } -} - -b3Scalar b3PgsJacobiSolver::solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveGroupCacheFriendlyIterations"); - - { - ///this is a special step to resolve penetrations (just for contacts) - solveGroupCacheFriendlySplitImpulseIterations(constraints, numConstraints, infoGlobal); - - int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations ? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations; - - for (int iteration = 0; iteration < maxIterations; iteration++) - //for ( int iteration = maxIterations-1 ; iteration >= 0;iteration--) - { - solveSingleIteration(iteration, constraints, numConstraints, infoGlobal); - - if (!m_usePgs) - { - averageVelocities(); - } - } - } - return 0.f; -} - -void b3PgsJacobiSolver::averageVelocities() -{ - B3_PROFILE("averaging"); - //average the velocities - int numBodies = m_bodyCount.size(); - - m_deltaLinearVelocities.resize(0); - m_deltaLinearVelocities.resize(numBodies, b3MakeVector3(0, 0, 0)); - m_deltaAngularVelocities.resize(0); - m_deltaAngularVelocities.resize(numBodies, b3MakeVector3(0, 0, 0)); - - for (int i = 0; i < m_tmpSolverBodyPool.size(); i++) - { - if (!m_tmpSolverBodyPool[i].m_invMass.isZero()) - { - int orgBodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex; - m_deltaLinearVelocities[orgBodyIndex] += m_tmpSolverBodyPool[i].getDeltaLinearVelocity(); - m_deltaAngularVelocities[orgBodyIndex] += m_tmpSolverBodyPool[i].getDeltaAngularVelocity(); - } - } - - for (int i = 0; i < m_tmpSolverBodyPool.size(); i++) - { - int orgBodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex; - - if (!m_tmpSolverBodyPool[i].m_invMass.isZero()) - { - b3Assert(m_bodyCount[orgBodyIndex] == m_bodyCountCheck[orgBodyIndex]); - - b3Scalar factor = 1.f / b3Scalar(m_bodyCount[orgBodyIndex]); - - m_tmpSolverBodyPool[i].m_deltaLinearVelocity = m_deltaLinearVelocities[orgBodyIndex] * factor; - m_tmpSolverBodyPool[i].m_deltaAngularVelocity = m_deltaAngularVelocities[orgBodyIndex] * factor; - } - } -} - -b3Scalar b3PgsJacobiSolver::solveGroupCacheFriendlyFinish(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveGroupCacheFriendlyFinish"); - int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - int i, j; - - if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING) - { - for (j = 0; j < numPoolConstraints; j++) - { - const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j]; - b3ContactPoint* pt = (b3ContactPoint*)solveManifold.m_originalContactPoint; - b3Assert(pt); - pt->m_appliedImpulse = solveManifold.m_appliedImpulse; - // float f = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse; - // printf("pt->m_appliedImpulseLateral1 = %f\n", f); - pt->m_appliedImpulseLateral1 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse; - //printf("pt->m_appliedImpulseLateral1 = %f\n", pt->m_appliedImpulseLateral1); - if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS)) - { - pt->m_appliedImpulseLateral2 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex + 1].m_appliedImpulse; - } - //do a callback here? - } - } - - numPoolConstraints = m_tmpSolverNonContactConstraintPool.size(); - for (j = 0; j < numPoolConstraints; j++) - { - const b3SolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j]; - b3TypedConstraint* constr = (b3TypedConstraint*)solverConstr.m_originalContactPoint; - b3JointFeedback* fb = constr->getJointFeedback(); - if (fb) - { - b3SolverBody* bodyA = &m_tmpSolverBodyPool[solverConstr.m_solverBodyIdA]; - b3SolverBody* bodyB = &m_tmpSolverBodyPool[solverConstr.m_solverBodyIdB]; - - fb->m_appliedForceBodyA += solverConstr.m_contactNormal * solverConstr.m_appliedImpulse * bodyA->m_linearFactor / infoGlobal.m_timeStep; - fb->m_appliedForceBodyB += -solverConstr.m_contactNormal * solverConstr.m_appliedImpulse * bodyB->m_linearFactor / infoGlobal.m_timeStep; - fb->m_appliedTorqueBodyA += solverConstr.m_relpos1CrossNormal * bodyA->m_angularFactor * solverConstr.m_appliedImpulse / infoGlobal.m_timeStep; - fb->m_appliedTorqueBodyB += -solverConstr.m_relpos1CrossNormal * bodyB->m_angularFactor * solverConstr.m_appliedImpulse / infoGlobal.m_timeStep; - } - - constr->internalSetAppliedImpulse(solverConstr.m_appliedImpulse); - if (b3Fabs(solverConstr.m_appliedImpulse) >= constr->getBreakingImpulseThreshold()) - { - constr->setEnabled(false); - } - } - - { - B3_PROFILE("write back velocities and transforms"); - for (i = 0; i < m_tmpSolverBodyPool.size(); i++) - { - int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex; - //b3Assert(i==bodyIndex); - - b3RigidBodyData* body = &bodies[bodyIndex]; - if (body->m_invMass) - { - if (infoGlobal.m_splitImpulse) - m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp); - else - m_tmpSolverBodyPool[i].writebackVelocity(); - - if (m_usePgs) - { - body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity; - body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity; - } - else - { - b3Scalar factor = 1.f / b3Scalar(m_bodyCount[bodyIndex]); - - b3Vector3 deltaLinVel = m_deltaLinearVelocities[bodyIndex] * factor; - b3Vector3 deltaAngVel = m_deltaAngularVelocities[bodyIndex] * factor; - //printf("body %d\n",bodyIndex); - //printf("deltaLinVel = %f,%f,%f\n",deltaLinVel.getX(),deltaLinVel.getY(),deltaLinVel.getZ()); - //printf("deltaAngVel = %f,%f,%f\n",deltaAngVel.getX(),deltaAngVel.getY(),deltaAngVel.getZ()); - - body->m_linVel += deltaLinVel; - body->m_angVel += deltaAngVel; - } - - if (infoGlobal.m_splitImpulse) - { - body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin(); - b3Quaternion orn; - orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation(); - body->m_quat = orn; - } - } - } - } - - m_tmpSolverContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0); - - m_tmpSolverBodyPool.resizeNoInitialize(0); - return 0.f; -} - -void b3PgsJacobiSolver::reset() -{ - m_btSeed2 = 0; -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h deleted file mode 100644 index 5b616541d95..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef B3_PGS_JACOBI_SOLVER -#define B3_PGS_JACOBI_SOLVER - -struct b3Contact4; -struct b3ContactPoint; - -class b3Dispatcher; - -#include "b3TypedConstraint.h" -#include "b3ContactSolverInfo.h" -#include "b3SolverBody.h" -#include "b3SolverConstraint.h" - -struct b3RigidBodyData; -struct b3InertiaData; - -class b3PgsJacobiSolver -{ -protected: - b3AlignedObjectArray m_tmpSolverBodyPool; - b3ConstraintArray m_tmpSolverContactConstraintPool; - b3ConstraintArray m_tmpSolverNonContactConstraintPool; - b3ConstraintArray m_tmpSolverContactFrictionConstraintPool; - b3ConstraintArray m_tmpSolverContactRollingFrictionConstraintPool; - - b3AlignedObjectArray m_orderTmpConstraintPool; - b3AlignedObjectArray m_orderNonContactConstraintPool; - b3AlignedObjectArray m_orderFrictionConstraintPool; - b3AlignedObjectArray m_tmpConstraintSizesPool; - - b3AlignedObjectArray m_bodyCount; - b3AlignedObjectArray m_bodyCountCheck; - - b3AlignedObjectArray m_deltaLinearVelocities; - b3AlignedObjectArray m_deltaAngularVelocities; - - bool m_usePgs; - void averageVelocities(); - - int m_maxOverrideNumSolverIterations; - - int m_numSplitImpulseRecoveries; - - b3Scalar getContactProcessingThreshold(b3Contact4* contact) - { - return 0.02f; - } - void setupFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, - b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, - b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, - b3Scalar desiredVelocity = 0., b3Scalar cfmSlip = 0.); - - void setupRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, - b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, - b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, - b3Scalar desiredVelocity = 0., b3Scalar cfmSlip = 0.); - - b3SolverConstraint& addFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity = 0., b3Scalar cfmSlip = 0.); - b3SolverConstraint& addRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity = 0, b3Scalar cfmSlip = 0.f); - - void setupContactConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, - b3SolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, b3ContactPoint& cp, - const b3ContactSolverInfo& infoGlobal, b3Vector3& vel, b3Scalar& rel_vel, b3Scalar& relaxation, - b3Vector3& rel_pos1, b3Vector3& rel_pos2); - - void setFrictionConstraintImpulse(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, - b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal); - - ///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction - unsigned long m_btSeed2; - - b3Scalar restitutionCurve(b3Scalar rel_vel, b3Scalar restitution); - - void convertContact(b3RigidBodyData* bodies, b3InertiaData* inertias, b3Contact4* manifold, const b3ContactSolverInfo& infoGlobal); - - void resolveSplitPenetrationSIMD( - b3SolverBody& bodyA, b3SolverBody& bodyB, - const b3SolverConstraint& contactConstraint); - - void resolveSplitPenetrationImpulseCacheFriendly( - b3SolverBody& bodyA, b3SolverBody& bodyB, - const b3SolverConstraint& contactConstraint); - - //internal method - int getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies, b3InertiaData* inertias); - void initSolverBody(int bodyIndex, b3SolverBody* solverBody, b3RigidBodyData* collisionObject); - - void resolveSingleConstraintRowGeneric(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint); - - void resolveSingleConstraintRowGenericSIMD(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint); - - void resolveSingleConstraintRowLowerLimit(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint); - - void resolveSingleConstraintRowLowerLimitSIMD(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint); - -protected: - virtual b3Scalar solveGroupCacheFriendlySetup(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - - virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - virtual void solveGroupCacheFriendlySplitImpulseIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - b3Scalar solveSingleIteration(int iteration, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - - virtual b3Scalar solveGroupCacheFriendlyFinish(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, const b3ContactSolverInfo& infoGlobal); - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3PgsJacobiSolver(bool usePgs); - virtual ~b3PgsJacobiSolver(); - - // void solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts); - void solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts, int numConstraints, b3TypedConstraint** constraints); - - b3Scalar solveGroup(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - - ///clear internal cached data and reset random seed - virtual void reset(); - - unsigned long b3Rand2(); - - int b3RandInt2(int n); - - void setRandSeed(unsigned long seed) - { - m_btSeed2 = seed; - } - unsigned long getRandSeed() const - { - return m_btSeed2; - } -}; - -#endif //B3_PGS_JACOBI_SOLVER diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.cpp b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.cpp deleted file mode 100644 index f9b103e34a1..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.cpp +++ /dev/null @@ -1,190 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3Point2PointConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include - -b3Point2PointConstraint::b3Point2PointConstraint(int rbA, int rbB, const b3Vector3& pivotInA, const b3Vector3& pivotInB) - : b3TypedConstraint(B3_POINT2POINT_CONSTRAINT_TYPE, rbA, rbB), m_pivotInA(pivotInA), m_pivotInB(pivotInB), m_flags(0) -{ -} - -/* -b3Point2PointConstraint::b3Point2PointConstraint(int rbA,const b3Vector3& pivotInA) -:b3TypedConstraint(B3_POINT2POINT_CONSTRAINT_TYPE,rbA),m_pivotInA(pivotInA),m_pivotInB(rbA.getCenterOfMassTransform()(pivotInA)), -m_flags(0), -m_useSolveConstraintObsolete(false) -{ - -} -*/ - -void b3Point2PointConstraint::getInfo1(b3ConstraintInfo1* info, const b3RigidBodyData* bodies) -{ - getInfo1NonVirtual(info, bodies); -} - -void b3Point2PointConstraint::getInfo1NonVirtual(b3ConstraintInfo1* info, const b3RigidBodyData* bodies) -{ - info->m_numConstraintRows = 3; - info->nub = 3; -} - -void b3Point2PointConstraint::getInfo2(b3ConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - b3Transform trA; - trA.setIdentity(); - trA.setOrigin(bodies[m_rbA].m_pos); - trA.setRotation(bodies[m_rbA].m_quat); - - b3Transform trB; - trB.setIdentity(); - trB.setOrigin(bodies[m_rbB].m_pos); - trB.setRotation(bodies[m_rbB].m_quat); - - getInfo2NonVirtual(info, trA, trB); -} - -void b3Point2PointConstraint::getInfo2NonVirtual(b3ConstraintInfo2* info, const b3Transform& body0_trans, const b3Transform& body1_trans) -{ - //retrieve matrices - - // anchor points in global coordinates with respect to body PORs. - - // set jacobian - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip + 1] = 1; - info->m_J1linearAxis[2 * info->rowskip + 2] = 1; - - b3Vector3 a1 = body0_trans.getBasis() * getPivotInA(); - //b3Vector3 a1a = b3QuatRotate(body0_trans.getRotation(),getPivotInA()); - - { - b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis + 2 * info->rowskip); - b3Vector3 a1neg = -a1; - a1neg.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip + 1] = -1; - info->m_J2linearAxis[2 * info->rowskip + 2] = -1; - } - - b3Vector3 a2 = body1_trans.getBasis() * getPivotInB(); - - { - // b3Vector3 a2n = -a2; - b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis + 2 * info->rowskip); - a2.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - // set right hand side - b3Scalar currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp; - b3Scalar k = info->fps * currERP; - int j; - for (j = 0; j < 3; j++) - { - info->m_constraintError[j * info->rowskip] = k * (a2[j] + body1_trans.getOrigin()[j] - a1[j] - body0_trans.getOrigin()[j]); - //printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]); - } - if (m_flags & B3_P2P_FLAGS_CFM) - { - for (j = 0; j < 3; j++) - { - info->cfm[j * info->rowskip] = m_cfm; - } - } - - b3Scalar impulseClamp = m_setting.m_impulseClamp; // - for (j = 0; j < 3; j++) - { - if (m_setting.m_impulseClamp > 0) - { - info->m_lowerLimit[j * info->rowskip] = -impulseClamp; - info->m_upperLimit[j * info->rowskip] = impulseClamp; - } - } - info->m_damping = m_setting.m_damping; -} - -void b3Point2PointConstraint::updateRHS(b3Scalar timeStep) -{ - (void)timeStep; -} - -///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). -///If no axis is provided, it uses the default axis for this constraint. -void b3Point2PointConstraint::setParam(int num, b3Scalar value, int axis) -{ - if (axis != -1) - { - b3AssertConstrParams(0); - } - else - { - switch (num) - { - case B3_CONSTRAINT_ERP: - case B3_CONSTRAINT_STOP_ERP: - m_erp = value; - m_flags |= B3_P2P_FLAGS_ERP; - break; - case B3_CONSTRAINT_CFM: - case B3_CONSTRAINT_STOP_CFM: - m_cfm = value; - m_flags |= B3_P2P_FLAGS_CFM; - break; - default: - b3AssertConstrParams(0); - } - } -} - -///return the local value of parameter -b3Scalar b3Point2PointConstraint::getParam(int num, int axis) const -{ - b3Scalar retVal(B3_INFINITY); - if (axis != -1) - { - b3AssertConstrParams(0); - } - else - { - switch (num) - { - case B3_CONSTRAINT_ERP: - case B3_CONSTRAINT_STOP_ERP: - b3AssertConstrParams(m_flags & B3_P2P_FLAGS_ERP); - retVal = m_erp; - break; - case B3_CONSTRAINT_CFM: - case B3_CONSTRAINT_STOP_CFM: - b3AssertConstrParams(m_flags & B3_P2P_FLAGS_CFM); - retVal = m_cfm; - break; - default: - b3AssertConstrParams(0); - } - } - return retVal; -} diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h deleted file mode 100644 index a6c21d20ae6..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h +++ /dev/null @@ -1,153 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_POINT2POINTCONSTRAINT_H -#define B3_POINT2POINTCONSTRAINT_H - -#include "Bullet3Common/b3Vector3.h" -//#include "b3JacobianEntry.h" -#include "b3TypedConstraint.h" - -class b3RigidBody; - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3Point2PointConstraintData b3Point2PointConstraintDoubleData -#define b3Point2PointConstraintDataName "b3Point2PointConstraintDoubleData" -#else -#define b3Point2PointConstraintData b3Point2PointConstraintFloatData -#define b3Point2PointConstraintDataName "b3Point2PointConstraintFloatData" -#endif //B3_USE_DOUBLE_PRECISION - -struct b3ConstraintSetting -{ - b3ConstraintSetting() : m_tau(b3Scalar(0.3)), - m_damping(b3Scalar(1.)), - m_impulseClamp(b3Scalar(0.)) - { - } - b3Scalar m_tau; - b3Scalar m_damping; - b3Scalar m_impulseClamp; -}; - -enum b3Point2PointFlags -{ - B3_P2P_FLAGS_ERP = 1, - B3_P2P_FLAGS_CFM = 2 -}; - -/// point to point constraint between two rigidbodies each with a pivotpoint that descibes the 'ballsocket' location in local space -B3_ATTRIBUTE_ALIGNED16(class) -b3Point2PointConstraint : public b3TypedConstraint -{ -#ifdef IN_PARALLELL_SOLVER -public: -#endif - - b3Vector3 m_pivotInA; - b3Vector3 m_pivotInB; - - int m_flags; - b3Scalar m_erp; - b3Scalar m_cfm; - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3ConstraintSetting m_setting; - - b3Point2PointConstraint(int rbA, int rbB, const b3Vector3& pivotInA, const b3Vector3& pivotInB); - - //b3Point2PointConstraint(int rbA,const b3Vector3& pivotInA); - - virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies); - - void getInfo1NonVirtual(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies); - - virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies); - - void getInfo2NonVirtual(b3ConstraintInfo2 * info, const b3Transform& body0_trans, const b3Transform& body1_trans); - - void updateRHS(b3Scalar timeStep); - - void setPivotA(const b3Vector3& pivotA) - { - m_pivotInA = pivotA; - } - - void setPivotB(const b3Vector3& pivotB) - { - m_pivotInB = pivotB; - } - - const b3Vector3& getPivotInA() const - { - return m_pivotInA; - } - - const b3Vector3& getPivotInB() const - { - return m_pivotInB; - } - - ///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). - ///If no axis is provided, it uses the default axis for this constraint. - virtual void setParam(int num, b3Scalar value, int axis = -1); - ///return the local value of parameter - virtual b3Scalar getParam(int num, int axis = -1) const; - - // virtual int calculateSerializeBufferSize() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - // virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; -}; - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3Point2PointConstraintFloatData -{ - b3TypedConstraintData m_typeConstraintData; - b3Vector3FloatData m_pivotInA; - b3Vector3FloatData m_pivotInB; -}; - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3Point2PointConstraintDoubleData -{ - b3TypedConstraintData m_typeConstraintData; - b3Vector3DoubleData m_pivotInA; - b3Vector3DoubleData m_pivotInB; -}; - -/* -B3_FORCE_INLINE int b3Point2PointConstraint::calculateSerializeBufferSize() const -{ - return sizeof(b3Point2PointConstraintData); - -} - - ///fills the dataBuffer and returns the struct name (and 0 on failure) -B3_FORCE_INLINE const char* b3Point2PointConstraint::serialize(void* dataBuffer, b3Serializer* serializer) const -{ - b3Point2PointConstraintData* p2pData = (b3Point2PointConstraintData*)dataBuffer; - - b3TypedConstraint::serialize(&p2pData->m_typeConstraintData,serializer); - m_pivotInA.serialize(p2pData->m_pivotInA); - m_pivotInB.serialize(p2pData->m_pivotInB); - - return b3Point2PointConstraintDataName; -} -*/ - -#endif //B3_POINT2POINTCONSTRAINT_H diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h deleted file mode 100644 index c9056ec4a7a..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h +++ /dev/null @@ -1,281 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SOLVER_BODY_H -#define B3_SOLVER_BODY_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" - -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3TransformUtil.h" - -///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision -#ifdef B3_USE_SSE -#define USE_SIMD 1 -#endif // - -#ifdef USE_SIMD - -struct b3SimdScalar -{ - B3_FORCE_INLINE b3SimdScalar() - { - } - - B3_FORCE_INLINE b3SimdScalar(float fl) - : m_vec128(_mm_set1_ps(fl)) - { - } - - B3_FORCE_INLINE b3SimdScalar(__m128 v128) - : m_vec128(v128) - { - } - union { - __m128 m_vec128; - float m_floats[4]; - float x, y, z, w; - int m_ints[4]; - b3Scalar m_unusedPadding; - }; - B3_FORCE_INLINE __m128 get128() - { - return m_vec128; - } - - B3_FORCE_INLINE const __m128 get128() const - { - return m_vec128; - } - - B3_FORCE_INLINE void set128(__m128 v128) - { - m_vec128 = v128; - } - - B3_FORCE_INLINE operator __m128() - { - return m_vec128; - } - B3_FORCE_INLINE operator const __m128() const - { - return m_vec128; - } - - B3_FORCE_INLINE operator float() const - { - return m_floats[0]; - } -}; - -///@brief Return the elementwise product of two b3SimdScalar -B3_FORCE_INLINE b3SimdScalar -operator*(const b3SimdScalar& v1, const b3SimdScalar& v2) -{ - return b3SimdScalar(_mm_mul_ps(v1.get128(), v2.get128())); -} - -///@brief Return the elementwise product of two b3SimdScalar -B3_FORCE_INLINE b3SimdScalar -operator+(const b3SimdScalar& v1, const b3SimdScalar& v2) -{ - return b3SimdScalar(_mm_add_ps(v1.get128(), v2.get128())); -} - -#else -#define b3SimdScalar b3Scalar -#endif - -///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance. -B3_ATTRIBUTE_ALIGNED16(struct) -b3SolverBody -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - b3Transform m_worldTransform; - b3Vector3 m_deltaLinearVelocity; - b3Vector3 m_deltaAngularVelocity; - b3Vector3 m_angularFactor; - b3Vector3 m_linearFactor; - b3Vector3 m_invMass; - b3Vector3 m_pushVelocity; - b3Vector3 m_turnVelocity; - b3Vector3 m_linearVelocity; - b3Vector3 m_angularVelocity; - - union { - void* m_originalBody; - int m_originalBodyIndex; - }; - - int padding[3]; - - void setWorldTransform(const b3Transform& worldTransform) - { - m_worldTransform = worldTransform; - } - - const b3Transform& getWorldTransform() const - { - return m_worldTransform; - } - - B3_FORCE_INLINE void getVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const - { - if (m_originalBody) - velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos); - else - velocity.setValue(0, 0, 0); - } - - B3_FORCE_INLINE void getAngularVelocity(b3Vector3 & angVel) const - { - if (m_originalBody) - angVel = m_angularVelocity + m_deltaAngularVelocity; - else - angVel.setValue(0, 0, 0); - } - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void applyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - B3_FORCE_INLINE void internalApplyPushImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_pushVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_turnVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - const b3Vector3& getDeltaLinearVelocity() const - { - return m_deltaLinearVelocity; - } - - const b3Vector3& getDeltaAngularVelocity() const - { - return m_deltaAngularVelocity; - } - - const b3Vector3& getPushVelocity() const - { - return m_pushVelocity; - } - - const b3Vector3& getTurnVelocity() const - { - return m_turnVelocity; - } - - //////////////////////////////////////////////// - ///some internal methods, don't use them - - b3Vector3& internalGetDeltaLinearVelocity() - { - return m_deltaLinearVelocity; - } - - b3Vector3& internalGetDeltaAngularVelocity() - { - return m_deltaAngularVelocity; - } - - const b3Vector3& internalGetAngularFactor() const - { - return m_angularFactor; - } - - const b3Vector3& internalGetInvMass() const - { - return m_invMass; - } - - void internalSetInvMass(const b3Vector3& invMass) - { - m_invMass = invMass; - } - - b3Vector3& internalGetPushVelocity() - { - return m_pushVelocity; - } - - b3Vector3& internalGetTurnVelocity() - { - return m_turnVelocity; - } - - B3_FORCE_INLINE void internalGetVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const - { - velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos); - } - - B3_FORCE_INLINE void internalGetAngularVelocity(b3Vector3 & angVel) const - { - angVel = m_angularVelocity + m_deltaAngularVelocity; - } - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void internalApplyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude) - { - //if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - void writebackVelocity() - { - //if (m_originalBody>=0) - { - m_linearVelocity += m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //m_originalBody->setCompanionId(-1); - } - } - - void writebackVelocityAndTransform(b3Scalar timeStep, b3Scalar splitImpulseTurnErp) - { - (void)timeStep; - if (m_originalBody) - { - m_linearVelocity += m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //correct the position/orientation based on push/turn recovery - b3Transform newTransform; - if (m_pushVelocity[0] != 0.f || m_pushVelocity[1] != 0 || m_pushVelocity[2] != 0 || m_turnVelocity[0] != 0.f || m_turnVelocity[1] != 0 || m_turnVelocity[2] != 0) - { - // b3Quaternion orn = m_worldTransform.getRotation(); - b3TransformUtil::integrateTransform(m_worldTransform, m_pushVelocity, m_turnVelocity * splitImpulseTurnErp, timeStep, newTransform); - m_worldTransform = newTransform; - } - //m_worldTransform.setRotation(orn); - //m_originalBody->setCompanionId(-1); - } - } -}; - -#endif //B3_SOLVER_BODY_H diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h deleted file mode 100644 index 1e9533fb780..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SOLVER_CONSTRAINT_H -#define B3_SOLVER_CONSTRAINT_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" -//#include "b3JacobianEntry.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -//#define NO_FRICTION_TANGENTIALS 1 -#include "b3SolverBody.h" - -///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints. -B3_ATTRIBUTE_ALIGNED16(struct) -b3SolverConstraint -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3Vector3 m_relpos1CrossNormal; - b3Vector3 m_contactNormal; - - b3Vector3 m_relpos2CrossNormal; - //b3Vector3 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal - - b3Vector3 m_angularComponentA; - b3Vector3 m_angularComponentB; - - mutable b3SimdScalar m_appliedPushImpulse; - mutable b3SimdScalar m_appliedImpulse; - int m_padding1; - int m_padding2; - b3Scalar m_friction; - b3Scalar m_jacDiagABInv; - b3Scalar m_rhs; - b3Scalar m_cfm; - - b3Scalar m_lowerLimit; - b3Scalar m_upperLimit; - b3Scalar m_rhsPenetration; - union { - void* m_originalContactPoint; - b3Scalar m_unusedPadding4; - }; - - int m_overrideNumSolverIterations; - int m_frictionIndex; - int m_solverBodyIdA; - int m_solverBodyIdB; - - enum b3SolverConstraintType - { - B3_SOLVER_CONTACT_1D = 0, - B3_SOLVER_FRICTION_1D - }; -}; - -typedef b3AlignedObjectArray b3ConstraintArray; - -#endif //B3_SOLVER_CONSTRAINT_H diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.cpp b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.cpp deleted file mode 100644 index eab17b9943a..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3TypedConstraint.h" -//#include "Bullet3Common/b3Serializer.h" - -#define B3_DEFAULT_DEBUGDRAW_SIZE b3Scalar(0.3f) - -b3TypedConstraint::b3TypedConstraint(b3TypedConstraintType type, int rbA, int rbB) - : b3TypedObject(type), - m_userConstraintType(-1), - m_userConstraintPtr((void*)-1), - m_breakingImpulseThreshold(B3_INFINITY), - m_isEnabled(true), - m_needsFeedback(false), - m_overrideNumSolverIterations(-1), - m_rbA(rbA), - m_rbB(rbB), - m_appliedImpulse(b3Scalar(0.)), - m_dbgDrawSize(B3_DEFAULT_DEBUGDRAW_SIZE), - m_jointFeedback(0) -{ -} - -b3Scalar b3TypedConstraint::getMotorFactor(b3Scalar pos, b3Scalar lowLim, b3Scalar uppLim, b3Scalar vel, b3Scalar timeFact) -{ - if (lowLim > uppLim) - { - return b3Scalar(1.0f); - } - else if (lowLim == uppLim) - { - return b3Scalar(0.0f); - } - b3Scalar lim_fact = b3Scalar(1.0f); - b3Scalar delta_max = vel / timeFact; - if (delta_max < b3Scalar(0.0f)) - { - if ((pos >= lowLim) && (pos < (lowLim - delta_max))) - { - lim_fact = (lowLim - pos) / delta_max; - } - else if (pos < lowLim) - { - lim_fact = b3Scalar(0.0f); - } - else - { - lim_fact = b3Scalar(1.0f); - } - } - else if (delta_max > b3Scalar(0.0f)) - { - if ((pos <= uppLim) && (pos > (uppLim - delta_max))) - { - lim_fact = (uppLim - pos) / delta_max; - } - else if (pos > uppLim) - { - lim_fact = b3Scalar(0.0f); - } - else - { - lim_fact = b3Scalar(1.0f); - } - } - else - { - lim_fact = b3Scalar(0.0f); - } - return lim_fact; -} - -void b3AngularLimit::set(b3Scalar low, b3Scalar high, b3Scalar _softness, b3Scalar _biasFactor, b3Scalar _relaxationFactor) -{ - m_halfRange = (high - low) / 2.0f; - m_center = b3NormalizeAngle(low + m_halfRange); - m_softness = _softness; - m_biasFactor = _biasFactor; - m_relaxationFactor = _relaxationFactor; -} - -void b3AngularLimit::test(const b3Scalar angle) -{ - m_correction = 0.0f; - m_sign = 0.0f; - m_solveLimit = false; - - if (m_halfRange >= 0.0f) - { - b3Scalar deviation = b3NormalizeAngle(angle - m_center); - if (deviation < -m_halfRange) - { - m_solveLimit = true; - m_correction = -(deviation + m_halfRange); - m_sign = +1.0f; - } - else if (deviation > m_halfRange) - { - m_solveLimit = true; - m_correction = m_halfRange - deviation; - m_sign = -1.0f; - } - } -} - -b3Scalar b3AngularLimit::getError() const -{ - return m_correction * m_sign; -} - -void b3AngularLimit::fit(b3Scalar& angle) const -{ - if (m_halfRange > 0.0f) - { - b3Scalar relativeAngle = b3NormalizeAngle(angle - m_center); - if (!b3Equal(relativeAngle, m_halfRange)) - { - if (relativeAngle > 0.0f) - { - angle = getHigh(); - } - else - { - angle = getLow(); - } - } - } -} - -b3Scalar b3AngularLimit::getLow() const -{ - return b3NormalizeAngle(m_center - m_halfRange); -} - -b3Scalar b3AngularLimit::getHigh() const -{ - return b3NormalizeAngle(m_center + m_halfRange); -} diff --git a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h b/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h deleted file mode 100644 index 82075db2aeb..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h +++ /dev/null @@ -1,469 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2010 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TYPED_CONSTRAINT_H -#define B3_TYPED_CONSTRAINT_H - -#include "Bullet3Common/b3Scalar.h" -#include "b3SolverConstraint.h" - -class b3Serializer; - -//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility -enum b3TypedConstraintType -{ - B3_POINT2POINT_CONSTRAINT_TYPE = 3, - B3_HINGE_CONSTRAINT_TYPE, - B3_CONETWIST_CONSTRAINT_TYPE, - B3_D6_CONSTRAINT_TYPE, - B3_SLIDER_CONSTRAINT_TYPE, - B3_CONTACT_CONSTRAINT_TYPE, - B3_D6_SPRING_CONSTRAINT_TYPE, - B3_GEAR_CONSTRAINT_TYPE, - B3_FIXED_CONSTRAINT_TYPE, - B3_MAX_CONSTRAINT_TYPE -}; - -enum b3ConstraintParams -{ - B3_CONSTRAINT_ERP = 1, - B3_CONSTRAINT_STOP_ERP, - B3_CONSTRAINT_CFM, - B3_CONSTRAINT_STOP_CFM -}; - -#if 1 -#define b3AssertConstrParams(_par) b3Assert(_par) -#else -#define b3AssertConstrParams(_par) -#endif - -B3_ATTRIBUTE_ALIGNED16(struct) -b3JointFeedback -{ - b3Vector3 m_appliedForceBodyA; - b3Vector3 m_appliedTorqueBodyA; - b3Vector3 m_appliedForceBodyB; - b3Vector3 m_appliedTorqueBodyB; -}; - -struct b3RigidBodyData; - -///TypedConstraint is the baseclass for Bullet constraints and vehicles -B3_ATTRIBUTE_ALIGNED16(class) -b3TypedConstraint : public b3TypedObject -{ - int m_userConstraintType; - - union { - int m_userConstraintId; - void* m_userConstraintPtr; - }; - - b3Scalar m_breakingImpulseThreshold; - bool m_isEnabled; - bool m_needsFeedback; - int m_overrideNumSolverIterations; - - b3TypedConstraint& operator=(b3TypedConstraint& other) - { - b3Assert(0); - (void)other; - return *this; - } - -protected: - int m_rbA; - int m_rbB; - b3Scalar m_appliedImpulse; - b3Scalar m_dbgDrawSize; - b3JointFeedback* m_jointFeedback; - - ///internal method used by the constraint solver, don't use them directly - b3Scalar getMotorFactor(b3Scalar pos, b3Scalar lowLim, b3Scalar uppLim, b3Scalar vel, b3Scalar timeFact); - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - virtual ~b3TypedConstraint(){}; - b3TypedConstraint(b3TypedConstraintType type, int bodyA, int bodyB); - - struct b3ConstraintInfo1 - { - int m_numConstraintRows, nub; - }; - - struct b3ConstraintInfo2 - { - // integrator parameters: frames per second (1/stepsize), default error - // reduction parameter (0..1). - b3Scalar fps, erp; - - // for the first and second body, pointers to two (linear and angular) - // n*3 jacobian sub matrices, stored by rows. these matrices will have - // been initialized to 0 on entry. if the second body is zero then the - // J2xx pointers may be 0. - b3Scalar *m_J1linearAxis, *m_J1angularAxis, *m_J2linearAxis, *m_J2angularAxis; - - // elements to jump from one row to the next in J's - int rowskip; - - // right hand sides of the equation J*v = c + cfm * lambda. cfm is the - // "constraint force mixing" vector. c is set to zero on entry, cfm is - // set to a constant value (typically very small or zero) value on entry. - b3Scalar *m_constraintError, *cfm; - - // lo and hi limits for variables (set to -/+ infinity on entry). - b3Scalar *m_lowerLimit, *m_upperLimit; - - // findex vector for variables. see the LCP solver interface for a - // description of what this does. this is set to -1 on entry. - // note that the returned indexes are relative to the first index of - // the constraint. - int* findex; - // number of solver iterations - int m_numIterations; - - //damping of the velocity - b3Scalar m_damping; - }; - - int getOverrideNumSolverIterations() const - { - return m_overrideNumSolverIterations; - } - - ///override the number of constraint solver iterations used to solve this constraint - ///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations - void setOverrideNumSolverIterations(int overideNumIterations) - { - m_overrideNumSolverIterations = overideNumIterations; - } - - ///internal method used by the constraint solver, don't use them directly - virtual void setupSolverConstraint(b3ConstraintArray & ca, int solverBodyA, int solverBodyB, b3Scalar timeStep) - { - (void)ca; - (void)solverBodyA; - (void)solverBodyB; - (void)timeStep; - } - - ///internal method used by the constraint solver, don't use them directly - virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies) = 0; - - ///internal method used by the constraint solver, don't use them directly - virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies) = 0; - - ///internal method used by the constraint solver, don't use them directly - void internalSetAppliedImpulse(b3Scalar appliedImpulse) - { - m_appliedImpulse = appliedImpulse; - } - ///internal method used by the constraint solver, don't use them directly - b3Scalar internalGetAppliedImpulse() - { - return m_appliedImpulse; - } - - b3Scalar getBreakingImpulseThreshold() const - { - return m_breakingImpulseThreshold; - } - - void setBreakingImpulseThreshold(b3Scalar threshold) - { - m_breakingImpulseThreshold = threshold; - } - - bool isEnabled() const - { - return m_isEnabled; - } - - void setEnabled(bool enabled) - { - m_isEnabled = enabled; - } - - ///internal method used by the constraint solver, don't use them directly - virtual void solveConstraintObsolete(b3SolverBody& /*bodyA*/, b3SolverBody& /*bodyB*/, b3Scalar /*timeStep*/){}; - - int getRigidBodyA() const - { - return m_rbA; - } - int getRigidBodyB() const - { - return m_rbB; - } - - int getRigidBodyA() - { - return m_rbA; - } - int getRigidBodyB() - { - return m_rbB; - } - - int getUserConstraintType() const - { - return m_userConstraintType; - } - - void setUserConstraintType(int userConstraintType) - { - m_userConstraintType = userConstraintType; - }; - - void setUserConstraintId(int uid) - { - m_userConstraintId = uid; - } - - int getUserConstraintId() const - { - return m_userConstraintId; - } - - void setUserConstraintPtr(void* ptr) - { - m_userConstraintPtr = ptr; - } - - void* getUserConstraintPtr() - { - return m_userConstraintPtr; - } - - void setJointFeedback(b3JointFeedback * jointFeedback) - { - m_jointFeedback = jointFeedback; - } - - const b3JointFeedback* getJointFeedback() const - { - return m_jointFeedback; - } - - b3JointFeedback* getJointFeedback() - { - return m_jointFeedback; - } - - int getUid() const - { - return m_userConstraintId; - } - - bool needsFeedback() const - { - return m_needsFeedback; - } - - ///enableFeedback will allow to read the applied linear and angular impulse - ///use getAppliedImpulse, getAppliedLinearImpulse and getAppliedAngularImpulse to read feedback information - void enableFeedback(bool needsFeedback) - { - m_needsFeedback = needsFeedback; - } - - ///getAppliedImpulse is an estimated total applied impulse. - ///This feedback could be used to determine breaking constraints or playing sounds. - b3Scalar getAppliedImpulse() const - { - b3Assert(m_needsFeedback); - return m_appliedImpulse; - } - - b3TypedConstraintType getConstraintType() const - { - return b3TypedConstraintType(m_objectType); - } - - void setDbgDrawSize(b3Scalar dbgDrawSize) - { - m_dbgDrawSize = dbgDrawSize; - } - b3Scalar getDbgDrawSize() - { - return m_dbgDrawSize; - } - - ///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). - ///If no axis is provided, it uses the default axis for this constraint. - virtual void setParam(int num, b3Scalar value, int axis = -1) = 0; - - ///return the local value of parameter - virtual b3Scalar getParam(int num, int axis = -1) const = 0; - - // virtual int calculateSerializeBufferSize() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - //virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; -}; - -// returns angle in range [-B3_2_PI, B3_2_PI], closest to one of the limits -// all arguments should be normalized angles (i.e. in range [-B3_PI, B3_PI]) -B3_FORCE_INLINE b3Scalar b3AdjustAngleToLimits(b3Scalar angleInRadians, b3Scalar angleLowerLimitInRadians, b3Scalar angleUpperLimitInRadians) -{ - if (angleLowerLimitInRadians >= angleUpperLimitInRadians) - { - return angleInRadians; - } - else if (angleInRadians < angleLowerLimitInRadians) - { - b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleLowerLimitInRadians - angleInRadians)); - b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleUpperLimitInRadians - angleInRadians)); - return (diffLo < diffHi) ? angleInRadians : (angleInRadians + B3_2_PI); - } - else if (angleInRadians > angleUpperLimitInRadians) - { - b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleInRadians - angleUpperLimitInRadians)); - b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleInRadians - angleLowerLimitInRadians)); - return (diffLo < diffHi) ? (angleInRadians - B3_2_PI) : angleInRadians; - } - else - { - return angleInRadians; - } -} - -// clang-format off -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3TypedConstraintData -{ - int m_bodyA; - int m_bodyB; - char *m_name; - - int m_objectType; - int m_userConstraintType; - int m_userConstraintId; - int m_needsFeedback; - - float m_appliedImpulse; - float m_dbgDrawSize; - - int m_disableCollisionsBetweenLinkedBodies; - int m_overrideNumSolverIterations; - - float m_breakingImpulseThreshold; - int m_isEnabled; - -}; - -// clang-format on - -/*B3_FORCE_INLINE int b3TypedConstraint::calculateSerializeBufferSize() const -{ - return sizeof(b3TypedConstraintData); -} -*/ - -class b3AngularLimit -{ -private: - b3Scalar - m_center, - m_halfRange, - m_softness, - m_biasFactor, - m_relaxationFactor, - m_correction, - m_sign; - - bool - m_solveLimit; - -public: - /// Default constructor initializes limit as inactive, allowing free constraint movement - b3AngularLimit() - : m_center(0.0f), - m_halfRange(-1.0f), - m_softness(0.9f), - m_biasFactor(0.3f), - m_relaxationFactor(1.0f), - m_correction(0.0f), - m_sign(0.0f), - m_solveLimit(false) - { - } - - /// Sets all limit's parameters. - /// When low > high limit becomes inactive. - /// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit - void set(b3Scalar low, b3Scalar high, b3Scalar _softness = 0.9f, b3Scalar _biasFactor = 0.3f, b3Scalar _relaxationFactor = 1.0f); - - /// Checks conastaint angle against limit. If limit is active and the angle violates the limit - /// correction is calculated. - void test(const b3Scalar angle); - - /// Returns limit's softness - inline b3Scalar getSoftness() const - { - return m_softness; - } - - /// Returns limit's bias factor - inline b3Scalar getBiasFactor() const - { - return m_biasFactor; - } - - /// Returns limit's relaxation factor - inline b3Scalar getRelaxationFactor() const - { - return m_relaxationFactor; - } - - /// Returns correction value evaluated when test() was invoked - inline b3Scalar getCorrection() const - { - return m_correction; - } - - /// Returns sign value evaluated when test() was invoked - inline b3Scalar getSign() const - { - return m_sign; - } - - /// Gives half of the distance between min and max limit angle - inline b3Scalar getHalfRange() const - { - return m_halfRange; - } - - /// Returns true when the last test() invocation recognized limit violation - inline bool isLimit() const - { - return m_solveLimit; - } - - /// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle - /// returned is modified so it equals to the limit closest to given angle. - void fit(b3Scalar& angle) const; - - /// Returns correction value multiplied by sign value - b3Scalar getError() const; - - b3Scalar getLow() const; - - b3Scalar getHigh() const; -}; - -#endif //B3_TYPED_CONSTRAINT_H diff --git a/thirdparty/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp b/thirdparty/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp deleted file mode 100644 index f1080d9d5e1..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp +++ /dev/null @@ -1,447 +0,0 @@ -#include "b3CpuRigidBodyPipeline.h" - -#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Dynamics/shared/b3ContactConstraint4.h" -#include "Bullet3Dynamics/shared/b3Inertia.h" - -struct b3CpuRigidBodyPipelineInternalData -{ - b3AlignedObjectArray m_rigidBodies; - b3AlignedObjectArray m_inertias; - b3AlignedObjectArray m_aabbWorldSpace; - - b3DynamicBvhBroadphase* m_bp; - b3CpuNarrowPhase* m_np; - b3Config m_config; -}; - -b3CpuRigidBodyPipeline::b3CpuRigidBodyPipeline(class b3CpuNarrowPhase* narrowphase, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config) -{ - m_data = new b3CpuRigidBodyPipelineInternalData; - m_data->m_np = narrowphase; - m_data->m_bp = broadphaseDbvt; - m_data->m_config = config; -} - -b3CpuRigidBodyPipeline::~b3CpuRigidBodyPipeline() -{ - delete m_data; -} - -void b3CpuRigidBodyPipeline::updateAabbWorldSpace() -{ - for (int i = 0; i < this->getNumBodies(); i++) - { - b3RigidBodyData* body = &m_data->m_rigidBodies[i]; - b3Float4 position = body->m_pos; - b3Quat orientation = body->m_quat; - - int collidableIndex = body->m_collidableIdx; - b3Collidable& collidable = m_data->m_np->getCollidableCpu(collidableIndex); - int shapeIndex = collidable.m_shapeIndex; - - if (shapeIndex >= 0) - { - b3Aabb localAabb = m_data->m_np->getLocalSpaceAabb(shapeIndex); - b3Aabb& worldAabb = m_data->m_aabbWorldSpace[i]; - float margin = 0.f; - b3TransformAabb2(localAabb.m_minVec, localAabb.m_maxVec, margin, position, orientation, &worldAabb.m_minVec, &worldAabb.m_maxVec); - m_data->m_bp->setAabb(i, worldAabb.m_minVec, worldAabb.m_maxVec, 0); - } - } -} - -void b3CpuRigidBodyPipeline::computeOverlappingPairs() -{ - int numPairs = m_data->m_bp->getOverlappingPairCache()->getNumOverlappingPairs(); - m_data->m_bp->calculateOverlappingPairs(); - numPairs = m_data->m_bp->getOverlappingPairCache()->getNumOverlappingPairs(); - printf("numPairs=%d\n", numPairs); -} - -void b3CpuRigidBodyPipeline::computeContactPoints() -{ - b3AlignedObjectArray& pairs = m_data->m_bp->getOverlappingPairCache()->getOverlappingPairArray(); - - m_data->m_np->computeContacts(pairs, m_data->m_aabbWorldSpace, m_data->m_rigidBodies); -} -void b3CpuRigidBodyPipeline::stepSimulation(float deltaTime) -{ - //update world space aabb's - updateAabbWorldSpace(); - - //compute overlapping pairs - computeOverlappingPairs(); - - //compute contacts - computeContactPoints(); - - //solve contacts - - //update transforms - integrate(deltaTime); -} - -static inline float b3CalcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1, - const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1) -{ - return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1); -} - -static inline void b3SetLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1, - b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1) -{ - linear = -n; - angular0 = -b3Cross(r0, n); - angular1 = b3Cross(r1, n); -} - -static inline void b3SolveContact(b3ContactConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - b3Vector3 dLinVelA; - dLinVelA.setZero(); - b3Vector3 dAngVelA; - dAngVelA.setZero(); - b3Vector3 dLinVelB; - dLinVelB.setZero(); - b3Vector3 dAngVelB; - dAngVelB.setZero(); - - for (int ic = 0; ic < 4; ic++) - { - // dont necessary because this makes change to 0 - if (cs.m_jacCoeffInv[ic] == 0.f) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - b3SetLinearAndAngular((const b3Vector3&)-cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, linear, angular0, angular1); - - float rambdaDt = b3CalcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB) + - cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[ic]); - updated = b3Min(updated, maxRambdaDt[ic]); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - { - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - } - } -} - -static inline void b3SolveFriction(b3ContactConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; - - b3PlaneSpace1(n, tangent[0], tangent[1]); - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for (int i = 0; i < 2; i++) - { - b3SetLinearAndAngular(tangent[i], r0, r1, linear, angular0, angular1); - float rambdaDt = b3CalcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[i]); - updated = b3Min(updated, maxRambdaDt[i]); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - - { // angular damping for point constraint - b3Vector3 ab = (posB - posA).normalized(); - b3Vector3 ac = (center - posA).normalized(); - if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot(n, angVelA); - float angNB = b3Dot(n, angVelB); - - angVelA -= (angNA * 0.1f) * n; - angVelB -= (angNB * 0.1f) * n; - } - } -} - -struct b3SolveTask // : public ThreadPool::Task -{ - b3SolveTask(b3AlignedObjectArray& bodies, - b3AlignedObjectArray& shapes, - b3AlignedObjectArray& constraints, - int start, int nConstraints, - int maxNumBatches, - b3AlignedObjectArray* wgUsedBodies, int curWgidx) - : m_bodies(bodies), m_shapes(shapes), m_constraints(constraints), m_wgUsedBodies(wgUsedBodies), m_curWgidx(curWgidx), m_start(start), m_nConstraints(nConstraints), m_solveFriction(true), m_maxNumBatches(maxNumBatches) - { - } - - unsigned short int getType() { return 0; } - - void run(int tIdx) - { - b3AlignedObjectArray usedBodies; - //printf("run..............\n"); - - for (int bb = 0; bb < m_maxNumBatches; bb++) - { - usedBodies.resize(0); - for (int ic = m_nConstraints - 1; ic >= 0; ic--) - //for(int ic=0; ic& m_bodies; - b3AlignedObjectArray& m_shapes; - b3AlignedObjectArray& m_constraints; - b3AlignedObjectArray* m_wgUsedBodies; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; -}; - -void b3CpuRigidBodyPipeline::solveContactConstraints() -{ - int m_nIterations = 4; - - b3AlignedObjectArray contactConstraints; - // const b3AlignedObjectArray& contacts = m_data->m_np->getContacts(); - int n = contactConstraints.size(); - //convert contacts... - - int maxNumBatches = 250; - - for (int iter = 0; iter < m_nIterations; iter++) - { - b3SolveTask task(m_data->m_rigidBodies, m_data->m_inertias, contactConstraints, 0, n, maxNumBatches, 0, 0); - task.m_solveFriction = false; - task.run(0); - } - - for (int iter = 0; iter < m_nIterations; iter++) - { - b3SolveTask task(m_data->m_rigidBodies, m_data->m_inertias, contactConstraints, 0, n, maxNumBatches, 0, 0); - task.m_solveFriction = true; - task.run(0); - } -} - -void b3CpuRigidBodyPipeline::integrate(float deltaTime) -{ - float angDamping = 0.f; - b3Vector3 gravityAcceleration = b3MakeVector3(0, -9, 0); - - //integrate transforms (external forces/gravity should be moved into constraint solver) - for (int i = 0; i < m_data->m_rigidBodies.size(); i++) - { - b3IntegrateTransform(&m_data->m_rigidBodies[i], deltaTime, angDamping, gravityAcceleration); - } -} - -int b3CpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userData) -{ - b3RigidBodyData body; - int bodyIndex = m_data->m_rigidBodies.size(); - body.m_invMass = mass ? 1.f / mass : 0.f; - body.m_angVel.setValue(0, 0, 0); - body.m_collidableIdx = collidableIndex; - body.m_frictionCoeff = 0.3f; - body.m_linVel.setValue(0, 0, 0); - body.m_pos.setValue(position[0], position[1], position[2]); - body.m_quat.setValue(orientation[0], orientation[1], orientation[2], orientation[3]); - body.m_restituitionCoeff = 0.f; - - m_data->m_rigidBodies.push_back(body); - - if (collidableIndex >= 0) - { - b3Aabb& worldAabb = m_data->m_aabbWorldSpace.expand(); - - b3Aabb localAabb = m_data->m_np->getLocalSpaceAabb(collidableIndex); - b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]); - b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]); - - b3Scalar margin = 0.01f; - b3Transform t; - t.setIdentity(); - t.setOrigin(b3MakeVector3(position[0], position[1], position[2])); - t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3])); - b3TransformAabb(localAabbMin, localAabbMax, margin, t, worldAabb.m_minVec, worldAabb.m_maxVec); - - m_data->m_bp->createProxy(worldAabb.m_minVec, worldAabb.m_maxVec, bodyIndex, 0, 1, 1); - // b3Vector3 aabbMin,aabbMax; - // m_data->m_bp->getAabb(bodyIndex,aabbMin,aabbMax); - } - else - { - b3Error("registerPhysicsInstance using invalid collidableIndex\n"); - } - - return bodyIndex; -} - -const struct b3RigidBodyData* b3CpuRigidBodyPipeline::getBodyBuffer() const -{ - return m_data->m_rigidBodies.size() ? &m_data->m_rigidBodies[0] : 0; -} - -int b3CpuRigidBodyPipeline::getNumBodies() const -{ - return m_data->m_rigidBodies.size(); -} diff --git a/thirdparty/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.h b/thirdparty/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.h deleted file mode 100644 index 9c65419f26b..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_CPU_RIGIDBODY_PIPELINE_H -#define B3_CPU_RIGIDBODY_PIPELINE_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -class b3CpuRigidBodyPipeline -{ -protected: - struct b3CpuRigidBodyPipelineInternalData* m_data; - - int allocateCollidable(); - -public: - b3CpuRigidBodyPipeline(class b3CpuNarrowPhase* narrowphase, struct b3DynamicBvhBroadphase* broadphaseDbvt, const struct b3Config& config); - virtual ~b3CpuRigidBodyPipeline(); - - virtual void stepSimulation(float deltaTime); - virtual void integrate(float timeStep); - virtual void updateAabbWorldSpace(); - virtual void computeOverlappingPairs(); - virtual void computeContactPoints(); - virtual void solveContactConstraints(); - - int registerConvexPolyhedron(class b3ConvexUtility* convex); - - int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData); - void writeAllInstancesToGpu(); - void copyConstraintsToHost(); - void setGravity(const float* grav); - void reset(); - - int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold); - int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold); - void removeConstraintByUid(int uid); - - void addConstraint(class b3TypedConstraint* constraint); - void removeConstraint(b3TypedConstraint* constraint); - - void castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults); - - const struct b3RigidBodyData* getBodyBuffer() const; - - int getNumBodies() const; -}; - -#endif //B3_CPU_RIGIDBODY_PIPELINE_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Dynamics/shared/b3ContactConstraint4.h b/thirdparty/bullet/Bullet3Dynamics/shared/b3ContactConstraint4.h deleted file mode 100644 index cf2eed0e7c4..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/shared/b3ContactConstraint4.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef B3_CONTACT_CONSTRAINT5_H -#define B3_CONTACT_CONSTRAINT5_H - -#include "Bullet3Common/shared/b3Float4.h" - -typedef struct b3ContactConstraint4 b3ContactConstraint4_t; - -struct b3ContactConstraint4 -{ - b3Float4 m_linear; //normal? - b3Float4 m_worldPos[4]; - b3Float4 m_center; // friction - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - float m_fJacCoeffInv[2]; // friction - float m_fAppliedRambdaDt[2]; // friction - - unsigned int m_bodyA; - unsigned int m_bodyB; - int m_batchIdx; - unsigned int m_paddings; -}; - -//inline void setFrictionCoeff(float value) { m_linear[3] = value; } -inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) -{ - return constraint->m_linear.w; -} - -#endif //B3_CONTACT_CONSTRAINT5_H diff --git a/thirdparty/bullet/Bullet3Dynamics/shared/b3ConvertConstraint4.h b/thirdparty/bullet/Bullet3Dynamics/shared/b3ConvertConstraint4.h deleted file mode 100644 index 3e72f1c3f21..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/shared/b3ConvertConstraint4.h +++ /dev/null @@ -1,148 +0,0 @@ - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" -#include "Bullet3Dynamics/shared/b3ContactConstraint4.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -void b3PlaneSpace1(b3Float4ConstArg n, b3Float4* p, b3Float4* q); -void b3PlaneSpace1(b3Float4ConstArg n, b3Float4* p, b3Float4* q) -{ - if (b3Fabs(n.z) > 0.70710678f) - { - // choose p in y-z plane - float a = n.y * n.y + n.z * n.z; - float k = 1.f / sqrt(a); - p[0].x = 0; - p[0].y = -n.z * k; - p[0].z = n.y * k; - // set q = n x p - q[0].x = a * k; - q[0].y = -n.x * p[0].z; - q[0].z = n.x * p[0].y; - } - else - { - // choose p in x-y plane - float a = n.x * n.x + n.y * n.y; - float k = 1.f / sqrt(a); - p[0].x = -n.y * k; - p[0].y = n.x * k; - p[0].z = 0; - // set q = n x p - q[0].x = -n.z * p[0].y; - q[0].y = n.z * p[0].x; - q[0].z = a * k; - } -} - -void setLinearAndAngular(b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1) -{ - *linear = b3MakeFloat4(n.x, n.y, n.z, 0.f); - *angular0 = b3Cross3(r0, n); - *angular1 = -b3Cross3(r1, n); -} - -float calcRelVel(b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0, - b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1) -{ - return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1); -} - -float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1, - float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1) -{ - // linear0,1 are normlized - float jmj0 = invMass0; //b3Dot3F4(linear0, linear0)*invMass0; - float jmj1 = b3Dot3F4(mtMul3(angular0, *invInertia0), angular0); - float jmj2 = invMass1; //b3Dot3F4(linear1, linear1)*invMass1; - float jmj3 = b3Dot3F4(mtMul3(angular1, *invInertia1), angular1); - return -1.f / (jmj0 + jmj1 + jmj2 + jmj3); -} - -void setConstraint4(b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA, - b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, - __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff, - b3ContactConstraint4_t* dstC) -{ - dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f / dt; - for (int ic = 0; ic < 4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear.w = 0.7f; //src->getFrictionCoeff() ); - for (int ic = 0; ic < 4; ic++) - { - b3Float4 r0 = src->m_worldPosB[ic] - posA; - b3Float4 r1 = src->m_worldPosB[ic] - posB; - - if (ic >= src->m_worldNormalOnB.w) //npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - b3Float4 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f; //src->getRestituitionCoeff(); - if (relVelN * relVelN < 0.004f) e = 0.f; - - dstC->m_b[ic] = e * relVelN; - //float penetration = src->m_worldPosB[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift) * positionConstraintCoeff * dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if (src->m_worldNormalOnB.w > 0) //npoints - { // prepare friction - b3Float4 center = b3MakeFloat4(0.f, 0.f, 0.f, 0.f); - for (int i = 0; i < src->m_worldNormalOnB.w; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB.w; - - b3Float4 tangent[2]; - b3PlaneSpace1(src->m_worldNormalOnB, &tangent[0], &tangent[1]); - - b3Float4 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for (int i = 0; i < 2; i++) - { - b3Float4 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for (int i = 0; i < 4; i++) - { - if (i < src->m_worldNormalOnB.w) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = b3MakeFloat4(0.f, 0.f, 0.f, 0.f); - } - } -} diff --git a/thirdparty/bullet/Bullet3Dynamics/shared/b3Inertia.h b/thirdparty/bullet/Bullet3Dynamics/shared/b3Inertia.h deleted file mode 100644 index 602a1335aa8..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/shared/b3Inertia.h +++ /dev/null @@ -1,14 +0,0 @@ - - -#ifndef B3_INERTIA_H -#define B3_INERTIA_H - -#include "Bullet3Common/shared/b3Mat3x3.h" - -struct b3Inertia -{ - b3Mat3x3 m_invInertiaWorld; - b3Mat3x3 m_initInvInertia; -}; - -#endif //B3_INERTIA_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Dynamics/shared/b3IntegrateTransforms.h b/thirdparty/bullet/Bullet3Dynamics/shared/b3IntegrateTransforms.h deleted file mode 100644 index 56d9118f95e..00000000000 --- a/thirdparty/bullet/Bullet3Dynamics/shared/b3IntegrateTransforms.h +++ /dev/null @@ -1,106 +0,0 @@ - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -inline void integrateSingleTransform(__global b3RigidBodyData_t* bodies, int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration) -{ - if (bodies[nodeID].m_invMass != 0.f) - { - float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); - - //angular velocity - { - b3Float4 axis; - //add some hardcoded angular damping - bodies[nodeID].m_angVel.x *= angularDamping; - bodies[nodeID].m_angVel.y *= angularDamping; - bodies[nodeID].m_angVel.z *= angularDamping; - - b3Float4 angvel = bodies[nodeID].m_angVel; - - float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel)); - - //limit the angular motion - if (fAngle * timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD) - { - fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep; - } - if (fAngle < 0.001f) - { - // use Taylor's expansions of sync function - axis = angvel * (0.5f * timeStep - (timeStep * timeStep * timeStep) * 0.020833333333f * fAngle * fAngle); - } - else - { - // sync(fAngle) = sin(c*fAngle)/t - axis = angvel * (b3Sin(0.5f * fAngle * timeStep) / fAngle); - } - - b3Quat dorn; - dorn.x = axis.x; - dorn.y = axis.y; - dorn.z = axis.z; - dorn.w = b3Cos(fAngle * timeStep * 0.5f); - b3Quat orn0 = bodies[nodeID].m_quat; - b3Quat predictedOrn = b3QuatMul(dorn, orn0); - predictedOrn = b3QuatNormalized(predictedOrn); - bodies[nodeID].m_quat = predictedOrn; - } - //linear velocity - bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep; - - //apply gravity - bodies[nodeID].m_linVel += gravityAcceleration * timeStep; - } -} - -inline void b3IntegrateTransform(__global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration) -{ - float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f); - - if ((body->m_invMass != 0.f)) - { - //angular velocity - { - b3Float4 axis; - //add some hardcoded angular damping - body->m_angVel.x *= angularDamping; - body->m_angVel.y *= angularDamping; - body->m_angVel.z *= angularDamping; - - b3Float4 angvel = body->m_angVel; - float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel)); - //limit the angular motion - if (fAngle * timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD) - { - fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep; - } - if (fAngle < 0.001f) - { - // use Taylor's expansions of sync function - axis = angvel * (0.5f * timeStep - (timeStep * timeStep * timeStep) * 0.020833333333f * fAngle * fAngle); - } - else - { - // sync(fAngle) = sin(c*fAngle)/t - axis = angvel * (b3Sin(0.5f * fAngle * timeStep) / fAngle); - } - b3Quat dorn; - dorn.x = axis.x; - dorn.y = axis.y; - dorn.z = axis.z; - dorn.w = b3Cos(fAngle * timeStep * 0.5f); - b3Quat orn0 = body->m_quat; - - b3Quat predictedOrn = b3QuatMul(dorn, orn0); - predictedOrn = b3QuatNormalized(predictedOrn); - body->m_quat = predictedOrn; - } - - //apply gravity - body->m_linVel += gravityAcceleration * timeStep; - - //linear velocity - body->m_pos += body->m_linVel * timeStep; - } -} diff --git a/thirdparty/bullet/Bullet3Geometry/b3AabbUtil.h b/thirdparty/bullet/Bullet3Geometry/b3AabbUtil.h deleted file mode 100644 index 04c52d8dc8a..00000000000 --- a/thirdparty/bullet/Bullet3Geometry/b3AabbUtil.h +++ /dev/null @@ -1,217 +0,0 @@ -/* -Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_AABB_UTIL2 -#define B3_AABB_UTIL2 - -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3MinMax.h" - -B3_FORCE_INLINE void b3AabbExpand(b3Vector3& aabbMin, - b3Vector3& aabbMax, - const b3Vector3& expansionMin, - const b3Vector3& expansionMax) -{ - aabbMin = aabbMin + expansionMin; - aabbMax = aabbMax + expansionMax; -} - -/// conservative test for overlap between two aabbs -B3_FORCE_INLINE bool b3TestPointAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1, - const b3Vector3& point) -{ - bool overlap = true; - overlap = (aabbMin1.getX() > point.getX() || aabbMax1.getX() < point.getX()) ? false : overlap; - overlap = (aabbMin1.getZ() > point.getZ() || aabbMax1.getZ() < point.getZ()) ? false : overlap; - overlap = (aabbMin1.getY() > point.getY() || aabbMax1.getY() < point.getY()) ? false : overlap; - return overlap; -} - -/// conservative test for overlap between two aabbs -B3_FORCE_INLINE bool b3TestAabbAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1, - const b3Vector3& aabbMin2, const b3Vector3& aabbMax2) -{ - bool overlap = true; - overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap; - overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap; - overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap; - return overlap; -} - -/// conservative test for overlap between triangle and aabb -B3_FORCE_INLINE bool b3TestTriangleAgainstAabb2(const b3Vector3* vertices, - const b3Vector3& aabbMin, const b3Vector3& aabbMax) -{ - const b3Vector3& p1 = vertices[0]; - const b3Vector3& p2 = vertices[1]; - const b3Vector3& p3 = vertices[2]; - - if (b3Min(b3Min(p1[0], p2[0]), p3[0]) > aabbMax[0]) return false; - if (b3Max(b3Max(p1[0], p2[0]), p3[0]) < aabbMin[0]) return false; - - if (b3Min(b3Min(p1[2], p2[2]), p3[2]) > aabbMax[2]) return false; - if (b3Max(b3Max(p1[2], p2[2]), p3[2]) < aabbMin[2]) return false; - - if (b3Min(b3Min(p1[1], p2[1]), p3[1]) > aabbMax[1]) return false; - if (b3Max(b3Max(p1[1], p2[1]), p3[1]) < aabbMin[1]) return false; - return true; -} - -B3_FORCE_INLINE int b3Outcode(const b3Vector3& p, const b3Vector3& halfExtent) -{ - return (p.getX() < -halfExtent.getX() ? 0x01 : 0x0) | - (p.getX() > halfExtent.getX() ? 0x08 : 0x0) | - (p.getY() < -halfExtent.getY() ? 0x02 : 0x0) | - (p.getY() > halfExtent.getY() ? 0x10 : 0x0) | - (p.getZ() < -halfExtent.getZ() ? 0x4 : 0x0) | - (p.getZ() > halfExtent.getZ() ? 0x20 : 0x0); -} - -B3_FORCE_INLINE bool b3RayAabb2(const b3Vector3& rayFrom, - const b3Vector3& rayInvDirection, - const unsigned int raySign[3], - const b3Vector3 bounds[2], - b3Scalar& tmin, - b3Scalar lambda_min, - b3Scalar lambda_max) -{ - b3Scalar tmax, tymin, tymax, tzmin, tzmax; - tmin = (bounds[raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX(); - tmax = (bounds[1 - raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX(); - tymin = (bounds[raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY(); - tymax = (bounds[1 - raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY(); - - if ((tmin > tymax) || (tymin > tmax)) - return false; - - if (tymin > tmin) - tmin = tymin; - - if (tymax < tmax) - tmax = tymax; - - tzmin = (bounds[raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ(); - tzmax = (bounds[1 - raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ(); - - if ((tmin > tzmax) || (tzmin > tmax)) - return false; - if (tzmin > tmin) - tmin = tzmin; - if (tzmax < tmax) - tmax = tzmax; - return ((tmin < lambda_max) && (tmax > lambda_min)); -} - -B3_FORCE_INLINE bool b3RayAabb(const b3Vector3& rayFrom, - const b3Vector3& rayTo, - const b3Vector3& aabbMin, - const b3Vector3& aabbMax, - b3Scalar& param, b3Vector3& normal) -{ - b3Vector3 aabbHalfExtent = (aabbMax - aabbMin) * b3Scalar(0.5); - b3Vector3 aabbCenter = (aabbMax + aabbMin) * b3Scalar(0.5); - b3Vector3 source = rayFrom - aabbCenter; - b3Vector3 target = rayTo - aabbCenter; - int sourceOutcode = b3Outcode(source, aabbHalfExtent); - int targetOutcode = b3Outcode(target, aabbHalfExtent); - if ((sourceOutcode & targetOutcode) == 0x0) - { - b3Scalar lambda_enter = b3Scalar(0.0); - b3Scalar lambda_exit = param; - b3Vector3 r = target - source; - int i; - b3Scalar normSign = 1; - b3Vector3 hitNormal = b3MakeVector3(0, 0, 0); - int bit = 1; - - for (int j = 0; j < 2; j++) - { - for (i = 0; i != 3; ++i) - { - if (sourceOutcode & bit) - { - b3Scalar lambda = (-source[i] - aabbHalfExtent[i] * normSign) / r[i]; - if (lambda_enter <= lambda) - { - lambda_enter = lambda; - hitNormal.setValue(0, 0, 0); - hitNormal[i] = normSign; - } - } - else if (targetOutcode & bit) - { - b3Scalar lambda = (-source[i] - aabbHalfExtent[i] * normSign) / r[i]; - b3SetMin(lambda_exit, lambda); - } - bit <<= 1; - } - normSign = b3Scalar(-1.); - } - if (lambda_enter <= lambda_exit) - { - param = lambda_enter; - normal = hitNormal; - return true; - } - } - return false; -} - -B3_FORCE_INLINE void b3TransformAabb(const b3Vector3& halfExtents, b3Scalar margin, const b3Transform& t, b3Vector3& aabbMinOut, b3Vector3& aabbMaxOut) -{ - b3Vector3 halfExtentsWithMargin = halfExtents + b3MakeVector3(margin, margin, margin); - b3Matrix3x3 abs_b = t.getBasis().absolute(); - b3Vector3 center = t.getOrigin(); - b3Vector3 extent = halfExtentsWithMargin.dot3(abs_b[0], abs_b[1], abs_b[2]); - aabbMinOut = center - extent; - aabbMaxOut = center + extent; -} - -B3_FORCE_INLINE void b3TransformAabb(const b3Vector3& localAabbMin, const b3Vector3& localAabbMax, b3Scalar margin, const b3Transform& trans, b3Vector3& aabbMinOut, b3Vector3& aabbMaxOut) -{ - //b3Assert(localAabbMin.getX() <= localAabbMax.getX()); - //b3Assert(localAabbMin.getY() <= localAabbMax.getY()); - //b3Assert(localAabbMin.getZ() <= localAabbMax.getZ()); - b3Vector3 localHalfExtents = b3Scalar(0.5) * (localAabbMax - localAabbMin); - localHalfExtents += b3MakeVector3(margin, margin, margin); - - b3Vector3 localCenter = b3Scalar(0.5) * (localAabbMax + localAabbMin); - b3Matrix3x3 abs_b = trans.getBasis().absolute(); - b3Vector3 center = trans(localCenter); - b3Vector3 extent = localHalfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]); - aabbMinOut = center - extent; - aabbMaxOut = center + extent; -} - -#define B3_USE_BANCHLESS 1 -#ifdef B3_USE_BANCHLESS -//This block replaces the block below and uses no branches, and replaces the 8 bit return with a 32 bit return for improved performance (~3x on XBox 360) -B3_FORCE_INLINE unsigned b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1, const unsigned short int* aabbMax1, const unsigned short int* aabbMin2, const unsigned short int* aabbMax2) -{ - return static_cast(b3Select((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0]) & (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2]) & (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])), - 1, 0)); -} -#else -B3_FORCE_INLINE bool b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1, const unsigned short int* aabbMax1, const unsigned short int* aabbMin2, const unsigned short int* aabbMax2) -{ - bool overlap = true; - overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap; - overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap; - overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap; - return overlap; -} -#endif //B3_USE_BANCHLESS - -#endif //B3_AABB_UTIL2 diff --git a/thirdparty/bullet/Bullet3Geometry/b3ConvexHullComputer.cpp b/thirdparty/bullet/Bullet3Geometry/b3ConvexHullComputer.cpp deleted file mode 100644 index b37652456e8..00000000000 --- a/thirdparty/bullet/Bullet3Geometry/b3ConvexHullComputer.cpp +++ /dev/null @@ -1,2745 +0,0 @@ -/* -Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include - -#include "b3ConvexHullComputer.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3MinMax.h" -#include "Bullet3Common/b3Vector3.h" - -#ifdef __GNUC__ -#include -typedef int32_t btInt32_t; -typedef int64_t btInt64_t; -typedef uint32_t btUint32_t; -typedef uint64_t btUint64_t; -#elif defined(_MSC_VER) -typedef __int32 btInt32_t; -typedef __int64 btInt64_t; -typedef unsigned __int32 btUint32_t; -typedef unsigned __int64 btUint64_t; -#else -typedef int btInt32_t; -typedef long long int btInt64_t; -typedef unsigned int btUint32_t; -typedef unsigned long long int btUint64_t; -#endif - -//The definition of USE_X86_64_ASM is moved into the build system. You can enable it manually by commenting out the following lines -//#if (defined(__GNUC__) && defined(__x86_64__) && !defined(__ICL)) // || (defined(__ICL) && defined(_M_X64)) bug in Intel compiler, disable inline assembly -// #define USE_X86_64_ASM -//#endif - -//#define DEBUG_CONVEX_HULL -//#define SHOW_ITERATIONS - -#if defined(DEBUG_CONVEX_HULL) || defined(SHOW_ITERATIONS) -#include -#endif - -// Convex hull implementation based on Preparata and Hong -// Ole Kniemeyer, MAXON Computer GmbH -class b3ConvexHullInternal -{ -public: - class Point64 - { - public: - btInt64_t x; - btInt64_t y; - btInt64_t z; - - Point64(btInt64_t x, btInt64_t y, btInt64_t z) : x(x), y(y), z(z) - { - } - - bool isZero() - { - return (x == 0) && (y == 0) && (z == 0); - } - - btInt64_t dot(const Point64& b) const - { - return x * b.x + y * b.y + z * b.z; - } - }; - - class Point32 - { - public: - btInt32_t x; - btInt32_t y; - btInt32_t z; - int index; - - Point32() - { - } - - Point32(btInt32_t x, btInt32_t y, btInt32_t z) : x(x), y(y), z(z), index(-1) - { - } - - bool operator==(const Point32& b) const - { - return (x == b.x) && (y == b.y) && (z == b.z); - } - - bool operator!=(const Point32& b) const - { - return (x != b.x) || (y != b.y) || (z != b.z); - } - - bool isZero() - { - return (x == 0) && (y == 0) && (z == 0); - } - - Point64 cross(const Point32& b) const - { - return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x); - } - - Point64 cross(const Point64& b) const - { - return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x); - } - - btInt64_t dot(const Point32& b) const - { - return x * b.x + y * b.y + z * b.z; - } - - btInt64_t dot(const Point64& b) const - { - return x * b.x + y * b.y + z * b.z; - } - - Point32 operator+(const Point32& b) const - { - return Point32(x + b.x, y + b.y, z + b.z); - } - - Point32 operator-(const Point32& b) const - { - return Point32(x - b.x, y - b.y, z - b.z); - } - }; - - class Int128 - { - public: - btUint64_t low; - btUint64_t high; - - Int128() - { - } - - Int128(btUint64_t low, btUint64_t high) : low(low), high(high) - { - } - - Int128(btUint64_t low) : low(low), high(0) - { - } - - Int128(btInt64_t value) : low(value), high((value >= 0) ? 0 : (btUint64_t)-1LL) - { - } - - static Int128 mul(btInt64_t a, btInt64_t b); - - static Int128 mul(btUint64_t a, btUint64_t b); - - Int128 operator-() const - { - return Int128((btUint64_t) - (btInt64_t)low, ~high + (low == 0)); - } - - Int128 operator+(const Int128& b) const - { -#ifdef USE_X86_64_ASM - Int128 result; - __asm__( - "addq %[bl], %[rl]\n\t" - "adcq %[bh], %[rh]\n\t" - : [rl] "=r"(result.low), [rh] "=r"(result.high) - : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high) - : "cc"); - return result; -#else - btUint64_t lo = low + b.low; - return Int128(lo, high + b.high + (lo < low)); -#endif - } - - Int128 operator-(const Int128& b) const - { -#ifdef USE_X86_64_ASM - Int128 result; - __asm__( - "subq %[bl], %[rl]\n\t" - "sbbq %[bh], %[rh]\n\t" - : [rl] "=r"(result.low), [rh] "=r"(result.high) - : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high) - : "cc"); - return result; -#else - return *this + -b; -#endif - } - - Int128& operator+=(const Int128& b) - { -#ifdef USE_X86_64_ASM - __asm__( - "addq %[bl], %[rl]\n\t" - "adcq %[bh], %[rh]\n\t" - : [rl] "=r"(low), [rh] "=r"(high) - : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high) - : "cc"); -#else - btUint64_t lo = low + b.low; - if (lo < low) - { - ++high; - } - low = lo; - high += b.high; -#endif - return *this; - } - - Int128& operator++() - { - if (++low == 0) - { - ++high; - } - return *this; - } - - Int128 operator*(btInt64_t b) const; - - b3Scalar toScalar() const - { - return ((btInt64_t)high >= 0) ? b3Scalar(high) * (b3Scalar(0x100000000LL) * b3Scalar(0x100000000LL)) + b3Scalar(low) - : -(-*this).toScalar(); - } - - int getSign() const - { - return ((btInt64_t)high < 0) ? -1 : (high || low) ? 1 : 0; - } - - bool operator<(const Int128& b) const - { - return (high < b.high) || ((high == b.high) && (low < b.low)); - } - - int ucmp(const Int128& b) const - { - if (high < b.high) - { - return -1; - } - if (high > b.high) - { - return 1; - } - if (low < b.low) - { - return -1; - } - if (low > b.low) - { - return 1; - } - return 0; - } - }; - - class Rational64 - { - private: - btUint64_t m_numerator; - btUint64_t m_denominator; - int sign; - - public: - Rational64(btInt64_t numerator, btInt64_t denominator) - { - if (numerator > 0) - { - sign = 1; - m_numerator = (btUint64_t)numerator; - } - else if (numerator < 0) - { - sign = -1; - m_numerator = (btUint64_t)-numerator; - } - else - { - sign = 0; - m_numerator = 0; - } - if (denominator > 0) - { - m_denominator = (btUint64_t)denominator; - } - else if (denominator < 0) - { - sign = -sign; - m_denominator = (btUint64_t)-denominator; - } - else - { - m_denominator = 0; - } - } - - bool isNegativeInfinity() const - { - return (sign < 0) && (m_denominator == 0); - } - - bool isNaN() const - { - return (sign == 0) && (m_denominator == 0); - } - - int compare(const Rational64& b) const; - - b3Scalar toScalar() const - { - return sign * ((m_denominator == 0) ? B3_INFINITY : (b3Scalar)m_numerator / m_denominator); - } - }; - - class Rational128 - { - private: - Int128 numerator; - Int128 denominator; - int sign; - bool isInt64; - - public: - Rational128(btInt64_t value) - { - if (value > 0) - { - sign = 1; - this->numerator = value; - } - else if (value < 0) - { - sign = -1; - this->numerator = -value; - } - else - { - sign = 0; - this->numerator = (btUint64_t)0; - } - this->denominator = (btUint64_t)1; - isInt64 = true; - } - - Rational128(const Int128& numerator, const Int128& denominator) - { - sign = numerator.getSign(); - if (sign >= 0) - { - this->numerator = numerator; - } - else - { - this->numerator = -numerator; - } - int dsign = denominator.getSign(); - if (dsign >= 0) - { - this->denominator = denominator; - } - else - { - sign = -sign; - this->denominator = -denominator; - } - isInt64 = false; - } - - int compare(const Rational128& b) const; - - int compare(btInt64_t b) const; - - b3Scalar toScalar() const - { - return sign * ((denominator.getSign() == 0) ? B3_INFINITY : numerator.toScalar() / denominator.toScalar()); - } - }; - - class PointR128 - { - public: - Int128 x; - Int128 y; - Int128 z; - Int128 denominator; - - PointR128() - { - } - - PointR128(Int128 x, Int128 y, Int128 z, Int128 denominator) : x(x), y(y), z(z), denominator(denominator) - { - } - - b3Scalar xvalue() const - { - return x.toScalar() / denominator.toScalar(); - } - - b3Scalar yvalue() const - { - return y.toScalar() / denominator.toScalar(); - } - - b3Scalar zvalue() const - { - return z.toScalar() / denominator.toScalar(); - } - }; - - class Edge; - class Face; - - class Vertex - { - public: - Vertex* next; - Vertex* prev; - Edge* edges; - Face* firstNearbyFace; - Face* lastNearbyFace; - PointR128 point128; - Point32 point; - int copy; - - Vertex() : next(NULL), prev(NULL), edges(NULL), firstNearbyFace(NULL), lastNearbyFace(NULL), copy(-1) - { - } - -#ifdef DEBUG_CONVEX_HULL - void print() - { - b3Printf("V%d (%d, %d, %d)", point.index, point.x, point.y, point.z); - } - - void printGraph(); -#endif - - Point32 operator-(const Vertex& b) const - { - return point - b.point; - } - - Rational128 dot(const Point64& b) const - { - return (point.index >= 0) ? Rational128(point.dot(b)) - : Rational128(point128.x * b.x + point128.y * b.y + point128.z * b.z, point128.denominator); - } - - b3Scalar xvalue() const - { - return (point.index >= 0) ? b3Scalar(point.x) : point128.xvalue(); - } - - b3Scalar yvalue() const - { - return (point.index >= 0) ? b3Scalar(point.y) : point128.yvalue(); - } - - b3Scalar zvalue() const - { - return (point.index >= 0) ? b3Scalar(point.z) : point128.zvalue(); - } - - void receiveNearbyFaces(Vertex* src) - { - if (lastNearbyFace) - { - lastNearbyFace->nextWithSameNearbyVertex = src->firstNearbyFace; - } - else - { - firstNearbyFace = src->firstNearbyFace; - } - if (src->lastNearbyFace) - { - lastNearbyFace = src->lastNearbyFace; - } - for (Face* f = src->firstNearbyFace; f; f = f->nextWithSameNearbyVertex) - { - b3Assert(f->nearbyVertex == src); - f->nearbyVertex = this; - } - src->firstNearbyFace = NULL; - src->lastNearbyFace = NULL; - } - }; - - class Edge - { - public: - Edge* next; - Edge* prev; - Edge* reverse; - Vertex* target; - Face* face; - int copy; - - ~Edge() - { - next = NULL; - prev = NULL; - reverse = NULL; - target = NULL; - face = NULL; - } - - void link(Edge* n) - { - b3Assert(reverse->target == n->reverse->target); - next = n; - n->prev = this; - } - -#ifdef DEBUG_CONVEX_HULL - void print() - { - b3Printf("E%p : %d -> %d, n=%p p=%p (0 %d\t%d\t%d) -> (%d %d %d)", this, reverse->target->point.index, target->point.index, next, prev, - reverse->target->point.x, reverse->target->point.y, reverse->target->point.z, target->point.x, target->point.y, target->point.z); - } -#endif - }; - - class Face - { - public: - Face* next; - Vertex* nearbyVertex; - Face* nextWithSameNearbyVertex; - Point32 origin; - Point32 dir0; - Point32 dir1; - - Face() : next(NULL), nearbyVertex(NULL), nextWithSameNearbyVertex(NULL) - { - } - - void init(Vertex* a, Vertex* b, Vertex* c) - { - nearbyVertex = a; - origin = a->point; - dir0 = *b - *a; - dir1 = *c - *a; - if (a->lastNearbyFace) - { - a->lastNearbyFace->nextWithSameNearbyVertex = this; - } - else - { - a->firstNearbyFace = this; - } - a->lastNearbyFace = this; - } - - Point64 getNormal() - { - return dir0.cross(dir1); - } - }; - - template - class DMul - { - private: - static btUint32_t high(btUint64_t value) - { - return (btUint32_t)(value >> 32); - } - - static btUint32_t low(btUint64_t value) - { - return (btUint32_t)value; - } - - static btUint64_t mul(btUint32_t a, btUint32_t b) - { - return (btUint64_t)a * (btUint64_t)b; - } - - static void shlHalf(btUint64_t& value) - { - value <<= 32; - } - - static btUint64_t high(Int128 value) - { - return value.high; - } - - static btUint64_t low(Int128 value) - { - return value.low; - } - - static Int128 mul(btUint64_t a, btUint64_t b) - { - return Int128::mul(a, b); - } - - static void shlHalf(Int128& value) - { - value.high = value.low; - value.low = 0; - } - - public: - static void mul(UWord a, UWord b, UWord& resLow, UWord& resHigh) - { - UWord p00 = mul(low(a), low(b)); - UWord p01 = mul(low(a), high(b)); - UWord p10 = mul(high(a), low(b)); - UWord p11 = mul(high(a), high(b)); - UWord p0110 = UWord(low(p01)) + UWord(low(p10)); - p11 += high(p01); - p11 += high(p10); - p11 += high(p0110); - shlHalf(p0110); - p00 += p0110; - if (p00 < p0110) - { - ++p11; - } - resLow = p00; - resHigh = p11; - } - }; - -private: - class IntermediateHull - { - public: - Vertex* minXy; - Vertex* maxXy; - Vertex* minYx; - Vertex* maxYx; - - IntermediateHull() : minXy(NULL), maxXy(NULL), minYx(NULL), maxYx(NULL) - { - } - - void print(); - }; - - enum Orientation - { - NONE, - CLOCKWISE, - COUNTER_CLOCKWISE - }; - - template - class PoolArray - { - private: - T* array; - int size; - - public: - PoolArray* next; - - PoolArray(int size) : size(size), next(NULL) - { - array = (T*)b3AlignedAlloc(sizeof(T) * size, 16); - } - - ~PoolArray() - { - b3AlignedFree(array); - } - - T* init() - { - T* o = array; - for (int i = 0; i < size; i++, o++) - { - o->next = (i + 1 < size) ? o + 1 : NULL; - } - return array; - } - }; - - template - class Pool - { - private: - PoolArray* arrays; - PoolArray* nextArray; - T* freeObjects; - int arraySize; - - public: - Pool() : arrays(NULL), nextArray(NULL), freeObjects(NULL), arraySize(256) - { - } - - ~Pool() - { - while (arrays) - { - PoolArray* p = arrays; - arrays = p->next; - p->~PoolArray(); - b3AlignedFree(p); - } - } - - void reset() - { - nextArray = arrays; - freeObjects = NULL; - } - - void setArraySize(int arraySize) - { - this->arraySize = arraySize; - } - - T* newObject() - { - T* o = freeObjects; - if (!o) - { - PoolArray* p = nextArray; - if (p) - { - nextArray = p->next; - } - else - { - p = new (b3AlignedAlloc(sizeof(PoolArray), 16)) PoolArray(arraySize); - p->next = arrays; - arrays = p; - } - o = p->init(); - } - freeObjects = o->next; - return new (o) T(); - }; - - void freeObject(T* object) - { - object->~T(); - object->next = freeObjects; - freeObjects = object; - } - }; - - b3Vector3 scaling; - b3Vector3 center; - Pool vertexPool; - Pool edgePool; - Pool facePool; - b3AlignedObjectArray originalVertices; - int mergeStamp; - int minAxis; - int medAxis; - int maxAxis; - int usedEdgePairs; - int maxUsedEdgePairs; - - static Orientation getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t); - Edge* findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot); - void findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1); - - Edge* newEdgePair(Vertex* from, Vertex* to); - - void removeEdgePair(Edge* edge) - { - Edge* n = edge->next; - Edge* r = edge->reverse; - - b3Assert(edge->target && r->target); - - if (n != edge) - { - n->prev = edge->prev; - edge->prev->next = n; - r->target->edges = n; - } - else - { - r->target->edges = NULL; - } - - n = r->next; - - if (n != r) - { - n->prev = r->prev; - r->prev->next = n; - edge->target->edges = n; - } - else - { - edge->target->edges = NULL; - } - - edgePool.freeObject(edge); - edgePool.freeObject(r); - usedEdgePairs--; - } - - void computeInternal(int start, int end, IntermediateHull& result); - - bool mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1); - - void merge(IntermediateHull& h0, IntermediateHull& h1); - - b3Vector3 toBtVector(const Point32& v); - - b3Vector3 getBtNormal(Face* face); - - bool shiftFace(Face* face, b3Scalar amount, b3AlignedObjectArray stack); - -public: - Vertex* vertexList; - - void compute(const void* coords, bool doubleCoords, int stride, int count); - - b3Vector3 getCoordinates(const Vertex* v); - - b3Scalar shrink(b3Scalar amount, b3Scalar clampAmount); -}; - -b3ConvexHullInternal::Int128 b3ConvexHullInternal::Int128::operator*(btInt64_t b) const -{ - bool negative = (btInt64_t)high < 0; - Int128 a = negative ? -*this : *this; - if (b < 0) - { - negative = !negative; - b = -b; - } - Int128 result = mul(a.low, (btUint64_t)b); - result.high += a.high * (btUint64_t)b; - return negative ? -result : result; -} - -b3ConvexHullInternal::Int128 b3ConvexHullInternal::Int128::mul(btInt64_t a, btInt64_t b) -{ - Int128 result; - -#ifdef USE_X86_64_ASM - __asm__("imulq %[b]" - : "=a"(result.low), "=d"(result.high) - : "0"(a), [b] "r"(b) - : "cc"); - return result; - -#else - bool negative = a < 0; - if (negative) - { - a = -a; - } - if (b < 0) - { - negative = !negative; - b = -b; - } - DMul::mul((btUint64_t)a, (btUint64_t)b, result.low, result.high); - return negative ? -result : result; -#endif -} - -b3ConvexHullInternal::Int128 b3ConvexHullInternal::Int128::mul(btUint64_t a, btUint64_t b) -{ - Int128 result; - -#ifdef USE_X86_64_ASM - __asm__("mulq %[b]" - : "=a"(result.low), "=d"(result.high) - : "0"(a), [b] "r"(b) - : "cc"); - -#else - DMul::mul(a, b, result.low, result.high); -#endif - - return result; -} - -int b3ConvexHullInternal::Rational64::compare(const Rational64& b) const -{ - if (sign != b.sign) - { - return sign - b.sign; - } - else if (sign == 0) - { - return 0; - } - - // return (numerator * b.denominator > b.numerator * denominator) ? sign : (numerator * b.denominator < b.numerator * denominator) ? -sign : 0; - -#ifdef USE_X86_64_ASM - - int result; - btInt64_t tmp; - btInt64_t dummy; - __asm__( - "mulq %[bn]\n\t" - "movq %%rax, %[tmp]\n\t" - "movq %%rdx, %%rbx\n\t" - "movq %[tn], %%rax\n\t" - "mulq %[bd]\n\t" - "subq %[tmp], %%rax\n\t" - "sbbq %%rbx, %%rdx\n\t" // rdx:rax contains 128-bit-difference "numerator*b.denominator - b.numerator*denominator" - "setnsb %%bh\n\t" // bh=1 if difference is non-negative, bh=0 otherwise - "orq %%rdx, %%rax\n\t" - "setnzb %%bl\n\t" // bl=1 if difference if non-zero, bl=0 if it is zero - "decb %%bh\n\t" // now bx=0x0000 if difference is zero, 0xff01 if it is negative, 0x0001 if it is positive (i.e., same sign as difference) - "shll $16, %%ebx\n\t" // ebx has same sign as difference - : "=&b"(result), [tmp] "=&r"(tmp), "=a"(dummy) - : "a"(denominator), [bn] "g"(b.numerator), [tn] "g"(numerator), [bd] "g"(b.denominator) - : "%rdx", "cc"); - return result ? result ^ sign // if sign is +1, only bit 0 of result is inverted, which does not change the sign of result (and cannot result in zero) - // if sign is -1, all bits of result are inverted, which changes the sign of result (and again cannot result in zero) - : 0; - -#else - - return sign * Int128::mul(m_numerator, b.m_denominator).ucmp(Int128::mul(m_denominator, b.m_numerator)); - -#endif -} - -int b3ConvexHullInternal::Rational128::compare(const Rational128& b) const -{ - if (sign != b.sign) - { - return sign - b.sign; - } - else if (sign == 0) - { - return 0; - } - if (isInt64) - { - return -b.compare(sign * (btInt64_t)numerator.low); - } - - Int128 nbdLow, nbdHigh, dbnLow, dbnHigh; - DMul::mul(numerator, b.denominator, nbdLow, nbdHigh); - DMul::mul(denominator, b.numerator, dbnLow, dbnHigh); - - int cmp = nbdHigh.ucmp(dbnHigh); - if (cmp) - { - return cmp * sign; - } - return nbdLow.ucmp(dbnLow) * sign; -} - -int b3ConvexHullInternal::Rational128::compare(btInt64_t b) const -{ - if (isInt64) - { - btInt64_t a = sign * (btInt64_t)numerator.low; - return (a > b) ? 1 : (a < b) ? -1 : 0; - } - if (b > 0) - { - if (sign <= 0) - { - return -1; - } - } - else if (b < 0) - { - if (sign >= 0) - { - return 1; - } - b = -b; - } - else - { - return sign; - } - - return numerator.ucmp(denominator * b) * sign; -} - -b3ConvexHullInternal::Edge* b3ConvexHullInternal::newEdgePair(Vertex* from, Vertex* to) -{ - b3Assert(from && to); - Edge* e = edgePool.newObject(); - Edge* r = edgePool.newObject(); - e->reverse = r; - r->reverse = e; - e->copy = mergeStamp; - r->copy = mergeStamp; - e->target = to; - r->target = from; - e->face = NULL; - r->face = NULL; - usedEdgePairs++; - if (usedEdgePairs > maxUsedEdgePairs) - { - maxUsedEdgePairs = usedEdgePairs; - } - return e; -} - -bool b3ConvexHullInternal::mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1) -{ - Vertex* v0 = h0.maxYx; - Vertex* v1 = h1.minYx; - if ((v0->point.x == v1->point.x) && (v0->point.y == v1->point.y)) - { - b3Assert(v0->point.z < v1->point.z); - Vertex* v1p = v1->prev; - if (v1p == v1) - { - c0 = v0; - if (v1->edges) - { - b3Assert(v1->edges->next == v1->edges); - v1 = v1->edges->target; - b3Assert(v1->edges->next == v1->edges); - } - c1 = v1; - return false; - } - Vertex* v1n = v1->next; - v1p->next = v1n; - v1n->prev = v1p; - if (v1 == h1.minXy) - { - if ((v1n->point.x < v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y < v1p->point.y))) - { - h1.minXy = v1n; - } - else - { - h1.minXy = v1p; - } - } - if (v1 == h1.maxXy) - { - if ((v1n->point.x > v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y > v1p->point.y))) - { - h1.maxXy = v1n; - } - else - { - h1.maxXy = v1p; - } - } - } - - v0 = h0.maxXy; - v1 = h1.maxXy; - Vertex* v00 = NULL; - Vertex* v10 = NULL; - btInt32_t sign = 1; - - for (int side = 0; side <= 1; side++) - { - btInt32_t dx = (v1->point.x - v0->point.x) * sign; - if (dx > 0) - { - while (true) - { - btInt32_t dy = v1->point.y - v0->point.y; - - Vertex* w0 = side ? v0->next : v0->prev; - if (w0 != v0) - { - btInt32_t dx0 = (w0->point.x - v0->point.x) * sign; - btInt32_t dy0 = w0->point.y - v0->point.y; - if ((dy0 <= 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx <= dy * dx0)))) - { - v0 = w0; - dx = (v1->point.x - v0->point.x) * sign; - continue; - } - } - - Vertex* w1 = side ? v1->next : v1->prev; - if (w1 != v1) - { - btInt32_t dx1 = (w1->point.x - v1->point.x) * sign; - btInt32_t dy1 = w1->point.y - v1->point.y; - btInt32_t dxn = (w1->point.x - v0->point.x) * sign; - if ((dxn > 0) && (dy1 < 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx < dy * dx1)))) - { - v1 = w1; - dx = dxn; - continue; - } - } - - break; - } - } - else if (dx < 0) - { - while (true) - { - btInt32_t dy = v1->point.y - v0->point.y; - - Vertex* w1 = side ? v1->prev : v1->next; - if (w1 != v1) - { - btInt32_t dx1 = (w1->point.x - v1->point.x) * sign; - btInt32_t dy1 = w1->point.y - v1->point.y; - if ((dy1 >= 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx <= dy * dx1)))) - { - v1 = w1; - dx = (v1->point.x - v0->point.x) * sign; - continue; - } - } - - Vertex* w0 = side ? v0->prev : v0->next; - if (w0 != v0) - { - btInt32_t dx0 = (w0->point.x - v0->point.x) * sign; - btInt32_t dy0 = w0->point.y - v0->point.y; - btInt32_t dxn = (v1->point.x - w0->point.x) * sign; - if ((dxn < 0) && (dy0 > 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx < dy * dx0)))) - { - v0 = w0; - dx = dxn; - continue; - } - } - - break; - } - } - else - { - btInt32_t x = v0->point.x; - btInt32_t y0 = v0->point.y; - Vertex* w0 = v0; - Vertex* t; - while (((t = side ? w0->next : w0->prev) != v0) && (t->point.x == x) && (t->point.y <= y0)) - { - w0 = t; - y0 = t->point.y; - } - v0 = w0; - - btInt32_t y1 = v1->point.y; - Vertex* w1 = v1; - while (((t = side ? w1->prev : w1->next) != v1) && (t->point.x == x) && (t->point.y >= y1)) - { - w1 = t; - y1 = t->point.y; - } - v1 = w1; - } - - if (side == 0) - { - v00 = v0; - v10 = v1; - - v0 = h0.minXy; - v1 = h1.minXy; - sign = -1; - } - } - - v0->prev = v1; - v1->next = v0; - - v00->next = v10; - v10->prev = v00; - - if (h1.minXy->point.x < h0.minXy->point.x) - { - h0.minXy = h1.minXy; - } - if (h1.maxXy->point.x >= h0.maxXy->point.x) - { - h0.maxXy = h1.maxXy; - } - - h0.maxYx = h1.maxYx; - - c0 = v00; - c1 = v10; - - return true; -} - -void b3ConvexHullInternal::computeInternal(int start, int end, IntermediateHull& result) -{ - int n = end - start; - switch (n) - { - case 0: - result.minXy = NULL; - result.maxXy = NULL; - result.minYx = NULL; - result.maxYx = NULL; - return; - case 2: - { - Vertex* v = originalVertices[start]; - Vertex* w = v + 1; - if (v->point != w->point) - { - btInt32_t dx = v->point.x - w->point.x; - btInt32_t dy = v->point.y - w->point.y; - - if ((dx == 0) && (dy == 0)) - { - if (v->point.z > w->point.z) - { - Vertex* t = w; - w = v; - v = t; - } - b3Assert(v->point.z < w->point.z); - v->next = v; - v->prev = v; - result.minXy = v; - result.maxXy = v; - result.minYx = v; - result.maxYx = v; - } - else - { - v->next = w; - v->prev = w; - w->next = v; - w->prev = v; - - if ((dx < 0) || ((dx == 0) && (dy < 0))) - { - result.minXy = v; - result.maxXy = w; - } - else - { - result.minXy = w; - result.maxXy = v; - } - - if ((dy < 0) || ((dy == 0) && (dx < 0))) - { - result.minYx = v; - result.maxYx = w; - } - else - { - result.minYx = w; - result.maxYx = v; - } - } - - Edge* e = newEdgePair(v, w); - e->link(e); - v->edges = e; - - e = e->reverse; - e->link(e); - w->edges = e; - - return; - } - } - // lint -fallthrough - case 1: - { - Vertex* v = originalVertices[start]; - v->edges = NULL; - v->next = v; - v->prev = v; - - result.minXy = v; - result.maxXy = v; - result.minYx = v; - result.maxYx = v; - - return; - } - } - - int split0 = start + n / 2; - Point32 p = originalVertices[split0 - 1]->point; - int split1 = split0; - while ((split1 < end) && (originalVertices[split1]->point == p)) - { - split1++; - } - computeInternal(start, split0, result); - IntermediateHull hull1; - computeInternal(split1, end, hull1); -#ifdef DEBUG_CONVEX_HULL - b3Printf("\n\nMerge\n"); - result.print(); - hull1.print(); -#endif - merge(result, hull1); -#ifdef DEBUG_CONVEX_HULL - b3Printf("\n Result\n"); - result.print(); -#endif -} - -#ifdef DEBUG_CONVEX_HULL -void b3ConvexHullInternal::IntermediateHull::print() -{ - b3Printf(" Hull\n"); - for (Vertex* v = minXy; v;) - { - b3Printf(" "); - v->print(); - if (v == maxXy) - { - b3Printf(" maxXy"); - } - if (v == minYx) - { - b3Printf(" minYx"); - } - if (v == maxYx) - { - b3Printf(" maxYx"); - } - if (v->next->prev != v) - { - b3Printf(" Inconsistency"); - } - b3Printf("\n"); - v = v->next; - if (v == minXy) - { - break; - } - } - if (minXy) - { - minXy->copy = (minXy->copy == -1) ? -2 : -1; - minXy->printGraph(); - } -} - -void b3ConvexHullInternal::Vertex::printGraph() -{ - print(); - b3Printf("\nEdges\n"); - Edge* e = edges; - if (e) - { - do - { - e->print(); - b3Printf("\n"); - e = e->next; - } while (e != edges); - do - { - Vertex* v = e->target; - if (v->copy != copy) - { - v->copy = copy; - v->printGraph(); - } - e = e->next; - } while (e != edges); - } -} -#endif - -b3ConvexHullInternal::Orientation b3ConvexHullInternal::getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t) -{ - b3Assert(prev->reverse->target == next->reverse->target); - if (prev->next == next) - { - if (prev->prev == next) - { - Point64 n = t.cross(s); - Point64 m = (*prev->target - *next->reverse->target).cross(*next->target - *next->reverse->target); - b3Assert(!m.isZero()); - btInt64_t dot = n.dot(m); - b3Assert(dot != 0); - return (dot > 0) ? COUNTER_CLOCKWISE : CLOCKWISE; - } - return COUNTER_CLOCKWISE; - } - else if (prev->prev == next) - { - return CLOCKWISE; - } - else - { - return NONE; - } -} - -b3ConvexHullInternal::Edge* b3ConvexHullInternal::findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot) -{ - Edge* minEdge = NULL; - -#ifdef DEBUG_CONVEX_HULL - b3Printf("find max edge for %d\n", start->point.index); -#endif - Edge* e = start->edges; - if (e) - { - do - { - if (e->copy > mergeStamp) - { - Point32 t = *e->target - *start; - Rational64 cot(t.dot(sxrxs), t.dot(rxs)); -#ifdef DEBUG_CONVEX_HULL - b3Printf(" Angle is %f (%d) for ", (float)b3Atan(cot.toScalar()), (int)cot.isNaN()); - e->print(); -#endif - if (cot.isNaN()) - { - b3Assert(ccw ? (t.dot(s) < 0) : (t.dot(s) > 0)); - } - else - { - int cmp; - if (minEdge == NULL) - { - minCot = cot; - minEdge = e; - } - else if ((cmp = cot.compare(minCot)) < 0) - { - minCot = cot; - minEdge = e; - } - else if ((cmp == 0) && (ccw == (getOrientation(minEdge, e, s, t) == COUNTER_CLOCKWISE))) - { - minEdge = e; - } - } -#ifdef DEBUG_CONVEX_HULL - b3Printf("\n"); -#endif - } - e = e->next; - } while (e != start->edges); - } - return minEdge; -} - -void b3ConvexHullInternal::findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1) -{ - Edge* start0 = e0; - Edge* start1 = e1; - Point32 et0 = start0 ? start0->target->point : c0->point; - Point32 et1 = start1 ? start1->target->point : c1->point; - Point32 s = c1->point - c0->point; - Point64 normal = ((start0 ? start0 : start1)->target->point - c0->point).cross(s); - btInt64_t dist = c0->point.dot(normal); - b3Assert(!start1 || (start1->target->point.dot(normal) == dist)); - Point64 perp = s.cross(normal); - b3Assert(!perp.isZero()); - -#ifdef DEBUG_CONVEX_HULL - b3Printf(" Advancing %d %d (%p %p, %d %d)\n", c0->point.index, c1->point.index, start0, start1, start0 ? start0->target->point.index : -1, start1 ? start1->target->point.index : -1); -#endif - - btInt64_t maxDot0 = et0.dot(perp); - if (e0) - { - while (e0->target != stop0) - { - Edge* e = e0->reverse->prev; - if (e->target->point.dot(normal) < dist) - { - break; - } - b3Assert(e->target->point.dot(normal) == dist); - if (e->copy == mergeStamp) - { - break; - } - btInt64_t dot = e->target->point.dot(perp); - if (dot <= maxDot0) - { - break; - } - maxDot0 = dot; - e0 = e; - et0 = e->target->point; - } - } - - btInt64_t maxDot1 = et1.dot(perp); - if (e1) - { - while (e1->target != stop1) - { - Edge* e = e1->reverse->next; - if (e->target->point.dot(normal) < dist) - { - break; - } - b3Assert(e->target->point.dot(normal) == dist); - if (e->copy == mergeStamp) - { - break; - } - btInt64_t dot = e->target->point.dot(perp); - if (dot <= maxDot1) - { - break; - } - maxDot1 = dot; - e1 = e; - et1 = e->target->point; - } - } - -#ifdef DEBUG_CONVEX_HULL - b3Printf(" Starting at %d %d\n", et0.index, et1.index); -#endif - - btInt64_t dx = maxDot1 - maxDot0; - if (dx > 0) - { - while (true) - { - btInt64_t dy = (et1 - et0).dot(s); - - if (e0 && (e0->target != stop0)) - { - Edge* f0 = e0->next->reverse; - if (f0->copy > mergeStamp) - { - btInt64_t dx0 = (f0->target->point - et0).dot(perp); - btInt64_t dy0 = (f0->target->point - et0).dot(s); - if ((dx0 == 0) ? (dy0 < 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) >= 0))) - { - et0 = f0->target->point; - dx = (et1 - et0).dot(perp); - e0 = (e0 == start0) ? NULL : f0; - continue; - } - } - } - - if (e1 && (e1->target != stop1)) - { - Edge* f1 = e1->reverse->next; - if (f1->copy > mergeStamp) - { - Point32 d1 = f1->target->point - et1; - if (d1.dot(normal) == 0) - { - btInt64_t dx1 = d1.dot(perp); - btInt64_t dy1 = d1.dot(s); - btInt64_t dxn = (f1->target->point - et0).dot(perp); - if ((dxn > 0) && ((dx1 == 0) ? (dy1 < 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) > 0)))) - { - e1 = f1; - et1 = e1->target->point; - dx = dxn; - continue; - } - } - else - { - b3Assert((e1 == start1) && (d1.dot(normal) < 0)); - } - } - } - - break; - } - } - else if (dx < 0) - { - while (true) - { - btInt64_t dy = (et1 - et0).dot(s); - - if (e1 && (e1->target != stop1)) - { - Edge* f1 = e1->prev->reverse; - if (f1->copy > mergeStamp) - { - btInt64_t dx1 = (f1->target->point - et1).dot(perp); - btInt64_t dy1 = (f1->target->point - et1).dot(s); - if ((dx1 == 0) ? (dy1 > 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) <= 0))) - { - et1 = f1->target->point; - dx = (et1 - et0).dot(perp); - e1 = (e1 == start1) ? NULL : f1; - continue; - } - } - } - - if (e0 && (e0->target != stop0)) - { - Edge* f0 = e0->reverse->prev; - if (f0->copy > mergeStamp) - { - Point32 d0 = f0->target->point - et0; - if (d0.dot(normal) == 0) - { - btInt64_t dx0 = d0.dot(perp); - btInt64_t dy0 = d0.dot(s); - btInt64_t dxn = (et1 - f0->target->point).dot(perp); - if ((dxn < 0) && ((dx0 == 0) ? (dy0 > 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) < 0)))) - { - e0 = f0; - et0 = e0->target->point; - dx = dxn; - continue; - } - } - else - { - b3Assert((e0 == start0) && (d0.dot(normal) < 0)); - } - } - } - - break; - } - } -#ifdef DEBUG_CONVEX_HULL - b3Printf(" Advanced edges to %d %d\n", et0.index, et1.index); -#endif -} - -void b3ConvexHullInternal::merge(IntermediateHull& h0, IntermediateHull& h1) -{ - if (!h1.maxXy) - { - return; - } - if (!h0.maxXy) - { - h0 = h1; - return; - } - - mergeStamp--; - - Vertex* c0 = NULL; - Edge* toPrev0 = NULL; - Edge* firstNew0 = NULL; - Edge* pendingHead0 = NULL; - Edge* pendingTail0 = NULL; - Vertex* c1 = NULL; - Edge* toPrev1 = NULL; - Edge* firstNew1 = NULL; - Edge* pendingHead1 = NULL; - Edge* pendingTail1 = NULL; - Point32 prevPoint; - - if (mergeProjection(h0, h1, c0, c1)) - { - Point32 s = *c1 - *c0; - Point64 normal = Point32(0, 0, -1).cross(s); - Point64 t = s.cross(normal); - b3Assert(!t.isZero()); - - Edge* e = c0->edges; - Edge* start0 = NULL; - if (e) - { - do - { - btInt64_t dot = (*e->target - *c0).dot(normal); - b3Assert(dot <= 0); - if ((dot == 0) && ((*e->target - *c0).dot(t) > 0)) - { - if (!start0 || (getOrientation(start0, e, s, Point32(0, 0, -1)) == CLOCKWISE)) - { - start0 = e; - } - } - e = e->next; - } while (e != c0->edges); - } - - e = c1->edges; - Edge* start1 = NULL; - if (e) - { - do - { - btInt64_t dot = (*e->target - *c1).dot(normal); - b3Assert(dot <= 0); - if ((dot == 0) && ((*e->target - *c1).dot(t) > 0)) - { - if (!start1 || (getOrientation(start1, e, s, Point32(0, 0, -1)) == COUNTER_CLOCKWISE)) - { - start1 = e; - } - } - e = e->next; - } while (e != c1->edges); - } - - if (start0 || start1) - { - findEdgeForCoplanarFaces(c0, c1, start0, start1, NULL, NULL); - if (start0) - { - c0 = start0->target; - } - if (start1) - { - c1 = start1->target; - } - } - - prevPoint = c1->point; - prevPoint.z++; - } - else - { - prevPoint = c1->point; - prevPoint.x++; - } - - Vertex* first0 = c0; - Vertex* first1 = c1; - bool firstRun = true; - - while (true) - { - Point32 s = *c1 - *c0; - Point32 r = prevPoint - c0->point; - Point64 rxs = r.cross(s); - Point64 sxrxs = s.cross(rxs); - -#ifdef DEBUG_CONVEX_HULL - b3Printf("\n Checking %d %d\n", c0->point.index, c1->point.index); -#endif - Rational64 minCot0(0, 0); - Edge* min0 = findMaxAngle(false, c0, s, rxs, sxrxs, minCot0); - Rational64 minCot1(0, 0); - Edge* min1 = findMaxAngle(true, c1, s, rxs, sxrxs, minCot1); - if (!min0 && !min1) - { - Edge* e = newEdgePair(c0, c1); - e->link(e); - c0->edges = e; - - e = e->reverse; - e->link(e); - c1->edges = e; - return; - } - else - { - int cmp = !min0 ? 1 : !min1 ? -1 : minCot0.compare(minCot1); -#ifdef DEBUG_CONVEX_HULL - b3Printf(" -> Result %d\n", cmp); -#endif - if (firstRun || ((cmp >= 0) ? !minCot1.isNegativeInfinity() : !minCot0.isNegativeInfinity())) - { - Edge* e = newEdgePair(c0, c1); - if (pendingTail0) - { - pendingTail0->prev = e; - } - else - { - pendingHead0 = e; - } - e->next = pendingTail0; - pendingTail0 = e; - - e = e->reverse; - if (pendingTail1) - { - pendingTail1->next = e; - } - else - { - pendingHead1 = e; - } - e->prev = pendingTail1; - pendingTail1 = e; - } - - Edge* e0 = min0; - Edge* e1 = min1; - -#ifdef DEBUG_CONVEX_HULL - b3Printf(" Found min edges to %d %d\n", e0 ? e0->target->point.index : -1, e1 ? e1->target->point.index : -1); -#endif - - if (cmp == 0) - { - findEdgeForCoplanarFaces(c0, c1, e0, e1, NULL, NULL); - } - - if ((cmp >= 0) && e1) - { - if (toPrev1) - { - for (Edge *e = toPrev1->next, *n = NULL; e != min1; e = n) - { - n = e->next; - removeEdgePair(e); - } - } - - if (pendingTail1) - { - if (toPrev1) - { - toPrev1->link(pendingHead1); - } - else - { - min1->prev->link(pendingHead1); - firstNew1 = pendingHead1; - } - pendingTail1->link(min1); - pendingHead1 = NULL; - pendingTail1 = NULL; - } - else if (!toPrev1) - { - firstNew1 = min1; - } - - prevPoint = c1->point; - c1 = e1->target; - toPrev1 = e1->reverse; - } - - if ((cmp <= 0) && e0) - { - if (toPrev0) - { - for (Edge *e = toPrev0->prev, *n = NULL; e != min0; e = n) - { - n = e->prev; - removeEdgePair(e); - } - } - - if (pendingTail0) - { - if (toPrev0) - { - pendingHead0->link(toPrev0); - } - else - { - pendingHead0->link(min0->next); - firstNew0 = pendingHead0; - } - min0->link(pendingTail0); - pendingHead0 = NULL; - pendingTail0 = NULL; - } - else if (!toPrev0) - { - firstNew0 = min0; - } - - prevPoint = c0->point; - c0 = e0->target; - toPrev0 = e0->reverse; - } - } - - if ((c0 == first0) && (c1 == first1)) - { - if (toPrev0 == NULL) - { - pendingHead0->link(pendingTail0); - c0->edges = pendingTail0; - } - else - { - for (Edge *e = toPrev0->prev, *n = NULL; e != firstNew0; e = n) - { - n = e->prev; - removeEdgePair(e); - } - if (pendingTail0) - { - pendingHead0->link(toPrev0); - firstNew0->link(pendingTail0); - } - } - - if (toPrev1 == NULL) - { - pendingTail1->link(pendingHead1); - c1->edges = pendingTail1; - } - else - { - for (Edge *e = toPrev1->next, *n = NULL; e != firstNew1; e = n) - { - n = e->next; - removeEdgePair(e); - } - if (pendingTail1) - { - toPrev1->link(pendingHead1); - pendingTail1->link(firstNew1); - } - } - - return; - } - - firstRun = false; - } -} - -static bool b3PointCmp(const b3ConvexHullInternal::Point32& p, const b3ConvexHullInternal::Point32& q) -{ - return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z)))); -} - -void b3ConvexHullInternal::compute(const void* coords, bool doubleCoords, int stride, int count) -{ - b3Vector3 min = b3MakeVector3(b3Scalar(1e30), b3Scalar(1e30), b3Scalar(1e30)), max = b3MakeVector3(b3Scalar(-1e30), b3Scalar(-1e30), b3Scalar(-1e30)); - const char* ptr = (const char*)coords; - if (doubleCoords) - { - for (int i = 0; i < count; i++) - { - const double* v = (const double*)ptr; - b3Vector3 p = b3MakeVector3((b3Scalar)v[0], (b3Scalar)v[1], (b3Scalar)v[2]); - ptr += stride; - min.setMin(p); - max.setMax(p); - } - } - else - { - for (int i = 0; i < count; i++) - { - const float* v = (const float*)ptr; - b3Vector3 p = b3MakeVector3(v[0], v[1], v[2]); - ptr += stride; - min.setMin(p); - max.setMax(p); - } - } - - b3Vector3 s = max - min; - maxAxis = s.maxAxis(); - minAxis = s.minAxis(); - if (minAxis == maxAxis) - { - minAxis = (maxAxis + 1) % 3; - } - medAxis = 3 - maxAxis - minAxis; - - s /= b3Scalar(10216); - if (((medAxis + 1) % 3) != maxAxis) - { - s *= -1; - } - scaling = s; - - if (s[0] != 0) - { - s[0] = b3Scalar(1) / s[0]; - } - if (s[1] != 0) - { - s[1] = b3Scalar(1) / s[1]; - } - if (s[2] != 0) - { - s[2] = b3Scalar(1) / s[2]; - } - - center = (min + max) * b3Scalar(0.5); - - b3AlignedObjectArray points; - points.resize(count); - ptr = (const char*)coords; - if (doubleCoords) - { - for (int i = 0; i < count; i++) - { - const double* v = (const double*)ptr; - b3Vector3 p = b3MakeVector3((b3Scalar)v[0], (b3Scalar)v[1], (b3Scalar)v[2]); - ptr += stride; - p = (p - center) * s; - points[i].x = (btInt32_t)p[medAxis]; - points[i].y = (btInt32_t)p[maxAxis]; - points[i].z = (btInt32_t)p[minAxis]; - points[i].index = i; - } - } - else - { - for (int i = 0; i < count; i++) - { - const float* v = (const float*)ptr; - b3Vector3 p = b3MakeVector3(v[0], v[1], v[2]); - ptr += stride; - p = (p - center) * s; - points[i].x = (btInt32_t)p[medAxis]; - points[i].y = (btInt32_t)p[maxAxis]; - points[i].z = (btInt32_t)p[minAxis]; - points[i].index = i; - } - } - points.quickSort(b3PointCmp); - - vertexPool.reset(); - vertexPool.setArraySize(count); - originalVertices.resize(count); - for (int i = 0; i < count; i++) - { - Vertex* v = vertexPool.newObject(); - v->edges = NULL; - v->point = points[i]; - v->copy = -1; - originalVertices[i] = v; - } - - points.clear(); - - edgePool.reset(); - edgePool.setArraySize(6 * count); - - usedEdgePairs = 0; - maxUsedEdgePairs = 0; - - mergeStamp = -3; - - IntermediateHull hull; - computeInternal(0, count, hull); - vertexList = hull.minXy; -#ifdef DEBUG_CONVEX_HULL - b3Printf("max. edges %d (3v = %d)", maxUsedEdgePairs, 3 * count); -#endif -} - -b3Vector3 b3ConvexHullInternal::toBtVector(const Point32& v) -{ - b3Vector3 p; - p[medAxis] = b3Scalar(v.x); - p[maxAxis] = b3Scalar(v.y); - p[minAxis] = b3Scalar(v.z); - return p * scaling; -} - -b3Vector3 b3ConvexHullInternal::getBtNormal(Face* face) -{ - return toBtVector(face->dir0).cross(toBtVector(face->dir1)).normalized(); -} - -b3Vector3 b3ConvexHullInternal::getCoordinates(const Vertex* v) -{ - b3Vector3 p; - p[medAxis] = v->xvalue(); - p[maxAxis] = v->yvalue(); - p[minAxis] = v->zvalue(); - return p * scaling + center; -} - -b3Scalar b3ConvexHullInternal::shrink(b3Scalar amount, b3Scalar clampAmount) -{ - if (!vertexList) - { - return 0; - } - int stamp = --mergeStamp; - b3AlignedObjectArray stack; - vertexList->copy = stamp; - stack.push_back(vertexList); - b3AlignedObjectArray faces; - - Point32 ref = vertexList->point; - Int128 hullCenterX(0, 0); - Int128 hullCenterY(0, 0); - Int128 hullCenterZ(0, 0); - Int128 volume(0, 0); - - while (stack.size() > 0) - { - Vertex* v = stack[stack.size() - 1]; - stack.pop_back(); - Edge* e = v->edges; - if (e) - { - do - { - if (e->target->copy != stamp) - { - e->target->copy = stamp; - stack.push_back(e->target); - } - if (e->copy != stamp) - { - Face* face = facePool.newObject(); - face->init(e->target, e->reverse->prev->target, v); - faces.push_back(face); - Edge* f = e; - - Vertex* a = NULL; - Vertex* b = NULL; - do - { - if (a && b) - { - btInt64_t vol = (v->point - ref).dot((a->point - ref).cross(b->point - ref)); - b3Assert(vol >= 0); - Point32 c = v->point + a->point + b->point + ref; - hullCenterX += vol * c.x; - hullCenterY += vol * c.y; - hullCenterZ += vol * c.z; - volume += vol; - } - - b3Assert(f->copy != stamp); - f->copy = stamp; - f->face = face; - - a = b; - b = f->target; - - f = f->reverse->prev; - } while (f != e); - } - e = e->next; - } while (e != v->edges); - } - } - - if (volume.getSign() <= 0) - { - return 0; - } - - b3Vector3 hullCenter; - hullCenter[medAxis] = hullCenterX.toScalar(); - hullCenter[maxAxis] = hullCenterY.toScalar(); - hullCenter[minAxis] = hullCenterZ.toScalar(); - hullCenter /= 4 * volume.toScalar(); - hullCenter *= scaling; - - int faceCount = faces.size(); - - if (clampAmount > 0) - { - b3Scalar minDist = B3_INFINITY; - for (int i = 0; i < faceCount; i++) - { - b3Vector3 normal = getBtNormal(faces[i]); - b3Scalar dist = normal.dot(toBtVector(faces[i]->origin) - hullCenter); - if (dist < minDist) - { - minDist = dist; - } - } - - if (minDist <= 0) - { - return 0; - } - - amount = b3Min(amount, minDist * clampAmount); - } - - unsigned int seed = 243703; - for (int i = 0; i < faceCount; i++, seed = 1664525 * seed + 1013904223) - { - b3Swap(faces[i], faces[seed % faceCount]); - } - - for (int i = 0; i < faceCount; i++) - { - if (!shiftFace(faces[i], amount, stack)) - { - return -amount; - } - } - - return amount; -} - -bool b3ConvexHullInternal::shiftFace(Face* face, b3Scalar amount, b3AlignedObjectArray stack) -{ - b3Vector3 origShift = getBtNormal(face) * -amount; - if (scaling[0] != 0) - { - origShift[0] /= scaling[0]; - } - if (scaling[1] != 0) - { - origShift[1] /= scaling[1]; - } - if (scaling[2] != 0) - { - origShift[2] /= scaling[2]; - } - Point32 shift((btInt32_t)origShift[medAxis], (btInt32_t)origShift[maxAxis], (btInt32_t)origShift[minAxis]); - if (shift.isZero()) - { - return true; - } - Point64 normal = face->getNormal(); -#ifdef DEBUG_CONVEX_HULL - b3Printf("\nShrinking face (%d %d %d) (%d %d %d) (%d %d %d) by (%d %d %d)\n", - face->origin.x, face->origin.y, face->origin.z, face->dir0.x, face->dir0.y, face->dir0.z, face->dir1.x, face->dir1.y, face->dir1.z, shift.x, shift.y, shift.z); -#endif - btInt64_t origDot = face->origin.dot(normal); - Point32 shiftedOrigin = face->origin + shift; - btInt64_t shiftedDot = shiftedOrigin.dot(normal); - b3Assert(shiftedDot <= origDot); - if (shiftedDot >= origDot) - { - return false; - } - - Edge* intersection = NULL; - - Edge* startEdge = face->nearbyVertex->edges; -#ifdef DEBUG_CONVEX_HULL - b3Printf("Start edge is "); - startEdge->print(); - b3Printf(", normal is (%lld %lld %lld), shifted dot is %lld\n", normal.x, normal.y, normal.z, shiftedDot); -#endif - Rational128 optDot = face->nearbyVertex->dot(normal); - int cmp = optDot.compare(shiftedDot); -#ifdef SHOW_ITERATIONS - int n = 0; -#endif - if (cmp >= 0) - { - Edge* e = startEdge; - do - { -#ifdef SHOW_ITERATIONS - n++; -#endif - Rational128 dot = e->target->dot(normal); - b3Assert(dot.compare(origDot) <= 0); -#ifdef DEBUG_CONVEX_HULL - b3Printf("Moving downwards, edge is "); - e->print(); - b3Printf(", dot is %f (%f %lld)\n", (float)dot.toScalar(), (float)optDot.toScalar(), shiftedDot); -#endif - if (dot.compare(optDot) < 0) - { - int c = dot.compare(shiftedDot); - optDot = dot; - e = e->reverse; - startEdge = e; - if (c < 0) - { - intersection = e; - break; - } - cmp = c; - } - e = e->prev; - } while (e != startEdge); - - if (!intersection) - { - return false; - } - } - else - { - Edge* e = startEdge; - do - { -#ifdef SHOW_ITERATIONS - n++; -#endif - Rational128 dot = e->target->dot(normal); - b3Assert(dot.compare(origDot) <= 0); -#ifdef DEBUG_CONVEX_HULL - b3Printf("Moving upwards, edge is "); - e->print(); - b3Printf(", dot is %f (%f %lld)\n", (float)dot.toScalar(), (float)optDot.toScalar(), shiftedDot); -#endif - if (dot.compare(optDot) > 0) - { - cmp = dot.compare(shiftedDot); - if (cmp >= 0) - { - intersection = e; - break; - } - optDot = dot; - e = e->reverse; - startEdge = e; - } - e = e->prev; - } while (e != startEdge); - - if (!intersection) - { - return true; - } - } - -#ifdef SHOW_ITERATIONS - b3Printf("Needed %d iterations to find initial intersection\n", n); -#endif - - if (cmp == 0) - { - Edge* e = intersection->reverse->next; -#ifdef SHOW_ITERATIONS - n = 0; -#endif - while (e->target->dot(normal).compare(shiftedDot) <= 0) - { -#ifdef SHOW_ITERATIONS - n++; -#endif - e = e->next; - if (e == intersection->reverse) - { - return true; - } -#ifdef DEBUG_CONVEX_HULL - b3Printf("Checking for outwards edge, current edge is "); - e->print(); - b3Printf("\n"); -#endif - } -#ifdef SHOW_ITERATIONS - b3Printf("Needed %d iterations to check for complete containment\n", n); -#endif - } - - Edge* firstIntersection = NULL; - Edge* faceEdge = NULL; - Edge* firstFaceEdge = NULL; - -#ifdef SHOW_ITERATIONS - int m = 0; -#endif - while (true) - { -#ifdef SHOW_ITERATIONS - m++; -#endif -#ifdef DEBUG_CONVEX_HULL - b3Printf("Intersecting edge is "); - intersection->print(); - b3Printf("\n"); -#endif - if (cmp == 0) - { - Edge* e = intersection->reverse->next; - startEdge = e; -#ifdef SHOW_ITERATIONS - n = 0; -#endif - while (true) - { -#ifdef SHOW_ITERATIONS - n++; -#endif - if (e->target->dot(normal).compare(shiftedDot) >= 0) - { - break; - } - intersection = e->reverse; - e = e->next; - if (e == startEdge) - { - return true; - } - } -#ifdef SHOW_ITERATIONS - b3Printf("Needed %d iterations to advance intersection\n", n); -#endif - } - -#ifdef DEBUG_CONVEX_HULL - b3Printf("Advanced intersecting edge to "); - intersection->print(); - b3Printf(", cmp = %d\n", cmp); -#endif - - if (!firstIntersection) - { - firstIntersection = intersection; - } - else if (intersection == firstIntersection) - { - break; - } - - int prevCmp = cmp; - Edge* prevIntersection = intersection; - Edge* prevFaceEdge = faceEdge; - - Edge* e = intersection->reverse; -#ifdef SHOW_ITERATIONS - n = 0; -#endif - while (true) - { -#ifdef SHOW_ITERATIONS - n++; -#endif - e = e->reverse->prev; - b3Assert(e != intersection->reverse); - cmp = e->target->dot(normal).compare(shiftedDot); -#ifdef DEBUG_CONVEX_HULL - b3Printf("Testing edge "); - e->print(); - b3Printf(" -> cmp = %d\n", cmp); -#endif - if (cmp >= 0) - { - intersection = e; - break; - } - } -#ifdef SHOW_ITERATIONS - b3Printf("Needed %d iterations to find other intersection of face\n", n); -#endif - - if (cmp > 0) - { - Vertex* removed = intersection->target; - e = intersection->reverse; - if (e->prev == e) - { - removed->edges = NULL; - } - else - { - removed->edges = e->prev; - e->prev->link(e->next); - e->link(e); - } -#ifdef DEBUG_CONVEX_HULL - b3Printf("1: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z); -#endif - - Point64 n0 = intersection->face->getNormal(); - Point64 n1 = intersection->reverse->face->getNormal(); - btInt64_t m00 = face->dir0.dot(n0); - btInt64_t m01 = face->dir1.dot(n0); - btInt64_t m10 = face->dir0.dot(n1); - btInt64_t m11 = face->dir1.dot(n1); - btInt64_t r0 = (intersection->face->origin - shiftedOrigin).dot(n0); - btInt64_t r1 = (intersection->reverse->face->origin - shiftedOrigin).dot(n1); - Int128 det = Int128::mul(m00, m11) - Int128::mul(m01, m10); - b3Assert(det.getSign() != 0); - Vertex* v = vertexPool.newObject(); - v->point.index = -1; - v->copy = -1; - v->point128 = PointR128(Int128::mul(face->dir0.x * r0, m11) - Int128::mul(face->dir0.x * r1, m01) + Int128::mul(face->dir1.x * r1, m00) - Int128::mul(face->dir1.x * r0, m10) + det * shiftedOrigin.x, - Int128::mul(face->dir0.y * r0, m11) - Int128::mul(face->dir0.y * r1, m01) + Int128::mul(face->dir1.y * r1, m00) - Int128::mul(face->dir1.y * r0, m10) + det * shiftedOrigin.y, - Int128::mul(face->dir0.z * r0, m11) - Int128::mul(face->dir0.z * r1, m01) + Int128::mul(face->dir1.z * r1, m00) - Int128::mul(face->dir1.z * r0, m10) + det * shiftedOrigin.z, - det); - v->point.x = (btInt32_t)v->point128.xvalue(); - v->point.y = (btInt32_t)v->point128.yvalue(); - v->point.z = (btInt32_t)v->point128.zvalue(); - intersection->target = v; - v->edges = e; - - stack.push_back(v); - stack.push_back(removed); - stack.push_back(NULL); - } - - if (cmp || prevCmp || (prevIntersection->reverse->next->target != intersection->target)) - { - faceEdge = newEdgePair(prevIntersection->target, intersection->target); - if (prevCmp == 0) - { - faceEdge->link(prevIntersection->reverse->next); - } - if ((prevCmp == 0) || prevFaceEdge) - { - prevIntersection->reverse->link(faceEdge); - } - if (cmp == 0) - { - intersection->reverse->prev->link(faceEdge->reverse); - } - faceEdge->reverse->link(intersection->reverse); - } - else - { - faceEdge = prevIntersection->reverse->next; - } - - if (prevFaceEdge) - { - if (prevCmp > 0) - { - faceEdge->link(prevFaceEdge->reverse); - } - else if (faceEdge != prevFaceEdge->reverse) - { - stack.push_back(prevFaceEdge->target); - while (faceEdge->next != prevFaceEdge->reverse) - { - Vertex* removed = faceEdge->next->target; - removeEdgePair(faceEdge->next); - stack.push_back(removed); -#ifdef DEBUG_CONVEX_HULL - b3Printf("2: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z); -#endif - } - stack.push_back(NULL); - } - } - faceEdge->face = face; - faceEdge->reverse->face = intersection->face; - - if (!firstFaceEdge) - { - firstFaceEdge = faceEdge; - } - } -#ifdef SHOW_ITERATIONS - b3Printf("Needed %d iterations to process all intersections\n", m); -#endif - - if (cmp > 0) - { - firstFaceEdge->reverse->target = faceEdge->target; - firstIntersection->reverse->link(firstFaceEdge); - firstFaceEdge->link(faceEdge->reverse); - } - else if (firstFaceEdge != faceEdge->reverse) - { - stack.push_back(faceEdge->target); - while (firstFaceEdge->next != faceEdge->reverse) - { - Vertex* removed = firstFaceEdge->next->target; - removeEdgePair(firstFaceEdge->next); - stack.push_back(removed); -#ifdef DEBUG_CONVEX_HULL - b3Printf("3: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z); -#endif - } - stack.push_back(NULL); - } - - b3Assert(stack.size() > 0); - vertexList = stack[0]; - -#ifdef DEBUG_CONVEX_HULL - b3Printf("Removing part\n"); -#endif -#ifdef SHOW_ITERATIONS - n = 0; -#endif - int pos = 0; - while (pos < stack.size()) - { - int end = stack.size(); - while (pos < end) - { - Vertex* kept = stack[pos++]; -#ifdef DEBUG_CONVEX_HULL - kept->print(); -#endif - bool deeper = false; - Vertex* removed; - while ((removed = stack[pos++]) != NULL) - { -#ifdef SHOW_ITERATIONS - n++; -#endif - kept->receiveNearbyFaces(removed); - while (removed->edges) - { - if (!deeper) - { - deeper = true; - stack.push_back(kept); - } - stack.push_back(removed->edges->target); - removeEdgePair(removed->edges); - } - } - if (deeper) - { - stack.push_back(NULL); - } - } - } -#ifdef SHOW_ITERATIONS - b3Printf("Needed %d iterations to remove part\n", n); -#endif - - stack.resize(0); - face->origin = shiftedOrigin; - - return true; -} - -static int getVertexCopy(b3ConvexHullInternal::Vertex* vertex, b3AlignedObjectArray& vertices) -{ - int index = vertex->copy; - if (index < 0) - { - index = vertices.size(); - vertex->copy = index; - vertices.push_back(vertex); -#ifdef DEBUG_CONVEX_HULL - b3Printf("Vertex %d gets index *%d\n", vertex->point.index, index); -#endif - } - return index; -} - -b3Scalar b3ConvexHullComputer::compute(const void* coords, bool doubleCoords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp) -{ - if (count <= 0) - { - vertices.clear(); - edges.clear(); - faces.clear(); - return 0; - } - - b3ConvexHullInternal hull; - hull.compute(coords, doubleCoords, stride, count); - - b3Scalar shift = 0; - if ((shrink > 0) && ((shift = hull.shrink(shrink, shrinkClamp)) < 0)) - { - vertices.clear(); - edges.clear(); - faces.clear(); - return shift; - } - - vertices.resize(0); - edges.resize(0); - faces.resize(0); - - b3AlignedObjectArray oldVertices; - getVertexCopy(hull.vertexList, oldVertices); - int copied = 0; - while (copied < oldVertices.size()) - { - b3ConvexHullInternal::Vertex* v = oldVertices[copied]; - vertices.push_back(hull.getCoordinates(v)); - b3ConvexHullInternal::Edge* firstEdge = v->edges; - if (firstEdge) - { - int firstCopy = -1; - int prevCopy = -1; - b3ConvexHullInternal::Edge* e = firstEdge; - do - { - if (e->copy < 0) - { - int s = edges.size(); - edges.push_back(Edge()); - edges.push_back(Edge()); - Edge* c = &edges[s]; - Edge* r = &edges[s + 1]; - e->copy = s; - e->reverse->copy = s + 1; - c->reverse = 1; - r->reverse = -1; - c->targetVertex = getVertexCopy(e->target, oldVertices); - r->targetVertex = copied; -#ifdef DEBUG_CONVEX_HULL - b3Printf(" CREATE: Vertex *%d has edge to *%d\n", copied, c->getTargetVertex()); -#endif - } - if (prevCopy >= 0) - { - edges[e->copy].next = prevCopy - e->copy; - } - else - { - firstCopy = e->copy; - } - prevCopy = e->copy; - e = e->next; - } while (e != firstEdge); - edges[firstCopy].next = prevCopy - firstCopy; - } - copied++; - } - - for (int i = 0; i < copied; i++) - { - b3ConvexHullInternal::Vertex* v = oldVertices[i]; - b3ConvexHullInternal::Edge* firstEdge = v->edges; - if (firstEdge) - { - b3ConvexHullInternal::Edge* e = firstEdge; - do - { - if (e->copy >= 0) - { -#ifdef DEBUG_CONVEX_HULL - b3Printf("Vertex *%d has edge to *%d\n", i, edges[e->copy].getTargetVertex()); -#endif - faces.push_back(e->copy); - b3ConvexHullInternal::Edge* f = e; - do - { -#ifdef DEBUG_CONVEX_HULL - b3Printf(" Face *%d\n", edges[f->copy].getTargetVertex()); -#endif - f->copy = -1; - f = f->reverse->prev; - } while (f != e); - } - e = e->next; - } while (e != firstEdge); - } - } - - return shift; -} diff --git a/thirdparty/bullet/Bullet3Geometry/b3ConvexHullComputer.h b/thirdparty/bullet/Bullet3Geometry/b3ConvexHullComputer.h deleted file mode 100644 index 8852c5a524f..00000000000 --- a/thirdparty/bullet/Bullet3Geometry/b3ConvexHullComputer.h +++ /dev/null @@ -1,99 +0,0 @@ -/* -Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_CONVEX_HULL_COMPUTER_H -#define B3_CONVEX_HULL_COMPUTER_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -/// Convex hull implementation based on Preparata and Hong -/// See http://code.google.com/p/bullet/issues/detail?id=275 -/// Ole Kniemeyer, MAXON Computer GmbH -class b3ConvexHullComputer -{ -private: - b3Scalar compute(const void* coords, bool doubleCoords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp); - -public: - class Edge - { - private: - int next; - int reverse; - int targetVertex; - - friend class b3ConvexHullComputer; - - public: - int getSourceVertex() const - { - return (this + reverse)->targetVertex; - } - - int getTargetVertex() const - { - return targetVertex; - } - - const Edge* getNextEdgeOfVertex() const // clockwise list of all edges of a vertex - { - return this + next; - } - - const Edge* getNextEdgeOfFace() const // counter-clockwise list of all edges of a face - { - return (this + reverse)->getNextEdgeOfVertex(); - } - - const Edge* getReverseEdge() const - { - return this + reverse; - } - }; - - // Vertices of the output hull - b3AlignedObjectArray vertices; - - // Edges of the output hull - b3AlignedObjectArray edges; - - // Faces of the convex hull. Each entry is an index into the "edges" array pointing to an edge of the face. Faces are planar n-gons - b3AlignedObjectArray faces; - - /* - Compute convex hull of "count" vertices stored in "coords". "stride" is the difference in bytes - between the addresses of consecutive vertices. If "shrink" is positive, the convex hull is shrunken - by that amount (each face is moved by "shrink" length units towards the center along its normal). - If "shrinkClamp" is positive, "shrink" is clamped to not exceed "shrinkClamp * innerRadius", where "innerRadius" - is the minimum distance of a face to the center of the convex hull. - - The returned value is the amount by which the hull has been shrunken. If it is negative, the amount was so large - that the resulting convex hull is empty. - - The output convex hull can be found in the member variables "vertices", "edges", "faces". - */ - b3Scalar compute(const float* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp) - { - return compute(coords, false, stride, count, shrink, shrinkClamp); - } - - // same as above, but double precision - b3Scalar compute(const double* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp) - { - return compute(coords, true, stride, count, shrink, shrinkClamp); - } -}; - -#endif //B3_CONVEX_HULL_COMPUTER_H diff --git a/thirdparty/bullet/Bullet3Geometry/b3GeometryUtil.cpp b/thirdparty/bullet/Bullet3Geometry/b3GeometryUtil.cpp deleted file mode 100644 index 1c5d5a73377..00000000000 --- a/thirdparty/bullet/Bullet3Geometry/b3GeometryUtil.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* -Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3GeometryUtil.h" - -/* - Make sure this dummy function never changes so that it - can be used by probes that are checking whether the - library is actually installed. -*/ -extern "C" -{ - void b3BulletMathProbe(); - - void b3BulletMathProbe() {} -} - -bool b3GeometryUtil::isPointInsidePlanes(const b3AlignedObjectArray& planeEquations, const b3Vector3& point, b3Scalar margin) -{ - int numbrushes = planeEquations.size(); - for (int i = 0; i < numbrushes; i++) - { - const b3Vector3& N1 = planeEquations[i]; - b3Scalar dist = b3Scalar(N1.dot(point)) + b3Scalar(N1[3]) - margin; - if (dist > b3Scalar(0.)) - { - return false; - } - } - return true; -} - -bool b3GeometryUtil::areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray& vertices, b3Scalar margin) -{ - int numvertices = vertices.size(); - for (int i = 0; i < numvertices; i++) - { - const b3Vector3& N1 = vertices[i]; - b3Scalar dist = b3Scalar(planeNormal.dot(N1)) + b3Scalar(planeNormal[3]) - margin; - if (dist > b3Scalar(0.)) - { - return false; - } - } - return true; -} - -bool notExist(const b3Vector3& planeEquation, const b3AlignedObjectArray& planeEquations); - -bool notExist(const b3Vector3& planeEquation, const b3AlignedObjectArray& planeEquations) -{ - int numbrushes = planeEquations.size(); - for (int i = 0; i < numbrushes; i++) - { - const b3Vector3& N1 = planeEquations[i]; - if (planeEquation.dot(N1) > b3Scalar(0.999)) - { - return false; - } - } - return true; -} - -void b3GeometryUtil::getPlaneEquationsFromVertices(b3AlignedObjectArray& vertices, b3AlignedObjectArray& planeEquationsOut) -{ - const int numvertices = vertices.size(); - // brute force: - for (int i = 0; i < numvertices; i++) - { - const b3Vector3& N1 = vertices[i]; - - for (int j = i + 1; j < numvertices; j++) - { - const b3Vector3& N2 = vertices[j]; - - for (int k = j + 1; k < numvertices; k++) - { - const b3Vector3& N3 = vertices[k]; - - b3Vector3 planeEquation, edge0, edge1; - edge0 = N2 - N1; - edge1 = N3 - N1; - b3Scalar normalSign = b3Scalar(1.); - for (int ww = 0; ww < 2; ww++) - { - planeEquation = normalSign * edge0.cross(edge1); - if (planeEquation.length2() > b3Scalar(0.0001)) - { - planeEquation.normalize(); - if (notExist(planeEquation, planeEquationsOut)) - { - planeEquation[3] = -planeEquation.dot(N1); - - //check if inside, and replace supportingVertexOut if needed - if (areVerticesBehindPlane(planeEquation, vertices, b3Scalar(0.01))) - { - planeEquationsOut.push_back(planeEquation); - } - } - } - normalSign = b3Scalar(-1.); - } - } - } - } -} - -void b3GeometryUtil::getVerticesFromPlaneEquations(const b3AlignedObjectArray& planeEquations, b3AlignedObjectArray& verticesOut) -{ - const int numbrushes = planeEquations.size(); - // brute force: - for (int i = 0; i < numbrushes; i++) - { - const b3Vector3& N1 = planeEquations[i]; - - for (int j = i + 1; j < numbrushes; j++) - { - const b3Vector3& N2 = planeEquations[j]; - - for (int k = j + 1; k < numbrushes; k++) - { - const b3Vector3& N3 = planeEquations[k]; - - b3Vector3 n2n3; - n2n3 = N2.cross(N3); - b3Vector3 n3n1; - n3n1 = N3.cross(N1); - b3Vector3 n1n2; - n1n2 = N1.cross(N2); - - if ((n2n3.length2() > b3Scalar(0.0001)) && - (n3n1.length2() > b3Scalar(0.0001)) && - (n1n2.length2() > b3Scalar(0.0001))) - { - //point P out of 3 plane equations: - - // d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 ) - //P = ------------------------------------------------------------------------- - // N1 . ( N2 * N3 ) - - b3Scalar quotient = (N1.dot(n2n3)); - if (b3Fabs(quotient) > b3Scalar(0.000001)) - { - quotient = b3Scalar(-1.) / quotient; - n2n3 *= N1[3]; - n3n1 *= N2[3]; - n1n2 *= N3[3]; - b3Vector3 potentialVertex = n2n3; - potentialVertex += n3n1; - potentialVertex += n1n2; - potentialVertex *= quotient; - - //check if inside, and replace supportingVertexOut if needed - if (isPointInsidePlanes(planeEquations, potentialVertex, b3Scalar(0.01))) - { - verticesOut.push_back(potentialVertex); - } - } - } - } - } - } -} diff --git a/thirdparty/bullet/Bullet3Geometry/b3GeometryUtil.h b/thirdparty/bullet/Bullet3Geometry/b3GeometryUtil.h deleted file mode 100644 index 9a7bf7e3a7e..00000000000 --- a/thirdparty/bullet/Bullet3Geometry/b3GeometryUtil.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GEOMETRY_UTIL_H -#define B3_GEOMETRY_UTIL_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -///The b3GeometryUtil helper class provides a few methods to convert between plane equations and vertices. -class b3GeometryUtil -{ -public: - static void getPlaneEquationsFromVertices(b3AlignedObjectArray& vertices, b3AlignedObjectArray& planeEquationsOut); - - static void getVerticesFromPlaneEquations(const b3AlignedObjectArray& planeEquations, b3AlignedObjectArray& verticesOut); - - static bool isInside(const b3AlignedObjectArray& vertices, const b3Vector3& planeNormal, b3Scalar margin); - - static bool isPointInsidePlanes(const b3AlignedObjectArray& planeEquations, const b3Vector3& point, b3Scalar margin); - - static bool areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray& vertices, b3Scalar margin); -}; - -#endif //B3_GEOMETRY_UTIL_H diff --git a/thirdparty/bullet/Bullet3Geometry/b3GrahamScan2dConvexHull.h b/thirdparty/bullet/Bullet3Geometry/b3GrahamScan2dConvexHull.h deleted file mode 100644 index 8881c9a6384..00000000000 --- a/thirdparty/bullet/Bullet3Geometry/b3GrahamScan2dConvexHull.h +++ /dev/null @@ -1,116 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GRAHAM_SCAN_2D_CONVEX_HULL_H -#define B3_GRAHAM_SCAN_2D_CONVEX_HULL_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -struct b3GrahamVector3 : public b3Vector3 -{ - b3GrahamVector3(const b3Vector3& org, int orgIndex) - : b3Vector3(org), - m_orgIndex(orgIndex) - { - } - b3Scalar m_angle; - int m_orgIndex; -}; - -struct b3AngleCompareFunc -{ - b3Vector3 m_anchor; - b3AngleCompareFunc(const b3Vector3& anchor) - : m_anchor(anchor) - { - } - bool operator()(const b3GrahamVector3& a, const b3GrahamVector3& b) const - { - if (a.m_angle != b.m_angle) - return a.m_angle < b.m_angle; - else - { - b3Scalar al = (a - m_anchor).length2(); - b3Scalar bl = (b - m_anchor).length2(); - if (al != bl) - return al < bl; - else - { - return a.m_orgIndex < b.m_orgIndex; - } - } - } -}; - -inline void b3GrahamScanConvexHull2D(b3AlignedObjectArray& originalPoints, b3AlignedObjectArray& hull, const b3Vector3& normalAxis) -{ - b3Vector3 axis0, axis1; - b3PlaneSpace1(normalAxis, axis0, axis1); - - if (originalPoints.size() <= 1) - { - for (int i = 0; i < originalPoints.size(); i++) - hull.push_back(originalPoints[0]); - return; - } - //step1 : find anchor point with smallest projection on axis0 and move it to first location - for (int i = 0; i < originalPoints.size(); i++) - { - // const b3Vector3& left = originalPoints[i]; - // const b3Vector3& right = originalPoints[0]; - b3Scalar projL = originalPoints[i].dot(axis0); - b3Scalar projR = originalPoints[0].dot(axis0); - if (projL < projR) - { - originalPoints.swap(0, i); - } - } - - //also precompute angles - originalPoints[0].m_angle = -1e30f; - for (int i = 1; i < originalPoints.size(); i++) - { - b3Vector3 xvec = axis0; - b3Vector3 ar = originalPoints[i] - originalPoints[0]; - originalPoints[i].m_angle = b3Cross(xvec, ar).dot(normalAxis) / ar.length(); - } - - //step 2: sort all points, based on 'angle' with this anchor - b3AngleCompareFunc comp(originalPoints[0]); - originalPoints.quickSortInternal(comp, 1, originalPoints.size() - 1); - - int i; - for (i = 0; i < 2; i++) - hull.push_back(originalPoints[i]); - - //step 3: keep all 'convex' points and discard concave points (using back tracking) - for (; i != originalPoints.size(); i++) - { - bool isConvex = false; - while (!isConvex && hull.size() > 1) - { - b3Vector3& a = hull[hull.size() - 2]; - b3Vector3& b = hull[hull.size() - 1]; - isConvex = b3Cross(a - b, a - originalPoints[i]).dot(normalAxis) > 0; - if (!isConvex) - hull.pop_back(); - else - hull.push_back(originalPoints[i]); - } - } -} - -#endif //B3_GRAHAM_SCAN_2D_CONVEX_HULL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h deleted file mode 100644 index b296992525f..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h +++ /dev/null @@ -1,42 +0,0 @@ - -#ifndef B3_GPU_BROADPHASE_INTERFACE_H -#define B3_GPU_BROADPHASE_INTERFACE_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3Vector3.h" -#include "b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" - -class b3GpuBroadphaseInterface -{ -public: - typedef class b3GpuBroadphaseInterface*(CreateFunc)(cl_context ctx, cl_device_id device, cl_command_queue q); - - virtual ~b3GpuBroadphaseInterface() - { - } - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) = 0; - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) = 0; - - virtual void calculateOverlappingPairs(int maxPairs) = 0; - virtual void calculateOverlappingPairsHost(int maxPairs) = 0; - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu() = 0; - - virtual cl_mem getAabbBufferWS() = 0; - virtual int getNumOverlap() = 0; - virtual cl_mem getOverlappingPairBuffer() = 0; - - virtual b3OpenCLArray& getAllAabbsGPU() = 0; - virtual b3AlignedObjectArray& getAllAabbsCPU() = 0; - - virtual b3OpenCLArray& getOverlappingPairsGPU() = 0; - virtual b3OpenCLArray& getSmallAabbIndicesGPU() = 0; - virtual b3OpenCLArray& getLargeAabbIndicesGPU() = 0; -}; - -#endif //B3_GPU_BROADPHASE_INTERFACE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp deleted file mode 100644 index e714fadac30..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp +++ /dev/null @@ -1,338 +0,0 @@ - -#include "b3GpuGridBroadphase.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "kernels/gridBroadphaseKernels.h" -#include "kernels/sapKernels.h" -//#include "kernels/gridBroadphase.cl" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" -#define B3_GRID_BROADPHASE_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl" - -cl_kernel kCalcHashAABB; -cl_kernel kClearCellStart; -cl_kernel kFindCellStart; -cl_kernel kFindOverlappingPairs; -cl_kernel m_copyAabbsKernel; -cl_kernel m_sap2Kernel; - -//int maxPairsPerBody = 64; -int maxBodiesPerCell = 256; //?? - -b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q) - : m_context(ctx), - m_device(device), - m_queue(q), - m_allAabbsGPU1(ctx, q), - m_smallAabbsMappingGPU(ctx, q), - m_largeAabbsMappingGPU(ctx, q), - m_gpuPairs(ctx, q), - - m_hashGpu(ctx, q), - - m_cellStartGpu(ctx, q), - m_paramsGPU(ctx, q) -{ - b3Vector3 gridSize = b3MakeVector3(3, 3, 3); - b3Vector3 invGridSize = b3MakeVector3(1.f / gridSize[0], 1.f / gridSize[1], 1.f / gridSize[2]); - - m_paramsCPU.m_gridSize[0] = 128; - m_paramsCPU.m_gridSize[1] = 128; - m_paramsCPU.m_gridSize[2] = 128; - m_paramsCPU.m_gridSize[3] = maxBodiesPerCell; - m_paramsCPU.setMaxBodiesPerCell(maxBodiesPerCell); - m_paramsCPU.m_invCellSize[0] = invGridSize[0]; - m_paramsCPU.m_invCellSize[1] = invGridSize[1]; - m_paramsCPU.m_invCellSize[2] = invGridSize[2]; - m_paramsCPU.m_invCellSize[3] = 0.f; - m_paramsGPU.push_back(m_paramsCPU); - - cl_int errNum = 0; - - { - const char* sapSrc = sapCL; - cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, sapSrc, &errNum, "", B3_BROADPHASE_SAP_PATH); - b3Assert(errNum == CL_SUCCESS); - m_copyAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "copyAabbsKernel", &errNum, sapProg); - m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelTwoArrays", &errNum, sapProg); - b3Assert(errNum == CL_SUCCESS); - } - - { - cl_program gridProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, gridBroadphaseCL, &errNum, "", B3_GRID_BROADPHASE_PATH); - b3Assert(errNum == CL_SUCCESS); - - kCalcHashAABB = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kCalcHashAABB", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - - kClearCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kClearCellStart", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - - kFindCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kFindCellStart", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - - kFindOverlappingPairs = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kFindOverlappingPairs", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - } - - m_sorter = new b3RadixSort32CL(m_context, m_device, m_queue); -} -b3GpuGridBroadphase::~b3GpuGridBroadphase() -{ - clReleaseKernel(kCalcHashAABB); - clReleaseKernel(kClearCellStart); - clReleaseKernel(kFindCellStart); - clReleaseKernel(kFindOverlappingPairs); - clReleaseKernel(m_sap2Kernel); - clReleaseKernel(m_copyAabbsKernel); - - delete m_sorter; -} - -void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size(); //NOT userPtr; - m_smallAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); - - m_allAabbsCPU1.push_back(aabb); -} -void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size(); //NOT userPtr; - m_largeAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); - - m_allAabbsCPU1.push_back(aabb); -} - -void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) -{ - B3_PROFILE("b3GpuGridBroadphase::calculateOverlappingPairs"); - - if (0) - { - calculateOverlappingPairsHost(maxPairs); - /* - b3AlignedObjectArray cpuPairs; - m_gpuPairs.copyToHost(cpuPairs); - printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size()); - for (int i=0;i pairCount(m_context, m_queue); - pairCount.push_back(0); - m_gpuPairs.resize(maxPairs); //numSmallAabbs*maxPairsPerBody); - - { - int numLargeAabbs = m_largeAabbsMappingGPU.size(); - if (numLargeAabbs && numSmallAabbs) - { - B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU1.getBufferCL()), - b3BufferInfoCL(m_largeAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_gpuPairs.getBufferCL()), - b3BufferInfoCL(pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel, "m_sap2Kernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbs); - launcher.setConst(numSmallAabbs); - launcher.setConst(0); //axis is not used - launcher.setConst(maxPairs); - //@todo: use actual maximum work item sizes of the device instead of hardcoded values - launcher.launch2D(numLargeAabbs, numSmallAabbs, 4, 64); - - int numPairs = pairCount.at(0); - - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - } - } - } - - if (numSmallAabbs) - { - B3_PROFILE("gridKernel"); - m_hashGpu.resize(numSmallAabbs); - { - B3_PROFILE("kCalcHashAABB"); - b3LauncherCL launch(m_queue, kCalcHashAABB, "kCalcHashAABB"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_allAabbsGPU1.getBufferCL()); - launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(this->m_paramsGPU.getBufferCL()); - launch.launch1D(numSmallAabbs); - } - - m_sorter->execute(m_hashGpu); - - int numCells = this->m_paramsCPU.m_gridSize[0] * this->m_paramsCPU.m_gridSize[1] * this->m_paramsCPU.m_gridSize[2]; - m_cellStartGpu.resize(numCells); - //b3AlignedObjectArray cellStartCpu; - - { - B3_PROFILE("kClearCellStart"); - b3LauncherCL launch(m_queue, kClearCellStart, "kClearCellStart"); - launch.setConst(numCells); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - launch.launch1D(numCells); - //m_cellStartGpu.copyToHost(cellStartCpu); - //printf("??\n"); - } - - { - B3_PROFILE("kFindCellStart"); - b3LauncherCL launch(m_queue, kFindCellStart, "kFindCellStart"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - launch.launch1D(numSmallAabbs); - //m_cellStartGpu.copyToHost(cellStartCpu); - //printf("??\n"); - } - - { - B3_PROFILE("kFindOverlappingPairs"); - - b3LauncherCL launch(m_queue, kFindOverlappingPairs, "kFindOverlappingPairs"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_allAabbsGPU1.getBufferCL()); - launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - - launch.setBuffer(m_paramsGPU.getBufferCL()); - //launch.setBuffer(0); - launch.setBuffer(pairCount.getBufferCL()); - launch.setBuffer(m_gpuPairs.getBufferCL()); - - launch.setConst(maxPairs); - launch.launch1D(numSmallAabbs); - - int numPairs = pairCount.at(0); - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - } - - m_gpuPairs.resize(numPairs); - - if (0) - { - b3AlignedObjectArray pairsCpu; - m_gpuPairs.copyToHost(pairsCpu); - - int sz = m_gpuPairs.size(); - printf("m_gpuPairs.size()=%d\n", sz); - for (int i = 0; i < m_gpuPairs.size(); i++) - { - printf("pair %d = %d,%d\n", i, pairsCpu[i].x, pairsCpu[i].y); - } - - printf("?!?\n"); - } - } - } - - //calculateOverlappingPairsHost(maxPairs); -} -void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - m_hostPairs.resize(0); - m_allAabbsGPU1.copyToHost(m_allAabbsCPU1); - for (int i = 0; i < m_allAabbsCPU1.size(); i++) - { - for (int j = i + 1; j < m_allAabbsCPU1.size(); j++) - { - if (b3TestAabbAgainstAabb2(m_allAabbsCPU1[i].m_minVec, m_allAabbsCPU1[i].m_maxVec, - m_allAabbsCPU1[j].m_minVec, m_allAabbsCPU1[j].m_maxVec)) - { - b3Int4 pair; - int a = m_allAabbsCPU1[j].m_minIndices[3]; - int b = m_allAabbsCPU1[i].m_minIndices[3]; - if (a <= b) - { - pair.x = a; - pair.y = b; //store the original index in the unsorted aabb array - } - else - { - pair.x = b; - pair.y = a; //store the original index in the unsorted aabb array - } - - if (m_hostPairs.size() < maxPairs) - { - m_hostPairs.push_back(pair); - } - } - } - } - - m_gpuPairs.copyFromHost(m_hostPairs); -} - -//call writeAabbsToGpu after done making all changes (createProxy etc) -void b3GpuGridBroadphase::writeAabbsToGpu() -{ - m_allAabbsGPU1.copyFromHost(m_allAabbsCPU1); - m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); - m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); -} - -cl_mem b3GpuGridBroadphase::getAabbBufferWS() -{ - return this->m_allAabbsGPU1.getBufferCL(); -} -int b3GpuGridBroadphase::getNumOverlap() -{ - return m_gpuPairs.size(); -} -cl_mem b3GpuGridBroadphase::getOverlappingPairBuffer() -{ - return m_gpuPairs.getBufferCL(); -} - -b3OpenCLArray& b3GpuGridBroadphase::getAllAabbsGPU() -{ - return m_allAabbsGPU1; -} - -b3AlignedObjectArray& b3GpuGridBroadphase::getAllAabbsCPU() -{ - return m_allAabbsCPU1; -} - -b3OpenCLArray& b3GpuGridBroadphase::getOverlappingPairsGPU() -{ - return m_gpuPairs; -} -b3OpenCLArray& b3GpuGridBroadphase::getSmallAabbIndicesGPU() -{ - return m_smallAabbsMappingGPU; -} -b3OpenCLArray& b3GpuGridBroadphase::getLargeAabbIndicesGPU() -{ - return m_largeAabbsMappingGPU; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h deleted file mode 100644 index b76cb43b68d..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef B3_GPU_GRID_BROADPHASE_H -#define B3_GPU_GRID_BROADPHASE_H - -#include "b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -struct b3ParamsGridBroadphaseCL -{ - float m_invCellSize[4]; - int m_gridSize[4]; - - int getMaxBodiesPerCell() const - { - return m_gridSize[3]; - } - - void setMaxBodiesPerCell(int maxOverlap) - { - m_gridSize[3] = maxOverlap; - } -}; - -class b3GpuGridBroadphase : public b3GpuBroadphaseInterface -{ -protected: - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3OpenCLArray m_allAabbsGPU1; - b3AlignedObjectArray m_allAabbsCPU1; - - b3OpenCLArray m_smallAabbsMappingGPU; - b3AlignedObjectArray m_smallAabbsMappingCPU; - - b3OpenCLArray m_largeAabbsMappingGPU; - b3AlignedObjectArray m_largeAabbsMappingCPU; - - b3AlignedObjectArray m_hostPairs; - b3OpenCLArray m_gpuPairs; - - b3OpenCLArray m_hashGpu; - b3OpenCLArray m_cellStartGpu; - - b3ParamsGridBroadphaseCL m_paramsCPU; - b3OpenCLArray m_paramsGPU; - - class b3RadixSort32CL* m_sorter; - -public: - b3GpuGridBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q); - virtual ~b3GpuGridBroadphase(); - - static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuGridBroadphase(ctx, device, q); - } - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual cl_mem getAabbBufferWS(); - virtual int getNumOverlap(); - virtual cl_mem getOverlappingPairBuffer(); - - virtual b3OpenCLArray& getAllAabbsGPU(); - virtual b3AlignedObjectArray& getAllAabbsCPU(); - - virtual b3OpenCLArray& getOverlappingPairsGPU(); - virtual b3OpenCLArray& getSmallAabbIndicesGPU(); - virtual b3OpenCLArray& getLargeAabbIndicesGPU(); -}; - -#endif //B3_GPU_GRID_BROADPHASE_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp deleted file mode 100644 index 616fc34f3a0..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp +++ /dev/null @@ -1,557 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#include "b3GpuParallelLinearBvh.h" - -b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) : m_queue(queue), - m_radixSorter(context, device, queue), - - m_rootNodeIndex(context, queue), - m_maxDistanceFromRoot(context, queue), - m_temp(context, queue), - - m_internalNodeAabbs(context, queue), - m_internalNodeLeafIndexRanges(context, queue), - m_internalNodeChildNodes(context, queue), - m_internalNodeParentNodes(context, queue), - - m_commonPrefixes(context, queue), - m_commonPrefixLengths(context, queue), - m_distanceFromRoot(context, queue), - - m_leafNodeParentNodes(context, queue), - m_mortonCodesAndAabbIndicies(context, queue), - m_mergedAabb(context, queue), - m_leafNodeAabbs(context, queue), - - m_largeAabbs(context, queue) -{ - m_rootNodeIndex.resize(1); - m_maxDistanceFromRoot.resize(1); - m_temp.resize(1); - - // - const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl"; - - const char* kernelSource = parallelLinearBvhCL; //parallelLinearBvhCL.h - cl_int error; - char* additionalMacros = 0; - m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH); - b3Assert(m_parallelLinearBvhProgram); - - m_separateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "separateAabbs", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_separateAabbsKernel); - m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_findAllNodesMergedAabbKernel); - m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_assignMortonCodesAndAabbIndiciesKernel); - - m_computeAdjacentPairCommonPrefixKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "computeAdjacentPairCommonPrefix", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_computeAdjacentPairCommonPrefixKernel); - m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_buildBinaryRadixTreeLeafNodesKernel); - m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_buildBinaryRadixTreeInternalNodesKernel); - m_findDistanceFromRootKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findDistanceFromRoot", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_findDistanceFromRootKernel); - m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel); - - m_findLeafIndexRangesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findLeafIndexRanges", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_findLeafIndexRangesKernel); - - m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhCalculateOverlappingPairsKernel); - m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhRayTraverseKernel); - m_plbvhLargeAabbAabbTestKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhLargeAabbAabbTest", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhLargeAabbAabbTestKernel); - m_plbvhLargeAabbRayTestKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhLargeAabbRayTest", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhLargeAabbRayTestKernel); -} - -b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh() -{ - clReleaseKernel(m_separateAabbsKernel); - clReleaseKernel(m_findAllNodesMergedAabbKernel); - clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel); - - clReleaseKernel(m_computeAdjacentPairCommonPrefixKernel); - clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel); - clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel); - clReleaseKernel(m_findDistanceFromRootKernel); - clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel); - - clReleaseKernel(m_findLeafIndexRangesKernel); - - clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel); - clReleaseKernel(m_plbvhRayTraverseKernel); - clReleaseKernel(m_plbvhLargeAabbAabbTestKernel); - clReleaseKernel(m_plbvhLargeAabbRayTestKernel); - - clReleaseProgram(m_parallelLinearBvhProgram); -} - -void b3GpuParallelLinearBvh::build(const b3OpenCLArray& worldSpaceAabbs, const b3OpenCLArray& smallAabbIndices, - const b3OpenCLArray& largeAabbIndices) -{ - B3_PROFILE("b3ParallelLinearBvh::build()"); - - int numLargeAabbs = largeAabbIndices.size(); - int numSmallAabbs = smallAabbIndices.size(); - - //Since all AABBs(both large and small) are input as a contiguous array, - //with 2 additional arrays used to indicate the indices of large and small AABBs, - //it is necessary to separate the AABBs so that the large AABBs will not degrade the quality of the BVH. - { - B3_PROFILE("Separate large and small AABBs"); - - m_largeAabbs.resize(numLargeAabbs); - m_leafNodeAabbs.resize(numSmallAabbs); - - //Write large AABBs into m_largeAabbs - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(worldSpaceAabbs.getBufferCL()), - b3BufferInfoCL(largeAabbIndices.getBufferCL()), - - b3BufferInfoCL(m_largeAabbs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbs); - - launcher.launch1D(numLargeAabbs); - } - - //Write small AABBs into m_leafNodeAabbs - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(worldSpaceAabbs.getBufferCL()), - b3BufferInfoCL(smallAabbIndices.getBufferCL()), - - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - - launcher.launch1D(numSmallAabbs); - } - - clFinish(m_queue); - } - - // - int numLeaves = numSmallAabbs; //Number of leaves in the BVH == Number of rigid bodies with small AABBs - int numInternalNodes = numLeaves - 1; - - if (numLeaves < 2) - { - //Number of leaf nodes is checked in calculateOverlappingPairs() and testRaysAgainstBvhAabbs(), - //so it does not matter if numLeaves == 0 and rootNodeIndex == -1 - int rootNodeIndex = numLeaves - 1; - m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1); - - //Since the AABBs need to be rearranged(sorted) for the BVH construction algorithm, - //m_mortonCodesAndAabbIndicies.m_value is used to map a sorted AABB index to the unsorted AABB index - //instead of directly moving the AABBs. It needs to be set for the ray cast traversal kernel to work. - //( m_mortonCodesAndAabbIndicies[].m_value == unsorted index == index of m_leafNodeAabbs ) - if (numLeaves == 1) - { - b3SortData leaf; - leaf.m_value = 0; //1 leaf so index is always 0; leaf.m_key does not need to be set - - m_mortonCodesAndAabbIndicies.resize(1); - m_mortonCodesAndAabbIndicies.copyFromHostPointer(&leaf, 1); - } - - return; - } - - // - { - m_internalNodeAabbs.resize(numInternalNodes); - m_internalNodeLeafIndexRanges.resize(numInternalNodes); - m_internalNodeChildNodes.resize(numInternalNodes); - m_internalNodeParentNodes.resize(numInternalNodes); - - m_commonPrefixes.resize(numInternalNodes); - m_commonPrefixLengths.resize(numInternalNodes); - m_distanceFromRoot.resize(numInternalNodes); - - m_leafNodeParentNodes.resize(numLeaves); - m_mortonCodesAndAabbIndicies.resize(numLeaves); - m_mergedAabb.resize(numLeaves); - } - - //Find the merged AABB of all small AABBs; this is used to define the size of - //each cell in the virtual grid for the next kernel(2^10 cells in each dimension). - { - B3_PROFILE("Find AABB of merged nodes"); - - m_mergedAabb.copyFromOpenCLArray(m_leafNodeAabbs); //Need to make a copy since the kernel modifies the array - - for (int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2; - numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2) - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_mergedAabb.getBufferCL()) //Resulting AABB is stored in m_mergedAabb[0] - }; - - b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabbKernel, "m_findAllNodesMergedAabbKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numAabbsNeedingMerge); - - launcher.launch1D(numAabbsNeedingMerge); - } - - clFinish(m_queue); - } - - //Insert the center of the AABBs into a virtual grid, - //then convert the discrete grid coordinates into a morton code - //For each element in m_mortonCodesAndAabbIndicies, set - // m_key == morton code (value to sort by) - // m_value == small AABB index - { - B3_PROFILE("Assign morton codes"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_mergedAabb.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLeaves); - - launcher.launch1D(numLeaves); - clFinish(m_queue); - } - - // - { - B3_PROFILE("Sort leaves by morton codes"); - - m_radixSorter.execute(m_mortonCodesAndAabbIndicies); - clFinish(m_queue); - } - - // - constructBinaryRadixTree(); - - //Since it is a sorted binary radix tree, each internal node contains a contiguous subset of leaf node indices. - //The root node contains leaf node indices in the range [0, numLeafNodes - 1]. - //The child nodes of each node split their parent's index range into 2 contiguous halves. - // - //For example, if the root has indices [0, 31], its children might partition that range into [0, 11] and [12, 31]. - //The next level in the tree could then split those ranges into [0, 2], [3, 11], [12, 22], and [23, 31]. - // - //This property can be used for optimizing calculateOverlappingPairs(), to avoid testing each AABB pair twice - { - B3_PROFILE("m_findLeafIndexRangesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findLeafIndexRangesKernel, "m_findLeafIndexRangesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } -} - -void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray& out_overlappingPairs) -{ - int maxPairs = out_overlappingPairs.size(); - b3OpenCLArray& numPairsGpu = m_temp; - - int reset = 0; - numPairsGpu.copyFromHostPointer(&reset, 1); - - // - if (m_leafNodeAabbs.size() > 1) - { - B3_PROFILE("PLBVH small-small AABB test"); - - int numQueryAabbs = m_leafNodeAabbs.size(); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - - b3BufferInfoCL(m_rootNodeIndex.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - - b3BufferInfoCL(numPairsGpu.getBufferCL()), - b3BufferInfoCL(out_overlappingPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhCalculateOverlappingPairsKernel, "m_plbvhCalculateOverlappingPairsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxPairs); - launcher.setConst(numQueryAabbs); - - launcher.launch1D(numQueryAabbs); - clFinish(m_queue); - } - - int numLargeAabbRigids = m_largeAabbs.size(); - if (numLargeAabbRigids > 0 && m_leafNodeAabbs.size() > 0) - { - B3_PROFILE("PLBVH large-small AABB test"); - - int numQueryAabbs = m_leafNodeAabbs.size(); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_largeAabbs.getBufferCL()), - - b3BufferInfoCL(numPairsGpu.getBufferCL()), - b3BufferInfoCL(out_overlappingPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhLargeAabbAabbTestKernel, "m_plbvhLargeAabbAabbTestKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxPairs); - launcher.setConst(numLargeAabbRigids); - launcher.setConst(numQueryAabbs); - - launcher.launch1D(numQueryAabbs); - clFinish(m_queue); - } - - // - int numPairs = -1; - numPairsGpu.copyToHostPointer(&numPairs, 1); - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - numPairsGpu.copyFromHostPointer(&maxPairs, 1); - } - - out_overlappingPairs.resize(numPairs); -} - -void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray& rays, - b3OpenCLArray& out_numRayRigidPairs, b3OpenCLArray& out_rayRigidPairs) -{ - B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()"); - - int numRays = rays.size(); - int maxRayRigidPairs = out_rayRigidPairs.size(); - - int reset = 0; - out_numRayRigidPairs.copyFromHostPointer(&reset, 1); - - // - if (m_leafNodeAabbs.size() > 0) - { - B3_PROFILE("PLBVH ray test small AABB"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - - b3BufferInfoCL(m_rootNodeIndex.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - - b3BufferInfoCL(rays.getBufferCL()), - - b3BufferInfoCL(out_numRayRigidPairs.getBufferCL()), - b3BufferInfoCL(out_rayRigidPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxRayRigidPairs); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_queue); - } - - int numLargeAabbRigids = m_largeAabbs.size(); - if (numLargeAabbRigids > 0) - { - B3_PROFILE("PLBVH ray test large AABB"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_largeAabbs.getBufferCL()), - b3BufferInfoCL(rays.getBufferCL()), - - b3BufferInfoCL(out_numRayRigidPairs.getBufferCL()), - b3BufferInfoCL(out_rayRigidPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhLargeAabbRayTestKernel, "m_plbvhLargeAabbRayTestKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbRigids); - launcher.setConst(maxRayRigidPairs); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_queue); - } - - // - int numRayRigidPairs = -1; - out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1); - - if (numRayRigidPairs > maxRayRigidPairs) - b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs); -} - -void b3GpuParallelLinearBvh::constructBinaryRadixTree() -{ - B3_PROFILE("b3GpuParallelLinearBvh::constructBinaryRadixTree()"); - - int numLeaves = m_leafNodeAabbs.size(); - int numInternalNodes = numLeaves - 1; - - //Each internal node is placed in between 2 leaf nodes. - //By using this arrangement and computing the common prefix between - //these 2 adjacent leaf nodes, it is possible to quickly construct a binary radix tree. - { - B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - b3BufferInfoCL(m_commonPrefixes.getBufferCL()), - b3BufferInfoCL(m_commonPrefixLengths.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_computeAdjacentPairCommonPrefixKernel, "m_computeAdjacentPairCommonPrefixKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //For each leaf node, select its parent node by - //comparing the 2 nearest internal nodes and assign child node indices - { - B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_commonPrefixLengths.getBufferCL()), - b3BufferInfoCL(m_leafNodeParentNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLeaves); - - launcher.launch1D(numLeaves); - clFinish(m_queue); - } - - //For each internal node, perform 2 binary searches among the other internal nodes - //to its left and right to find its potential parent nodes and assign child node indices - { - B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_commonPrefixes.getBufferCL()), - b3BufferInfoCL(m_commonPrefixLengths.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeParentNodes.getBufferCL()), - b3BufferInfoCL(m_rootNodeIndex.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //Find the number of nodes separating each internal node and the root node - //so that the AABBs can be set using the next kernel. - //Also determine the maximum number of nodes separating an internal node and the root node. - { - B3_PROFILE("m_findDistanceFromRootKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_rootNodeIndex.getBufferCL()), - b3BufferInfoCL(m_internalNodeParentNodes.getBufferCL()), - b3BufferInfoCL(m_maxDistanceFromRoot.getBufferCL()), - b3BufferInfoCL(m_distanceFromRoot.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //Starting from the internal nodes nearest to the leaf nodes, recursively move up - //the tree towards the root to set the AABBs of each internal node; each internal node - //checks its children and merges their AABBs - { - B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel"); - - int maxDistanceFromRoot = -1; - { - B3_PROFILE("copy maxDistanceFromRoot to CPU"); - m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1); - clFinish(m_queue); - } - - for (int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot) - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_distanceFromRoot.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_internalNodeAabbs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxDistanceFromRoot); - launcher.setConst(distanceFromRoot); - launcher.setConst(numInternalNodes); - - //It may seem inefficent to launch a thread for each internal node when a - //much smaller number of nodes is actually processed, but this is actually - //faster than determining the exact nodes that are ready to merge their child AABBs. - launcher.launch1D(numInternalNodes); - } - - clFinish(m_queue); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h deleted file mode 100644 index b3907751298..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#ifndef B3_GPU_PARALLEL_LINEAR_BVH_H -#define B3_GPU_PARALLEL_LINEAR_BVH_H - -//#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" - -#include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h" - -#define b3Int64 cl_long - -///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame -///@remarks -///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details. -///@par -///Related papers: \n -///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n -///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n -///@par -///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages: -/// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid) -/// - [fully parallel] Sort morton codes -/// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH) -/// - [somewhat parallel] Set internal node AABBs -///@par -///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages. -///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree. -///Instead of searching for the child nodes of each internal node, we search for the parent node of each node. -///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs. -class b3GpuParallelLinearBvh -{ - cl_command_queue m_queue; - - cl_program m_parallelLinearBvhProgram; - - cl_kernel m_separateAabbsKernel; - cl_kernel m_findAllNodesMergedAabbKernel; - cl_kernel m_assignMortonCodesAndAabbIndiciesKernel; - - //Binary radix tree construction kernels - cl_kernel m_computeAdjacentPairCommonPrefixKernel; - cl_kernel m_buildBinaryRadixTreeLeafNodesKernel; - cl_kernel m_buildBinaryRadixTreeInternalNodesKernel; - cl_kernel m_findDistanceFromRootKernel; - cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel; - - cl_kernel m_findLeafIndexRangesKernel; - - //Traversal kernels - cl_kernel m_plbvhCalculateOverlappingPairsKernel; - cl_kernel m_plbvhRayTraverseKernel; - cl_kernel m_plbvhLargeAabbAabbTestKernel; - cl_kernel m_plbvhLargeAabbRayTestKernel; - - b3RadixSort32CL m_radixSorter; - - //1 element - b3OpenCLArray m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node - b3OpenCLArray m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node - b3OpenCLArray m_temp; //Used to hold the number of pairs in calculateOverlappingPairs() - - //1 element per internal node (number_of_internal_nodes == number_of_leaves - 1) - b3OpenCLArray m_internalNodeAabbs; - b3OpenCLArray m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index - b3OpenCLArray m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node - b3OpenCLArray m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal - - //1 element per internal node; for binary radix tree construction - b3OpenCLArray m_commonPrefixes; - b3OpenCLArray m_commonPrefixLengths; - b3OpenCLArray m_distanceFromRoot; //Number of internal nodes between this node and the root - - //1 element per leaf node (leaf nodes only include small AABBs) - b3OpenCLArray m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal - b3OpenCLArray m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs - b3OpenCLArray m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes - b3OpenCLArray m_leafNodeAabbs; //Contains only small AABBs - - //1 element per large AABB, which is not stored in the BVH - b3OpenCLArray m_largeAabbs; - -public: - b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue); - virtual ~b3GpuParallelLinearBvh(); - - ///Must be called before any other function - void build(const b3OpenCLArray& worldSpaceAabbs, const b3OpenCLArray& smallAabbIndices, - const b3OpenCLArray& largeAabbIndices); - - ///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs. - ///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs. - ///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized. - void calculateOverlappingPairs(b3OpenCLArray& out_overlappingPairs); - - ///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections; - ///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough. - ///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index. - ///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded. - void testRaysAgainstBvhAabbs(const b3OpenCLArray& rays, - b3OpenCLArray& out_numRayRigidPairs, b3OpenCLArray& out_rayRigidPairs); - -private: - void constructBinaryRadixTree(); -}; - -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp deleted file mode 100644 index 62ea7a32df4..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#include "b3GpuParallelLinearBvhBroadphase.h" - -b3GpuParallelLinearBvhBroadphase::b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) : m_plbvh(context, device, queue), - - m_overlappingPairsGpu(context, queue), - - m_aabbsGpu(context, queue), - m_smallAabbsMappingGpu(context, queue), - m_largeAabbsMappingGpu(context, queue) -{ -} - -void b3GpuParallelLinearBvhBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int newAabbIndex = m_aabbsCpu.size(); - - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = newAabbIndex; - - m_smallAabbsMappingCpu.push_back(newAabbIndex); - - m_aabbsCpu.push_back(aabb); -} -void b3GpuParallelLinearBvhBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int newAabbIndex = m_aabbsCpu.size(); - - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = newAabbIndex; - - m_largeAabbsMappingCpu.push_back(newAabbIndex); - - m_aabbsCpu.push_back(aabb); -} - -void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairs(int maxPairs) -{ - //Reconstruct BVH - m_plbvh.build(m_aabbsGpu, m_smallAabbsMappingGpu, m_largeAabbsMappingGpu); - - // - m_overlappingPairsGpu.resize(maxPairs); - m_plbvh.calculateOverlappingPairs(m_overlappingPairsGpu); -} -void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - b3Assert(0); //CPU version not implemented -} - -void b3GpuParallelLinearBvhBroadphase::writeAabbsToGpu() -{ - m_aabbsGpu.copyFromHost(m_aabbsCpu); - m_smallAabbsMappingGpu.copyFromHost(m_smallAabbsMappingCpu); - m_largeAabbsMappingGpu.copyFromHost(m_largeAabbsMappingCpu); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h deleted file mode 100644 index dda0eea7bea..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#ifndef B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H -#define B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H - -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" - -#include "b3GpuParallelLinearBvh.h" - -class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface -{ - b3GpuParallelLinearBvh m_plbvh; - - b3OpenCLArray m_overlappingPairsGpu; - - b3OpenCLArray m_aabbsGpu; - b3OpenCLArray m_smallAabbsMappingGpu; - b3OpenCLArray m_largeAabbsMappingGpu; - - b3AlignedObjectArray m_aabbsCpu; - b3AlignedObjectArray m_smallAabbsMappingCpu; - b3AlignedObjectArray m_largeAabbsMappingCpu; - -public: - b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue); - virtual ~b3GpuParallelLinearBvhBroadphase() {} - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); } - virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); } - - virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); } - virtual b3OpenCLArray& getAllAabbsGPU() { return m_aabbsGpu; } - - virtual b3OpenCLArray& getOverlappingPairsGPU() { return m_overlappingPairsGpu; } - virtual b3OpenCLArray& getSmallAabbIndicesGPU() { return m_smallAabbsMappingGpu; } - virtual b3OpenCLArray& getLargeAabbIndicesGPU() { return m_largeAabbsMappingGpu; } - - virtual b3AlignedObjectArray& getAllAabbsCPU() { return m_aabbsCpu; } - - static b3GpuBroadphaseInterface* CreateFunc(cl_context context, cl_device_id device, cl_command_queue queue) - { - return new b3GpuParallelLinearBvhBroadphase(context, device, queue); - } -}; - -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp deleted file mode 100644 index 4126d03ed0a..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ /dev/null @@ -1,1298 +0,0 @@ - -bool searchIncremental3dSapOnGpu = true; -#include -#include "b3GpuSapBroadphase.h" -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/sapKernels.h" - -#include "Bullet3Common/b3MinMax.h" - -#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" - -/* - - - - - - - b3OpenCLArray m_pairCount; - - - b3OpenCLArray m_allAabbsGPU; - b3AlignedObjectArray m_allAabbsCPU; - - virtual b3OpenCLArray& getAllAabbsGPU() - { - return m_allAabbsGPU; - } - virtual b3AlignedObjectArray& getAllAabbsCPU() - { - return m_allAabbsCPU; - } - - b3OpenCLArray m_sum; - b3OpenCLArray m_sum2; - b3OpenCLArray m_dst; - - b3OpenCLArray m_smallAabbsMappingGPU; - b3AlignedObjectArray m_smallAabbsMappingCPU; - - b3OpenCLArray m_largeAabbsMappingGPU; - b3AlignedObjectArray m_largeAabbsMappingCPU; - - - b3OpenCLArray m_overlappingPairs; - - //temporary gpu work memory - b3OpenCLArray m_gpuSmallSortData; - b3OpenCLArray m_gpuSmallSortedAabbs; - - class b3PrefixScanFloat4CL* m_prefixScanFloat4; - */ - -b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType) - : m_context(ctx), - m_device(device), - m_queue(q), - - m_objectMinMaxIndexGPUaxis0(ctx, q), - m_objectMinMaxIndexGPUaxis1(ctx, q), - m_objectMinMaxIndexGPUaxis2(ctx, q), - m_objectMinMaxIndexGPUaxis0prev(ctx, q), - m_objectMinMaxIndexGPUaxis1prev(ctx, q), - m_objectMinMaxIndexGPUaxis2prev(ctx, q), - m_sortedAxisGPU0(ctx, q), - m_sortedAxisGPU1(ctx, q), - m_sortedAxisGPU2(ctx, q), - m_sortedAxisGPU0prev(ctx, q), - m_sortedAxisGPU1prev(ctx, q), - m_sortedAxisGPU2prev(ctx, q), - m_addedHostPairsGPU(ctx, q), - m_removedHostPairsGPU(ctx, q), - m_addedCountGPU(ctx, q), - m_removedCountGPU(ctx, q), - m_currentBuffer(-1), - m_pairCount(ctx, q), - m_allAabbsGPU(ctx, q), - m_sum(ctx, q), - m_sum2(ctx, q), - m_dst(ctx, q), - m_smallAabbsMappingGPU(ctx, q), - m_largeAabbsMappingGPU(ctx, q), - m_overlappingPairs(ctx, q), - m_gpuSmallSortData(ctx, q), - m_gpuSmallSortedAabbs(ctx, q) -{ - const char* sapSrc = sapCL; - - cl_int errNum = 0; - - b3Assert(m_context); - b3Assert(m_device); - cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, sapSrc, &errNum, "", B3_BROADPHASE_SAP_PATH); - b3Assert(errNum == CL_SUCCESS); - - b3Assert(errNum == CL_SUCCESS); -#ifndef __APPLE__ - m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context, m_device, m_queue); -#else - m_prefixScanFloat4 = 0; -#endif - m_sapKernel = 0; - - switch (kernelType) - { - case B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU: - { - m_sapKernel = 0; - break; - } - case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBruteForce", &errNum, sapProg); - break; - } - - case B3_GPU_SAP_KERNEL_ORIGINAL: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelOriginal", &errNum, sapProg); - break; - } - case B3_GPU_SAP_KERNEL_BARRIER: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBarrier", &errNum, sapProg); - break; - } - case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg); - break; - } - - default: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg); - b3Error("Unknown 3D GPU SAP provided, fallback to computePairsKernelLocalSharedMemory"); - } - }; - - m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelTwoArrays", &errNum, sapProg); - b3Assert(errNum == CL_SUCCESS); - - m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "prepareSumVarianceKernel", &errNum, sapProg); - b3Assert(errNum == CL_SUCCESS); - - m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "flipFloatKernel", &errNum, sapProg); - - m_copyAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "copyAabbsKernel", &errNum, sapProg); - - m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "scatterKernel", &errNum, sapProg); - - m_sorter = new b3RadixSort32CL(m_context, m_device, m_queue); -} - -b3GpuSapBroadphase::~b3GpuSapBroadphase() -{ - delete m_sorter; - delete m_prefixScanFloat4; - - clReleaseKernel(m_scatterKernel); - clReleaseKernel(m_flipFloatKernel); - clReleaseKernel(m_copyAabbsKernel); - clReleaseKernel(m_sapKernel); - clReleaseKernel(m_sap2Kernel); - clReleaseKernel(m_prepareSumVarianceKernel); -} - -/// conservative test for overlap between two aabbs -static bool TestAabbAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1, - const b3Vector3& aabbMin2, const b3Vector3& aabbMax2) -{ - bool overlap = true; - overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap; - overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap; - overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap; - return overlap; -} - -//http://stereopsis.com/radix.html -static unsigned int FloatFlip(float fl) -{ - unsigned int f = *(unsigned int*)&fl; - unsigned int mask = -(int)(f >> 31) | 0x80000000; - return f ^ mask; -}; - -void b3GpuSapBroadphase::init3dSap() -{ - if (m_currentBuffer < 0) - { - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - - m_currentBuffer = 0; - for (int axis = 0; axis < 3; axis++) - { - for (int buf = 0; buf < 2; buf++) - { - int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = 2 * totalNumAabbs; - m_sortedAxisCPU[axis][buf].resize(numEndPoints); - - if (buf == m_currentBuffer) - { - for (int i = 0; i < totalNumAabbs; i++) - { - m_sortedAxisCPU[axis][buf][i * 2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis]) - 1; - m_sortedAxisCPU[axis][buf][i * 2].m_value = i * 2; - m_sortedAxisCPU[axis][buf][i * 2 + 1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis]) + 1; - m_sortedAxisCPU[axis][buf][i * 2 + 1].m_value = i * 2 + 1; - } - } - } - } - - for (int axis = 0; axis < 3; axis++) - { - m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); - } - - for (int axis = 0; axis < 3; axis++) - { - //int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); - m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints); - for (int i = 0; i < numEndPoints; i++) - { - int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; - int newDest = destIndex / 2; - if (destIndex & 1) - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y = i; - } - else - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x = i; - } - } - } - } -} - -static bool b3PairCmp(const b3Int4& p, const b3Int4& q) -{ - return ((p.x < q.x) || ((p.x == q.x) && (p.y < q.y))); -} - -static bool operator==(const b3Int4& a, const b3Int4& b) -{ - return a.x == b.x && a.y == b.y; -}; - -static bool operator<(const b3Int4& a, const b3Int4& b) -{ - return a.x < b.x || (a.x == b.x && a.y < b.y); -}; - -static bool operator>(const b3Int4& a, const b3Int4& b) -{ - return a.x > b.x || (a.x == b.x && a.y > b.y); -}; - -b3AlignedObjectArray addedHostPairs; -b3AlignedObjectArray removedHostPairs; - -b3AlignedObjectArray preAabbs; - -void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() -{ - //static int framepje = 0; - //printf("framepje=%d\n",framepje++); - - B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap"); - - addedHostPairs.resize(0); - removedHostPairs.resize(0); - - b3Assert(m_currentBuffer >= 0); - - { - preAabbs.resize(m_allAabbsCPU.size()); - for (int i = 0; i < preAabbs.size(); i++) - { - preAabbs[i] = m_allAabbsCPU[i]; - } - } - - if (m_currentBuffer < 0) - return; - { - B3_PROFILE("m_allAabbsGPU.copyToHost"); - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - } - - b3AlignedObjectArray allPairs; - { - B3_PROFILE("m_overlappingPairs.copyToHost"); - m_overlappingPairs.copyToHost(allPairs); - } - if (0) - { - { - printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n", - m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1], m_allAabbsCPU[40].m_min[2], - m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1], m_allAabbsCPU[40].m_max[2]); - } - - { - printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n", - m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1], m_allAabbsCPU[53].m_min[2], - m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1], m_allAabbsCPU[53].m_max[2]); - } - - { - b3Int4 newPair; - newPair.x = 40; - newPair.y = 53; - int index = allPairs.findBinarySearch(newPair); - printf("hasPair(40,53)=%d out of %d\n", index, allPairs.size()); - - { - int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max, (const b3Vector3&)m_allAabbsCPU[53].m_min, (const b3Vector3&)m_allAabbsCPU[53].m_max); - printf("overlap=%d\n", overlap); - } - - if (preAabbs.size()) - { - int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max, (const b3Vector3&)preAabbs[53].m_min, (const b3Vector3&)preAabbs[53].m_max); - printf("prevoverlap=%d\n", prevOverlap); - } - else - { - printf("unknown prevoverlap\n"); - } - } - } - - if (0) - { - for (int i = 0; i < m_allAabbsCPU.size(); i++) - { - //printf("aabb[%d] min=%f,%f,%f max=%f,%f,%f\n",i,m_allAabbsCPU[i].m_min[0],m_allAabbsCPU[i].m_min[1],m_allAabbsCPU[i].m_min[2], m_allAabbsCPU[i].m_max[0],m_allAabbsCPU[i].m_max[1],m_allAabbsCPU[i].m_max[2]); - } - - for (int axis = 0; axis < 3; axis++) - { - for (int buf = 0; buf < 2; buf++) - { - b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size() * 2); - } - } - } - - m_currentBuffer = 1 - m_currentBuffer; - - int totalNumAabbs = m_allAabbsCPU.size(); - - { - B3_PROFILE("assign m_sortedAxisCPU(FloatFlip)"); - for (int i = 0; i < totalNumAabbs; i++) - { - unsigned int keyMin[3]; - unsigned int keyMax[3]; - for (int axis = 0; axis < 3; axis++) - { - float vmin = m_allAabbsCPU[i].m_min[axis]; - float vmax = m_allAabbsCPU[i].m_max[axis]; - keyMin[axis] = FloatFlip(vmin); - keyMax[axis] = FloatFlip(vmax); - - m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_key = keyMin[axis] - 1; - m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_value = i * 2; - m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_key = keyMax[axis] + 1; - m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_value = i * 2 + 1; - } - //printf("aabb[%d] min=%u,%u,%u max %u,%u,%u\n", i,keyMin[0],keyMin[1],keyMin[2],keyMax[0],keyMax[1],keyMax[2]); - } - } - - { - B3_PROFILE("sort m_sortedAxisCPU"); - for (int axis = 0; axis < 3; axis++) - m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); - } - -#if 0 - if (0) - { - for (int axis=0;axis<3;axis++) - { - //printf("axis %d\n",axis); - for (int i=0;i m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) - overlap = false; - } - - // b3Assert(overlap2==overlap); - - bool prevOverlap = true; - - for (int ax = 0; ax < 3; ax++) - { - if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x)) - prevOverlap = false; - } - - //b3Assert(overlap==overlap2); - - if (dmin < 0) - { - if (overlap && !prevOverlap) - { - //add a pair - b3Int4 newPair; - if (i <= otherIndex) - { - newPair.x = i; - newPair.y = otherIndex; - } - else - { - newPair.x = otherIndex; - newPair.y = i; - } - addedHostPairs.push_back(newPair); - } - } - else - { - if (!overlap && prevOverlap) - { - //remove a pair - b3Int4 removedPair; - if (i <= otherIndex) - { - removedPair.x = i; - removedPair.y = otherIndex; - } - else - { - removedPair.x = otherIndex; - removedPair.y = i; - } - removedHostPairs.push_back(removedPair); - } - } //otherisMax - } //if (dmin<0) - } //if (otherIndex!=i) - } //for (int j= - } - - if (dmax != 0) - { - int stepMax = dmax < 0 ? -1 : 1; - for (int j = prevMaxIndex; j != curMaxIndex; j += stepMax) - { - int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; - int otherIndex = otherIndex2 / 2; - if (otherIndex != i) - { - //bool otherIsMin = ((otherIndex2&1)==0); - //if (otherIsMin) - { - //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); - //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); - - bool overlap = true; - - for (int ax = 0; ax < 3; ax++) - { - if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) - overlap = false; - } - //b3Assert(overlap2==overlap); - - bool prevOverlap = true; - - for (int ax = 0; ax < 3; ax++) - { - if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x)) - prevOverlap = false; - } - - if (dmax > 0) - { - if (overlap && !prevOverlap) - { - //add a pair - b3Int4 newPair; - if (i <= otherIndex) - { - newPair.x = i; - newPair.y = otherIndex; - } - else - { - newPair.x = otherIndex; - newPair.y = i; - } - addedHostPairs.push_back(newPair); - } - } - else - { - if (!overlap && prevOverlap) - { - //if (otherIndex2&1==0) -> min? - //remove a pair - b3Int4 removedPair; - if (i <= otherIndex) - { - removedPair.x = i; - removedPair.y = otherIndex; - } - else - { - removedPair.x = otherIndex; - removedPair.y = i; - } - removedHostPairs.push_back(removedPair); - } - } - - } //if (dmin<0) - } //if (otherIndex!=i) - } //for (int j= - } - } //for (int otherbuffer - } //for (int axis=0; - } //for (int i=0;i removedPositions; - - { - B3_PROFILE("actual removing"); - for (int i = 0; i < removedHostPairs.size(); i++) - { - b3Int4 removedPair = removedHostPairs[i]; - if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y)) - { - int index1 = allPairs.findBinarySearch(removedPair); - - //#ifdef _DEBUG - - int index2 = allPairs.findLinearSearch(removedPair); - b3Assert(index1 == index2); - - //b3Assert(index1!=allPairs.size()); - if (index1 < allPairs.size()) - //#endif//_DEBUG - { - uniqueRemovedPairs++; - removedPositions.push_back(index1); - { - //printf("framepje(%d) remove pair(%d):%d,%d\n",framepje,i,removedPair.x,removedPair.y); - } - } - } - prevPair = removedPair; - } - - if (uniqueRemovedPairs) - { - for (int i = 0; i < removedPositions.size(); i++) - { - allPairs[removedPositions[i]].x = INT_MAX; - allPairs[removedPositions[i]].y = INT_MAX; - } - allPairs.quickSort(b3PairCmp); - allPairs.resize(allPairs.size() - uniqueRemovedPairs); - } - } - //if (uniqueRemovedPairs) - // printf("uniqueRemovedPairs=%d\n",uniqueRemovedPairs); - //printf("removedHostPairs.size = %d\n",removedHostPairs.size()); - - prevPair.x = -1; - prevPair.y = -1; - - int uniqueAddedPairs = 0; - b3AlignedObjectArray actualAddedPairs; - - { - B3_PROFILE("actual adding"); - for (int i = 0; i < addedHostPairs.size(); i++) - { - b3Int4 newPair = addedHostPairs[i]; - if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y)) - { - //#ifdef _DEBUG - int index1 = allPairs.findBinarySearch(newPair); - - int index2 = allPairs.findLinearSearch(newPair); - b3Assert(index1 == index2); - - b3Assert(index1 == allPairs.size()); - if (index1 != allPairs.size()) - { - printf("??\n"); - } - - if (index1 == allPairs.size()) - //#endif //_DEBUG - { - uniqueAddedPairs++; - actualAddedPairs.push_back(newPair); - } - } - prevPair = newPair; - } - for (int i = 0; i < actualAddedPairs.size(); i++) - { - //printf("framepje (%d), new pair(%d):%d,%d\n",framepje,i,actualAddedPairs[i].x,actualAddedPairs[i].y); - allPairs.push_back(actualAddedPairs[i]); - } - } - - //if (uniqueAddedPairs) - // printf("uniqueAddedPairs=%d\n", uniqueAddedPairs); - - { - B3_PROFILE("m_overlappingPairs.copyFromHost"); - m_overlappingPairs.copyFromHost(allPairs); - } -} - -void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - //test - // if (m_currentBuffer>=0) - // return calculateOverlappingPairsHostIncremental3Sap(); - - b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size()); - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - - int axis = 0; - { - B3_PROFILE("CPU compute best variance axis"); - b3Vector3 s = b3MakeVector3(0, 0, 0), s2 = b3MakeVector3(0, 0, 0); - int numRigidBodies = m_smallAabbsMappingCPU.size(); - - for (int i = 0; i < numRigidBodies; i++) - { - b3SapAabb aabb = this->m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - - b3Vector3 maxAabb = b3MakeVector3(aabb.m_max[0], aabb.m_max[1], aabb.m_max[2]); - b3Vector3 minAabb = b3MakeVector3(aabb.m_min[0], aabb.m_min[1], aabb.m_min[2]); - b3Vector3 centerAabb = (maxAabb + minAabb) * 0.5f; - - s += centerAabb; - s2 += centerAabb * centerAabb; - } - b3Vector3 v = s2 - (s * s) / (float)numRigidBodies; - - if (v[1] > v[0]) - axis = 1; - if (v[2] > v[axis]) - axis = 2; - } - - b3AlignedObjectArray hostPairs; - - { - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - for (int i = 0; i < numSmallAabbs; i++) - { - b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - //float reference = smallAabbi.m_max[axis]; - - for (int j = i + 1; j < numSmallAabbs; j++) - { - b3SapAabb smallAabbj = m_allAabbsCPU[m_smallAabbsMappingCPU[j]]; - - if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, - (b3Vector3&)smallAabbj.m_min, (b3Vector3&)smallAabbj.m_max)) - { - b3Int4 pair; - int a = smallAabbi.m_minIndices[3]; - int b = smallAabbj.m_minIndices[3]; - if (a <= b) - { - pair.x = a; //store the original index in the unsorted aabb array - pair.y = b; - } - else - { - pair.x = b; //store the original index in the unsorted aabb array - pair.y = a; - } - hostPairs.push_back(pair); - } - } - } - } - - { - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - for (int i = 0; i < numSmallAabbs; i++) - { - b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - - //float reference = smallAabbi.m_max[axis]; - int numLargeAabbs = m_largeAabbsMappingCPU.size(); - - for (int j = 0; j < numLargeAabbs; j++) - { - b3SapAabb largeAabbj = m_allAabbsCPU[m_largeAabbsMappingCPU[j]]; - if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, - (b3Vector3&)largeAabbj.m_min, (b3Vector3&)largeAabbj.m_max)) - { - b3Int4 pair; - int a = largeAabbj.m_minIndices[3]; - int b = smallAabbi.m_minIndices[3]; - if (a <= b) - { - pair.x = a; - pair.y = b; //store the original index in the unsorted aabb array - } - else - { - pair.x = b; - pair.y = a; //store the original index in the unsorted aabb array - } - - hostPairs.push_back(pair); - } - } - } - } - - if (hostPairs.size() > maxPairs) - { - hostPairs.resize(maxPairs); - } - - if (hostPairs.size()) - { - m_overlappingPairs.copyFromHost(hostPairs); - } - else - { - m_overlappingPairs.resize(0); - } - - //init3dSap(); -} - -void b3GpuSapBroadphase::reset() -{ - m_allAabbsGPU.resize(0); - m_allAabbsCPU.resize(0); - - m_smallAabbsMappingGPU.resize(0); - m_smallAabbsMappingCPU.resize(0); - - m_pairCount.resize(0); - - m_largeAabbsMappingGPU.resize(0); - m_largeAabbsMappingCPU.resize(0); -} - -void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) -{ - if (m_sapKernel == 0) - { - calculateOverlappingPairsHost(maxPairs); - return; - } - - //if (m_currentBuffer>=0) - // return calculateOverlappingPairsHostIncremental3Sap(); - - //calculateOverlappingPairsHost(maxPairs); - - B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs"); - - int axis = 0; - - { - //bool syncOnHost = false; - - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - if (m_prefixScanFloat4 && numSmallAabbs) - { - B3_PROFILE("GPU compute best variance axis"); - - if (m_dst.size() != (numSmallAabbs + 1)) - { - m_dst.resize(numSmallAabbs + 128); - m_sum.resize(numSmallAabbs + 128); - m_sum2.resize(numSmallAabbs + 128); - m_sum.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow? - m_sum2.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow? - } - - b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel, "m_prepareSumVarianceKernel"); - launcher.setBuffer(m_allAabbsGPU.getBufferCL()); - - launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launcher.setBuffer(m_sum.getBufferCL()); - launcher.setBuffer(m_sum2.getBufferCL()); - launcher.setConst(numSmallAabbs); - int num = numSmallAabbs; - launcher.launch1D(num); - - b3Vector3 s; - b3Vector3 s2; - m_prefixScanFloat4->execute(m_sum, m_dst, numSmallAabbs + 1, &s); - m_prefixScanFloat4->execute(m_sum2, m_dst, numSmallAabbs + 1, &s2); - - b3Vector3 v = s2 - (s * s) / (float)numSmallAabbs; - - if (v[1] > v[0]) - axis = 1; - if (v[2] > v[axis]) - axis = 2; - } - - m_gpuSmallSortData.resize(numSmallAabbs); - -#if 1 - if (m_smallAabbsMappingGPU.size()) - { - B3_PROFILE("flipFloatKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true), - b3BufferInfoCL(m_gpuSmallSortData.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_flipFloatKernel, "m_flipFloatKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - launcher.setConst(axis); - - int num = numSmallAabbs; - launcher.launch1D(num); - clFinish(m_queue); - } - - if (m_gpuSmallSortData.size()) - { - B3_PROFILE("gpu radix sort"); - m_sorter->execute(m_gpuSmallSortData); - clFinish(m_queue); - } - - m_gpuSmallSortedAabbs.resize(numSmallAabbs); - if (numSmallAabbs) - { - B3_PROFILE("scatterKernel"); - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true), - b3BufferInfoCL(m_gpuSmallSortData.getBufferCL(), true), - b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_scatterKernel, "m_scatterKernel "); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - int num = numSmallAabbs; - launcher.launch1D(num); - clFinish(m_queue); - } - - m_overlappingPairs.resize(maxPairs); - - m_pairCount.resize(0); - m_pairCount.push_back(0); - int numPairs = 0; - - { - int numLargeAabbs = m_largeAabbsMappingGPU.size(); - if (numLargeAabbs && numSmallAabbs) - { - //@todo - B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU.getBufferCL()), - b3BufferInfoCL(m_largeAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_overlappingPairs.getBufferCL()), - b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel, "m_sap2Kernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbs); - launcher.setConst(numSmallAabbs); - launcher.setConst(axis); - launcher.setConst(maxPairs); - //@todo: use actual maximum work item sizes of the device instead of hardcoded values - launcher.launch2D(numLargeAabbs, numSmallAabbs, 4, 64); - - numPairs = m_pairCount.at(0); - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - } - } - } - if (m_gpuSmallSortedAabbs.size()) - { - B3_PROFILE("sapKernel"); - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL()), b3BufferInfoCL(m_overlappingPairs.getBufferCL()), b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sapKernel, "m_sapKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - launcher.setConst(axis); - launcher.setConst(maxPairs); - - int num = numSmallAabbs; -#if 0 - int buffSize = launcher.getSerializationBufferSize(); - unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; - for (int i=0;i maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - m_pairCount.resize(0); - m_pairCount.push_back(maxPairs); - } - } - -#else - int numPairs = 0; - - b3LauncherCL launcher(m_queue, m_sapKernel); - - const char* fileName = "m_sapKernelArgs.bin"; - FILE* f = fopen(fileName, "rb"); - if (f) - { - int sizeInBytes = 0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*)malloc(sizeInBytes); - fread(buf, sizeInBytes, 1, f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); - int num = *(int*)&buf[serializedBytes]; - launcher.launch1D(num); - - b3OpenCLArray pairCount(m_context, m_queue); - int numElements = launcher.m_arrays[2]->size() / sizeof(int); - pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(), numElements); - numPairs = pairCount.at(0); - //printf("overlapping pairs = %d\n",numPairs); - b3AlignedObjectArray hostOoverlappingPairs; - b3OpenCLArray tmpGpuPairs(m_context, m_queue); - tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(), numPairs); - - tmpGpuPairs.copyToHost(hostOoverlappingPairs); - m_overlappingPairs.copyFromHost(hostOoverlappingPairs); - //printf("hello %d\n", m_overlappingPairs.size()); - free(buf); - fclose(f); - } - else - { - printf("error: cannot find file %s\n", fileName); - } - - clFinish(m_queue); - -#endif - - m_overlappingPairs.resize(numPairs); - - } //B3_PROFILE("GPU_RADIX SORT"); - //init3dSap(); -} - -void b3GpuSapBroadphase::writeAabbsToGpu() -{ - m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); - m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); - - m_allAabbsGPU.copyFromHost(m_allAabbsCPU); //might not be necessary, the 'setupGpuAabbsFull' already takes care of this -} - -void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int index = userPtr; - b3SapAabb aabb; - for (int i = 0; i < 4; i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - } - aabb.m_minIndices[3] = index; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); - m_largeAabbsMappingCPU.push_back(m_allAabbsCPU.size()); - - m_allAabbsCPU.push_back(aabb); -} - -void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int index = userPtr; - b3SapAabb aabb; - for (int i = 0; i < 4; i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - } - aabb.m_minIndices[3] = index; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); - m_smallAabbsMappingCPU.push_back(m_allAabbsCPU.size()); - - m_allAabbsCPU.push_back(aabb); -} - -cl_mem b3GpuSapBroadphase::getAabbBufferWS() -{ - return m_allAabbsGPU.getBufferCL(); -} - -int b3GpuSapBroadphase::getNumOverlap() -{ - return m_overlappingPairs.size(); -} -cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer() -{ - return m_overlappingPairs.getBufferCL(); -} - -b3OpenCLArray& b3GpuSapBroadphase::getOverlappingPairsGPU() -{ - return m_overlappingPairs; -} -b3OpenCLArray& b3GpuSapBroadphase::getSmallAabbIndicesGPU() -{ - return m_smallAabbsMappingGPU; -} -b3OpenCLArray& b3GpuSapBroadphase::getLargeAabbIndicesGPU() -{ - return m_largeAabbsMappingGPU; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h deleted file mode 100644 index d17590b14a0..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ /dev/null @@ -1,143 +0,0 @@ -#ifndef B3_GPU_SAP_BROADPHASE_H -#define B3_GPU_SAP_BROADPHASE_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2 -class b3Vector3; -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -#include "b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" - -#include "b3GpuBroadphaseInterface.h" - -class b3GpuSapBroadphase : public b3GpuBroadphaseInterface -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - cl_kernel m_flipFloatKernel; - cl_kernel m_scatterKernel; - cl_kernel m_copyAabbsKernel; - cl_kernel m_sapKernel; - cl_kernel m_sap2Kernel; - cl_kernel m_prepareSumVarianceKernel; - - class b3RadixSort32CL* m_sorter; - - ///test for 3d SAP - b3AlignedObjectArray m_sortedAxisCPU[3][2]; - b3AlignedObjectArray m_objectMinMaxIndexCPU[3][2]; - b3OpenCLArray m_objectMinMaxIndexGPUaxis0; - b3OpenCLArray m_objectMinMaxIndexGPUaxis1; - b3OpenCLArray m_objectMinMaxIndexGPUaxis2; - b3OpenCLArray m_objectMinMaxIndexGPUaxis0prev; - b3OpenCLArray m_objectMinMaxIndexGPUaxis1prev; - b3OpenCLArray m_objectMinMaxIndexGPUaxis2prev; - - b3OpenCLArray m_sortedAxisGPU0; - b3OpenCLArray m_sortedAxisGPU1; - b3OpenCLArray m_sortedAxisGPU2; - b3OpenCLArray m_sortedAxisGPU0prev; - b3OpenCLArray m_sortedAxisGPU1prev; - b3OpenCLArray m_sortedAxisGPU2prev; - - b3OpenCLArray m_addedHostPairsGPU; - b3OpenCLArray m_removedHostPairsGPU; - b3OpenCLArray m_addedCountGPU; - b3OpenCLArray m_removedCountGPU; - - int m_currentBuffer; - -public: - b3OpenCLArray m_pairCount; - - b3OpenCLArray m_allAabbsGPU; - b3AlignedObjectArray m_allAabbsCPU; - - virtual b3OpenCLArray& getAllAabbsGPU() - { - return m_allAabbsGPU; - } - virtual b3AlignedObjectArray& getAllAabbsCPU() - { - return m_allAabbsCPU; - } - - b3OpenCLArray m_sum; - b3OpenCLArray m_sum2; - b3OpenCLArray m_dst; - - b3OpenCLArray m_smallAabbsMappingGPU; - b3AlignedObjectArray m_smallAabbsMappingCPU; - - b3OpenCLArray m_largeAabbsMappingGPU; - b3AlignedObjectArray m_largeAabbsMappingCPU; - - b3OpenCLArray m_overlappingPairs; - - //temporary gpu work memory - b3OpenCLArray m_gpuSmallSortData; - b3OpenCLArray m_gpuSmallSortedAabbs; - - class b3PrefixScanFloat4CL* m_prefixScanFloat4; - - enum b3GpuSapKernelType - { - B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU = 1, - B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU, - B3_GPU_SAP_KERNEL_ORIGINAL, - B3_GPU_SAP_KERNEL_BARRIER, - B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY - }; - - b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType = B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); - virtual ~b3GpuSapBroadphase(); - - static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU); - } - - static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU); - } - - static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_ORIGINAL); - } - static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BARRIER); - } - static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); - } - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - void reset(); - - void init3dSap(); - virtual void calculateOverlappingPairsHostIncremental3Sap(); - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual cl_mem getAabbBufferWS(); - virtual int getNumOverlap(); - virtual cl_mem getOverlappingPairBuffer(); - - virtual b3OpenCLArray& getOverlappingPairsGPU(); - virtual b3OpenCLArray& getSmallAabbIndicesGPU(); - virtual b3OpenCLArray& getLargeAabbIndicesGPU(); -}; - -#endif //B3_GPU_SAP_BROADPHASE_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h deleted file mode 100644 index 60570f2605c..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef B3_SAP_AABB_H -#define B3_SAP_AABB_H - -#include "Bullet3Common/b3Scalar.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" - -///just make sure that the b3Aabb is 16-byte aligned -B3_ATTRIBUTE_ALIGNED16(struct) -b3SapAabb : public b3Aabb{ - - }; - -#endif //B3_SAP_AABB_H diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl deleted file mode 100644 index ded4796d337..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl +++ /dev/null @@ -1,216 +0,0 @@ - - -int getPosHash(int4 gridPos, __global float4* pParams) -{ - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x &= gridDim.x - 1; - gridPos.y &= gridDim.y - 1; - gridPos.z &= gridDim.z - 1; - int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x; - return hash; -} - -int4 getGridPos(float4 worldPos, __global float4* pParams) -{ - int4 gridPos; - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1); - gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1); - gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1); - return gridPos; -} - - -// calculate grid hash value for each body using its AABB -__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams ) -{ - int index = get_global_id(0); - if(index >= numObjects) - { - return; - } - float4 bbMin = allpAABB[smallAabbMapping[index]*2]; - float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - pos.w = 0.f; - // get address in grid - int4 gridPos = getGridPos(pos, pParams); - int gridHash = getPosHash(gridPos, pParams); - // store grid hash and body index - int2 hashVal; - hashVal.x = gridHash; - hashVal.y = index; - pHash[index] = hashVal; -} - -__kernel void kClearCellStart( int numCells, - __global int* pCellStart ) -{ - int index = get_global_id(0); - if(index >= numCells) - { - return; - } - pCellStart[index] = -1; -} - -__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart ) -{ - __local int sharedHash[513]; - int index = get_global_id(0); - int2 sortedData; - - if(index < numObjects) - { - sortedData = pHash[index]; - // Load hash data into shared memory so that we can look - // at neighboring body's hash value without loading - // two hash values per thread - sharedHash[get_local_id(0) + 1] = sortedData.x; - if((index > 0) && (get_local_id(0) == 0)) - { - // first thread in block must load neighbor body hash - sharedHash[0] = pHash[index-1].x; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - if(index < numObjects) - { - if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)])) - { - cellStart[sortedData.x] = index; - } - } -} - -int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1) -{ - return (min0.x <= max1.x)&& (min1.x <= max0.x) && - (min0.y <= max1.y)&& (min1.y <= max0.y) && - (min0.z <= max1.z)&& (min1.z <= max0.z); -} - - - - -//search for AABB 'index' against other AABBs' in this cell -void findPairsInCell( int numObjects, - int4 gridPos, - int index, - __global int2* pHash, - __global int* pCellStart, - __global float4* allpAABB, - __global const int* smallAabbMapping, - __global float4* pParams, - volatile __global int* pairCount, - __global int4* pPairBuff2, - int maxPairs - ) -{ - int4 pGridDim = *((__global int4*)(pParams + 1)); - int maxBodiesPerCell = pGridDim.w; - int gridHash = getPosHash(gridPos, pParams); - // get start of bucket for this cell - int bucketStart = pCellStart[gridHash]; - if (bucketStart == -1) - { - return; // cell empty - } - // iterate over bodies in this cell - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - int handleIndex = as_int(min0.w); - - int bucketEnd = bucketStart + maxBodiesPerCell; - bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd; - for(int index2 = bucketStart; index2 < bucketEnd; index2++) - { - int2 cellData = pHash[index2]; - if (cellData.x != gridHash) - { - break; // no longer in same bucket - } - int unsorted_indx2 = cellData.y; - //if (unsorted_indx2 < unsorted_indx) // check not colliding with self - if (unsorted_indx2 != unsorted_indx) // check not colliding with self - { - float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0]; - float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1]; - if(testAABBOverlap(min0, max0, min1, max1)) - { - if (pairCount) - { - int handleIndex2 = as_int(min1.w); - if (handleIndex= numObjects) - { - return; - } - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - // get address in grid - int4 gridPosA = getGridPos(pos, pParams); - int4 gridPosB; - // examine only neighbouring cells - for(int z=-1; z<=1; z++) - { - gridPosB.z = gridPosA.z + z; - for(int y=-1; y<=1; y++) - { - gridPosB.y = gridPosA.y + y; - for(int x=-1; x<=1; x++) - { - gridPosB.x = gridPosA.x + x; - findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs); - } - } - } -} - - - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h deleted file mode 100644 index 01854177869..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h +++ /dev/null @@ -1,198 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* gridBroadphaseCL = - "int getPosHash(int4 gridPos, __global float4* pParams)\n" - "{\n" - " int4 gridDim = *((__global int4*)(pParams + 1));\n" - " gridPos.x &= gridDim.x - 1;\n" - " gridPos.y &= gridDim.y - 1;\n" - " gridPos.z &= gridDim.z - 1;\n" - " int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n" - " return hash;\n" - "} \n" - "int4 getGridPos(float4 worldPos, __global float4* pParams)\n" - "{\n" - " int4 gridPos;\n" - " int4 gridDim = *((__global int4*)(pParams + 1));\n" - " gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n" - " gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n" - " gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n" - " return gridPos;\n" - "}\n" - "// calculate grid hash value for each body using its AABB\n" - "__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numObjects)\n" - " {\n" - " return;\n" - " }\n" - " float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n" - " float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n" - " float4 pos;\n" - " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" - " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" - " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" - " pos.w = 0.f;\n" - " // get address in grid\n" - " int4 gridPos = getGridPos(pos, pParams);\n" - " int gridHash = getPosHash(gridPos, pParams);\n" - " // store grid hash and body index\n" - " int2 hashVal;\n" - " hashVal.x = gridHash;\n" - " hashVal.y = index;\n" - " pHash[index] = hashVal;\n" - "}\n" - "__kernel void kClearCellStart( int numCells, \n" - " __global int* pCellStart )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numCells)\n" - " {\n" - " return;\n" - " }\n" - " pCellStart[index] = -1;\n" - "}\n" - "__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n" - "{\n" - " __local int sharedHash[513];\n" - " int index = get_global_id(0);\n" - " int2 sortedData;\n" - " if(index < numObjects)\n" - " {\n" - " sortedData = pHash[index];\n" - " // Load hash data into shared memory so that we can look \n" - " // at neighboring body's hash value without loading\n" - " // two hash values per thread\n" - " sharedHash[get_local_id(0) + 1] = sortedData.x;\n" - " if((index > 0) && (get_local_id(0) == 0))\n" - " {\n" - " // first thread in block must load neighbor body hash\n" - " sharedHash[0] = pHash[index-1].x;\n" - " }\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " if(index < numObjects)\n" - " {\n" - " if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n" - " {\n" - " cellStart[sortedData.x] = index;\n" - " }\n" - " }\n" - "}\n" - "int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n" - "{\n" - " return (min0.x <= max1.x)&& (min1.x <= max0.x) && \n" - " (min0.y <= max1.y)&& (min1.y <= max0.y) && \n" - " (min0.z <= max1.z)&& (min1.z <= max0.z); \n" - "}\n" - "//search for AABB 'index' against other AABBs' in this cell\n" - "void findPairsInCell( int numObjects,\n" - " int4 gridPos,\n" - " int index,\n" - " __global int2* pHash,\n" - " __global int* pCellStart,\n" - " __global float4* allpAABB, \n" - " __global const int* smallAabbMapping,\n" - " __global float4* pParams,\n" - " volatile __global int* pairCount,\n" - " __global int4* pPairBuff2,\n" - " int maxPairs\n" - " )\n" - "{\n" - " int4 pGridDim = *((__global int4*)(pParams + 1));\n" - " int maxBodiesPerCell = pGridDim.w;\n" - " int gridHash = getPosHash(gridPos, pParams);\n" - " // get start of bucket for this cell\n" - " int bucketStart = pCellStart[gridHash];\n" - " if (bucketStart == -1)\n" - " {\n" - " return; // cell empty\n" - " }\n" - " // iterate over bodies in this cell\n" - " int2 sortedData = pHash[index];\n" - " int unsorted_indx = sortedData.y;\n" - " float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n" - " float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" - " int handleIndex = as_int(min0.w);\n" - " \n" - " int bucketEnd = bucketStart + maxBodiesPerCell;\n" - " bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n" - " for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n" - " {\n" - " int2 cellData = pHash[index2];\n" - " if (cellData.x != gridHash)\n" - " {\n" - " break; // no longer in same bucket\n" - " }\n" - " int unsorted_indx2 = cellData.y;\n" - " //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n" - " if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n" - " { \n" - " float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n" - " float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n" - " if(testAABBOverlap(min0, max0, min1, max1))\n" - " {\n" - " if (pairCount)\n" - " {\n" - " int handleIndex2 = as_int(min1.w);\n" - " if (handleIndex= numObjects)\n" - " {\n" - " return;\n" - " }\n" - " int2 sortedData = pHash[index];\n" - " int unsorted_indx = sortedData.y;\n" - " float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n" - " float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" - " float4 pos;\n" - " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" - " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" - " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" - " // get address in grid\n" - " int4 gridPosA = getGridPos(pos, pParams);\n" - " int4 gridPosB; \n" - " // examine only neighbouring cells\n" - " for(int z=-1; z<=1; z++) \n" - " {\n" - " gridPosB.z = gridPosA.z + z;\n" - " for(int y=-1; y<=1; y++) \n" - " {\n" - " gridPosB.y = gridPosA.y + y;\n" - " for(int x=-1; x<=1; x++) \n" - " {\n" - " gridPosB.x = gridPosA.x + x;\n" - " findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n" - " }\n" - " }\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl deleted file mode 100644 index c375b9bf37e..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ /dev/null @@ -1,767 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -typedef float b3Scalar; -typedef float4 b3Vector3; -#define b3Max max -#define b3Min min -#define b3Sqrt sqrt - -typedef struct -{ - unsigned int m_key; - unsigned int m_value; -} SortDataCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} b3AabbCL; - - -unsigned int interleaveBits(unsigned int x) -{ - //........ ........ ......12 3456789A //x - //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits - - //......12 3456789A ......12 3456789A //x ^ (x << 16) - //11111111 ........ ........ 11111111 //0x FF 00 00 FF - //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF; - - //......12 ........ 3456789A 3456789A //x ^ (x << 8) - //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F - //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F; - - //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4) - //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3 - //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3; - - //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2) - //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49 - //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249; - - //........ ........ ......11 11111111 //0x000003FF - x &= 0x000003FF; //Clear all bits above bit 10 - - x = (x ^ (x << 16)) & 0xFF0000FF; - x = (x ^ (x << 8)) & 0x0300F00F; - x = (x ^ (x << 4)) & 0x030C30C3; - x = (x ^ (x << 2)) & 0x09249249; - - return x; -} -unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z) -{ - return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2; -} - -__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate) -{ - int separatedAabbIndex = get_global_id(0); - if(separatedAabbIndex >= numAabbsToSeparate) return; - - int unseparatedAabbIndex = aabbIndices[separatedAabbIndex]; - out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex]; -} - -//Should replace with an optimized parallel reduction -__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge) -{ - //Each time this kernel is added to the command queue, - //the number of AABBs needing to be merged is halved - // - //Example with 159 AABBs: - // numRemainingAabbs == 159 / 2 + 159 % 2 == 80 - // numMergedAabbs == 159 - 80 == 79 - //So, indices [0, 78] are merged with [0 + 80, 78 + 80] - - int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2; - int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs; - - int aabbIndex = get_global_id(0); - if(aabbIndex >= numMergedAabbs) return; - - int otherAabbIndex = aabbIndex + numRemainingAabbs; - - b3AabbCL aabb = out_mergedAabb[aabbIndex]; - b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min); - mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max); - out_mergedAabb[aabbIndex] = mergedAabb; -} - -__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, - __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs) -{ - int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index - if(leafNodeIndex >= numAabbs) return; - - b3AabbCL mergedAabb = mergedAabbOfAllNodes[0]; - b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f; - b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024; - - b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex]; - b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f; - b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter; - - //Quantize into integer coordinates - //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size - b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize; - - int4 discretePosition; - discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) ); - discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) ); - discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) ); - - //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023] - discretePosition = b3Max( -512, b3Min(discretePosition, 511) ); - discretePosition += 512; - - //Interleave bits(assign a morton code, also known as a z-curve) - unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z); - - // - SortDataCL mortonCodeIndexPair; - mortonCodeIndexPair.m_key = mortonCode; - mortonCodeIndexPair.m_value = leafNodeIndex; - - out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair; -} - -#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128 - -//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes. -//If it is set, then the index is for an internal node; otherwise, it is a leaf node. -//In both cases, the bit should be cleared to access the actual node index. -int isLeafNode(int index) { return (index >> 31 == 0); } -int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); } -int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); } - -//From sap.cl -#define NEW_PAIR_MARKER -1 - -bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -//From sap.cl - -__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - - __global SortDataCL* mortonCodesAndAabbIndices, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numQueryAabbs) -{ - //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since - //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent) - int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(queryBvhNodeIndex >= numQueryAabbs) return; - - int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value; - b3AabbCL queryAabb = rigidAabbs[queryRigidIndex]; - - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //Optimization - if the BVH is structured as a binary radix tree, then - //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]). - //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself. - { - int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y; - if(highestLeafIndex <= queryBvhNodeIndex) continue; - } - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) ) - { - if(isLeaf) - { - int4 pair; - pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3]; - pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - - } -} - - -//From rayCastKernels.cl -typedef struct -{ - float4 m_from; - float4 m_to; -} b3RayInfo; -//From rayCastKernels.cl - -b3Vector3 b3Vector3_normalize(b3Vector3 v) -{ - b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f}; - return normalize(normal); //OpenCL normalize == vector4 normalize -} -b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; } -b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; } - -int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) -{ - //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ). - //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane. - // - //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane - //and min.x will be the far plane; otherwise, it is reversed. - // - //In order for there to be a collision, the t_min and t_max of each pair must overlap. - //This can be tested for by selecting the highest t_min and lowest t_max and comparing them. - - int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y) - - //When using vector types, the select() function checks the most signficant bit, - //but isless() sets the least significant bit. - isNegative <<= 31; - - //select(b, a, condition) == condition ? a : b - //When using select() with vector types, (condition[i]) is true if its most significant bit is 1 - b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection; - b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection; - - b3Scalar t_min_final = 0.0f; - b3Scalar t_max_final = rayLength; - - //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. - //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4]) - //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN. - t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) ); - t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) ); - - return (t_min_final <= t_max_final); -} - -__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - __global SortDataCL* mortonCodesAndAabbIndices, - - __global b3RayInfo* rays, - - __global int* out_numRayRigidPairs, - __global int2* out_rayRigidPairs, - int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - // - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - // - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) ) - { - if(isLeaf) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - } -} - -__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids) -{ - int smallAabbIndex = get_global_id(0); - if(smallAabbIndex >= numSmallAabbRigids) return; - - b3AabbCL smallAabb = smallAabbs[smallAabbIndex]; - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL largeAabb = largeAabbs[i]; - if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) ) - { - int4 pair; - pair.x = largeAabb.m_minIndices[3]; - pair.y = smallAabb.m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - } -} -__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays, - __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs, - int numLargeAabbRigids, int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL rigidAabb = largeRigidAabbs[i]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) ) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabb.m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - } -} - - -//Set so that it is always greater than the actual common prefixes, and never selected as a parent node. -//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve. -//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node. -//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve). -#define B3_PLBVH_INVALID_COMMON_PREFIX 128 - -#define B3_PLBVH_ROOT_NODE_MARKER -1 - -#define b3Int64 long - -int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); } -b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) -{ - //This function only needs to return (i & j) in order for the algorithm to work, - //but it may help with debugging to mask out the lower bits. - - b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j); - - b3Int64 sharedBits = i & j; - b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0 - - return sharedBits & bitmask; -} - -//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths -int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB) -{ - return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) ); -} - -__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices, - __global b3Int64* out_commonPrefixes, - __global int* out_commonPrefixLengths, - int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if (internalNodeIndex >= numInternalNodes) return; - - //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index, - //and the number of internal nodes is always numLeafNodes - 1 - int leftLeafIndex = internalNodeIndex; - int rightLeafIndex = internalNodeIndex + 1; - - int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key; - int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key; - - //Binary radix tree construction algorithm does not work if there are duplicate morton codes. - //Append the index of each leaf node to each morton code so that there are no duplicates. - //The algorithm also requires that the morton codes are sorted in ascending order; this requirement - //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true. - // - //upsample(a, b) == ( ((b3Int64)a) << 32) | b - b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex); - b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex); - - out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); - out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); -} - - -__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes, - __global int2* out_childNodes, int numLeafNodes) -{ - int leafNodeIndex = get_global_id(0); - if (leafNodeIndex >= numLeafNodes) return; - - int numInternalNodes = numLeafNodes - 1; - - int leftSplitIndex = leafNodeIndex - 1; - int rightSplitIndex = leafNodeIndex; - - int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - //Parent node is the highest adjacent common prefix that is lower than the node's common prefix - //Leaf nodes are considered as having the highest common prefix - int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix); - - //Handle cases for the edge nodes; the first and last node - //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX - if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false; - if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true; - - int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex; - out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex; - - int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - int isLeaf = 1; - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex); -} - -__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths, - __global int2* out_childNodes, - __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex, - int numInternalNodes) -{ - int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - b3Int64 nodePrefix = commonPrefixes[internalNodeIndex]; - int nodePrefixLength = commonPrefixLengths[internalNodeIndex]; - -//#define USE_LINEAR_SEARCH -#ifdef USE_LINEAR_SEARCH - int leftIndex = -1; - int rightIndex = -1; - - //Find nearest element to left with a lower common prefix - for(int i = internalNodeIndex - 1; i >= 0; --i) - { - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - leftIndex = i; - break; - } - } - - //Find nearest element to right with a lower common prefix - for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i) - { - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - rightIndex = i; - break; - } - } - -#else //Use binary search - - //Find nearest element to left with a lower common prefix - int leftIndex = -1; - { - int lower = 0; - int upper = internalNodeIndex - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int right = mid + 1; - if(right < internalNodeIndex) - { - b3Int64 rightPrefix = commonPrefixes[right]; - int rightPrefixLength = commonPrefixLengths[right]; - - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - lower = right; - leftIndex = right; - } - else - { - leftIndex = mid; - break; - } - } - else - { - leftIndex = mid; - break; - } - } - else upper = mid - 1; - } - } - - //Find nearest element to right with a lower common prefix - int rightIndex = -1; - { - int lower = internalNodeIndex + 1; - int upper = numInternalNodes - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int left = mid - 1; - if(left > internalNodeIndex) - { - b3Int64 leftPrefix = commonPrefixes[left]; - int leftPrefixLength = commonPrefixLengths[left]; - - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - upper = left; - rightIndex = left; - } - else - { - rightIndex = mid; - break; - } - } - else - { - rightIndex = mid; - break; - } - } - else lower = mid + 1; - } - } -#endif - - //Select parent - { - int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength); - - if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false; - else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true; - - int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex; - - int isRootNode = (leftIndex == -1 && rightIndex == -1); - out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER; - - int isLeaf = 0; - if(!isRootNode) - { - int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } - else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } -} - -__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes, - __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes) -{ - if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0); - - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - // - int distanceFromRoot = 0; - { - int parentIndex = internalNodeParentNodes[internalNodeIndex]; - while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER) - { - parentIndex = internalNodeParentNodes[parentIndex]; - ++distanceFromRoot; - } - } - out_distanceFromRoot[internalNodeIndex] = distanceFromRoot; - - // - __local int localMaxDistanceFromRoot; - if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0; - barrier(CLK_LOCAL_MEM_FENCE); - - atomic_max(&localMaxDistanceFromRoot, distanceFromRoot); - barrier(CLK_LOCAL_MEM_FENCE); - - if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot); -} - -__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices, - __global int2* childNodes, - __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs, - int maxDistanceFromRoot, int processedDistance, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int distance = distanceFromRoot[internalNodeIndex]; - - if(distance == processedDistance) - { - int leftChildIndex = childNodes[internalNodeIndex].x; - int rightChildIndex = childNodes[internalNodeIndex].y; - - int isLeftChildLeaf = isLeafNode(leftChildIndex); - int isRightChildLeaf = isLeafNode(rightChildIndex); - - leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex); - rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex); - - //leftRigidIndex/rightRigidIndex is not used if internal node - int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1; - int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1; - - b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex]; - b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min); - mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max); - internalNodeAabbs[internalNodeIndex] = mergedAabb; - } -} - -__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int numLeafNodes = numInternalNodes + 1; - - int2 childNodes = internalNodeChildNodes[internalNodeIndex]; - - int2 leafIndexRange; //x == min leaf index, y == max leaf index - - //Find lowest leaf index covered by this internal node - { - int lowestIndex = childNodes.x; //childNodes.x == Left child - while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x; - leafIndexRange.x = lowestIndex; - } - - //Find highest leaf index covered by this internal node - { - int highestIndex = childNodes.y; //childNodes.y == Right child - while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y; - leafIndexRange.y = highestIndex; - } - - // - out_leafIndexRanges[internalNodeIndex] = leafIndexRange; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h deleted file mode 100644 index c02877dde9c..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ /dev/null @@ -1,728 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* parallelLinearBvhCL = - "/*\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose,\n" - "including commercial applications, and to alter it and redistribute it freely,\n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Initial Author Jackson Lee, 2014\n" - "typedef float b3Scalar;\n" - "typedef float4 b3Vector3;\n" - "#define b3Max max\n" - "#define b3Min min\n" - "#define b3Sqrt sqrt\n" - "typedef struct\n" - "{\n" - " unsigned int m_key;\n" - " unsigned int m_value;\n" - "} SortDataCL;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} b3AabbCL;\n" - "unsigned int interleaveBits(unsigned int x)\n" - "{\n" - " //........ ........ ......12 3456789A //x\n" - " //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n" - " \n" - " //......12 3456789A ......12 3456789A //x ^ (x << 16)\n" - " //11111111 ........ ........ 11111111 //0x FF 00 00 FF\n" - " //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;\n" - " \n" - " //......12 ........ 3456789A 3456789A //x ^ (x << 8)\n" - " //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F\n" - " //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;\n" - " \n" - " //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)\n" - " //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3\n" - " //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;\n" - " \n" - " //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)\n" - " //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49\n" - " //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;\n" - " \n" - " //........ ........ ......11 11111111 //0x000003FF\n" - " x &= 0x000003FF; //Clear all bits above bit 10\n" - " \n" - " x = (x ^ (x << 16)) & 0xFF0000FF;\n" - " x = (x ^ (x << 8)) & 0x0300F00F;\n" - " x = (x ^ (x << 4)) & 0x030C30C3;\n" - " x = (x ^ (x << 2)) & 0x09249249;\n" - " \n" - " return x;\n" - "}\n" - "unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n" - "{\n" - " return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n" - "}\n" - "__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n" - "{\n" - " int separatedAabbIndex = get_global_id(0);\n" - " if(separatedAabbIndex >= numAabbsToSeparate) return;\n" - " int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n" - " out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n" - "}\n" - "//Should replace with an optimized parallel reduction\n" - "__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n" - "{\n" - " //Each time this kernel is added to the command queue, \n" - " //the number of AABBs needing to be merged is halved\n" - " //\n" - " //Example with 159 AABBs:\n" - " // numRemainingAabbs == 159 / 2 + 159 % 2 == 80\n" - " // numMergedAabbs == 159 - 80 == 79\n" - " //So, indices [0, 78] are merged with [0 + 80, 78 + 80]\n" - " \n" - " int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;\n" - " int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;\n" - " \n" - " int aabbIndex = get_global_id(0);\n" - " if(aabbIndex >= numMergedAabbs) return;\n" - " \n" - " int otherAabbIndex = aabbIndex + numRemainingAabbs;\n" - " \n" - " b3AabbCL aabb = out_mergedAabb[aabbIndex];\n" - " b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n" - " \n" - " b3AabbCL mergedAabb;\n" - " mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n" - " mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n" - " out_mergedAabb[aabbIndex] = mergedAabb;\n" - "}\n" - "__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n" - " __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n" - "{\n" - " int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n" - " if(leafNodeIndex >= numAabbs) return;\n" - " \n" - " b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];\n" - " b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;\n" - " b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;\n" - " \n" - " b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n" - " b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;\n" - " b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;\n" - " \n" - " //Quantize into integer coordinates\n" - " //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n" - " b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;\n" - " \n" - " int4 discretePosition;\n" - " discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n" - " discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n" - " discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n" - " \n" - " //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n" - " discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n" - " discretePosition += 512;\n" - " \n" - " //Interleave bits(assign a morton code, also known as a z-curve)\n" - " unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n" - " \n" - " //\n" - " SortDataCL mortonCodeIndexPair;\n" - " mortonCodeIndexPair.m_key = mortonCode;\n" - " mortonCodeIndexPair.m_value = leafNodeIndex;\n" - " \n" - " out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n" - "}\n" - "#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n" - "//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n" - "//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n" - "//In both cases, the bit should be cleared to access the actual node index.\n" - "int isLeafNode(int index) { return (index >> 31 == 0); }\n" - "int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n" - "int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n" - "//From sap.cl\n" - "#define NEW_PAIR_MARKER -1\n" - "bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "//From sap.cl\n" - "__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n" - " __global int* rootNodeIndex, \n" - " __global int2* internalNodeChildIndices, \n" - " __global b3AabbCL* internalNodeAabbs,\n" - " __global int2* internalNodeLeafIndexRanges,\n" - " \n" - " __global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global int* out_numPairs, __global int4* out_overlappingPairs, \n" - " int maxPairs, int numQueryAabbs)\n" - "{\n" - " //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since\n" - " //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)\n" - " int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" - " if(queryBvhNodeIndex >= numQueryAabbs) return;\n" - " \n" - " int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;\n" - " b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n" - " \n" - " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" - " \n" - " int stackSize = 1;\n" - " stack[0] = *rootNodeIndex;\n" - " \n" - " while(stackSize)\n" - " {\n" - " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" - " --stackSize;\n" - " \n" - " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" - " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" - " \n" - " //Optimization - if the BVH is structured as a binary radix tree, then\n" - " //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n" - " //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.\n" - " {\n" - " int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n" - " if(highestLeafIndex <= queryBvhNodeIndex) continue;\n" - " }\n" - " \n" - " //bvhRigidIndex is not used if internal node\n" - " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" - " \n" - " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" - " if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n" - " {\n" - " if(isLeaf)\n" - " {\n" - " int4 pair;\n" - " pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n" - " pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" - " pair.z = NEW_PAIR_MARKER;\n" - " pair.w = NEW_PAIR_MARKER;\n" - " \n" - " int pairIndex = atomic_inc(out_numPairs);\n" - " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" - " }\n" - " \n" - " if(!isLeaf) //Internal node\n" - " {\n" - " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" - " {\n" - " //Error\n" - " }\n" - " else\n" - " {\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" - " }\n" - " }\n" - " }\n" - " \n" - " }\n" - "}\n" - "//From rayCastKernels.cl\n" - "typedef struct\n" - "{\n" - " float4 m_from;\n" - " float4 m_to;\n" - "} b3RayInfo;\n" - "//From rayCastKernels.cl\n" - "b3Vector3 b3Vector3_normalize(b3Vector3 v)\n" - "{\n" - " b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n" - " return normalize(normal); //OpenCL normalize == vector4 normalize\n" - "}\n" - "b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n" - "b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n" - "int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" - "{\n" - " //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).\n" - " //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.\n" - " //\n" - " //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane \n" - " //and min.x will be the far plane; otherwise, it is reversed.\n" - " //\n" - " //In order for there to be a collision, the t_min and t_max of each pair must overlap.\n" - " //This can be tested for by selecting the highest t_min and lowest t_max and comparing them.\n" - " \n" - " int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y)\n" - " \n" - " //When using vector types, the select() function checks the most signficant bit, \n" - " //but isless() sets the least significant bit.\n" - " isNegative <<= 31;\n" - " //select(b, a, condition) == condition ? a : b\n" - " //When using select() with vector types, (condition[i]) is true if its most significant bit is 1\n" - " b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" - " b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" - " \n" - " b3Scalar t_min_final = 0.0f;\n" - " b3Scalar t_max_final = rayLength;\n" - " \n" - " //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n" - " //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n" - " //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.\n" - " t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n" - " t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n" - " \n" - " return (t_min_final <= t_max_final);\n" - "}\n" - "__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n" - " __global int* rootNodeIndex, \n" - " __global int2* internalNodeChildIndices, \n" - " __global b3AabbCL* internalNodeAabbs,\n" - " __global int2* internalNodeLeafIndexRanges,\n" - " __global SortDataCL* mortonCodesAndAabbIndices,\n" - " \n" - " __global b3RayInfo* rays,\n" - " \n" - " __global int* out_numRayRigidPairs, \n" - " __global int2* out_rayRigidPairs,\n" - " int maxRayRigidPairs, int numRays)\n" - "{\n" - " int rayIndex = get_global_id(0);\n" - " if(rayIndex >= numRays) return;\n" - " \n" - " //\n" - " b3Vector3 rayFrom = rays[rayIndex].m_from;\n" - " b3Vector3 rayTo = rays[rayIndex].m_to;\n" - " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" - " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" - " \n" - " //\n" - " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" - " \n" - " int stackSize = 1;\n" - " stack[0] = *rootNodeIndex;\n" - " \n" - " while(stackSize)\n" - " {\n" - " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" - " --stackSize;\n" - " \n" - " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" - " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" - " \n" - " //bvhRigidIndex is not used if internal node\n" - " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" - " \n" - " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" - " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )\n" - " {\n" - " if(isLeaf)\n" - " {\n" - " int2 rayRigidPair;\n" - " rayRigidPair.x = rayIndex;\n" - " rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" - " \n" - " int pairIndex = atomic_inc(out_numRayRigidPairs);\n" - " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" - " }\n" - " \n" - " if(!isLeaf) //Internal node\n" - " {\n" - " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" - " {\n" - " //Error\n" - " }\n" - " else\n" - " {\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n" - " __global int* out_numPairs, __global int4* out_overlappingPairs, \n" - " int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n" - "{\n" - " int smallAabbIndex = get_global_id(0);\n" - " if(smallAabbIndex >= numSmallAabbRigids) return;\n" - " \n" - " b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n" - " for(int i = 0; i < numLargeAabbRigids; ++i)\n" - " {\n" - " b3AabbCL largeAabb = largeAabbs[i];\n" - " if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n" - " {\n" - " int4 pair;\n" - " pair.x = largeAabb.m_minIndices[3];\n" - " pair.y = smallAabb.m_minIndices[3];\n" - " pair.z = NEW_PAIR_MARKER;\n" - " pair.w = NEW_PAIR_MARKER;\n" - " \n" - " int pairIndex = atomic_inc(out_numPairs);\n" - " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" - " }\n" - " }\n" - "}\n" - "__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n" - " __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,\n" - " int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n" - "{\n" - " int rayIndex = get_global_id(0);\n" - " if(rayIndex >= numRays) return;\n" - " \n" - " b3Vector3 rayFrom = rays[rayIndex].m_from;\n" - " b3Vector3 rayTo = rays[rayIndex].m_to;\n" - " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" - " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" - " \n" - " for(int i = 0; i < numLargeAabbRigids; ++i)\n" - " {\n" - " b3AabbCL rigidAabb = largeRigidAabbs[i];\n" - " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n" - " {\n" - " int2 rayRigidPair;\n" - " rayRigidPair.x = rayIndex;\n" - " rayRigidPair.y = rigidAabb.m_minIndices[3];\n" - " \n" - " int pairIndex = atomic_inc(out_numRayRigidPairs);\n" - " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" - " }\n" - " }\n" - "}\n" - "//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n" - "//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n" - "//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n" - "//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n" - "#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n" - "#define B3_PLBVH_ROOT_NODE_MARKER -1\n" - "#define b3Int64 long\n" - "int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }\n" - "b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) \n" - "{\n" - " //This function only needs to return (i & j) in order for the algorithm to work,\n" - " //but it may help with debugging to mask out the lower bits.\n" - " b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);\n" - " b3Int64 sharedBits = i & j;\n" - " b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0\n" - " \n" - " return sharedBits & bitmask;\n" - "}\n" - "//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n" - "int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n" - "{\n" - " return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n" - "}\n" - "__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global b3Int64* out_commonPrefixes,\n" - " __global int* out_commonPrefixLengths,\n" - " int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if (internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n" - " //and the number of internal nodes is always numLeafNodes - 1\n" - " int leftLeafIndex = internalNodeIndex;\n" - " int rightLeafIndex = internalNodeIndex + 1;\n" - " \n" - " int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;\n" - " int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;\n" - " \n" - " //Binary radix tree construction algorithm does not work if there are duplicate morton codes.\n" - " //Append the index of each leaf node to each morton code so that there are no duplicates.\n" - " //The algorithm also requires that the morton codes are sorted in ascending order; this requirement\n" - " //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.\n" - " //\n" - " //upsample(a, b) == ( ((b3Int64)a) << 32) | b\n" - " b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);\n" - " b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);\n" - " \n" - " out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" - " out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" - "}\n" - "__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,\n" - " __global int2* out_childNodes, int numLeafNodes)\n" - "{\n" - " int leafNodeIndex = get_global_id(0);\n" - " if (leafNodeIndex >= numLeafNodes) return;\n" - " \n" - " int numInternalNodes = numLeafNodes - 1;\n" - " \n" - " int leftSplitIndex = leafNodeIndex - 1;\n" - " int rightSplitIndex = leafNodeIndex;\n" - " \n" - " int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " \n" - " //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n" - " //Leaf nodes are considered as having the highest common prefix\n" - " int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n" - " \n" - " //Handle cases for the edge nodes; the first and last node\n" - " //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n" - " if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n" - " if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n" - " \n" - " int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n" - " out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;\n" - " \n" - " int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa\n" - " \n" - " //out_childNodesAsInt[0] == int2.x == left child\n" - " //out_childNodesAsInt[1] == int2.y == right child\n" - " int isLeaf = 1;\n" - " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" - " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n" - "}\n" - "__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n" - " __global int2* out_childNodes,\n" - " __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n" - " int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];\n" - " int nodePrefixLength = commonPrefixLengths[internalNodeIndex];\n" - " \n" - "//#define USE_LINEAR_SEARCH\n" - "#ifdef USE_LINEAR_SEARCH\n" - " int leftIndex = -1;\n" - " int rightIndex = -1;\n" - " \n" - " //Find nearest element to left with a lower common prefix\n" - " for(int i = internalNodeIndex - 1; i >= 0; --i)\n" - " {\n" - " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" - " if(nodeLeftSharedPrefixLength < nodePrefixLength)\n" - " {\n" - " leftIndex = i;\n" - " break;\n" - " }\n" - " }\n" - " \n" - " //Find nearest element to right with a lower common prefix\n" - " for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n" - " {\n" - " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" - " if(nodeRightSharedPrefixLength < nodePrefixLength)\n" - " {\n" - " rightIndex = i;\n" - " break;\n" - " }\n" - " }\n" - " \n" - "#else //Use binary search\n" - " //Find nearest element to left with a lower common prefix\n" - " int leftIndex = -1;\n" - " {\n" - " int lower = 0;\n" - " int upper = internalNodeIndex - 1;\n" - " \n" - " while(lower <= upper)\n" - " {\n" - " int mid = (lower + upper) / 2;\n" - " b3Int64 midPrefix = commonPrefixes[mid];\n" - " int midPrefixLength = commonPrefixLengths[mid];\n" - " \n" - " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" - " if(nodeMidSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " int right = mid + 1;\n" - " if(right < internalNodeIndex)\n" - " {\n" - " b3Int64 rightPrefix = commonPrefixes[right];\n" - " int rightPrefixLength = commonPrefixLengths[right];\n" - " \n" - " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);\n" - " if(nodeRightSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " lower = right;\n" - " leftIndex = right;\n" - " }\n" - " else \n" - " {\n" - " leftIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else \n" - " {\n" - " leftIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else upper = mid - 1;\n" - " }\n" - " }\n" - " \n" - " //Find nearest element to right with a lower common prefix\n" - " int rightIndex = -1;\n" - " {\n" - " int lower = internalNodeIndex + 1;\n" - " int upper = numInternalNodes - 1;\n" - " \n" - " while(lower <= upper)\n" - " {\n" - " int mid = (lower + upper) / 2;\n" - " b3Int64 midPrefix = commonPrefixes[mid];\n" - " int midPrefixLength = commonPrefixLengths[mid];\n" - " \n" - " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" - " if(nodeMidSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " int left = mid - 1;\n" - " if(left > internalNodeIndex)\n" - " {\n" - " b3Int64 leftPrefix = commonPrefixes[left];\n" - " int leftPrefixLength = commonPrefixLengths[left];\n" - " \n" - " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);\n" - " if(nodeLeftSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " upper = left;\n" - " rightIndex = left;\n" - " }\n" - " else \n" - " {\n" - " rightIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else \n" - " {\n" - " rightIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else lower = mid + 1;\n" - " }\n" - " }\n" - "#endif\n" - " \n" - " //Select parent\n" - " {\n" - " int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " \n" - " int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);\n" - " \n" - " if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;\n" - " else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;\n" - " \n" - " int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;\n" - " \n" - " int isRootNode = (leftIndex == -1 && rightIndex == -1);\n" - " out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;\n" - " \n" - " int isLeaf = 0;\n" - " if(!isRootNode)\n" - " {\n" - " int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa\n" - " \n" - " //out_childNodesAsInt[0] == int2.x == left child\n" - " //out_childNodesAsInt[1] == int2.y == right child\n" - " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" - " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" - " }\n" - " else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" - " }\n" - "}\n" - "__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,\n" - " __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)\n" - "{\n" - " if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " //\n" - " int distanceFromRoot = 0;\n" - " {\n" - " int parentIndex = internalNodeParentNodes[internalNodeIndex];\n" - " while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)\n" - " {\n" - " parentIndex = internalNodeParentNodes[parentIndex];\n" - " ++distanceFromRoot;\n" - " }\n" - " }\n" - " out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;\n" - " \n" - " //\n" - " __local int localMaxDistanceFromRoot;\n" - " if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);\n" - "}\n" - "__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global int2* childNodes,\n" - " __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n" - " int maxDistanceFromRoot, int processedDistance, int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " int distance = distanceFromRoot[internalNodeIndex];\n" - " \n" - " if(distance == processedDistance)\n" - " {\n" - " int leftChildIndex = childNodes[internalNodeIndex].x;\n" - " int rightChildIndex = childNodes[internalNodeIndex].y;\n" - " \n" - " int isLeftChildLeaf = isLeafNode(leftChildIndex);\n" - " int isRightChildLeaf = isLeafNode(rightChildIndex);\n" - " \n" - " leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n" - " rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n" - " \n" - " //leftRigidIndex/rightRigidIndex is not used if internal node\n" - " int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n" - " int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n" - " \n" - " b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n" - " b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n" - " \n" - " b3AabbCL mergedAabb;\n" - " mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n" - " mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n" - " internalNodeAabbs[internalNodeIndex] = mergedAabb;\n" - " }\n" - "}\n" - "__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " int numLeafNodes = numInternalNodes + 1;\n" - " \n" - " int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n" - " \n" - " int2 leafIndexRange; //x == min leaf index, y == max leaf index\n" - " \n" - " //Find lowest leaf index covered by this internal node\n" - " {\n" - " int lowestIndex = childNodes.x; //childNodes.x == Left child\n" - " while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n" - " leafIndexRange.x = lowestIndex;\n" - " }\n" - " \n" - " //Find highest leaf index covered by this internal node\n" - " {\n" - " int highestIndex = childNodes.y; //childNodes.y == Right child\n" - " while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n" - " leafIndexRange.y = highestIndex;\n" - " }\n" - " \n" - " //\n" - " out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl deleted file mode 100644 index 93f77a64335..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl +++ /dev/null @@ -1,389 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#define NEW_PAIR_MARKER -1 - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - - -/// conservative test for overlap between two aabbs -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2); -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - - -__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numUnsortedAabbs) - return; - - int j = get_global_id(1); - if (j>=numUnSortedAabbs2) - return; - - - __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]]; - __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]]; - - if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2)) - { - int4 myPair; - - int xIndex = unsortedAabbPtr[0].m_minIndices[3]; - int yIndex = unsortedAabbPtr2[0].m_minIndices[3]; - if (xIndex>yIndex) - { - int tmp = xIndex; - xIndex=yIndex; - yIndex=tmp; - } - - myPair.x = xIndex; - myPair.y = yIndex; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - - int curPair = atomic_inc (pairCount); - if (curPair=numObjects) - return; - for (int j=i+1;j=numObjects) - return; - for (int j=i+1;j=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1])) - { - int4 myPair; - myPair.x = myAabb.m_minIndices[3]; - myPair.y = localAabbs[localCount+localId+1].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair> 31) | 0x80000000; - return f ^ mask; -} -float IFloatFlip(unsigned int f); -float IFloatFlip(unsigned int f) -{ - unsigned int mask = ((f >> 31) - 1) | 0x80000000; - unsigned int fl = f ^ mask; - return *(float*)&fl; -} - - - - -__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - int src = destAabbs[i].m_maxIndices[3]; - destAabbs[i] = allAabbs[src]; - destAabbs[i].m_maxIndices[3] = src; -} - - -__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - - sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]); - sortData[i].y = i; - -} - - -__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]]; -} - - - -__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs) -{ - int i = get_global_id(0); - if (i>=numAabbs) - return; - - btAabbCL smallAabb = allAabbs[smallAabbMapping[i]]; - - float4 s; - s = (smallAabb.m_max+smallAabb.m_min)*0.5f; - sum[i]=s; - sum2[i]=s*s; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h deleted file mode 100644 index d6999b94cb3..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h +++ /dev/null @@ -1,341 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* sapCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#define NEW_PAIR_MARKER -1\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "/// conservative test for overlap between two aabbs\n" - "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numUnsortedAabbs)\n" - " return;\n" - " int j = get_global_id(1);\n" - " if (j>=numUnSortedAabbs2)\n" - " return;\n" - " __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n" - " __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n" - " if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n" - " {\n" - " int4 myPair;\n" - " \n" - " int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n" - " int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n" - " if (xIndex>yIndex)\n" - " {\n" - " int tmp = xIndex;\n" - " xIndex=yIndex;\n" - " yIndex=tmp;\n" - " }\n" - " \n" - " myPair.x = xIndex;\n" - " myPair.y = yIndex;\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair=numObjects)\n" - " return;\n" - " for (int j=i+1;j=numObjects)\n" - " return;\n" - " for (int j=i+1;j=numObjects && !localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (!localBreak)\n" - " {\n" - " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = aabbs[i].m_minIndices[3];\n" - " myPair.y = aabbs[j].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair=numObjects && !localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (!localBreak)\n" - " {\n" - " if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = myAabb.m_minIndices[3];\n" - " myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair> 31) | 0x80000000;\n" - " return f ^ mask;\n" - "}\n" - "float IFloatFlip(unsigned int f);\n" - "float IFloatFlip(unsigned int f)\n" - "{\n" - " unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n" - " unsigned int fl = f ^ mask;\n" - " return *(float*)&fl;\n" - "}\n" - "__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " int src = destAabbs[i].m_maxIndices[3];\n" - " destAabbs[i] = allAabbs[src];\n" - " destAabbs[i].m_maxIndices[3] = src;\n" - "}\n" - "__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " \n" - " \n" - " sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n" - " sortData[i].y = i;\n" - " \n" - "}\n" - "__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " \n" - " sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n" - "}\n" - "__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numAabbs)\n" - " return;\n" - " \n" - " btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n" - " \n" - " float4 s;\n" - " s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n" - " sum[i]=s;\n" - " sum2[i]=s*s; \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLInclude.h b/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLInclude.h deleted file mode 100644 index 61465382630..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLInclude.h +++ /dev/null @@ -1,51 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_OPENCL_INCLUDE_H -#define B3_OPENCL_INCLUDE_H - -#ifdef B3_USE_CLEW -#include "clew/clew.h" -#else - -#ifdef __APPLE__ -#ifdef USE_MINICL -#include -#else -#include -#include //clLogMessagesToStderrAPPLE -#endif -#else -#ifdef USE_MINICL -#include -#else -#include -#ifdef _WIN32 -#include "CL/cl_gl.h" -#endif //_WIN32 -#endif -#endif //__APPLE__ -#endif //B3_USE_CLEW - -#include -#include -#define oclCHECKERROR(a, b) \ - if ((a) != (b)) \ - { \ - printf("OCL Error : %d\n", (a)); \ - assert((a) == (b)); \ - } - -#endif //B3_OPENCL_INCLUDE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp b/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp deleted file mode 100644 index fe54ea5ec90..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp +++ /dev/null @@ -1,963 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org -Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//Original author: Roman Ponomarev -//Mostly Reimplemented by Erwin Coumans - -bool gDebugForceLoadingFromSource = false; -bool gDebugSkipLoadingBinary = false; - -#include "Bullet3Common/b3Logging.h" - -#include - -#ifdef _WIN32 -#pragma warning(disable : 4996) -#endif -#include "b3OpenCLUtils.h" -//#include "b3OpenCLInclude.h" - -#include -#include - -#define B3_MAX_CL_DEVICES 16 //who needs 16 devices? - -#ifdef _WIN32 -#include -#endif - -#include -#define b3Assert assert -#ifndef _WIN32 -#include - -#endif - -static const char* sCachedBinaryPath = "cache"; - -//Set the preferred platform vendor using the OpenCL SDK -static const char* spPlatformVendor = -#if defined(CL_PLATFORM_MINI_CL) - "MiniCL, SCEA"; -#elif defined(CL_PLATFORM_AMD) - "Advanced Micro Devices, Inc."; -#elif defined(CL_PLATFORM_NVIDIA) - "NVIDIA Corporation"; -#elif defined(CL_PLATFORM_INTEL) - "Intel(R) Corporation"; -#elif defined(B3_USE_CLEW) - "clew (OpenCL Extension Wrangler library)"; -#else - "Unknown Vendor"; -#endif - -#ifndef CL_PLATFORM_MINI_CL -#ifdef _WIN32 -#ifndef B3_USE_CLEW -#include "CL/cl_gl.h" -#endif //B3_USE_CLEW -#endif //_WIN32 -#endif - -void MyFatalBreakAPPLE(const char* errstr, - const void* private_info, - size_t cb, - void* user_data) -{ - const char* patloc = strstr(errstr, "Warning"); - //find out if it is a warning or error, exit if error - - if (patloc) - { - b3Warning("Warning: %s\n", errstr); - } - else - { - b3Error("Error: %s\n", errstr); - b3Assert(0); - } -} - -#ifdef B3_USE_CLEW - -int b3OpenCLUtils_clewInit() -{ - int result = -1; - -#ifdef _WIN32 - const char* cl = "OpenCL.dll"; -#elif defined __APPLE__ - const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL"; -#else //presumable Linux? \ - //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so - const char* cl = "libOpenCL.so.1"; - result = clewInit(cl); - if (result != CLEW_SUCCESS) - { - cl = "libOpenCL.so"; - } - else - { - clewExit(); - } -#endif - result = clewInit(cl); - if (result != CLEW_SUCCESS) - { - b3Error("clewInit failed with error code %d\n", result); - } - else - { - b3Printf("clewInit succesfull using %s\n", cl); - } - return result; -} -#endif - -int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_platform_id pPlatforms[10] = {0}; - - cl_uint numPlatforms = 0; - cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms); - //cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) - *pErrNum = ciErrNum; - } - return numPlatforms; -} - -const char* b3OpenCLUtils_getSdkVendorName() -{ - return spPlatformVendor; -} - -void b3OpenCLUtils_setCachePath(const char* path) -{ - sCachedBinaryPath = path; -} - -cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_platform_id platform = 0; - unsigned int platformIndex = (unsigned int)platformIndex0; - cl_uint numPlatforms; - cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - - if (platformIndex < numPlatforms) - { - cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms); - ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) - *pErrNum = ciErrNum; - return platform; - } - - platform = platforms[platformIndex]; - - free(platforms); - } - - return platform; -} - -void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo) -{ - b3Assert(platform); - cl_int ciErrNum; - ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, B3_MAX_STRING_LENGTH, platformInfo->m_platformVendor, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_NAME, B3_MAX_STRING_LENGTH, platformInfo->m_platformName, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, B3_MAX_STRING_LENGTH, platformInfo->m_platformVersion, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); -} - -void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform) -{ - b3OpenCLPlatformInfo platformInfo; - b3OpenCLUtils::getPlatformInfo(platform, &platformInfo); - b3Printf("Platform info:\n"); - b3Printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n", platformInfo.m_platformVendor); - b3Printf(" CL_PLATFORM_NAME: \t\t\t%s\n", platformInfo.m_platformName); - b3Printf(" CL_PLATFORM_VERSION: \t\t\t%s\n", platformInfo.m_platformVersion); -} - -cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex) -{ - cl_context retContext = 0; - cl_int ciErrNum = 0; - cl_uint num_entries; - cl_device_id devices[B3_MAX_CL_DEVICES]; - cl_uint num_devices; - cl_context_properties* cprops; - - /* - * If we could find our platform, use it. Otherwise pass a NULL and get whatever the - * implementation thinks we should be using. - */ - cl_context_properties cps[7] = {0, 0, 0, 0, 0, 0, 0}; - cps[0] = CL_CONTEXT_PLATFORM; - cps[1] = (cl_context_properties)platform; -#ifdef _WIN32 -#ifndef B3_USE_CLEW - if (pGLContext && pGLDC) - { - cps[2] = CL_GL_CONTEXT_KHR; - cps[3] = (cl_context_properties)pGLContext; - cps[4] = CL_WGL_HDC_KHR; - cps[5] = (cl_context_properties)pGLDC; - } -#endif //B3_USE_CLEW -#endif //_WIN32 - num_entries = B3_MAX_CL_DEVICES; - - num_devices = -1; - - ciErrNum = clGetDeviceIDs( - platform, - deviceType, - num_entries, - devices, - &num_devices); - - if (ciErrNum < 0) - { - b3Printf("clGetDeviceIDs returned %d\n", ciErrNum); - return 0; - } - cprops = (NULL == platform) ? NULL : cps; - - if (!num_devices) - return 0; - - if (pGLContext) - { - //search for the GPU that relates to the OpenCL context - unsigned int i; - for (i = 0; i < num_devices; i++) - { - retContext = clCreateContext(cprops, 1, &devices[i], NULL, NULL, &ciErrNum); - if (ciErrNum == CL_SUCCESS) - break; - } - } - else - { - if (preferredDeviceIndex >= 0 && (unsigned int)preferredDeviceIndex < num_devices) - { - //create a context of the preferred device index - retContext = clCreateContext(cprops, 1, &devices[preferredDeviceIndex], NULL, NULL, &ciErrNum); - } - else - { - //create a context of all devices -#if defined(__APPLE__) - retContext = clCreateContext(cprops, num_devices, devices, MyFatalBreakAPPLE, NULL, &ciErrNum); -#else - b3Printf("numDevices=%d\n", num_devices); - - retContext = clCreateContext(cprops, num_devices, devices, NULL, NULL, &ciErrNum); -#endif - } - } - if (pErrNum != NULL) - { - *pErrNum = ciErrNum; - }; - - return retContext; -} - -cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_uint numPlatforms; - cl_context retContext = 0; - unsigned int i; - - cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) *pErrNum = ciErrNum; - return NULL; - } - if (numPlatforms > 0) - { - cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms); - ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) - *pErrNum = ciErrNum; - free(platforms); - return NULL; - } - - for (i = 0; i < numPlatforms; ++i) - { - char pbuf[128]; - ciErrNum = clGetPlatformInfo(platforms[i], - CL_PLATFORM_VENDOR, - sizeof(pbuf), - pbuf, - NULL); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) *pErrNum = ciErrNum; - return NULL; - } - - if (preferredPlatformIndex >= 0 && i == preferredPlatformIndex) - { - cl_platform_id tmpPlatform = platforms[0]; - platforms[0] = platforms[i]; - platforms[i] = tmpPlatform; - break; - } - else - { - if (!strcmp(pbuf, spPlatformVendor)) - { - cl_platform_id tmpPlatform = platforms[0]; - platforms[0] = platforms[i]; - platforms[i] = tmpPlatform; - } - } - } - - for (i = 0; i < numPlatforms; ++i) - { - cl_platform_id platform = platforms[i]; - assert(platform); - - retContext = b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLContext, pGLDC, preferredDeviceIndex, preferredPlatformIndex); - - if (retContext) - { - // printf("OpenCL platform details:\n"); - b3OpenCLPlatformInfo platformInfo; - - b3OpenCLUtils::getPlatformInfo(platform, &platformInfo); - - if (retPlatformId) - *retPlatformId = platform; - - break; - } - } - - free(platforms); - } - return retContext; -} - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of the nth device from the context -//! -//! @return the id or -1 when out of range -//! @param cxMainContext OpenCL context -//! @param device_idx index of the device of interest -////////////////////////////////////////////////////////////////////////////// -cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex) -{ - assert(cxMainContext); - - size_t szParmDataBytes; - cl_device_id* cdDevices; - cl_device_id device; - - // get the list of devices associated with context - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); - - if (szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex) - { - return (cl_device_id)-1; - } - - cdDevices = (cl_device_id*)malloc(szParmDataBytes); - - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); - - device = cdDevices[deviceIndex]; - free(cdDevices); - - return device; -} - -int b3OpenCLUtils_getNumDevices(cl_context cxMainContext) -{ - size_t szParamDataBytes; - int device_count; - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes); - device_count = (int)szParamDataBytes / sizeof(cl_device_id); - return device_count; -} - -void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info) -{ - // CL_DEVICE_NAME - clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL); - - // CL_DEVICE_VENDOR - clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL); - - // CL_DRIVER_VERSION - clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL); - - // CL_DEVICE_INFO - clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL); - - // CL_DEVICE_MAX_COMPUTE_UNITS - clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL); - - // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL); - - // CL_DEVICE_MAX_WORK_ITEM_SIZES - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL); - - // CL_DEVICE_MAX_WORK_GROUP_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL); - - // CL_DEVICE_MAX_CLOCK_FREQUENCY - clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL); - - // CL_DEVICE_ADDRESS_BITS - clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL); - - // CL_DEVICE_MAX_MEM_ALLOC_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL); - - // CL_DEVICE_GLOBAL_MEM_SIZE - clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL); - - // CL_DEVICE_ERROR_CORRECTION_SUPPORT - clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL); - - // CL_DEVICE_LOCAL_MEM_TYPE - clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL); - - // CL_DEVICE_LOCAL_MEM_SIZE - clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL); - - // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL); - - // CL_DEVICE_QUEUE_PROPERTIES - clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL); - - // CL_DEVICE_IMAGE_SUPPORT - clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL); - - // CL_DEVICE_MAX_READ_IMAGE_ARGS - clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL); - - // CL_DEVICE_MAX_WRITE_IMAGE_ARGS - clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL); - - // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH - clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL); - - // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines - clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL); - - // CL_DEVICE_PREFERRED_VECTOR_WIDTH_ - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL); -} - -void b3OpenCLUtils_printDeviceInfo(cl_device_id device) -{ - b3OpenCLDeviceInfo info; - b3OpenCLUtils::getDeviceInfo(device, &info); - b3Printf("Device Info:\n"); - b3Printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName); - b3Printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor); - b3Printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion); - - if (info.m_deviceType & CL_DEVICE_TYPE_CPU) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); - if (info.m_deviceType & CL_DEVICE_TYPE_GPU) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); - if (info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); - if (info.m_deviceType & CL_DEVICE_TYPE_DEFAULT) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); - - b3Printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits); - b3Printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims); - b3Printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]); - b3Printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize); - b3Printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency); - b3Printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits); - b3Printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize / (1024 * 1024))); - b3Printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize / (1024 * 1024))); - b3Printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport == CL_TRUE ? "yes" : "no"); - b3Printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global"); - b3Printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024)); - b3Printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024)); - if (info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) - b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); - if (info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE) - b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); - - b3Printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport); - - b3Printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs); - b3Printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs); - b3Printf("\n CL_DEVICE_IMAGE "); - b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth); - b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight); - b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth); - b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight); - b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth); - if (*info.m_deviceExtensions != 0) - { - b3Printf("\n CL_DEVICE_EXTENSIONS:%s\n", info.m_deviceExtensions); - } - else - { - b3Printf(" CL_DEVICE_EXTENSIONS: None\n"); - } - b3Printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); - b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n", - info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong, info.m_vecWidthFloat, info.m_vecWidthDouble); -} - -static const char* strip2(const char* name, const char* pattern) -{ - size_t const patlen = strlen(pattern); - size_t patcnt = 0; - const char* oriptr; - const char* patloc; - // find how many times the pattern occurs in the original string - for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen) - { - patcnt++; - } - return oriptr; -} - -cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg, const char* clFileNameForCaching, bool disableBinaryCaching) -{ - const char* additionalMacros = additionalMacrosArg ? additionalMacrosArg : ""; - - if (disableBinaryCaching) - { - //kernelSourceOrg = 0; - } - - cl_program m_cpProgram = 0; - cl_int status; - - char binaryFileName[B3_MAX_STRING_LENGTH]; - - char deviceName[256]; - char driverVersion[256]; - const char* strippedName; - int fileUpToDate = 0; -#ifdef _WIN32 - int binaryFileValid = 0; -#endif - if (!disableBinaryCaching && clFileNameForCaching) - { - clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL); - clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL); - - strippedName = strip2(clFileNameForCaching, "\\"); - strippedName = strip2(strippedName, "/"); - -#ifdef _MSC_VER - sprintf_s(binaryFileName, B3_MAX_STRING_LENGTH, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion); -#else - sprintf(binaryFileName, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion); -#endif - } - if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary || gDebugForceLoadingFromSource)) - { -#ifdef _WIN32 - char* bla = 0; - - //printf("searching for %s\n", binaryFileName); - - FILETIME modtimeBinary; - CreateDirectoryA(sCachedBinaryPath, 0); - { - HANDLE binaryFileHandle = CreateFileA(binaryFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - if (binaryFileHandle == INVALID_HANDLE_VALUE) - { - DWORD errorCode; - errorCode = GetLastError(); - switch (errorCode) - { - case ERROR_FILE_NOT_FOUND: - { - b3Warning("\nCached file not found %s\n", binaryFileName); - break; - } - case ERROR_PATH_NOT_FOUND: - { - b3Warning("\nCached file path not found %s\n", binaryFileName); - break; - } - default: - { - b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode); - } - } - } - else - { - if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary) == 0) - { - DWORD errorCode; - errorCode = GetLastError(); - b3Warning("\nGetFileTime errorCode = %d\n", errorCode); - } - else - { - binaryFileValid = 1; - } - CloseHandle(binaryFileHandle); - } - - if (binaryFileValid) - { - HANDLE srcFileHandle = CreateFileA(clFileNameForCaching, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - - if (srcFileHandle == INVALID_HANDLE_VALUE) - { - const char* prefix[] = {"./", "../", "../../", "../../../", "../../../../"}; - for (int i = 0; (srcFileHandle == INVALID_HANDLE_VALUE) && i < 5; i++) - { - char relativeFileName[1024]; - sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching); - srcFileHandle = CreateFileA(relativeFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - } - } - - if (srcFileHandle != INVALID_HANDLE_VALUE) - { - FILETIME modtimeSrc; - if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc) == 0) - { - DWORD errorCode; - errorCode = GetLastError(); - b3Warning("\nGetFileTime errorCode = %d\n", errorCode); - } - if ((modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) || ((modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime) && (modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime))) - { - fileUpToDate = 1; - } - else - { - b3Warning("\nCached binary file out-of-date (%s)\n", binaryFileName); - } - CloseHandle(srcFileHandle); - } - else - { -#ifdef _DEBUG - DWORD errorCode; - errorCode = GetLastError(); - switch (errorCode) - { - case ERROR_FILE_NOT_FOUND: - { - b3Warning("\nSrc file not found %s\n", clFileNameForCaching); - break; - } - case ERROR_PATH_NOT_FOUND: - { - b3Warning("\nSrc path not found %s\n", clFileNameForCaching); - break; - } - default: - { - b3Warning("\nnSrc file reading errorCode = %d\n", errorCode); - } - } - - //we should make sure the src file exists so we can verify the timestamp with binary - // assert(0); - b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n", clFileNameForCaching, binaryFileName); - fileUpToDate = true; -#else - //if we cannot find the source, assume it is OK in release builds - fileUpToDate = true; -#endif - } - } - } - -#else - fileUpToDate = true; - if (mkdir(sCachedBinaryPath, 0777) == -1) - { - } - else - { - b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath); - } -#endif //_WIN32 - } - - if (fileUpToDate) - { -#ifdef _MSC_VER - FILE* file; - if (fopen_s(&file, binaryFileName, "rb") != 0) - file = 0; -#else - FILE* file = fopen(binaryFileName, "rb"); -#endif - - if (file) - { - size_t binarySize = 0; - char* binary = 0; - - fseek(file, 0L, SEEK_END); - binarySize = ftell(file); - rewind(file); - binary = (char*)malloc(sizeof(char) * binarySize); - int bytesRead; - bytesRead = fread(binary, sizeof(char), binarySize, file); - fclose(file); - - m_cpProgram = clCreateProgramWithBinary(clContext, 1, &device, &binarySize, (const unsigned char**)&binary, 0, &status); - b3Assert(status == CL_SUCCESS); - status = clBuildProgram(m_cpProgram, 1, &device, additionalMacros, 0, 0); - b3Assert(status == CL_SUCCESS); - - if (status != CL_SUCCESS) - { - char* build_log; - size_t ret_val_size; - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); - build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1)); - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); - build_log[ret_val_size] = '\0'; - b3Error("%s\n", build_log); - free(build_log); - b3Assert(0); - m_cpProgram = 0; - - b3Warning("clBuildProgram reported failure on cached binary: %s\n", binaryFileName); - } - else - { - b3Printf("clBuildProgram successfully compiled cached binary: %s\n", binaryFileName); - } - free(binary); - } - else - { - b3Warning("Cannot open cached binary: %s\n", binaryFileName); - } - } - - if (!m_cpProgram) - { - cl_int localErrNum; - char* compileFlags; - int flagsize; - - const char* kernelSource = kernelSourceOrg; - - if (!kernelSourceOrg || gDebugForceLoadingFromSource) - { - if (clFileNameForCaching) - { - FILE* file = fopen(clFileNameForCaching, "rb"); - //in many cases the relative path is a few levels up the directory hierarchy, so try it - if (!file) - { - const char* prefix[] = {"../", "../../", "../../../", "../../../../"}; - for (int i = 0; !file && i < 3; i++) - { - char relativeFileName[1024]; - sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching); - file = fopen(relativeFileName, "rb"); - } - } - - if (file) - { - char* kernelSrc = 0; - fseek(file, 0L, SEEK_END); - int kernelSize = ftell(file); - rewind(file); - kernelSrc = (char*)malloc(kernelSize + 1); - int readBytes; - readBytes = fread((void*)kernelSrc, 1, kernelSize, file); - kernelSrc[kernelSize] = 0; - fclose(file); - kernelSource = kernelSrc; - } - } - } - - size_t program_length = kernelSource ? strlen(kernelSource) : 0; -#ifdef MAC //or __APPLE__? - char* flags = "-cl-mad-enable -DMAC "; -#else - const char* flags = ""; -#endif - - m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum); - if (localErrNum != CL_SUCCESS) - { - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - // Build the program with 'mad' Optimization option - - flagsize = sizeof(char) * (strlen(additionalMacros) + strlen(flags) + 5); - compileFlags = (char*)malloc(flagsize); -#ifdef _MSC_VER - sprintf_s(compileFlags, flagsize, "%s %s", flags, additionalMacros); -#else - sprintf(compileFlags, "%s %s", flags, additionalMacros); -#endif - localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL); - if (localErrNum != CL_SUCCESS) - { - char* build_log; - size_t ret_val_size; - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); - build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1)); - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); - - // to be carefully, terminate with \0 - // there's no information in the reference whether the string is 0 terminated or not - build_log[ret_val_size] = '\0'; - - b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log); - free(build_log); - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - if (!disableBinaryCaching && clFileNameForCaching) - { // write to binary - - cl_uint numAssociatedDevices; - status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0); - b3Assert(status == CL_SUCCESS); - if (numAssociatedDevices == 1) - { - size_t binarySize; - char* binary; - - status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0); - b3Assert(status == CL_SUCCESS); - - binary = (char*)malloc(sizeof(char) * binarySize); - - status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0); - b3Assert(status == CL_SUCCESS); - - { - FILE* file = 0; -#ifdef _MSC_VER - if (fopen_s(&file, binaryFileName, "wb") != 0) - file = 0; -#else - file = fopen(binaryFileName, "wb"); -#endif - if (file) - { - fwrite(binary, sizeof(char), binarySize, file); - fclose(file); - } - else - { - b3Warning("cannot write file %s\n", binaryFileName); - } - } - - free(binary); - } - } - - free(compileFlags); - } - return m_cpProgram; -} - -cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros) -{ - cl_kernel kernel; - cl_int localErrNum; - - cl_program m_cpProgram = prog; - - b3Printf("compiling kernel %s ", kernelName); - - if (!m_cpProgram) - { - m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, 0, false); - } - - // Create the kernel - kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum); - if (localErrNum != CL_SUCCESS) - { - b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName); - assert(0); - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - if (!prog && m_cpProgram) - { - clReleaseProgram(m_cpProgram); - } - b3Printf("ready. \n"); - - if (pErrNum) - *pErrNum = CL_SUCCESS; - return kernel; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.h b/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.h deleted file mode 100644 index 6c82eed2a6b..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.h +++ /dev/null @@ -1,190 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org -Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//original author: Roman Ponomarev -//cleanup by Erwin Coumans - -#ifndef B3_OPENCL_UTILS_H -#define B3_OPENCL_UTILS_H - -#include "b3OpenCLInclude.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - ///C API for OpenCL utilities: convenience functions, see below for C++ API - - /// CL Context optionally takes a GL context. This is a generic type because we don't really want this code - /// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise. - cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* platformId); - - int b3OpenCLUtils_getNumDevices(cl_context cxMainContext); - - cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int nr); - - void b3OpenCLUtils_printDeviceInfo(cl_device_id device); - - cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros); - - //optional - cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSource, cl_int* pErrNum, const char* additionalMacros, const char* srcFileNameForCaching, bool disableBinaryCaching); - - //the following optional APIs provide access using specific platform information - int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum); - - ///get the nr'th platform, where nr is in the range [0..getNumPlatforms) - cl_platform_id b3OpenCLUtils_getPlatform(int nr, cl_int* pErrNum); - - void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform); - - const char* b3OpenCLUtils_getSdkVendorName(); - - ///set the path (directory/folder) where the compiled OpenCL kernel are stored - void b3OpenCLUtils_setCachePath(const char* path); - - cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex); - -#ifdef __cplusplus -} - -#define B3_MAX_STRING_LENGTH 1024 - -typedef struct -{ - char m_deviceName[B3_MAX_STRING_LENGTH]; - char m_deviceVendor[B3_MAX_STRING_LENGTH]; - char m_driverVersion[B3_MAX_STRING_LENGTH]; - char m_deviceExtensions[B3_MAX_STRING_LENGTH]; - - cl_device_type m_deviceType; - cl_uint m_computeUnits; - size_t m_workitemDims; - size_t m_workItemSize[3]; - size_t m_image2dMaxWidth; - size_t m_image2dMaxHeight; - size_t m_image3dMaxWidth; - size_t m_image3dMaxHeight; - size_t m_image3dMaxDepth; - size_t m_workgroupSize; - cl_uint m_clockFrequency; - cl_ulong m_constantBufferSize; - cl_ulong m_localMemSize; - cl_ulong m_globalMemSize; - cl_bool m_errorCorrectionSupport; - cl_device_local_mem_type m_localMemType; - cl_uint m_maxReadImageArgs; - cl_uint m_maxWriteImageArgs; - - cl_uint m_addressBits; - cl_ulong m_maxMemAllocSize; - cl_command_queue_properties m_queueProperties; - cl_bool m_imageSupport; - cl_uint m_vecWidthChar; - cl_uint m_vecWidthShort; - cl_uint m_vecWidthInt; - cl_uint m_vecWidthLong; - cl_uint m_vecWidthFloat; - cl_uint m_vecWidthDouble; - -} b3OpenCLDeviceInfo; - -struct b3OpenCLPlatformInfo -{ - char m_platformVendor[B3_MAX_STRING_LENGTH]; - char m_platformName[B3_MAX_STRING_LENGTH]; - char m_platformVersion[B3_MAX_STRING_LENGTH]; - - b3OpenCLPlatformInfo() - { - m_platformVendor[0] = 0; - m_platformName[0] = 0; - m_platformVersion[0] = 0; - } -}; - -///C++ API for OpenCL utilities: convenience functions -struct b3OpenCLUtils -{ - /// CL Context optionally takes a GL context. This is a generic type because we don't really want this code - /// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise. - static inline cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex = -1, cl_platform_id* platformId = 0) - { - return b3OpenCLUtils_createContextFromType(deviceType, pErrNum, pGLCtx, pGLDC, preferredDeviceIndex, preferredPlatformIndex, platformId); - } - - static inline int getNumDevices(cl_context cxMainContext) - { - return b3OpenCLUtils_getNumDevices(cxMainContext); - } - static inline cl_device_id getDevice(cl_context cxMainContext, int nr) - { - return b3OpenCLUtils_getDevice(cxMainContext, nr); - } - - static void getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info); - - static inline void printDeviceInfo(cl_device_id device) - { - b3OpenCLUtils_printDeviceInfo(device); - } - - static inline cl_kernel compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum = 0, cl_program prog = 0, const char* additionalMacros = "") - { - return b3OpenCLUtils_compileCLKernelFromString(clContext, device, kernelSource, kernelName, pErrNum, prog, additionalMacros); - } - - //optional - static inline cl_program compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSource, cl_int* pErrNum = 0, const char* additionalMacros = "", const char* srcFileNameForCaching = 0, bool disableBinaryCaching = false) - { - return b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, srcFileNameForCaching, disableBinaryCaching); - } - - //the following optional APIs provide access using specific platform information - static inline int getNumPlatforms(cl_int* pErrNum = 0) - { - return b3OpenCLUtils_getNumPlatforms(pErrNum); - } - ///get the nr'th platform, where nr is in the range [0..getNumPlatforms) - static inline cl_platform_id getPlatform(int nr, cl_int* pErrNum = 0) - { - return b3OpenCLUtils_getPlatform(nr, pErrNum); - } - - static void getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo); - - static inline void printPlatformInfo(cl_platform_id platform) - { - b3OpenCLUtils_printPlatformInfo(platform); - } - - static inline const char* getSdkVendorName() - { - return b3OpenCLUtils_getSdkVendorName(); - } - static inline cl_context createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex = -1) - { - return b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLCtx, pGLDC, preferredDeviceIndex, preferredPlatformIndex); - } - static void setCachePath(const char* path) - { - b3OpenCLUtils_setCachePath(path); - } -}; - -#endif //__cplusplus - -#endif // B3_OPENCL_UTILS_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h deleted file mode 100644 index 27835bb7474..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef B3_BVH_INFO_H -#define B3_BVH_INFO_H - -#include "Bullet3Common/b3Vector3.h" - -struct b3BvhInfo -{ - b3Vector3 m_aabbMin; - b3Vector3 m_aabbMax; - b3Vector3 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; -}; - -#endif //B3_BVH_INFO_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp deleted file mode 100644 index 867b3e0a9ce..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp +++ /dev/null @@ -1,253 +0,0 @@ - -#if 0 -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3ContactCache.h" -#include "Bullet3Common/b3Transform.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -b3Scalar gContactBreakingThreshold = b3Scalar(0.02); - -///gContactCalcArea3Points will approximate the convex hull area using 3 points -///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower -bool gContactCalcArea3Points = true; - - - - -static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3) -{ - // It calculates possible 3 area constructed from random 4 points and returns the biggest one. - - b3Vector3 a[3],b[3]; - a[0] = p0 - p1; - a[1] = p0 - p2; - a[2] = p0 - p3; - b[0] = p2 - p3; - b[1] = p1 - p3; - b[2] = p1 - p2; - - //todo: Following 3 cross production can be easily optimized by SIMD. - b3Vector3 tmp0 = a[0].cross(b[0]); - b3Vector3 tmp1 = a[1].cross(b[1]); - b3Vector3 tmp2 = a[2].cross(b[2]); - - return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2()); -} -#if 0 - -//using localPointA for all points -int b3ContactCache::sortCachedPoints(const b3Vector3& pt) -{ - //calculate 4 possible cases areas, and take biggest area - //also need to keep 'deepest' - - int maxPenetrationIndex = -1; -#define KEEP_DEEPEST_POINT 1 -#ifdef KEEP_DEEPEST_POINT - b3Scalar maxPenetration = pt.getDistance(); - for (int i=0;i<4;i++) - { - if (m_pointCache[i].getDistance() < maxPenetration) - { - maxPenetrationIndex = i; - maxPenetration = m_pointCache[i].getDistance(); - } - } -#endif //KEEP_DEEPEST_POINT - - b3Scalar res0(b3Scalar(0.)),res1(b3Scalar(0.)),res2(b3Scalar(0.)),res3(b3Scalar(0.)); - - if (gContactCalcArea3Points) - { - if (maxPenetrationIndex != 0) - { - b3Vector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - b3Vector3 cross = a0.cross(b0); - res0 = cross.length2(); - } - if (maxPenetrationIndex != 1) - { - b3Vector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - b3Vector3 cross = a1.cross(b1); - res1 = cross.length2(); - } - - if (maxPenetrationIndex != 2) - { - b3Vector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 cross = a2.cross(b2); - res2 = cross.length2(); - } - - if (maxPenetrationIndex != 3) - { - b3Vector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 cross = a3.cross(b3); - res3 = cross.length2(); - } - } - else - { - if(maxPenetrationIndex != 0) { - res0 = calcArea4Points(pt.m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 1) { - res1 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 2) { - res2 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 3) { - res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA); - } - } - b3Vector4 maxvec(res0,res1,res2,res3); - int biggestarea = maxvec.closestAxis4(); - return biggestarea; - -} - - -int b3ContactCache::getCacheEntry(const b3Vector3& newPoint) const -{ - b3Scalar shortestDist = getContactBreakingThreshold() * getContactBreakingThreshold(); - int size = getNumContacts(); - int nearestPoint = -1; - for( int i = 0; i < size; i++ ) - { - const b3Vector3 &mp = m_pointCache[i]; - - b3Vector3 diffA = mp.m_localPointA- newPoint.m_localPointA; - const b3Scalar distToManiPoint = diffA.dot(diffA); - if( distToManiPoint < shortestDist ) - { - shortestDist = distToManiPoint; - nearestPoint = i; - } - } - return nearestPoint; -} - -int b3ContactCache::addManifoldPoint(const b3Vector3& newPoint) -{ - b3Assert(validContactDistance(newPoint)); - - int insertIndex = getNumContacts(); - if (insertIndex == MANIFOLD_CACHE_SIZE) - { -#if MANIFOLD_CACHE_SIZE >= 4 - //sort cache so best points come first, based on area - insertIndex = sortCachedPoints(newPoint); -#else - insertIndex = 0; -#endif - clearUserCache(m_pointCache[insertIndex]); - - } else - { - m_cachedPoints++; - - - } - if (insertIndex<0) - insertIndex=0; - - //b3Assert(m_pointCache[insertIndex].m_userPersistentData==0); - m_pointCache[insertIndex] = newPoint; - return insertIndex; -} - -#endif - -bool b3ContactCache::validContactDistance(const b3Vector3& pt) -{ - return pt.w <= gContactBreakingThreshold; -} - -void b3ContactCache::removeContactPoint(struct b3Contact4Data& newContactCache,int i) -{ - int numContacts = b3Contact4Data_getNumPoints(&newContactCache); - if (i!=(numContacts-1)) - { - b3Swap(newContactCache.m_localPosA[i],newContactCache.m_localPosA[numContacts-1]); - b3Swap(newContactCache.m_localPosB[i],newContactCache.m_localPosB[numContacts-1]); - b3Swap(newContactCache.m_worldPosB[i],newContactCache.m_worldPosB[numContacts-1]); - } - b3Contact4Data_setNumPoints(&newContactCache,numContacts-1); - -} - - -void b3ContactCache::refreshContactPoints(const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& contacts) -{ - - int numContacts = b3Contact4Data_getNumPoints(&contacts); - - - int i; - /// first refresh worldspace positions and distance - for (i=numContacts-1;i>=0;i--) - { - b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); - b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); - contacts.m_worldPosB[i] = worldPosB; - float distance = (worldPosA - worldPosB).dot(contacts.m_worldNormalOnB); - contacts.m_worldPosB[i].w = distance; - } - - /// then - b3Scalar distance2d; - b3Vector3 projectedDifference,projectedPoint; - for (i=numContacts-1;i>=0;i--) - { - b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); - b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); - b3Vector3&pt = contacts.m_worldPosB[i]; - //contact becomes invalid when signed distance exceeds margin (projected on contactnormal direction) - if (!validContactDistance(pt)) - { - removeContactPoint(contacts,i); - } else - { - //contact also becomes invalid when relative movement orthogonal to normal exceeds margin - projectedPoint = worldPosA - contacts.m_worldNormalOnB * contacts.m_worldPosB[i].w; - projectedDifference = contacts.m_worldPosB[i] - projectedPoint; - distance2d = projectedDifference.dot(projectedDifference); - if (distance2d > gContactBreakingThreshold*gContactBreakingThreshold ) - { - removeContactPoint(contacts,i); - } else - { - ////contact point processed callback - //if (gContactProcessedCallback) - // (*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1); - } - } - } - - -} - -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h deleted file mode 100644 index a15fd0b2a9e..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h +++ /dev/null @@ -1,62 +0,0 @@ - -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_CONTACT_CACHE_H -#define B3_CONTACT_CACHE_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Common/b3AlignedAllocator.h" - -///maximum contact breaking and merging threshold -extern b3Scalar gContactBreakingThreshold; - -#define MANIFOLD_CACHE_SIZE 4 - -///b3ContactCache is a contact point cache, it stays persistent as long as objects are overlapping in the broadphase. -///Those contact points are created by the collision narrow phase. -///The cache can be empty, or hold 1,2,3 or 4 points. Some collision algorithms (GJK) might only add one point at a time. -///updates/refreshes old contact points, and throw them away if necessary (distance becomes too large) -///reduces the cache to 4 points, when more then 4 points are added, using following rules: -///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points -///note that some pairs of objects might have more then one contact manifold. -B3_ATTRIBUTE_ALIGNED16(class) -b3ContactCache -{ - /// sort cached points so most isolated points come first - int sortCachedPoints(const b3Vector3& pt); - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int addManifoldPoint(const b3Vector3& newPoint); - - /*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex) - { - b3Assert(validContactDistance(newPoint)); - m_pointCache[insertIndex] = newPoint; - } - */ - - static bool validContactDistance(const b3Vector3& pt); - - /// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin - static void refreshContactPoints(const b3Transform& trA, const b3Transform& trB, struct b3Contact4Data& newContactCache); - - static void removeContactPoint(struct b3Contact4Data & newContactCache, int i); -}; - -#endif //B3_CONTACT_CACHE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp deleted file mode 100644 index 54a104c5c86..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ /dev/null @@ -1,4408 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -bool findSeparatingAxisOnGpu = true; -bool splitSearchSepAxisConcave = false; -bool splitSearchSepAxisConvex = true; -bool useMprGpu = true; //use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... -bool bvhTraversalKernelGPU = true; -bool findConcaveSeparatingAxisKernelGPU = true; -bool clipConcaveFacesAndFindContactsCPU = false; //false;//true; -bool clipConvexFacesAndFindContactsCPU = false; //false;//true; -bool reduceConcaveContactsOnGPU = true; //false; -bool reduceConvexContactsOnGPU = true; //false; -bool findConvexClippingFacesGPU = true; -bool useGjk = false; ///option for CPU/host testing, when findSeparatingAxisOnGpu = false -bool useGjkContacts = false; //////option for CPU/host testing when findSeparatingAxisOnGpu = false - -static int myframecount = 0; ///for testing - -///This file was written by Erwin Coumans -///Separating axis rest based on work from Pierre Terdiman, see -///And contact clipping based on work from Simon Hobbs - -//#define B3_DEBUG_SAT_FACE - -//#define CHECK_ON_HOST - -#ifdef CHECK_ON_HOST -//#define PERSISTENT_CONTACTS_HOST -#endif - -int b3g_actualSATPairTests = 0; - -#include "b3ConvexHullContact.h" -#include //memcpy -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" - -#include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h" -#include "Bullet3Geometry/b3AabbUtil.h" - -typedef b3AlignedObjectArray b3VertexArray; - -#include //for FLT_MAX -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -//#include "AdlQuaternion.h" - -#include "kernels/satKernels.h" -#include "kernels/mprKernels.h" - -#include "kernels/satConcaveKernels.h" - -#include "kernels/satClipHullContacts.h" -#include "kernels/bvhTraversal.h" -#include "kernels/primitiveContacts.h" - -#include "Bullet3Geometry/b3AabbUtil.h" - -#define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" -#define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl" - -#define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" - -#define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" -#define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" -#define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" - -#ifndef __global -#define __global -#endif - -#ifndef __kernel -#define __kernel -#endif - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" - -#define dot3F4 b3Dot - -GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q) - : m_context(ctx), - m_device(device), - m_queue(q), - - m_findSeparatingAxisKernel(0), - m_findSeparatingAxisVertexFaceKernel(0), - m_findSeparatingAxisEdgeEdgeKernel(0), - m_unitSphereDirections(m_context, m_queue), - - m_totalContactsOut(m_context, m_queue), - m_sepNormals(m_context, m_queue), - m_dmins(m_context, m_queue), - - m_hasSeparatingNormals(m_context, m_queue), - m_concaveSepNormals(m_context, m_queue), - m_concaveHasSeparatingNormals(m_context, m_queue), - m_numConcavePairsOut(m_context, m_queue), - - m_gpuCompoundPairs(m_context, m_queue), - - m_gpuCompoundSepNormals(m_context, m_queue), - m_gpuHasCompoundSepNormals(m_context, m_queue), - - m_numCompoundPairsOut(m_context, m_queue) -{ - m_totalContactsOut.push_back(0); - - cl_int errNum = 0; - - if (1) - { - const char* mprSrc = mprKernelsCL; - - const char* srcConcave = satConcaveKernelsCL; - char flags[1024] = {0}; - //#ifdef CL_PLATFORM_INTEL - // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); - //#endif - m_mprPenetrationKernel = 0; - m_findSeparatingAxisUnitSphereKernel = 0; - - if (useMprGpu) - { - cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg); - b3Assert(m_mprPenetrationKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg); - b3Assert(m_findSeparatingAxisUnitSphereKernel); - b3Assert(errNum == CL_SUCCESS); - - int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - m_unitSphereDirections.resize(numDirections); - m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true); - } - - cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH); - b3Assert(errNum == CL_SUCCESS); - - cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg); - b3Assert(m_findSeparatingAxisKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg); - b3Assert(m_findSeparatingAxisVertexFaceKernel); - - m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg); - b3Assert(m_findSeparatingAxisVertexFaceKernel); - - m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg); - b3Assert(m_findConcaveSeparatingAxisKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg); - b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg); - b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg); - b3Assert(m_findCompoundPairsKernel); - b3Assert(errNum == CL_SUCCESS); - m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg); - b3Assert(m_processCompoundPairsKernel); - b3Assert(errNum == CL_SUCCESS); - } - - if (1) - { - const char* srcClip = satClipKernelsCL; - - char flags[1024] = {0}; - //#ifdef CL_PLATFORM_INTEL - // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); - //#endif - - cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - // m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); - // b3Assert(errNum==CL_SUCCESS); - - m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, - "newContactReductionKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - } - else - { - m_clipHullHullKernel = 0; - m_clipCompoundsHullHullKernel = 0; - m_findClippingFacesKernel = 0; - m_newContactReductionKernel = 0; - m_clipFacesAndFindContacts = 0; - m_clipHullHullConcaveConvexKernel = 0; - // m_extractManifoldAndAddContactKernel = 0; - } - - if (1) - { - const char* srcBvh = bvhTraversalKernelCL; - cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, ""); - b3Assert(errNum == CL_SUCCESS); - } - - { - const char* primitiveContactsSrc = primitiveContactsKernelsCL; - cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, ""); - b3Assert(errNum == CL_SUCCESS); - - m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg); - b3Assert(errNum == CL_SUCCESS); - b3Assert(m_findConcaveSphereContactsKernel); - - m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, ""); - b3Assert(errNum == CL_SUCCESS); - b3Assert(m_processCompoundPairsPrimitivesKernel); - } -} - -GpuSatCollision::~GpuSatCollision() -{ - if (m_findSeparatingAxisVertexFaceKernel) - clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); - - if (m_findSeparatingAxisEdgeEdgeKernel) - clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel); - - if (m_findSeparatingAxisUnitSphereKernel) - clReleaseKernel(m_findSeparatingAxisUnitSphereKernel); - - if (m_mprPenetrationKernel) - clReleaseKernel(m_mprPenetrationKernel); - - if (m_findSeparatingAxisKernel) - clReleaseKernel(m_findSeparatingAxisKernel); - - if (m_findConcaveSeparatingAxisVertexFaceKernel) - clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); - - if (m_findConcaveSeparatingAxisEdgeEdgeKernel) - clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); - - if (m_findConcaveSeparatingAxisKernel) - clReleaseKernel(m_findConcaveSeparatingAxisKernel); - - if (m_findCompoundPairsKernel) - clReleaseKernel(m_findCompoundPairsKernel); - - if (m_processCompoundPairsKernel) - clReleaseKernel(m_processCompoundPairsKernel); - - if (m_findClippingFacesKernel) - clReleaseKernel(m_findClippingFacesKernel); - - if (m_clipFacesAndFindContacts) - clReleaseKernel(m_clipFacesAndFindContacts); - if (m_newContactReductionKernel) - clReleaseKernel(m_newContactReductionKernel); - if (m_primitiveContactsKernel) - clReleaseKernel(m_primitiveContactsKernel); - - if (m_findConcaveSphereContactsKernel) - clReleaseKernel(m_findConcaveSphereContactsKernel); - - if (m_processCompoundPairsPrimitivesKernel) - clReleaseKernel(m_processCompoundPairsPrimitivesKernel); - - if (m_clipHullHullKernel) - clReleaseKernel(m_clipHullHullKernel); - if (m_clipCompoundsHullHullKernel) - clReleaseKernel(m_clipCompoundsHullHullKernel); - - if (m_clipHullHullConcaveConvexKernel) - clReleaseKernel(m_clipHullHullConcaveConvexKernel); - // if (m_extractManifoldAndAddContactKernel) - // clReleaseKernel(m_extractManifoldAndAddContactKernel); - - if (m_bvhTraversalKernel) - clReleaseKernel(m_bvhTraversalKernel); -} - -struct MyTriangleCallback : public b3NodeOverlapCallback -{ - int m_bodyIndexA; - int m_bodyIndexB; - - virtual void processNode(int subPart, int triangleIndex) - { - printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB); - printf("triangleIndex %d\n", triangleIndex); - } -}; - -#define float4 b3Vector3 -#define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w) - -float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace) -{ - float4 n = planeEqn; - n[3] = 0.f; - float dist = dot3F4(n, point) + planeEqn[3]; - *closestPointOnFace = point - dist * n; - return dist; -} - -#define cross3(a, b) (a.cross(b)) -b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn) -{ - b3Transform tr; - tr.setIdentity(); - tr.setOrigin(*pos); - tr.setRotation(*orn); - b3Vector3 res = tr(*v); - return res; -} - -inline bool IsPointInPolygon(const float4& p, - const b3GpuFace* face, - const float4* baseVertex, - const int* convexIndices, - float4* out) -{ - float4 a; - float4 b; - float4 ab; - float4 ap; - float4 v; - - float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f); - - if (face->m_numIndices < 2) - return false; - - float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]]; - b = v0; - - for (unsigned i = 0; i != face->m_numIndices; ++i) - { - a = b; - float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; - b = vi; - ab = b - a; - ap = p - a; - v = cross3(ab, plane); - - if (b3Dot(ap, v) > 0.f) - { - float ab_m2 = b3Dot(ab, ab); - float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; - if (rt <= 0.f) - { - *out = a; - } - else if (rt >= 1.f) - { - *out = b; - } - else - { - float s = 1.f - rt; - out[0].x = s * a.x + rt * b.x; - out[0].y = s * a.y + rt * b.y; - out[0].z = s * a.z + rt * b.z; - } - return false; - } - } - return true; -} - -#define normalize3(a) (a.normalize()) - -int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ - if (nPoints == 0) - return 0; - - if (nPoints <= 4) - return nPoints; - - if (nPoints > 64) - nPoints = 64; - - float4 center = b3MakeVector3(0, 0, 0, 0); - { - for (int i = 0; i < nPoints; i++) - center += p[i]; - center /= (float)nPoints; - } - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3(nearNormal, aVector); - float4 v = cross3(nearNormal, u); - u = normalize3(u); - v = normalize3(v); - - //keep point with deepest penetration - float minW = FLT_MAX; - - int minIndex = -1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for (int ie = 0; ie < nPoints; ie++) - { - if (p[ie].w < minW) - { - minW = p[ie].w; - minIndex = ie; - } - float f; - float4 r = p[ie] - center; - f = dot3F4(u, r); - if (f < maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4(-u, r); - if (f < maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - f = dot3F4(v, r); - if (f < maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4(-v, r); - if (f < maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; -} - -#define MAX_VERTS 1024 - -inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray& vertices, b3Scalar& min, b3Scalar& max) -{ - min = FLT_MAX; - max = -FLT_MAX; - int numVerts = hull.m_numVertices; - - const float4 localDir = b3QuatRotate(orn.inverse(), dir); - - b3Scalar offset = dot3F4(pos, dir); - - for (int i = 0; i < numVerts; i++) - { - //b3Vector3 pt = trans * vertices[m_vertexOffset+i]; - //b3Scalar dp = pt.dot(dir); - //b3Vector3 vertex = vertices[hull.m_vertexOffset+i]; - b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir); - //b3Assert(dp==dpL); - if (dp < min) min = dp; - if (dp > max) max = dp; - } - if (min > max) - { - b3Scalar tmp = min; - min = max; - max = tmp; - } - min += offset; - max += offset; -} - -static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA, const b3Quaternion& ornA, - const float4& posB, const b3Quaternion& ornB, - const float4& sep_axis, const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& verticesB, b3Scalar& depth) -{ - b3Scalar Min0, Max0; - b3Scalar Min1, Max1; - project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0); - project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1); - - if (Max0 < Min1 || Max1 < Min0) - return false; - - b3Scalar d0 = Max0 - Min1; - assert(d0 >= 0.0f); - b3Scalar d1 = Max1 - Min0; - assert(d1 >= 0.0f); - depth = d0 < d1 ? d0 : d1; - return true; -} - -inline bool IsAlmostZero(const b3Vector3& v) -{ - if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false; - return true; -} - -static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA1, - const b3Quaternion& ornA, - const float4& posB1, - const b3Quaternion& ornB, - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& uniqueEdgesA, - const b3AlignedObjectArray& facesA, - const b3AlignedObjectArray& indicesA, - const b3AlignedObjectArray& verticesB, - const b3AlignedObjectArray& uniqueEdgesB, - const b3AlignedObjectArray& facesB, - const b3AlignedObjectArray& indicesB, - - b3Vector3& sep) -{ - B3_PROFILE("findSeparatingAxis"); - - b3g_actualSATPairTests++; - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - //#ifdef TEST_INTERNAL_OBJECTS - float4 c0local = (float4&)hullA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = (float4&)hullB.m_localCenter; - float4 c1 = transform(&c1local, &posB, &ornB); - const float4 deltaC2 = c0 - c1; - //#endif - - b3Scalar dmin = FLT_MAX; - int curPlaneTests = 0; - - int numFacesA = hullA.m_numFaces; - // Test normals from hullA - for (int i = 0; i < numFacesA; i++) - { - const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane; - float4 faceANormalWS = b3QuatRotate(ornA, normal); - - if (dot3F4(deltaC2, faceANormalWS) < 0) - faceANormalWS *= -1.f; - - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar d; - if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d)) - return false; - - if (d < dmin) - { - dmin = d; - sep = (b3Vector3&)faceANormalWS; - } - } - - int numFacesB = hullB.m_numFaces; - // Test normals from hullB - for (int i = 0; i < numFacesB; i++) - { - float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane; - float4 WorldNormal = b3QuatRotate(ornB, normal); - - if (dot3F4(deltaC2, WorldNormal) < 0) - { - WorldNormal *= -1.f; - } - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar d; - if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d)) - return false; - - if (d < dmin) - { - dmin = d; - sep = (b3Vector3&)WorldNormal; - } - } - - int curEdgeEdge = 0; - // Test edges - for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++) - { - const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0]; - float4 edge0World = b3QuatRotate(ornA, (float4&)edge0); - - for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++) - { - const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1]; - float4 edge1World = b3QuatRotate(ornB, (float4&)edge1); - - float4 crossje = cross3(edge0World, edge1World); - - curEdgeEdge++; - if (!IsAlmostZero((b3Vector3&)crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(deltaC2, crossje) < 0) - crossje *= -1.f; - -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar dist; - if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist)) - return false; - - if (dist < dmin) - { - dmin = dist; - sep = (b3Vector3&)crossje; - } - } - } - } - - if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f) - sep = -sep; - - return true; -} - -bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, - const b3Float4& posA1, - const b3Quat& ornA, - const b3Float4& posB1, - const b3Quat& ornB, - const b3Float4& DeltaC2, - __global const b3AlignedObjectArray& vertices, - __global const b3AlignedObjectArray& uniqueEdges, - __global const b3AlignedObjectArray& faces, - __global const b3AlignedObjectArray& indices, - float4* sep, - float* dmin) -{ - // int i = get_global_id(0); - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - //int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++) - { - const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0]; - float4 edge0World = b3QuatRotate(ornA, edge0); - - for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++) - { - const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1]; - float4 edge1World = b3QuatRotate(ornB, edge1); - - float4 crossje = cross3(edge0World, edge1World); - - curEdgeEdge++; - if (!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2, crossje) < 0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0, Max0; - float Min1, Max1; - project(*hullA, posA, ornA, crossje, vertices, Min0, Max0); - project(*hullB, posB, ornB, crossje, vertices, Min1, Max1); - - if (Max0 < Min1 || Max1 < Min0) - result = false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0 < d1 ? d0 : d1; - result = true; - } - - if (dist < *dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - } - - if ((dot3F4(-DeltaC2, *sep)) > 0.0f) - { - *sep = -(*sep); - } - return true; -} - -__inline float4 lerp3(const float4& a, const float4& b, float t) -{ - return b3MakeVector3(a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut) -{ - int ve; - float ds, de; - int numVertsOut = 0; - if (numVertsIn < 2) - return 0; - - float4 firstVertex = pVtxIn[numVertsIn - 1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex = pVtxIn[ve]; - - de = dot3F4(planeNormalWS, endVertex) + planeEqWS; - - if (ds < 0) - { - if (de < 0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - } - } - else - { - if (de < 0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - -int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, - const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, - //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA = -1; - { - float dmin = FLT_MAX; - for (int face = 0; face < hullA->m_numFaces; face++) - { - const float4 Normal = b3MakeVector3( - facesA[hullA->m_faceOffset + face].m_plane.x, - facesA[hullA->m_faceOffset + face].m_plane.y, - facesA[hullA->m_faceOffset + face].m_plane.z, 0.f); - const float4 faceANormalWS = b3QuatRotate(ornA, Normal); - - float d = dot3F4(faceANormalWS, separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA < 0) - return numContactsOut; - - b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - // int numContacts = numWorldVertsB1; - int numVerticesA = polyA.m_numIndices; - for (int e0 = 0; e0 < numVerticesA; e0++) - { - const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]]; - const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = b3QuatRotate(ornA, edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); - float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1); - float4 worldA1 = transform(&a, &posA, &ornA); - float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS = planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal); - float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA); - for (int i = 0; i < numVertsIn; i++) - { - float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS; - if (depth <= minDist) - { - depth = minDist; - } - if (numContactsOut < contactCapacity) - { - if (depth <= maxDist) - { - float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth); - //printf("depth=%f\n",depth); - } - } - else - { - b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity); - } - } - } - - return numContactsOut; -} - -static int clipHullAgainstHull(const float4& separatingNormal, - const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, - const b3AlignedObjectArray& verticesB, const b3AlignedObjectArray& facesB, const b3AlignedObjectArray& indicesB, - - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1 = 0; - - B3_PROFILE("clipHullAgainstHull"); - - // float curMaxDist=maxDist; - int closestFaceB = -1; - float dmax = -FLT_MAX; - - { - //B3_PROFILE("closestFaceB"); - if (hullB.m_numFaces != 1) - { - //printf("wtf\n"); - } - static bool once = true; - //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); - - for (int face = 0; face < hullB.m_numFaces; face++) - { -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("face %d\n", face); - const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face]; - if (once) - { - for (int i = 0; i < faceB->m_numIndices; i++) - { - float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]]; - printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z); - } - } -#endif //BT_DEBUG_SAT_FACE \ - //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) - { - const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x, - facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f); - const float4 WorldNormal = b3QuatRotate(ornB, Normal); -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z); -#endif - float d = dot3F4(WorldNormal, separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - once = false; - } - - b3Assert(closestFaceB >= 0); - { - //B3_PROFILE("worldVertsB1"); - const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB]; - const int numVertices = polyB.m_numIndices; - for (int e0 = 0; e0 < numVertices; e0++) - { - const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB); - } - } - - if (closestFaceB >= 0) - { - //B3_PROFILE("clipFaceAgainstHull"); - numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, - posA, ornA, - worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist, - verticesA, facesA, indicesA, - contactsOut, contactCapacity); - } - - return numContactsOut; -} - -#define PARALLEL_SUM(v, n) \ - for (int j = 1; j < n; j++) v[0] += v[j]; -#define PARALLEL_DO(execution, n) \ - for (int ie = 0; ie < n; ie++) \ - { \ - execution; \ - } -#define REDUCE_MAX(v, n) \ - { \ - int i = 0; \ - for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \ - } -#define REDUCE_MIN(v, n) \ - { \ - int i = 0; \ - for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \ - } - -int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ - if (nPoints == 0) - return 0; - - if (nPoints <= 4) - return nPoints; - - if (nPoints > 64) - nPoints = 64; - - float4 center = make_float4(0, 0, 0, 0); - { - for (int i = 0; i < nPoints; i++) - center += p[i]; - center /= (float)nPoints; - } - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3(nearNormal, aVector); - float4 v = cross3(nearNormal, u); - u = normalize3(u); - v = normalize3(v); - - //keep point with deepest penetration - float minW = FLT_MAX; - - int minIndex = -1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for (int ie = 0; ie < nPoints; ie++) - { - if (p[ie].w < minW) - { - minW = p[ie].w; - minIndex = ie; - } - float f; - float4 r = p[ie] - center; - f = dot3F4(u, r); - if (f < maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4(-u, r); - if (f < maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - f = dot3F4(v, r); - if (f < maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4(-v, r); - if (f < maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; -} - -int clipHullHullSingle( - int bodyIndexA, int bodyIndexB, - const float4& posA, - const b3Quaternion& ornA, - const float4& posB, - const b3Quaternion& ornB, - - int collidableIndexA, int collidableIndexB, - - const b3AlignedObjectArray* bodyBuf, - b3AlignedObjectArray* globalContactOut, - int& nContacts, - - const b3AlignedObjectArray& hostConvexDataA, - const b3AlignedObjectArray& hostConvexDataB, - - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& uniqueEdgesA, - const b3AlignedObjectArray& facesA, - const b3AlignedObjectArray& indicesA, - - const b3AlignedObjectArray& verticesB, - const b3AlignedObjectArray& uniqueEdgesB, - const b3AlignedObjectArray& facesB, - const b3AlignedObjectArray& indicesB, - - const b3AlignedObjectArray& hostCollidablesA, - const b3AlignedObjectArray& hostCollidablesB, - const b3Vector3& sepNormalWorldSpace, - int maxContactCapacity) -{ - int contactIndex = -1; - b3ConvexPolyhedronData hullA, hullB; - - b3Collidable colA = hostCollidablesA[collidableIndexA]; - hullA = hostConvexDataA[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - b3Collidable colB = hostCollidablesB[collidableIndexB]; - hullB = hostConvexDataB[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - - float4 contactsOut[MAX_VERTS]; - int localContactCapacity = MAX_VERTS; - -#ifdef _WIN32 - b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); - b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); -#endif - - { - float4 worldVertsB1[MAX_VERTS]; - float4 worldVertsB2[MAX_VERTS]; - int capacityWorldVerts = MAX_VERTS; - - float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f); - int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; - int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; - - b3Scalar minDist = -1; - b3Scalar maxDist = 0.; - - b3Transform trA, trB; - { - //B3_PROFILE("transform computation"); - //trA.setIdentity(); - trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z)); - trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w)); - - //trB.setIdentity(); - trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z)); - trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w)); - } - - b3Quaternion trAorn = trA.getRotation(); - b3Quaternion trBorn = trB.getRotation(); - - int numContactsOut = clipHullAgainstHull(hostNormal, - hostConvexDataA.at(shapeA), - hostConvexDataB.at(shapeB), - (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, - (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, - worldVertsB1, worldVertsB2, capacityWorldVerts, - minDist, maxDist, - verticesA, facesA, indicesA, - verticesB, facesB, indicesB, - - contactsOut, localContactCapacity); - - if (numContactsOut > 0) - { - B3_PROFILE("overlap"); - - float4 normalOnSurfaceB = (float4&)hostNormal; - - b3Int4 contactIdx; - contactIdx.x = 0; - contactIdx.y = 1; - contactIdx.z = 2; - contactIdx.w = 3; - - int numPoints = 0; - - { - // B3_PROFILE("extractManifold"); - numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); - } - - b3Assert(numPoints); - - if (nContacts < maxContactCapacity) - { - contactIndex = nContacts; - globalContactOut->expand(); - b3Contact4& contact = globalContactOut->at(nContacts); - contact.m_batchIdx = 0; //i; - contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; - contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; - - contact.m_frictionCoeffCmp = 45874; - contact.m_restituitionCoeffCmp = 0; - - // float distance = 0.f; - for (int p = 0; p < numPoints; p++) - { - contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]]; //check if it is actually on B - contact.m_worldNormalOnB = normalOnSurfaceB; - } - //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints); - contact.m_worldNormalOnB.w = (b3Scalar)numPoints; - nContacts++; - } - else - { - b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity); - } - } - } - return contactIndex; -} - -void computeContactPlaneConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex]; - - b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - - // int numContactsOut = 0; - // int numWorldVertsB1= 0; - - b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z); - b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal); - float planeConstant = planeEq.w; - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(posB); - convexWorldTransform.setRotation(ornB); - b3Transform planeTransform; - planeTransform.setIdentity(); - planeTransform.setOrigin(posA); - planeTransform.setRotation(ornA); - - b3Transform planeInConvex; - planeInConvex = convexWorldTransform.inverse() * planeTransform; - b3Transform convexInPlane; - convexInPlane = planeTransform.inverse() * convexWorldTransform; - - b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal; - float maxDot = -1e30; - int hitVertex = -1; - b3Vector3 hitVtx; - -#define MAX_PLANE_CONVEX_POINTS 64 - - b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; - int numPoints = 0; - - b3Int4 contactIdx; - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - for (int i = 0; i < hullB->m_numVertices; i++) - { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i]; - float curDot = vtx.dot(planeNormalInConvex); - - if (curDot > maxDot) - { - hitVertex = i; - maxDot = curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints == MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints < MAX_PLANE_CONVEX_POINTS) - { - b3Vector3 vtxWorld = convexWorldTransform * vtx; - b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; - float dist = planeNormal.dot(vtxInPlane) - planeConstant; - if (dist < 0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - } - - int numReducedPoints = 0; - - numReducedPoints = numPoints; - - if (numPoints > 4) - { - numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints > 0) - { - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx = nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; - for (int i = 0; i < numReducedPoints; i++) - { - b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; - c->m_worldPosB[i] = pOnB1; - } - c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - } //if (dstIdx < numPairs) - } - - // printf("computeContactPlaneConvex\n"); -} - -B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) -{ - b3Vector3 vecOut; - vecOut.setValue( - (b3Scalar)(vecIn[0]) / (quantization.x), - (b3Scalar)(vecIn[1]) / (quantization.y), - (b3Scalar)(vecIn[2]) / (quantization.z)); - vecOut += bvhAabbMin; - return vecOut; -} - -void traverseTreeTree() -{ -} - -#include "Bullet3Common/shared/b3Mat3x3.h" - -int numAabbChecks = 0; -int maxNumAabbChecks = 0; -int maxDepth = 0; - -// work-in-progress -__kernel void findCompoundPairsKernel( - int pairIndex, - int bodyIndexA, - int bodyIndexB, - int collidableIndexA, - int collidableIndexB, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray& vertices, - __global const b3AlignedObjectArray& aabbsWorldSpace, - __global const b3AlignedObjectArray& aabbsLocalSpace, - __global const b3GpuChildShape* gpuChildShapes, - __global b3Int4* gpuCompoundPairsOut, - __global int* numCompoundPairsOut, - int maxNumCompoundPairsCapacity, - b3AlignedObjectArray& treeNodesCPU, - b3AlignedObjectArray& subTreesCPU, - b3AlignedObjectArray& bvhInfoCPU) -{ - numAabbChecks = 0; - maxNumAabbChecks = 0; - // int i = pairIndex; - { - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0)) - { - return; - } - - if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; - int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; - int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees; - int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset; - int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; - - int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - - b3Transform transA; - transA.setIdentity(); - transA.setOrigin(posA); - transA.setRotation(ornA); - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - b3Transform transB; - transB.setIdentity(); - transB.setOrigin(posB); - transB.setRotation(ornB); - - for (int p = 0; p < numSubTreesA; p++) - { - b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p]; - //bvhInfoCPU[bvhA].m_quantization - b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - - b3Vector3 aabbAMinOut, aabbAMaxOut; - float margin = 0.f; - b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - - for (int q = 0; q < numSubTreesB; q++) - { - b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q]; - - b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - - b3Vector3 aabbBMinOut, aabbBMaxOut; - float margin = 0.f; - b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); - - numAabbChecks = 0; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); - if (aabbOverlap) - { - int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset; - // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; - - int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset; - // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; - - b3AlignedObjectArray nodeStack; - b3Int2 node0; - node0.x = startNodeIndexA; - node0.y = startNodeIndexB; - - int maxStackDepth = 1024; - nodeStack.resize(maxStackDepth); - int depth = 0; - nodeStack[depth++] = node0; - - do - { - if (depth > maxDepth) - { - maxDepth = depth; - printf("maxDepth=%d\n", maxDepth); - } - b3Int2 node = nodeStack[--depth]; - - b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - - b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - - float margin = 0.f; - b3Vector3 aabbAMinOut, aabbAMaxOut; - b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - - b3Vector3 aabbBMinOut, aabbBMaxOut; - b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); - - numAabbChecks++; - bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); - if (nodeOverlap) - { - bool isLeafA = treeNodesCPU[node.x].isLeafNode(); - bool isLeafB = treeNodesCPU[node.y].isLeafNode(); - bool isInternalA = !isLeafA; - bool isInternalB = !isLeafB; - - //fail, even though it might hit two leaf nodes - if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB)) - { - b3Error("Error: traversal exceeded maxStackDepth\n"); - continue; - } - - if (isInternalA) - { - int nodeAleftChild = node.x + 1; - bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode(); - int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex(); - - if (isInternalB) - { - int nodeBleftChild = node.y + 1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex(); - - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); - } - else - { - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y); - } - } - else - { - if (isInternalB) - { - int nodeBleftChild = node.y + 1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex(); - nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild); - } - else - { - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx < maxNumCompoundPairsCapacity) - { - int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex(); - int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex(); - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB); - } - } - } - } - } while (depth); - maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks); - } - } - } - - return; - } - - if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenA = collidables[collidableIndexA].m_numChildShapes; - for (int c = 0; c < numChildrenA; c++) - { - int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c; - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; - b3Quat newOrnA = b3QuatMul(ornA, childOrnA); - - b3Aabb aabbA = aabbsLocalSpace[childColIndexA]; - - b3Transform transA; - transA.setIdentity(); - transA.setOrigin(newPosA); - transA.setRotation(newOrnA); - b3Scalar margin = 0.0f; - - b3Vector3 aabbAMinOut, aabbAMaxOut; - - b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - - if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b = 0; b < numChildrenB; b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB, &posB, &ornB); - b3Quat newOrnB = b3QuatMul(ornB, childOrnB); - - b3Aabb aabbB = aabbsLocalSpace[childColIndexB]; - - b3Transform transB; - transB.setIdentity(); - transB.setOrigin(newPosB); - transB.setRotation(newOrnB); - - b3Vector3 aabbBMinOut, aabbBMaxOut; - b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); - - numAabbChecks++; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); - if (aabbOverlap) - { - /* - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = newOrnA; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB =newOrnB; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - */ - { // - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx < maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB); - } - } // - } //fi(1) - } //for (int b=0 - } //if (collidables[collidableIndexB]. - else //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - if (1) - { - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - // float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = newOrnA; - float4 c0; - c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 c1; - c1 = transform(&c1local, &posB, &ornB); - // const float4 DeltaC2 = c0 - c1; - - { - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx < maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1); - } //if (compoundPairIdx& vertices, - __global const b3AlignedObjectArray& uniqueEdges, - __global const b3AlignedObjectArray& faces, - __global const b3AlignedObjectArray& indices, - __global b3Aabb* aabbs, - __global const b3GpuChildShape* gpuChildShapes, - __global b3AlignedObjectArray& gpuCompoundSepNormalsOut, - __global b3AlignedObjectArray& gpuHasCompoundSepNormalsOut, - int numCompoundPairs, - int i) -{ - // int i = get_global_id(0); - if (i < numCompoundPairs) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; - b3Quat newOrnA = b3QuatMul(ornA, childOrnA); - posA = newPosA; - ornA = newOrnA; - } - else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB >= 0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; - b3Quat newOrnB = b3QuatMul(ornB, childOrnB); - posB = newPosB; - ornB = newOrnB; - } - else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - gpuHasCompoundSepNormalsOut[i] = 0; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) - { - return; - } - - int hasSeparatingAxis = 5; - - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - posA.w = 0.f; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local, &posB, &ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = make_float4(1, 0, 0, 0); - // bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin); - - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } - else - { - bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } - else //(!sepB) - { - bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin); - if (sepEE) - { - gpuCompoundSepNormalsOut[i] = sepNormal; //fastNormalize4(sepNormal); - gpuHasCompoundSepNormalsOut[i] = 1; - } //sepEE - } //(!sepB) - } //(!sepA) - } -} - -__kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray& vertices, - __global const b3AlignedObjectArray& uniqueEdges, - __global const b3AlignedObjectArray& faces, - __global const b3AlignedObjectArray& indices, - __global const b3GpuChildShape* gpuChildShapes, - __global const b3AlignedObjectArray& gpuCompoundSepNormalsOut, - __global const b3AlignedObjectArray& gpuHasCompoundSepNormalsOut, - __global struct b3Contact4Data* globalContactsOut, - int* nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity, int i) -{ - // int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity = 64; - - float minDist = -1e30f; - float maxDist = 0.0f; - - if (i < numCompoundPairs) - { - if (gpuHasCompoundSepNormalsOut[i]) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; - b3Quat newOrnA = b3QuatMul(ornA, childOrnA); - posA = newPosA; - ornA = newOrnA; - } - else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB >= 0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; - b3Quat newOrnB = b3QuatMul(ornB, childOrnB); - posB = newPosB; - ornB = newOrnB; - } - else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], - convexShapes[shapeIndexA], convexShapes[shapeIndexB], - posA, ornA, - posB, ornB, - worldVertsB1, worldVertsB2, capacityWorldVerts, - minDist, maxDist, - vertices, faces, indices, - vertices, faces, indices, - localContactsOut, localContactCapacity); - - if (numLocalContactsOut > 0) - { - float4 normal = -gpuCompoundSepNormalsOut[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - b3Int4 contactIdx; // = {-1,-1,-1,-1}; - - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - - int dstIdx; - dstIdx = b3AtomicInc(nGlobalContactsOut); - if ((dstIdx + nReducedContacts) < maxContactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut + dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f * 0xffff); - c->m_frictionCoeffCmp = (0.7f * 0xffff); - c->m_batchIdx = pairIndex; - int bodyA = gpuCompoundPairs[pairIndex].x; - int bodyB = gpuCompoundPairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i = 0; i < nReducedContacts; i++) - { - c->m_worldPosB[i] = pointsIn[contactIdx.s[i]]; - } - b3Contact4Data_setNumPoints(c, nReducedContacts); - } - - } // if (numContactsOut>0) - } // if (gpuHasCompoundSepNormalsOut[i]) - } // if (i& hostAabbsWorldSpace, - const b3AlignedObjectArray& hostAabbsLocalSpace, - - const b3AlignedObjectArray& convexVertices, - const b3AlignedObjectArray& hostUniqueEdges, - const b3AlignedObjectArray& convexIndices, - const b3AlignedObjectArray& faces, - - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - b3AlignedObjectArray& treeNodesCPU, - b3AlignedObjectArray& subTreesCPU, - b3AlignedObjectArray& bvhInfoCPU) -{ - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); - - b3AlignedObjectArray cpuCompoundPairsOut; - int numCompoundPairsOut = 0; - int maxNumCompoundPairsCapacity = 8192; //1024; - cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity); - - // work-in-progress - findCompoundPairsKernel( - pairIndex, - bodyIndexA, bodyIndexB, - collidableIndexA, collidableIndexB, - rigidBodies, - collidables, - convexShapes, - convexVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - cpuChildShapes, - &cpuCompoundPairsOut[0], - &numCompoundPairsOut, - maxNumCompoundPairsCapacity, - treeNodesCPU, - subTreesCPU, - bvhInfoCPU); - - printf("maxNumAabbChecks=%d\n", maxNumAabbChecks); - if (numCompoundPairsOut > maxNumCompoundPairsCapacity) - { - b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity); - numCompoundPairsOut = maxNumCompoundPairsCapacity; - } - b3AlignedObjectArray cpuCompoundSepNormalsOut; - b3AlignedObjectArray cpuHasCompoundSepNormalsOut; - cpuCompoundSepNormalsOut.resize(numCompoundPairsOut); - cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); - - for (int i = 0; i < numCompoundPairsOut; i++) - { - processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes, - cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i); - } - - for (int i = 0; i < numCompoundPairsOut; i++) - { - clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes, - cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i); - } - /* - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - - int shapeIndexA = collidables[childColIndexA].m_shapeIndex; - - - bool foundSepAxis = findSeparatingAxis(hullA,hullB, - posA, - ornA, - posB, - ornB, - - convexVertices,uniqueEdges,faces,convexIndices, - convexVertices,uniqueEdges,faces,convexIndices, - - sepNormalWorldSpace - ); - */ - - /* - if (foundSepAxis) - { - - - contactIndex = clipHullHullSingle( - bodyIndexA, bodyIndexB, - posA,ornA, - posB,ornB, - collidableIndexA, collidableIndexB, - &rigidBodies, - &globalContactsOut, - nGlobalContactsOut, - - convexShapes, - convexShapes, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - collidables, - collidables, - sepNormalWorldSpace, - maxContactCapacity); - - } - */ - - // return contactIndex; - - /* - - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int c=0;cm_numVertices; i++) - { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i]; - float curDot = vtx.dot(planeNormalInConvex); - - if (curDot > maxDot) - { - hitVertex = i; - maxDot = curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints == MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints < MAX_PLANE_CONVEX_POINTS) - { - b3Vector3 vtxWorld = convexWorldTransform * vtx; - b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; - float dist = planeNormal.dot(vtxInPlane) - planeConstant; - if (dist < 0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - } - - int numReducedPoints = 0; - - numReducedPoints = numPoints; - - if (numPoints > 4) - { - numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints > 0) - { - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx = nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; - for (int i = 0; i < numReducedPoints; i++) - { - b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; - c->m_worldPosB[i] = pOnB1; - } - c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - } //if (dstIdx < numPairs) - } - } -} - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - float radius = collidables[collidableIndexA].m_radius; - float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; - b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; - - float4 pos = rigidBodies[bodyIndexB].m_pos; - - b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; - - b3Transform tr; - tr.setIdentity(); - tr.setOrigin(pos); - tr.setRotation(quat); - b3Transform trInv = tr.inverse(); - - float4 spherePos = trInv(spherePos1); - - int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; - int shapeIndex = collidables[collidableIndex].m_shapeIndex; - int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = b3MakeVector3(0, 0, 0, 0); - // float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); - float minDist = -1000000.f; // TODO: What is the largest/smallest float? - bool bCollide = true; - int region = -1; - float4 localHitNormal; - for (int f = 0; f < numFaces; f++) - { - b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f]; - float4 planeEqn; - float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f); - float4 n1 = localPlaneNormal; //quatRotate(quat,localPlaneNormal); - planeEqn = n1; - planeEqn[3] = face.m_plane.w; - - float4 pntReturn; - float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - - if (dist > radius) - { - bCollide = false; - break; - } - - if (dist > 0) - { - //might hit an edge or vertex - b3Vector3 out; - - bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); - if (isInPoly) - { - if (dist > minDist) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region = 1; - } - } - else - { - b3Vector3 tmp = spherePos - out; - b3Scalar l2 = tmp.length2(); - if (l2 < radius * radius) - { - dist = b3Sqrt(l2); - if (dist > minDist) - { - minDist = dist; - closestPnt = out; - localHitNormal = tmp / dist; - region = 2; - } - } - else - { - bCollide = false; - break; - } - } - } - else - { - if (dist > minDist) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region = 3; - } - } - } - static int numChecks = 0; - numChecks++; - - if (bCollide && minDist > -10000) - { - float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal; //-hitNormalWorld; - float4 pOnB1 = tr(closestPnt); - //printf("dist ,%f,",minDist); - float actualDepth = minDist - radius; - if (actualDepth < 0) - { - //printf("actualDepth = ,%f,", actualDepth); - //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); - //printf("region=,%d,\n", region); - pOnB1[3] = actualDepth; - - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx = nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB1; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; - c->m_worldPosB[0] = pOnB1; - int numPoints = 1; - c->m_worldNormalOnB.w = (b3Scalar)numPoints; - } //if (dstIdx < numPairs) - } - } //if (hasCollision) -} - -int computeContactConvexConvex2( - int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3AlignedObjectArray& rigidBodies, - const b3AlignedObjectArray& collidables, - const b3AlignedObjectArray& convexShapes, - const b3AlignedObjectArray& convexVertices, - const b3AlignedObjectArray& uniqueEdges, - const b3AlignedObjectArray& convexIndices, - const b3AlignedObjectArray& faces, - b3AlignedObjectArray& globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - const b3AlignedObjectArray& oldContacts) -{ - int contactIndex = -1; - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - - b3ConvexPolyhedronData hullA, hullB; - - b3Vector3 sepNormalWorldSpace; - - b3Collidable colA = collidables[collidableIndexA]; - hullA = convexShapes[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - b3Collidable colB = collidables[collidableIndexB]; - hullB = convexShapes[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - - // int contactCapacity = MAX_VERTS; - //int numContactsOut=0; - -#ifdef _WIN32 - b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); - b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); -#endif - - bool foundSepAxis = findSeparatingAxis(hullA, hullB, - posA, - ornA, - posB, - ornB, - - convexVertices, uniqueEdges, faces, convexIndices, - convexVertices, uniqueEdges, faces, convexIndices, - - sepNormalWorldSpace); - - if (foundSepAxis) - { - contactIndex = clipHullHullSingle( - bodyIndexA, bodyIndexB, - posA, ornA, - posB, ornB, - collidableIndexA, collidableIndexB, - &rigidBodies, - &globalContactsOut, - nGlobalContactsOut, - - convexShapes, - convexShapes, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - collidables, - collidables, - sepNormalWorldSpace, - maxContactCapacity); - } - - return contactIndex; -} - -void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray* pairs, int nPairs, - const b3OpenCLArray* bodyBuf, - b3OpenCLArray* contactOut, int& nContacts, - const b3OpenCLArray* oldContacts, - int maxContactCapacity, - int compoundPairCapacity, - const b3OpenCLArray& convexData, - const b3OpenCLArray& gpuVertices, - const b3OpenCLArray& gpuUniqueEdges, - const b3OpenCLArray& gpuFaces, - const b3OpenCLArray& gpuIndices, - const b3OpenCLArray& gpuCollidables, - const b3OpenCLArray& gpuChildShapes, - - const b3OpenCLArray& clAabbsWorldSpace, - const b3OpenCLArray& clAabbsLocalSpace, - - b3OpenCLArray& worldVertsB1GPU, - b3OpenCLArray& clippingFacesOutGPU, - b3OpenCLArray& worldNormalsAGPU, - b3OpenCLArray& worldVertsA1GPU, - b3OpenCLArray& worldVertsB2GPU, - b3AlignedObjectArray& bvhDataUnused, - b3OpenCLArray* treeNodesGPU, - b3OpenCLArray* subTreesGPU, - b3OpenCLArray* bvhInfo, - - int numObjects, - int maxTriConvexPairCapacity, - b3OpenCLArray& triangleConvexPairsOut, - int& numTriConvexPairsOut) -{ - myframecount++; - - if (!nPairs) - return; - -#ifdef CHECK_ON_HOST - - b3AlignedObjectArray treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - - b3AlignedObjectArray subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - - b3AlignedObjectArray bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray hostAabbsLocalSpace; - clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); - - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray hostTriangleConvexPairs; - - b3AlignedObjectArray hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - - b3AlignedObjectArray oldHostContacts; - - if (oldContacts->size()) - { - oldContacts->copyToHost(oldHostContacts); - } - - hostContacts.resize(maxContactCapacity); - - for (int i = 0; i < nPairs; i++) - { - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - //printf("convex-sphere\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("convex-plane\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("plane-convex\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0], - nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU); - // printf("convex-plane\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("convex-plane\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("plane-convex\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts); - //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - - if (contactIndex >= 0) - { - // printf("convex convex contactIndex = %d\n",contactIndex); - hostPairs[i].z = contactIndex; - } - // printf("plane-convex\n"); - } - } - - if (hostPairs.size()) - { - pairs->copyFromHost(hostPairs); - } - - hostContacts.resize(nContacts); - if (nContacts) - { - contactOut->copyFromHost(hostContacts); - } - else - { - contactOut->resize(0); - } - - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - //printf("(HOST) nContacts = %d\n",nContacts); - -#else - - { - if (nPairs) - { - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - - B3_PROFILE("primitiveContactsKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - launcher.setConst(maxContactCapacity); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - } - } - -#endif //CHECK_ON_HOST - - B3_PROFILE("computeConvexConvexContactsGPUSAT"); - // printf("nContacts = %d\n",nContacts); - - m_sepNormals.resize(nPairs); - m_hasSeparatingNormals.resize(nPairs); - - int concaveCapacity = maxTriConvexPairCapacity; - m_concaveSepNormals.resize(concaveCapacity); - m_concaveHasSeparatingNormals.resize(concaveCapacity); - m_numConcavePairsOut.resize(0); - m_numConcavePairsOut.push_back(0); - - m_gpuCompoundPairs.resize(compoundPairCapacity); - - m_gpuCompoundSepNormals.resize(compoundPairCapacity); - - m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); - - m_numCompoundPairsOut.resize(0); - m_numCompoundPairsOut.push_back(0); - - int numCompoundPairs = 0; - - int numConcavePairs = 0; - - { - clFinish(m_queue); - if (findSeparatingAxisOnGpu) - { - m_dmins.resize(nPairs); - if (splitSearchSepAxisConvex) - { - if (useMprGpu) - { - nContacts = m_totalContactsOut.at(0); - { - B3_PROFILE("mprPenetrationKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - - launcher.setConst(maxContactCapacity); - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - /* - b3AlignedObjectArrayhostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArrayhostSepAxis; - m_sepNormals.copyToHost(hostSepAxis); - */ - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); - if (nContacts > maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - } - - if (1) - { - if (1) - { - { - B3_PROFILE("findSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - - { - B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL()), - b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true) - - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numDirections); - launcher.setConst(nPairs); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - if (useMprGpu) - { - B3_PROFILE("findSeparatingAxisUnitSphereKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - launcher.setConst(numDirections); - - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - } - else - { - B3_PROFILE("findSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - else - { - B3_PROFILE("findSeparatingAxisKernel CPU"); - - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray hostConvexShapeData; - convexData.copyToHost(hostConvexShapeData); - - b3AlignedObjectArray hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray hostHasSepAxis; - hostHasSepAxis.resize(nPairs); - b3AlignedObjectArray hostSepAxis; - hostSepAxis.resize(nPairs); - - b3AlignedObjectArray hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray hostFaces; - gpuFaces.copyToHost(hostFaces); - - b3AlignedObjectArray hostIndices; - gpuIndices.copyToHost(hostIndices); - - b3AlignedObjectArray hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - hostContacts.resize(maxContactCapacity); - int nGlobalContactsOut = nContacts; - - for (int i = 0; i < nPairs; i++) - { - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - - hostHasSepAxis[i] = 0; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0)) - { - continue; - } - - if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL)) - { - continue; - } - - float dmin = FLT_MAX; - - b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; - b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; - b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; - b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; - b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat; - b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat; - - if (useGjk) - { - //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR - { - b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; - b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); - b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB); - b3Vector3 DeltaC2 = c0 - c1; - - b3Vector3 sepAxis; - - bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - - if (hasSepAxisA) - { - bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - if (hasSepAxisB) - { - bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin, false); - - if (hasEdgeEdge) - { - hostHasSepAxis[i] = 1; - hostSepAxis[i] = sepAxis; - hostSepAxis[i].w = dmin; - } - } - } - } - - if (hostHasSepAxis[i]) - { - int pairIndex = i; - - bool useMpr = true; - if (useMpr) - { - int res = 0; - float depth = 0.f; - b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0); - b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0); - - float depthOut; - b3Vector3 dirOut; - b3Vector3 posOut; - - //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); - res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut); - depth = depthOut; - sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z); - resultPointOnBWorld = posOut; - //hostHasSepAxis[i] = 0; - - if (res == 0) - { - //add point? - //printf("depth = %f\n",depth); - //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); - //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); - - float dist = 0.f; - - const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; - const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; - - if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist)) - { - if (depth > dist) - { - float diff = depth - dist; - - static float maxdiff = 0.f; - if (maxdiff < diff) - { - maxdiff = diff; - printf("maxdiff = %20.10f\n", maxdiff); - } - } - } - if (depth > dmin) - { - b3Vector3 oldAxis = hostSepAxis[i]; - depth = dmin; - sepAxis2 = oldAxis; - } - - if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist)) - { - if (depth > dist) - { - float diff = depth - dist; - //printf("?diff = %f\n",diff ); - static float maxdiff = 0.f; - if (maxdiff < diff) - { - maxdiff = diff; - printf("maxdiff = %20.10f\n", maxdiff); - } - } - //this is used for SAT - //hostHasSepAxis[i] = 1; - //hostSepAxis[i] = sepAxis2; - - //add contact point - - //int contactIndex = nGlobalContactsOut; - b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); - nGlobalContactsOut++; - newContact.m_batchIdx = 0; //i; - newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; - newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; - - newContact.m_frictionCoeffCmp = 45874; - newContact.m_restituitionCoeffCmp = 0; - - static float maxDepth = 0.f; - - if (depth > maxDepth) - { - maxDepth = depth; - printf("MPR maxdepth = %f\n", maxDepth); - } - - resultPointOnBWorld.w = -depth; - newContact.m_worldPosB[0] = resultPointOnBWorld; - //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; - newContact.m_worldNormalOnB = sepAxis2; - newContact.m_worldNormalOnB.w = (b3Scalar)1; - } - else - { - printf("rejected\n"); - } - } - } - else - { - //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - b3AlignedObjectArray oldHostContacts; - int result; - result = computeContactConvexConvex2( //hostPairs, - pairIndex, - bodyIndexA, bodyIndexB, - collidableIndexA, collidableIndexB, - hostBodyBuf, - hostCollidables, - hostConvexShapeData, - hostVertices, - hostUniqueEdges, - hostIndices, - hostFaces, - hostContacts, - nGlobalContactsOut, - maxContactCapacity, - oldHostContacts - //hostHasSepAxis, - //hostSepAxis - - ); - } //mpr - } //hostHasSepAxis[i] = 1; - } - else - { - b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; - b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); - b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB); - b3Vector3 DeltaC2 = c0 - c1; - - b3Vector3 sepAxis; - - bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - - if (hasSepAxisA) - { - bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - if (hasSepAxisB) - { - bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin, true); - - if (hasEdgeEdge) - { - hostHasSepAxis[i] = 1; - hostSepAxis[i] = sepAxis; - } - } - } - } - } - - if (useGjkContacts) //nGlobalContactsOut>0) - { - //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut); - nContacts = nGlobalContactsOut; - contactOut->copyFromHost(hostContacts); - - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - } - - m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); - m_sepNormals.copyFromHost(hostSepAxis); - - /* - //double-check results from GPU (comment-out the 'else' so both paths are executed - b3AlignedObjectArray checkHasSepAxis; - m_hasSeparatingNormals.copyToHost(checkHasSepAxis); - static int frameCount = 0; - frameCount++; - for (int i=0;igetBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()), - b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()), - b3BufferInfoCL(subTreesGPU->getBufferCL()), - b3BufferInfoCL(treeNodesGPU->getBufferCL()), - b3BufferInfoCL(bvhInfo->getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - launcher.setConst(compoundPairCapacity); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - - numCompoundPairs = m_numCompoundPairsOut.at(0); - //printf("numCompoundPairs =%d\n",numCompoundPairs ); - if (numCompoundPairs) - { - //printf("numCompoundPairs=%d\n",numCompoundPairs); - } - } - else - { - b3AlignedObjectArray treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - - b3AlignedObjectArray subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - - b3AlignedObjectArray bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray hostAabbsLocalSpace; - clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); - - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray cpuCompoundPairsOut; - cpuCompoundPairsOut.resize(compoundPairCapacity); - - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray hostVertices; - gpuVertices.copyToHost(hostVertices); - - for (int pairIndex = 0; pairIndex < nPairs; pairIndex++) - { - int bodyIndexA = hostPairs[pairIndex].x; - int bodyIndexB = hostPairs[pairIndex].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - if (cpuChildShapes.size()) - { - findCompoundPairsKernel( - pairIndex, - bodyIndexA, - bodyIndexB, - collidableIndexA, - collidableIndexB, - &hostBodyBuf[0], - &hostCollidables[0], - &hostConvexData[0], - hostVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - &cpuChildShapes[0], - &cpuCompoundPairsOut[0], - &numCompoundPairs, - compoundPairCapacity, - treeNodesCPU, - subTreesCPU, - bvhInfoCPU); - } - } - - m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true); - if (numCompoundPairs) - { - b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0]; - m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true); - } - //cpuCompoundPairsOut - } - if (numCompoundPairs) - { - printf("numCompoundPairs=%d\n", numCompoundPairs); - } - - if (numCompoundPairs > compoundPairCapacity) - { - b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); - numCompoundPairs = compoundPairCapacity; - } - - m_gpuCompoundPairs.resize(numCompoundPairs); - m_gpuHasCompoundSepNormals.resize(numCompoundPairs); - m_gpuCompoundSepNormals.resize(numCompoundPairs); - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsPrimitivesKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numCompoundPairs); - launcher.setConst(maxContactCapacity); - - int num = numCompoundPairs; - launcher.launch1D(num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); - if (nContacts > maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()), - b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numCompoundPairs); - - int num = numCompoundPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - //printf("numConcave = %d\n",numConcave); - - // printf("hostNormals.size()=%d\n",hostNormals.size()); - //int numPairs = pairCount.at(0); - } - int vertexFaceCapacity = 64; - - { - //now perform the tree query on GPU - - if (treeNodesGPU->size() && treeNodesGPU->size()) - { - if (bvhTraversalKernelGPU) - { - B3_PROFILE("m_bvhTraversalKernel"); - - numConcavePairs = m_numConcavePairsOut.at(0); - - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel"); - launcher.setBuffer(pairs->getBufferCL()); - launcher.setBuffer(bodyBuf->getBufferCL()); - launcher.setBuffer(gpuCollidables.getBufferCL()); - launcher.setBuffer(clAabbsWorldSpace.getBufferCL()); - launcher.setBuffer(triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer(m_numConcavePairsOut.getBufferCL()); - launcher.setBuffer(subTreesGPU->getBufferCL()); - launcher.setBuffer(treeNodesGPU->getBufferCL()); - launcher.setBuffer(bvhInfo->getBufferCL()); - - launcher.setConst(nPairs); - launcher.setConst(maxTriConvexPairCapacity); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - numConcavePairs = m_numConcavePairsOut.at(0); - } - else - { - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - //int maxTriConvexPairCapacity, - b3AlignedObjectArray triangleConvexPairsOutHost; - triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - - //int numTriConvexPairsOutHost=0; - numConcavePairs = 0; - //m_numConcavePairsOut - - b3AlignedObjectArray treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - //compute it... - - volatile int hostNumConcavePairsOut = 0; - - // - for (int i = 0; i < nPairs; i++) - { - b3BvhTraversal(&hostPairs.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostAabbsWorldSpace.at(0), - &triangleConvexPairsOutHost.at(0), - &hostNumConcavePairsOut, - &subTreesCPU.at(0), - &treeNodesCPU.at(0), - &bvhInfoCPU.at(0), - nPairs, - maxTriConvexPairCapacity, - i); - } - numConcavePairs = hostNumConcavePairsOut; - - if (hostNumConcavePairsOut) - { - triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); - triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); - } - // - - m_numConcavePairsOut.resize(0); - m_numConcavePairsOut.push_back(numConcavePairs); - } - - //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); - - if (numConcavePairs > maxTriConvexPairCapacity) - { - static int exceeded_maxTriConvexPairCapacity_count = 0; - b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", - numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++); - numConcavePairs = maxTriConvexPairCapacity; - } - triangleConvexPairsOut.resize(numConcavePairs); - - if (numConcavePairs) - { - clippingFacesOutGPU.resize(numConcavePairs); - worldNormalsAGPU.resize(numConcavePairs); - worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs)); - worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs)); - - if (findConcaveSeparatingAxisKernelGPU) - { - /* - m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - */ - - //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) - if (splitSearchSepAxisConcave) - { - //printf("numConcavePairs = %d\n",numConcavePairs); - m_dmins.resize(numConcavePairs); - { - B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(numConcavePairs); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - } - // numConcavePairs = 0; - if (1) - { - B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(numConcavePairs); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - // numConcavePairs = 0; - } - else - { - B3_PROFILE("findConcaveSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(numConcavePairs); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - else - { - b3AlignedObjectArray clippingFacesOutCPU; - b3AlignedObjectArray worldVertsA1CPU; - b3AlignedObjectArray worldNormalsACPU; - b3AlignedObjectArray worldVertsB1CPU; - b3AlignedObjectArray concaveHasSeparatingNormalsCPU; - - b3AlignedObjectArray triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); - - b3GpuChildShape* childShapePointerCPU = 0; - if (cpuChildShapes.size()) - childShapePointerCPU = &cpuChildShapes.at(0); - - clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - worldNormalsACPU.resize(worldNormalsAGPU.size()); - worldVertsB1CPU.resize(worldVertsB1GPU.size()); - - for (int i = 0; i < numConcavePairs; i++) - { - b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0), - &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU, - &hostAabbsWorldSpace.at(0), - &concaveSepNormalsHost.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - vertexFaceCapacity, - numConcavePairs, i); - }; - - m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); - m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - } - // b3AlignedObjectArray cpuCompoundSepNormals; - // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); - // b3AlignedObjectArray cpuConcavePairs; - // triangleConvexPairsOut.copyToHost(cpuConcavePairs); - } - } - } - - if (numConcavePairs) - { - if (numConcavePairs) - { - B3_PROFILE("findConcaveSphereContactsKernel"); - nContacts = m_totalContactsOut.at(0); - // printf("nContacts1 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - - launcher.setConst(numConcavePairs); - launcher.setConst(maxContactCapacity); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); - - //printf("nContacts2 = %d\n",nContacts); - - if (nContacts >= maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - } - -#ifdef __APPLE__ - bool contactClippingOnGpu = true; -#else - bool contactClippingOnGpu = true; -#endif - - if (contactClippingOnGpu) - { - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - // printf("nContacts3 = %d\n",nContacts); - - //B3_PROFILE("clipHullHullKernel"); - - bool breakupConcaveConvexKernel = true; - -#ifdef __APPLE__ - //actually, some Apple OpenCL platform/device combinations work fine... - breakupConcaveConvexKernel = true; -#endif - //concave-convex contact clipping - if (numConcavePairs) - { - // printf("numConcavePairs = %d\n", numConcavePairs); - // nContacts = m_totalContactsOut.at(0); - // printf("nContacts before = %d\n", nContacts); - - if (breakupConcaveConvexKernel) - { - worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs); - - //clipFacesAndFindContacts - - if (clipConcaveFacesAndFindContactsCPU) - { - b3AlignedObjectArray clippingFacesOutCPU; - b3AlignedObjectArray worldVertsA1CPU; - b3AlignedObjectArray worldNormalsACPU; - b3AlignedObjectArray worldVertsB1CPU; - - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - worldVertsA1GPU.copyToHost(worldVertsA1CPU); - worldNormalsAGPU.copyToHost(worldNormalsACPU); - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - b3AlignedObjectArray concaveHasSeparatingNormalsCPU; - m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); - - b3AlignedObjectArray concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - - b3AlignedObjectArray worldVertsB2CPU; - worldVertsB2CPU.resize(worldVertsB2GPU.size()); - - for (int i = 0; i < numConcavePairs; i++) - { - clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - vertexFaceCapacity, - i); - } - - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - } - else - { - if (1) - { - B3_PROFILE("clipFacesAndFindContacts"); - //nContacts = m_totalContactsOut.at(0); - //int h = m_hasSeparatingNormals.at(0); - //int4 p = clippingFacesOutGPU.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - - launcher.setConst(numConcavePairs); - int debugMode = 0; - launcher.setConst(debugMode); - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - //int bla = m_totalContactsOut.at(0); - } - } - //contactReduction - { - int newContactCapacity = nContacts + numConcavePairs; - contactOut->reserve(newContactCapacity); - if (reduceConcaveContactsOnGPU) - { - // printf("newReservation = %d\n",newReservation); - { - B3_PROFILE("newContactReductionKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(newContactCapacity); - launcher.setConst(numConcavePairs); - int num = numConcavePairs; - - launcher.launch1D(num); - } - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - } - else - { - volatile int nGlobalContactsOut = nContacts; - b3AlignedObjectArray triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray concaveHasSeparatingNormalsCPU; - m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); - - b3AlignedObjectArray concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - - b3AlignedObjectArray hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - hostContacts.resize(newContactCapacity); - - b3AlignedObjectArray clippingFacesOutCPU; - b3AlignedObjectArray worldVertsB2CPU; - - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - for (int i = 0; i < numConcavePairs; i++) - { - b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &hostContacts.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - numConcavePairs, - i); - } - - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - // nContacts = m_totalContactsOut.at(0); - //contactOut->resize(nContacts); - hostContacts.resize(nContacts); - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - contactOut->copyFromHost(hostContacts); - } - } - //re-use? - } - else - { - B3_PROFILE("clipHullHullConcaveConvexKernel"); - nContacts = m_totalContactsOut.at(0); - int newContactCapacity = contactOut->capacity(); - - //printf("contactOut5 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(newContactCapacity); - launcher.setConst(numConcavePairs); - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - //printf("contactOut6 = %d\n",nContacts); - b3AlignedObjectArray cpuContacts; - contactOut->copyToHost(cpuContacts); - } - // printf("nContacts after = %d\n", nContacts); - } //numConcavePairs - - //convex-convex contact clipping - - bool breakupKernel = false; - -#ifdef __APPLE__ - breakupKernel = true; -#endif - -#ifdef CHECK_ON_HOST - bool computeConvexConvex = false; -#else - bool computeConvexConvex = true; -#endif //CHECK_ON_HOST - if (computeConvexConvex) - { - B3_PROFILE("clipHullHullKernel"); - if (breakupKernel) - { - worldVertsB1GPU.resize(vertexFaceCapacity * nPairs); - clippingFacesOutGPU.resize(nPairs); - worldNormalsAGPU.resize(nPairs); - worldVertsA1GPU.resize(vertexFaceCapacity * nPairs); - worldVertsB2GPU.resize(vertexFaceCapacity * nPairs); - - if (findConvexClippingFacesGPU) - { - B3_PROFILE("findClippingFacesKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(nPairs); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - else - { - float minDist = -1e30f; - float maxDist = 0.02f; - - b3AlignedObjectArray hostConvexData; - convexData.copyToHost(hostConvexData); - b3AlignedObjectArray hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray hostHasSepNormals; - m_hasSeparatingNormals.copyToHost(hostHasSepNormals); - b3AlignedObjectArray cpuSepNormals; - m_sepNormals.copyToHost(cpuSepNormals); - - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - b3AlignedObjectArray worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - b3AlignedObjectArray clippingFacesOutCPU; - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - - b3AlignedObjectArray worldNormalsACPU; - worldNormalsACPU.resize(nPairs); - - b3AlignedObjectArray worldVertsA1CPU; - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - - b3AlignedObjectArray hostVertices; - gpuVertices.copyToHost(hostVertices); - b3AlignedObjectArray hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray hostIndices; - gpuIndices.copyToHost(hostIndices); - - for (int i = 0; i < nPairs; i++) - { - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - - if (hostHasSepNormals[i]) - { - b3FindClippingFaces(cpuSepNormals[i], - &hostConvexData[shapeIndexA], - &hostConvexData[shapeIndexB], - hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat, - hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat, - &worldVertsA1CPU.at(0), &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - vertexFaceCapacity, minDist, maxDist, - &hostVertices.at(0), &hostFaces.at(0), - &hostIndices.at(0), - &hostVertices.at(0), &hostFaces.at(0), - &hostIndices.at(0), &clippingFacesOutCPU.at(0), i); - } - } - - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - } - - ///clip face B against face A, reduce contacts and append them to a global contact array - if (1) - { - if (clipConvexFacesAndFindContactsCPU) - { - //b3AlignedObjectArray hostPairs; - //pairs->copyToHost(hostPairs); - - b3AlignedObjectArray hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - - b3AlignedObjectArray hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray worldVertsB2CPU; - worldVertsB2CPU.resize(vertexFaceCapacity * nPairs); - - b3AlignedObjectArray worldVertsA1CPU; - worldVertsA1GPU.copyToHost(worldVertsA1CPU); - b3AlignedObjectArray worldNormalsACPU; - worldNormalsAGPU.copyToHost(worldNormalsACPU); - - b3AlignedObjectArray worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - /* - __global const b3Float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global b3Int4* clippingFacesOut, - __global b3Float4* worldVertsA1, - __global b3Float4* worldNormalsA1, - __global b3Float4* worldVertsB1, - __global b3Float4* worldVertsB2, - int vertexFaceCapacity, - int pairIndex - */ - for (int i = 0; i < nPairs; i++) - { - clipFacesAndFindContactsKernel( - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostClippingFaces.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - - vertexFaceCapacity, - i); - } - - clippingFacesOutGPU.copyFromHost(hostClippingFaces); - worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - } - else - { - B3_PROFILE("clipFacesAndFindContacts"); - //nContacts = m_totalContactsOut.at(0); - //int h = m_hasSeparatingNormals.at(0); - //int4 p = clippingFacesOutGPU.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - - launcher.setConst(nPairs); - int debugMode = 0; - launcher.setConst(debugMode); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - { - nContacts = m_totalContactsOut.at(0); - //printf("nContacts = %d\n",nContacts); - - int newContactCapacity = nContacts + nPairs; - contactOut->reserve(newContactCapacity); - - if (reduceConvexContactsOnGPU) - { - { - B3_PROFILE("newContactReductionKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(newContactCapacity); - launcher.setConst(nPairs); - int num = nPairs; - - launcher.launch1D(num); - } - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - } - else - { - volatile int nGlobalContactsOut = nContacts; - b3AlignedObjectArray hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArray hostContactsOut; - contactOut->copyToHost(hostContactsOut); - hostContactsOut.resize(newContactCapacity); - - b3AlignedObjectArray hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray worldVertsB2CPU; - worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - for (int i = 0; i < nPairs; i++) - { - b3NewContactReductionKernel(&hostPairs.at(0), - &hostBodyBuf.at(0), - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostContactsOut.at(0), - &hostClippingFaces.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - nPairs, - i); - } - - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - hostContactsOut.resize(nContacts); - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - contactOut->copyFromHost(hostContactsOut); - } - // b3Contact4 pt = contactOut->at(0); - // printf("nContacts = %d\n",nContacts); - } - } - } - else //breakupKernel - { - if (nPairs) - { - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - launcher.setConst(maxContactCapacity); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - if (nContacts >= maxContactCapacity) - { - b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - contactOut->resize(nContacts); - } - } - - int nCompoundsPairs = m_gpuCompoundPairs.size(); - - if (nCompoundsPairs) - { - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true), - b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nCompoundsPairs); - launcher.setConst(maxContactCapacity); - - int num = nCompoundsPairs; - launcher.launch1D(num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - if (nContacts > maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - contactOut->resize(nContacts); - } //if nCompoundsPairs - } - } //contactClippingOnGpu - - //printf("nContacts end = %d\n",nContacts); - - //printf("frameCount = %d\n",frameCount++); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h deleted file mode 100644 index 53e8c4ed4d6..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ /dev/null @@ -1,106 +0,0 @@ - -#ifndef _CONVEX_HULL_CONTACT_H -#define _CONVEX_HULL_CONTACT_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "b3OptimizedBvh.h" -#include "b3BvhInfo.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" - -//#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h" - -struct GpuSatCollision -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - cl_kernel m_findSeparatingAxisKernel; - cl_kernel m_mprPenetrationKernel; - cl_kernel m_findSeparatingAxisUnitSphereKernel; - - cl_kernel m_findSeparatingAxisVertexFaceKernel; - cl_kernel m_findSeparatingAxisEdgeEdgeKernel; - - cl_kernel m_findConcaveSeparatingAxisKernel; - cl_kernel m_findConcaveSeparatingAxisVertexFaceKernel; - cl_kernel m_findConcaveSeparatingAxisEdgeEdgeKernel; - - cl_kernel m_findCompoundPairsKernel; - cl_kernel m_processCompoundPairsKernel; - - cl_kernel m_clipHullHullKernel; - cl_kernel m_clipCompoundsHullHullKernel; - - cl_kernel m_clipFacesAndFindContacts; - cl_kernel m_findClippingFacesKernel; - - cl_kernel m_clipHullHullConcaveConvexKernel; - // cl_kernel m_extractManifoldAndAddContactKernel; - cl_kernel m_newContactReductionKernel; - - cl_kernel m_bvhTraversalKernel; - cl_kernel m_primitiveContactsKernel; - cl_kernel m_findConcaveSphereContactsKernel; - - cl_kernel m_processCompoundPairsPrimitivesKernel; - - b3OpenCLArray m_unitSphereDirections; - - b3OpenCLArray m_totalContactsOut; - - b3OpenCLArray m_sepNormals; - b3OpenCLArray m_dmins; - - b3OpenCLArray m_hasSeparatingNormals; - b3OpenCLArray m_concaveSepNormals; - b3OpenCLArray m_concaveHasSeparatingNormals; - b3OpenCLArray m_numConcavePairsOut; - b3OpenCLArray m_gpuCompoundPairs; - b3OpenCLArray m_gpuCompoundSepNormals; - b3OpenCLArray m_gpuHasCompoundSepNormals; - b3OpenCLArray m_numCompoundPairsOut; - - GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q); - virtual ~GpuSatCollision(); - - void computeConvexConvexContactsGPUSAT(b3OpenCLArray* pairs, int nPairs, - const b3OpenCLArray* bodyBuf, - b3OpenCLArray* contactOut, int& nContacts, - const b3OpenCLArray* oldContacts, - int maxContactCapacity, - int compoundPairCapacity, - const b3OpenCLArray& hostConvexData, - const b3OpenCLArray& vertices, - const b3OpenCLArray& uniqueEdges, - const b3OpenCLArray& faces, - const b3OpenCLArray& indices, - const b3OpenCLArray& gpuCollidables, - const b3OpenCLArray& gpuChildShapes, - - const b3OpenCLArray& clAabbsWorldSpace, - const b3OpenCLArray& clAabbsLocalSpace, - - b3OpenCLArray& worldVertsB1GPU, - b3OpenCLArray& clippingFacesOutGPU, - b3OpenCLArray& worldNormalsAGPU, - b3OpenCLArray& worldVertsA1GPU, - b3OpenCLArray& worldVertsB2GPU, - b3AlignedObjectArray& bvhData, - b3OpenCLArray* treeNodesGPU, - b3OpenCLArray* subTreesGPU, - b3OpenCLArray* bvhInfo, - int numObjects, - int maxTriConvexPairCapacity, - b3OpenCLArray& triangleConvexPairs, - int& numTriConvexPairsOut); -}; - -#endif //_CONVEX_HULL_CONTACT_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h deleted file mode 100644 index c4cf7000765..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef CONVEX_POLYHEDRON_CL -#define CONVEX_POLYHEDRON_CL - -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - -#endif //CONVEX_POLYHEDRON_CL diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp deleted file mode 100644 index 3a554fe5b42..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp +++ /dev/null @@ -1,1062 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the -use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not -claim that you wrote the original software. If you use this software in a -product, an acknowledgment in the product documentation would be appreciated -but is not required. -2. Altered source versions must be plainly marked as such, and must not be -misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -GJK-EPA collision solver by Nathanael Presson, 2008 -*/ - -#include "b3GjkEpa.h" - -#include "b3SupportMappings.h" - -namespace gjkepa2_impl2 -{ -// Config - -/* GJK */ -#define GJK_MAX_ITERATIONS 128 -#define GJK_ACCURACY ((b3Scalar)0.0001) -#define GJK_MIN_DISTANCE ((b3Scalar)0.0001) -#define GJK_DUPLICATED_EPS ((b3Scalar)0.0001) -#define GJK_SIMPLEX2_EPS ((b3Scalar)0.0) -#define GJK_SIMPLEX3_EPS ((b3Scalar)0.0) -#define GJK_SIMPLEX4_EPS ((b3Scalar)0.0) - -/* EPA */ -#define EPA_MAX_VERTICES 64 -#define EPA_MAX_FACES (EPA_MAX_VERTICES * 2) -#define EPA_MAX_ITERATIONS 255 -#define EPA_ACCURACY ((b3Scalar)0.0001) -#define EPA_FALLBACK (10 * EPA_ACCURACY) -#define EPA_PLANE_EPS ((b3Scalar)0.00001) -#define EPA_INSIDE_EPS ((b3Scalar)0.01) - -// Shorthands - -// MinkowskiDiff -struct b3MinkowskiDiff -{ - const b3ConvexPolyhedronData* m_shapes[2]; - - b3Matrix3x3 m_toshape1; - b3Transform m_toshape0; - - bool m_enableMargin; - - void EnableMargin(bool enable) - { - m_enableMargin = enable; - } - inline b3Vector3 Support0(const b3Vector3& d, const b3AlignedObjectArray& verticesA) const - { - if (m_enableMargin) - { - return localGetSupportVertexWithMargin(d, m_shapes[0], verticesA, 0.f); - } - else - { - return localGetSupportVertexWithoutMargin(d, m_shapes[0], verticesA); - } - } - inline b3Vector3 Support1(const b3Vector3& d, const b3AlignedObjectArray& verticesB) const - { - if (m_enableMargin) - { - return m_toshape0 * (localGetSupportVertexWithMargin(m_toshape1 * d, m_shapes[1], verticesB, 0.f)); - } - else - { - return m_toshape0 * (localGetSupportVertexWithoutMargin(m_toshape1 * d, m_shapes[1], verticesB)); - } - } - - inline b3Vector3 Support(const b3Vector3& d, const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& verticesB) const - { - return (Support0(d, verticesA) - Support1(-d, verticesB)); - } - b3Vector3 Support(const b3Vector3& d, unsigned int index, const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& verticesB) const - { - if (index) - return (Support1(d, verticesA)); - else - return (Support0(d, verticesB)); - } -}; - -typedef b3MinkowskiDiff tShape; - -// GJK -struct b3GJK -{ - /* Types */ - struct sSV - { - b3Vector3 d, w; - }; - struct sSimplex - { - sSV* c[4]; - b3Scalar p[4]; - unsigned int rank; - }; - struct eStatus - { - enum _ - { - Valid, - Inside, - Failed - }; - }; - /* Fields */ - tShape m_shape; - const b3AlignedObjectArray& m_verticesA; - const b3AlignedObjectArray& m_verticesB; - b3Vector3 m_ray; - b3Scalar m_distance; - sSimplex m_simplices[2]; - sSV m_store[4]; - sSV* m_free[4]; - unsigned int m_nfree; - unsigned int m_current; - sSimplex* m_simplex; - eStatus::_ m_status; - /* Methods */ - b3GJK(const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& verticesB) - : m_verticesA(verticesA), m_verticesB(verticesB) - { - Initialize(); - } - void Initialize() - { - m_ray = b3MakeVector3(0, 0, 0); - m_nfree = 0; - m_status = eStatus::Failed; - m_current = 0; - m_distance = 0; - } - eStatus::_ Evaluate(const tShape& shapearg, const b3Vector3& guess) - { - unsigned int iterations = 0; - b3Scalar sqdist = 0; - b3Scalar alpha = 0; - b3Vector3 lastw[4]; - unsigned int clastw = 0; - /* Initialize solver */ - m_free[0] = &m_store[0]; - m_free[1] = &m_store[1]; - m_free[2] = &m_store[2]; - m_free[3] = &m_store[3]; - m_nfree = 4; - m_current = 0; - m_status = eStatus::Valid; - m_shape = shapearg; - m_distance = 0; - /* Initialize simplex */ - m_simplices[0].rank = 0; - m_ray = guess; - const b3Scalar sqrl = m_ray.length2(); - appendvertice(m_simplices[0], sqrl > 0 ? -m_ray : b3MakeVector3(1, 0, 0)); - m_simplices[0].p[0] = 1; - m_ray = m_simplices[0].c[0]->w; - sqdist = sqrl; - lastw[0] = - lastw[1] = - lastw[2] = - lastw[3] = m_ray; - /* Loop */ - do - { - const unsigned int next = 1 - m_current; - sSimplex& cs = m_simplices[m_current]; - sSimplex& ns = m_simplices[next]; - /* Check zero */ - const b3Scalar rl = m_ray.length(); - if (rl < GJK_MIN_DISTANCE) - { /* Touching or inside */ - m_status = eStatus::Inside; - break; - } - /* Append new vertice in -'v' direction */ - appendvertice(cs, -m_ray); - const b3Vector3& w = cs.c[cs.rank - 1]->w; - bool found = false; - for (unsigned int i = 0; i < 4; ++i) - { - if ((w - lastw[i]).length2() < GJK_DUPLICATED_EPS) - { - found = true; - break; - } - } - if (found) - { /* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - else - { /* Update lastw */ - lastw[clastw = (clastw + 1) & 3] = w; - } - /* Check for termination */ - const b3Scalar omega = b3Dot(m_ray, w) / rl; - alpha = b3Max(omega, alpha); - if (((rl - alpha) - (GJK_ACCURACY * rl)) <= 0) - { /* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - /* Reduce simplex */ - b3Scalar weights[4]; - unsigned int mask = 0; - switch (cs.rank) - { - case 2: - sqdist = projectorigin(cs.c[0]->w, - cs.c[1]->w, - weights, mask); - break; - case 3: - sqdist = projectorigin(cs.c[0]->w, - cs.c[1]->w, - cs.c[2]->w, - weights, mask); - break; - case 4: - sqdist = projectorigin(cs.c[0]->w, - cs.c[1]->w, - cs.c[2]->w, - cs.c[3]->w, - weights, mask); - break; - } - if (sqdist >= 0) - { /* Valid */ - ns.rank = 0; - m_ray = b3MakeVector3(0, 0, 0); - m_current = next; - for (unsigned int i = 0, ni = cs.rank; i < ni; ++i) - { - if (mask & (1 << i)) - { - ns.c[ns.rank] = cs.c[i]; - ns.p[ns.rank++] = weights[i]; - m_ray += cs.c[i]->w * weights[i]; - } - else - { - m_free[m_nfree++] = cs.c[i]; - } - } - if (mask == 15) m_status = eStatus::Inside; - } - else - { /* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - m_status = ((++iterations) < GJK_MAX_ITERATIONS) ? m_status : eStatus::Failed; - } while (m_status == eStatus::Valid); - m_simplex = &m_simplices[m_current]; - switch (m_status) - { - case eStatus::Valid: - m_distance = m_ray.length(); - break; - case eStatus::Inside: - m_distance = 0; - break; - default: - { - } - } - return (m_status); - } - bool EncloseOrigin() - { - switch (m_simplex->rank) - { - case 1: - { - for (unsigned int i = 0; i < 3; ++i) - { - b3Vector3 axis = b3MakeVector3(0, 0, 0); - axis[i] = 1; - appendvertice(*m_simplex, axis); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - appendvertice(*m_simplex, -axis); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - } - } - break; - case 2: - { - const b3Vector3 d = m_simplex->c[1]->w - m_simplex->c[0]->w; - for (unsigned int i = 0; i < 3; ++i) - { - b3Vector3 axis = b3MakeVector3(0, 0, 0); - axis[i] = 1; - const b3Vector3 p = b3Cross(d, axis); - if (p.length2() > 0) - { - appendvertice(*m_simplex, p); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - appendvertice(*m_simplex, -p); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - } - } - } - break; - case 3: - { - const b3Vector3 n = b3Cross(m_simplex->c[1]->w - m_simplex->c[0]->w, - m_simplex->c[2]->w - m_simplex->c[0]->w); - if (n.length2() > 0) - { - appendvertice(*m_simplex, n); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - appendvertice(*m_simplex, -n); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - } - } - break; - case 4: - { - if (b3Fabs(det(m_simplex->c[0]->w - m_simplex->c[3]->w, - m_simplex->c[1]->w - m_simplex->c[3]->w, - m_simplex->c[2]->w - m_simplex->c[3]->w)) > 0) - return (true); - } - break; - } - return (false); - } - /* Internals */ - void getsupport(const b3Vector3& d, sSV& sv) const - { - sv.d = d / d.length(); - sv.w = m_shape.Support(sv.d, m_verticesA, m_verticesB); - } - void removevertice(sSimplex& simplex) - { - m_free[m_nfree++] = simplex.c[--simplex.rank]; - } - void appendvertice(sSimplex& simplex, const b3Vector3& v) - { - simplex.p[simplex.rank] = 0; - simplex.c[simplex.rank] = m_free[--m_nfree]; - getsupport(v, *simplex.c[simplex.rank++]); - } - static b3Scalar det(const b3Vector3& a, const b3Vector3& b, const b3Vector3& c) - { - return (a.y * b.z * c.x + a.z * b.x * c.y - - a.x * b.z * c.y - a.y * b.x * c.z + - a.x * b.y * c.z - a.z * b.y * c.x); - } - static b3Scalar projectorigin(const b3Vector3& a, - const b3Vector3& b, - b3Scalar* w, unsigned int& m) - { - const b3Vector3 d = b - a; - const b3Scalar l = d.length2(); - if (l > GJK_SIMPLEX2_EPS) - { - const b3Scalar t(l > 0 ? -b3Dot(a, d) / l : 0); - if (t >= 1) - { - w[0] = 0; - w[1] = 1; - m = 2; - return (b.length2()); - } - else if (t <= 0) - { - w[0] = 1; - w[1] = 0; - m = 1; - return (a.length2()); - } - else - { - w[0] = 1 - (w[1] = t); - m = 3; - return ((a + d * t).length2()); - } - } - return (-1); - } - static b3Scalar projectorigin(const b3Vector3& a, - const b3Vector3& b, - const b3Vector3& c, - b3Scalar* w, unsigned int& m) - { - static const unsigned int imd3[] = {1, 2, 0}; - const b3Vector3* vt[] = {&a, &b, &c}; - const b3Vector3 dl[] = {a - b, b - c, c - a}; - const b3Vector3 n = b3Cross(dl[0], dl[1]); - const b3Scalar l = n.length2(); - if (l > GJK_SIMPLEX3_EPS) - { - b3Scalar mindist = -1; - b3Scalar subw[2] = {0.f, 0.f}; - unsigned int subm(0); - for (unsigned int i = 0; i < 3; ++i) - { - if (b3Dot(*vt[i], b3Cross(dl[i], n)) > 0) - { - const unsigned int j = imd3[i]; - const b3Scalar subd(projectorigin(*vt[i], *vt[j], subw, subm)); - if ((mindist < 0) || (subd < mindist)) - { - mindist = subd; - m = static_cast(((subm & 1) ? 1 << i : 0) + ((subm & 2) ? 1 << j : 0)); - w[i] = subw[0]; - w[j] = subw[1]; - w[imd3[j]] = 0; - } - } - } - if (mindist < 0) - { - const b3Scalar d = b3Dot(a, n); - const b3Scalar s = b3Sqrt(l); - const b3Vector3 p = n * (d / l); - mindist = p.length2(); - m = 7; - w[0] = (b3Cross(dl[1], b - p)).length() / s; - w[1] = (b3Cross(dl[2], c - p)).length() / s; - w[2] = 1 - (w[0] + w[1]); - } - return (mindist); - } - return (-1); - } - static b3Scalar projectorigin(const b3Vector3& a, - const b3Vector3& b, - const b3Vector3& c, - const b3Vector3& d, - b3Scalar* w, unsigned int& m) - { - static const unsigned int imd3[] = {1, 2, 0}; - const b3Vector3* vt[] = {&a, &b, &c, &d}; - const b3Vector3 dl[] = {a - d, b - d, c - d}; - const b3Scalar vl = det(dl[0], dl[1], dl[2]); - const bool ng = (vl * b3Dot(a, b3Cross(b - c, a - b))) <= 0; - if (ng && (b3Fabs(vl) > GJK_SIMPLEX4_EPS)) - { - b3Scalar mindist = -1; - b3Scalar subw[3] = {0.f, 0.f, 0.f}; - unsigned int subm(0); - for (unsigned int i = 0; i < 3; ++i) - { - const unsigned int j = imd3[i]; - const b3Scalar s = vl * b3Dot(d, b3Cross(dl[i], dl[j])); - if (s > 0) - { - const b3Scalar subd = projectorigin(*vt[i], *vt[j], d, subw, subm); - if ((mindist < 0) || (subd < mindist)) - { - mindist = subd; - m = static_cast((subm & 1 ? 1 << i : 0) + - (subm & 2 ? 1 << j : 0) + - (subm & 4 ? 8 : 0)); - w[i] = subw[0]; - w[j] = subw[1]; - w[imd3[j]] = 0; - w[3] = subw[2]; - } - } - } - if (mindist < 0) - { - mindist = 0; - m = 15; - w[0] = det(c, b, d) / vl; - w[1] = det(a, c, d) / vl; - w[2] = det(b, a, d) / vl; - w[3] = 1 - (w[0] + w[1] + w[2]); - } - return (mindist); - } - return (-1); - } -}; - -// EPA -struct b3EPA -{ - /* Types */ - typedef b3GJK::sSV sSV; - struct sFace - { - b3Vector3 n; - b3Scalar d; - sSV* c[3]; - sFace* f[3]; - sFace* l[2]; - unsigned char e[3]; - unsigned char pass; - }; - struct sList - { - sFace* root; - unsigned int count; - sList() : root(0), count(0) {} - }; - struct sHorizon - { - sFace* cf; - sFace* ff; - unsigned int nf; - sHorizon() : cf(0), ff(0), nf(0) {} - }; - struct eStatus - { - enum _ - { - Valid, - Touching, - Degenerated, - NonConvex, - InvalidHull, - OutOfFaces, - OutOfVertices, - AccuraryReached, - FallBack, - Failed - }; - }; - /* Fields */ - eStatus::_ m_status; - b3GJK::sSimplex m_result; - b3Vector3 m_normal; - b3Scalar m_depth; - sSV m_sv_store[EPA_MAX_VERTICES]; - sFace m_fc_store[EPA_MAX_FACES]; - unsigned int m_nextsv; - sList m_hull; - sList m_stock; - /* Methods */ - b3EPA() - { - Initialize(); - } - - static inline void bind(sFace* fa, unsigned int ea, sFace* fb, unsigned int eb) - { - fa->e[ea] = (unsigned char)eb; - fa->f[ea] = fb; - fb->e[eb] = (unsigned char)ea; - fb->f[eb] = fa; - } - static inline void append(sList& list, sFace* face) - { - face->l[0] = 0; - face->l[1] = list.root; - if (list.root) list.root->l[0] = face; - list.root = face; - ++list.count; - } - static inline void remove(sList& list, sFace* face) - { - if (face->l[1]) face->l[1]->l[0] = face->l[0]; - if (face->l[0]) face->l[0]->l[1] = face->l[1]; - if (face == list.root) list.root = face->l[1]; - --list.count; - } - - void Initialize() - { - m_status = eStatus::Failed; - m_normal = b3MakeVector3(0, 0, 0); - m_depth = 0; - m_nextsv = 0; - for (unsigned int i = 0; i < EPA_MAX_FACES; ++i) - { - append(m_stock, &m_fc_store[EPA_MAX_FACES - i - 1]); - } - } - eStatus::_ Evaluate(b3GJK& gjk, const b3Vector3& guess) - { - b3GJK::sSimplex& simplex = *gjk.m_simplex; - if ((simplex.rank > 1) && gjk.EncloseOrigin()) - { - /* Clean up */ - while (m_hull.root) - { - sFace* f = m_hull.root; - remove(m_hull, f); - append(m_stock, f); - } - m_status = eStatus::Valid; - m_nextsv = 0; - /* Orient simplex */ - if (gjk.det(simplex.c[0]->w - simplex.c[3]->w, - simplex.c[1]->w - simplex.c[3]->w, - simplex.c[2]->w - simplex.c[3]->w) < 0) - { - b3Swap(simplex.c[0], simplex.c[1]); - b3Swap(simplex.p[0], simplex.p[1]); - } - /* Build initial hull */ - sFace* tetra[] = {newface(simplex.c[0], simplex.c[1], simplex.c[2], true), - newface(simplex.c[1], simplex.c[0], simplex.c[3], true), - newface(simplex.c[2], simplex.c[1], simplex.c[3], true), - newface(simplex.c[0], simplex.c[2], simplex.c[3], true)}; - if (m_hull.count == 4) - { - sFace* best = findbest(); - sFace outer = *best; - unsigned int pass = 0; - unsigned int iterations = 0; - bind(tetra[0], 0, tetra[1], 0); - bind(tetra[0], 1, tetra[2], 0); - bind(tetra[0], 2, tetra[3], 0); - bind(tetra[1], 1, tetra[3], 2); - bind(tetra[1], 2, tetra[2], 1); - bind(tetra[2], 2, tetra[3], 1); - m_status = eStatus::Valid; - for (; iterations < EPA_MAX_ITERATIONS; ++iterations) - { - if (m_nextsv < EPA_MAX_VERTICES) - { - sHorizon horizon; - sSV* w = &m_sv_store[m_nextsv++]; - bool valid = true; - best->pass = (unsigned char)(++pass); - gjk.getsupport(best->n, *w); - const b3Scalar wdist = b3Dot(best->n, w->w) - best->d; - if (wdist > EPA_ACCURACY) - { - for (unsigned int j = 0; (j < 3) && valid; ++j) - { - valid &= expand(pass, w, - best->f[j], best->e[j], - horizon); - } - if (valid && (horizon.nf >= 3)) - { - bind(horizon.cf, 1, horizon.ff, 2); - remove(m_hull, best); - append(m_stock, best); - best = findbest(); - outer = *best; - } - else - { - m_status = eStatus::Failed; - //m_status=eStatus::InvalidHull; - break; - } - } - else - { - m_status = eStatus::AccuraryReached; - break; - } - } - else - { - m_status = eStatus::OutOfVertices; - break; - } - } - const b3Vector3 projection = outer.n * outer.d; - m_normal = outer.n; - m_depth = outer.d; - m_result.rank = 3; - m_result.c[0] = outer.c[0]; - m_result.c[1] = outer.c[1]; - m_result.c[2] = outer.c[2]; - m_result.p[0] = b3Cross(outer.c[1]->w - projection, - outer.c[2]->w - projection) - .length(); - m_result.p[1] = b3Cross(outer.c[2]->w - projection, - outer.c[0]->w - projection) - .length(); - m_result.p[2] = b3Cross(outer.c[0]->w - projection, - outer.c[1]->w - projection) - .length(); - const b3Scalar sum = m_result.p[0] + m_result.p[1] + m_result.p[2]; - m_result.p[0] /= sum; - m_result.p[1] /= sum; - m_result.p[2] /= sum; - return (m_status); - } - } - /* Fallback */ - m_status = eStatus::FallBack; - m_normal = -guess; - const b3Scalar nl = m_normal.length(); - if (nl > 0) - m_normal = m_normal / nl; - else - m_normal = b3MakeVector3(1, 0, 0); - m_depth = 0; - m_result.rank = 1; - m_result.c[0] = simplex.c[0]; - m_result.p[0] = 1; - return (m_status); - } - bool getedgedist(sFace* face, sSV* a, sSV* b, b3Scalar& dist) - { - const b3Vector3 ba = b->w - a->w; - const b3Vector3 n_ab = b3Cross(ba, face->n); // Outward facing edge normal direction, on triangle plane - const b3Scalar a_dot_nab = b3Dot(a->w, n_ab); // Only care about the sign to determine inside/outside, so not normalization required - - if (a_dot_nab < 0) - { - // Outside of edge a->b - - const b3Scalar ba_l2 = ba.length2(); - const b3Scalar a_dot_ba = b3Dot(a->w, ba); - const b3Scalar b_dot_ba = b3Dot(b->w, ba); - - if (a_dot_ba > 0) - { - // Pick distance vertex a - dist = a->w.length(); - } - else if (b_dot_ba < 0) - { - // Pick distance vertex b - dist = b->w.length(); - } - else - { - // Pick distance to edge a->b - const b3Scalar a_dot_b = b3Dot(a->w, b->w); - dist = b3Sqrt(b3Max((a->w.length2() * b->w.length2() - a_dot_b * a_dot_b) / ba_l2, (b3Scalar)0)); - } - - return true; - } - - return false; - } - sFace* newface(sSV* a, sSV* b, sSV* c, bool forced) - { - if (m_stock.root) - { - sFace* face = m_stock.root; - remove(m_stock, face); - append(m_hull, face); - face->pass = 0; - face->c[0] = a; - face->c[1] = b; - face->c[2] = c; - face->n = b3Cross(b->w - a->w, c->w - a->w); - const b3Scalar l = face->n.length(); - const bool v = l > EPA_ACCURACY; - - if (v) - { - if (!(getedgedist(face, a, b, face->d) || - getedgedist(face, b, c, face->d) || - getedgedist(face, c, a, face->d))) - { - // Origin projects to the interior of the triangle - // Use distance to triangle plane - face->d = b3Dot(a->w, face->n) / l; - } - - face->n /= l; - if (forced || (face->d >= -EPA_PLANE_EPS)) - { - return face; - } - else - m_status = eStatus::NonConvex; - } - else - m_status = eStatus::Degenerated; - - remove(m_hull, face); - append(m_stock, face); - return 0; - } - m_status = m_stock.root ? eStatus::OutOfVertices : eStatus::OutOfFaces; - return 0; - } - sFace* findbest() - { - sFace* minf = m_hull.root; - b3Scalar mind = minf->d * minf->d; - for (sFace* f = minf->l[1]; f; f = f->l[1]) - { - const b3Scalar sqd = f->d * f->d; - if (sqd < mind) - { - minf = f; - mind = sqd; - } - } - return (minf); - } - bool expand(unsigned int pass, sSV* w, sFace* f, unsigned int e, sHorizon& horizon) - { - static const unsigned int i1m3[] = {1, 2, 0}; - static const unsigned int i2m3[] = {2, 0, 1}; - if (f->pass != pass) - { - const unsigned int e1 = i1m3[e]; - if ((b3Dot(f->n, w->w) - f->d) < -EPA_PLANE_EPS) - { - sFace* nf = newface(f->c[e1], f->c[e], w, false); - if (nf) - { - bind(nf, 0, f, e); - if (horizon.cf) - bind(horizon.cf, 1, nf, 2); - else - horizon.ff = nf; - horizon.cf = nf; - ++horizon.nf; - return (true); - } - } - else - { - const unsigned int e2 = i2m3[e]; - f->pass = (unsigned char)pass; - if (expand(pass, w, f->f[e1], f->e[e1], horizon) && - expand(pass, w, f->f[e2], f->e[e2], horizon)) - { - remove(m_hull, f); - append(m_stock, f); - return (true); - } - } - } - return (false); - } -}; - -// -static void Initialize(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& verticesB, - b3GjkEpaSolver2::sResults& results, - tShape& shape, - bool withmargins) -{ - /* Results */ - results.witnesses[0] = - results.witnesses[1] = b3MakeVector3(0, 0, 0); - results.status = b3GjkEpaSolver2::sResults::Separated; - /* Shape */ - shape.m_shapes[0] = hullA; - shape.m_shapes[1] = hullB; - shape.m_toshape1 = transB.getBasis().transposeTimes(transA.getBasis()); - shape.m_toshape0 = transA.inverseTimes(transB); - shape.EnableMargin(withmargins); -} - -} // namespace gjkepa2_impl2 - -// -// Api -// - -using namespace gjkepa2_impl2; - -// -int b3GjkEpaSolver2::StackSizeRequirement() -{ - return (sizeof(b3GJK) + sizeof(b3EPA)); -} - -// -bool b3GjkEpaSolver2::Distance(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& verticesB, - const b3Vector3& guess, - sResults& results) -{ - tShape shape; - Initialize(transA, transB, hullA, hullB, verticesA, verticesB, results, shape, false); - b3GJK gjk(verticesA, verticesB); - b3GJK::eStatus::_ gjk_status = gjk.Evaluate(shape, guess); - if (gjk_status == b3GJK::eStatus::Valid) - { - b3Vector3 w0 = b3MakeVector3(0, 0, 0); - b3Vector3 w1 = b3MakeVector3(0, 0, 0); - for (unsigned int i = 0; i < gjk.m_simplex->rank; ++i) - { - const b3Scalar p = gjk.m_simplex->p[i]; - w0 += shape.Support(gjk.m_simplex->c[i]->d, 0, verticesA, verticesB) * p; - w1 += shape.Support(-gjk.m_simplex->c[i]->d, 1, verticesA, verticesB) * p; - } - results.witnesses[0] = transA * w0; - results.witnesses[1] = transA * w1; - results.normal = w0 - w1; - results.distance = results.normal.length(); - results.normal /= results.distance > GJK_MIN_DISTANCE ? results.distance : 1; - return (true); - } - else - { - results.status = gjk_status == b3GJK::eStatus::Inside ? sResults::Penetrating : sResults::GJK_Failed; - return (false); - } -} - -// -bool b3GjkEpaSolver2::Penetration(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& verticesB, - const b3Vector3& guess, - sResults& results, - bool usemargins) -{ - tShape shape; - Initialize(transA, transB, hullA, hullB, verticesA, verticesB, results, shape, usemargins); - b3GJK gjk(verticesA, verticesB); - b3GJK::eStatus::_ gjk_status = gjk.Evaluate(shape, guess); - switch (gjk_status) - { - case b3GJK::eStatus::Inside: - { - b3EPA epa; - b3EPA::eStatus::_ epa_status = epa.Evaluate(gjk, -guess); - if (epa_status != b3EPA::eStatus::Failed) - { - b3Vector3 w0 = b3MakeVector3(0, 0, 0); - for (unsigned int i = 0; i < epa.m_result.rank; ++i) - { - w0 += shape.Support(epa.m_result.c[i]->d, 0, verticesA, verticesB) * epa.m_result.p[i]; - } - results.status = sResults::Penetrating; - results.witnesses[0] = transA * w0; - results.witnesses[1] = transA * (w0 - epa.m_normal * epa.m_depth); - results.normal = -epa.m_normal; - results.distance = -epa.m_depth; - return (true); - } - else - results.status = sResults::EPA_Failed; - } - break; - case b3GJK::eStatus::Failed: - results.status = sResults::GJK_Failed; - break; - default: - { - } - } - return (false); -} - -#if 0 -// -b3Scalar b3GjkEpaSolver2::SignedDistance(const b3Vector3& position, - b3Scalar margin, - const b3Transform& transA, - const b3ConvexPolyhedronData& hullA, - const b3AlignedObjectArray& verticesA, - sResults& results) -{ - tShape shape; - btSphereShape shape1(margin); - b3Transform wtrs1(b3Quaternion(0,0,0,1),position); - Initialize(shape0,wtrs0,&shape1,wtrs1,results,shape,false); - GJK gjk; - GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,b3Vector3(1,1,1)); - if(gjk_status==GJK::eStatus::Valid) - { - b3Vector3 w0=b3Vector3(0,0,0); - b3Vector3 w1=b3Vector3(0,0,0); - for(unsigned int i=0;irank;++i) - { - const b3Scalar p=gjk.m_simplex->p[i]; - w0+=shape.Support( gjk.m_simplex->c[i]->d,0)*p; - w1+=shape.Support(-gjk.m_simplex->c[i]->d,1)*p; - } - results.witnesses[0] = wtrs0*w0; - results.witnesses[1] = wtrs0*w1; - const b3Vector3 delta= results.witnesses[1]- - results.witnesses[0]; - const b3Scalar margin= shape0->getMarginNonVirtual()+ - shape1.getMarginNonVirtual(); - const b3Scalar length= delta.length(); - results.normal = delta/length; - results.witnesses[0] += results.normal*margin; - return(length-margin); - } - else - { - if(gjk_status==GJK::eStatus::Inside) - { - if(Penetration(shape0,wtrs0,&shape1,wtrs1,gjk.m_ray,results)) - { - const b3Vector3 delta= results.witnesses[0]- - results.witnesses[1]; - const b3Scalar length= delta.length(); - if (length >= B3_EPSILON) - results.normal = delta/length; - return(-length); - } - } - } - return(B3_INFINITY); -} - -// -bool b3GjkEpaSolver2::SignedDistance(const btConvexShape* shape0, - const b3Transform& wtrs0, - const btConvexShape* shape1, - const b3Transform& wtrs1, - const b3Vector3& guess, - sResults& results) -{ - if(!Distance(shape0,wtrs0,shape1,wtrs1,guess,results)) - return(Penetration(shape0,wtrs0,shape1,wtrs1,guess,results,false)); - else - return(true); -} -#endif - -/* Symbols cleanup */ - -#undef GJK_MAX_ITERATIONS -#undef GJK_ACCURACY -#undef GJK_MIN_DISTANCE -#undef GJK_DUPLICATED_EPS -#undef GJK_SIMPLEX2_EPS -#undef GJK_SIMPLEX3_EPS -#undef GJK_SIMPLEX4_EPS - -#undef EPA_MAX_VERTICES -#undef EPA_MAX_FACES -#undef EPA_MAX_ITERATIONS -#undef EPA_ACCURACY -#undef EPA_FALLBACK -#undef EPA_PLANE_EPS -#undef EPA_INSIDE_EPS diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h deleted file mode 100644 index 1a67a511803..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the -use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not -claim that you wrote the original software. If you use this software in a -product, an acknowledgment in the product documentation would be appreciated -but is not required. -2. Altered source versions must be plainly marked as such, and must not be -misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -GJK-EPA collision solver by Nathanael Presson, 2008 -*/ -#ifndef B3_GJK_EPA2_H -#define B3_GJK_EPA2_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - -///btGjkEpaSolver contributed under zlib by Nathanael Presson -struct b3GjkEpaSolver2 -{ - struct sResults - { - enum eStatus - { - Separated, /* Shapes doesnt penetrate */ - Penetrating, /* Shapes are penetrating */ - GJK_Failed, /* GJK phase fail, no big issue, shapes are probably just 'touching' */ - EPA_Failed /* EPA phase fail, bigger problem, need to save parameters, and debug */ - } status; - b3Vector3 witnesses[2]; - b3Vector3 normal; - b3Scalar distance; - }; - - static int StackSizeRequirement(); - - static bool Distance(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& verticesB, - const b3Vector3& guess, - sResults& results); - - static bool Penetration(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray& verticesA, - const b3AlignedObjectArray& verticesB, - const b3Vector3& guess, - sResults& results, - bool usemargins = true); -#if 0 -static b3Scalar SignedDistance( const b3Vector3& position, - b3Scalar margin, - const btConvexShape* shape, - const btTransform& wtrs, - sResults& results); - -static bool SignedDistance( const btConvexShape* shape0,const btTransform& wtrs0, - const btConvexShape* shape1,const btTransform& wtrs1, - const b3Vector3& guess, - sResults& results); -#endif -}; - -#endif //B3_GJK_EPA2_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp deleted file mode 100644 index 4938fa17afb..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp +++ /dev/null @@ -1,363 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3OptimizedBvh.h" -#include "b3StridingMeshInterface.h" -#include "Bullet3Geometry/b3AabbUtil.h" - -b3OptimizedBvh::b3OptimizedBvh() -{ -} - -b3OptimizedBvh::~b3OptimizedBvh() -{ -} - -void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax) -{ - m_useQuantization = useQuantizedAabbCompression; - - // NodeArray triangleNodes; - - struct NodeTriangleCallback : public b3InternalTriangleIndexCallback - { - NodeArray& m_triangleNodes; - - NodeTriangleCallback& operator=(NodeTriangleCallback& other) - { - m_triangleNodes.copyFromArray(other.m_triangleNodes); - return *this; - } - - NodeTriangleCallback(NodeArray& triangleNodes) - : m_triangleNodes(triangleNodes) - { - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) - { - b3OptimizedBvhNode node; - b3Vector3 aabbMin, aabbMax; - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangle[0]); - aabbMax.setMax(triangle[0]); - aabbMin.setMin(triangle[1]); - aabbMax.setMax(triangle[1]); - aabbMin.setMin(triangle[2]); - aabbMax.setMax(triangle[2]); - - //with quantization? - node.m_aabbMinOrg = aabbMin; - node.m_aabbMaxOrg = aabbMax; - - node.m_escapeIndex = -1; - - //for child nodes - node.m_subPart = partId; - node.m_triangleIndex = triangleIndex; - m_triangleNodes.push_back(node); - } - }; - struct QuantizedNodeTriangleCallback : public b3InternalTriangleIndexCallback - { - QuantizedNodeArray& m_triangleNodes; - const b3QuantizedBvh* m_optimizedTree; // for quantization - - QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other) - { - m_triangleNodes.copyFromArray(other.m_triangleNodes); - m_optimizedTree = other.m_optimizedTree; - return *this; - } - - QuantizedNodeTriangleCallback(QuantizedNodeArray& triangleNodes, const b3QuantizedBvh* tree) - : m_triangleNodes(triangleNodes), m_optimizedTree(tree) - { - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) - { - // The partId and triangle index must fit in the same (positive) integer - b3Assert(partId < (1 << MAX_NUM_PARTS_IN_BITS)); - b3Assert(triangleIndex < (1 << (31 - MAX_NUM_PARTS_IN_BITS))); - //negative indices are reserved for escapeIndex - b3Assert(triangleIndex >= 0); - - b3QuantizedBvhNode node; - b3Vector3 aabbMin, aabbMax; - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangle[0]); - aabbMax.setMax(triangle[0]); - aabbMin.setMin(triangle[1]); - aabbMax.setMax(triangle[1]); - aabbMin.setMin(triangle[2]); - aabbMax.setMax(triangle[2]); - - //PCK: add these checks for zero dimensions of aabb - const b3Scalar MIN_AABB_DIMENSION = b3Scalar(0.002); - const b3Scalar MIN_AABB_HALF_DIMENSION = b3Scalar(0.001); - if (aabbMax.getX() - aabbMin.getX() < MIN_AABB_DIMENSION) - { - aabbMax.setX(aabbMax.getX() + MIN_AABB_HALF_DIMENSION); - aabbMin.setX(aabbMin.getX() - MIN_AABB_HALF_DIMENSION); - } - if (aabbMax.getY() - aabbMin.getY() < MIN_AABB_DIMENSION) - { - aabbMax.setY(aabbMax.getY() + MIN_AABB_HALF_DIMENSION); - aabbMin.setY(aabbMin.getY() - MIN_AABB_HALF_DIMENSION); - } - if (aabbMax.getZ() - aabbMin.getZ() < MIN_AABB_DIMENSION) - { - aabbMax.setZ(aabbMax.getZ() + MIN_AABB_HALF_DIMENSION); - aabbMin.setZ(aabbMin.getZ() - MIN_AABB_HALF_DIMENSION); - } - - m_optimizedTree->quantize(&node.m_quantizedAabbMin[0], aabbMin, 0); - m_optimizedTree->quantize(&node.m_quantizedAabbMax[0], aabbMax, 1); - - node.m_escapeIndexOrTriangleIndex = (partId << (31 - MAX_NUM_PARTS_IN_BITS)) | triangleIndex; - - m_triangleNodes.push_back(node); - } - }; - - int numLeafNodes = 0; - - if (m_useQuantization) - { - //initialize quantization values - setQuantizationValues(bvhAabbMin, bvhAabbMax); - - QuantizedNodeTriangleCallback callback(m_quantizedLeafNodes, this); - - triangles->InternalProcessAllTriangles(&callback, m_bvhAabbMin, m_bvhAabbMax); - - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_quantizedLeafNodes.size(); - - m_quantizedContiguousNodes.resize(2 * numLeafNodes); - } - else - { - NodeTriangleCallback callback(m_leafNodes); - - b3Vector3 aabbMin = b3MakeVector3(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - b3Vector3 aabbMax = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - - triangles->InternalProcessAllTriangles(&callback, aabbMin, aabbMax); - - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_leafNodes.size(); - - m_contiguousNodes.resize(2 * numLeafNodes); - } - - m_curNodeIndex = 0; - - buildTree(0, numLeafNodes); - - ///if the entire tree is small then subtree size, we need to create a header info for the tree - if (m_useQuantization && !m_SubtreeHeaders.size()) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); - subtree.m_rootNodeIndex = 0; - subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary - m_quantizedLeafNodes.clear(); - m_leafNodes.clear(); -} - -void b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface, const b3Vector3& aabbMin, const b3Vector3& aabbMax) -{ - if (m_useQuantization) - { - setQuantizationValues(aabbMin, aabbMax); - - updateBvhNodes(meshInterface, 0, m_curNodeIndex, 0); - - ///now update all subtree headers - - int i; - for (i = 0; i < m_SubtreeHeaders.size(); i++) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); - } - } - else - { - } -} - -void b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface, const b3Vector3& aabbMin, const b3Vector3& aabbMax) -{ - //incrementally initialize quantization values - b3Assert(m_useQuantization); - - b3Assert(aabbMin.getX() > m_bvhAabbMin.getX()); - b3Assert(aabbMin.getY() > m_bvhAabbMin.getY()); - b3Assert(aabbMin.getZ() > m_bvhAabbMin.getZ()); - - b3Assert(aabbMax.getX() < m_bvhAabbMax.getX()); - b3Assert(aabbMax.getY() < m_bvhAabbMax.getY()); - b3Assert(aabbMax.getZ() < m_bvhAabbMax.getZ()); - - ///we should update all quantization values, using updateBvhNodes(meshInterface); - ///but we only update chunks that overlap the given aabb - - unsigned short quantizedQueryAabbMin[3]; - unsigned short quantizedQueryAabbMax[3]; - - quantize(&quantizedQueryAabbMin[0], aabbMin, 0); - quantize(&quantizedQueryAabbMax[0], aabbMax, 1); - - int i; - for (i = 0; i < this->m_SubtreeHeaders.size(); i++) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - - //PCK: unsigned instead of bool - unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax); - if (overlap != 0) - { - updateBvhNodes(meshInterface, subtree.m_rootNodeIndex, subtree.m_rootNodeIndex + subtree.m_subtreeSize, i); - - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); - } - } -} - -void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface, int firstNode, int endNode, int index) -{ - (void)index; - - b3Assert(m_useQuantization); - - int curNodeSubPart = -1; - - //get access info to trianglemesh data - const unsigned char* vertexbase = 0; - int numverts = 0; - PHY_ScalarType type = PHY_INTEGER; - int stride = 0; - const unsigned char* indexbase = 0; - int indexstride = 0; - int numfaces = 0; - PHY_ScalarType indicestype = PHY_INTEGER; - - b3Vector3 triangleVerts[3]; - b3Vector3 aabbMin, aabbMax; - const b3Vector3& meshScaling = meshInterface->getScaling(); - - int i; - for (i = endNode - 1; i >= firstNode; i--) - { - b3QuantizedBvhNode& curNode = m_quantizedContiguousNodes[i]; - if (curNode.isLeafNode()) - { - //recalc aabb from triangle data - int nodeSubPart = curNode.getPartId(); - int nodeTriangleIndex = curNode.getTriangleIndex(); - if (nodeSubPart != curNodeSubPart) - { - if (curNodeSubPart >= 0) - meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); - meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numfaces, indicestype, nodeSubPart); - - curNodeSubPart = nodeSubPart; - } - //triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, - - unsigned int* gfxbase = (unsigned int*)(indexbase + nodeTriangleIndex * indexstride); - - for (int j = 2; j >= 0; j--) - { - int graphicsindex; - switch (indicestype) { - case PHY_INTEGER: graphicsindex = gfxbase[j]; break; - case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break; - case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break; - default: b3Assert(0); - } - if (type == PHY_FLOAT) - { - float* graphicsbase = (float*)(vertexbase + graphicsindex * stride); - triangleVerts[j] = b3MakeVector3( - graphicsbase[0] * meshScaling.getX(), - graphicsbase[1] * meshScaling.getY(), - graphicsbase[2] * meshScaling.getZ()); - } - else - { - double* graphicsbase = (double*)(vertexbase + graphicsindex * stride); - triangleVerts[j] = b3MakeVector3(b3Scalar(graphicsbase[0] * meshScaling.getX()), b3Scalar(graphicsbase[1] * meshScaling.getY()), b3Scalar(graphicsbase[2] * meshScaling.getZ())); - } - } - - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangleVerts[0]); - aabbMax.setMax(triangleVerts[0]); - aabbMin.setMin(triangleVerts[1]); - aabbMax.setMax(triangleVerts[1]); - aabbMin.setMin(triangleVerts[2]); - aabbMax.setMax(triangleVerts[2]); - - quantize(&curNode.m_quantizedAabbMin[0], aabbMin, 0); - quantize(&curNode.m_quantizedAabbMax[0], aabbMax, 1); - } - else - { - //combine aabb from both children - - b3QuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i + 1]; - - b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i + 2] : &m_quantizedContiguousNodes[i + 1 + leftChildNode->getEscapeIndex()]; - - { - for (int i = 0; i < 3; i++) - { - curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i]; - if (curNode.m_quantizedAabbMin[i] > rightChildNode->m_quantizedAabbMin[i]) - curNode.m_quantizedAabbMin[i] = rightChildNode->m_quantizedAabbMin[i]; - - curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i]; - if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i]) - curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i]; - } - } - } - } - - if (curNodeSubPart >= 0) - meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); -} - -///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' -b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) -{ - b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer, i_dataBufferSize, i_swapEndian); - - //we don't add additional data so just do a static upcast - return static_cast(bvh); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h deleted file mode 100644 index 12865529399..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h +++ /dev/null @@ -1,56 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -///Contains contributions from Disney Studio's - -#ifndef B3_OPTIMIZED_BVH_H -#define B3_OPTIMIZED_BVH_H - -#include "b3QuantizedBvh.h" - -class b3StridingMeshInterface; - -///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface. -B3_ATTRIBUTE_ALIGNED16(class) -b3OptimizedBvh : public b3QuantizedBvh -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - -protected: -public: - b3OptimizedBvh(); - - virtual ~b3OptimizedBvh(); - - void build(b3StridingMeshInterface * triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax); - - void refit(b3StridingMeshInterface * triangles, const b3Vector3& aabbMin, const b3Vector3& aabbMax); - - void refitPartial(b3StridingMeshInterface * triangles, const b3Vector3& aabbMin, const b3Vector3& aabbMax); - - void updateBvhNodes(b3StridingMeshInterface * meshInterface, int firstNode, int endNode, int index); - - /// Data buffer MUST be 16 byte aligned - virtual bool serializeInPlace(void* o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const - { - return b3QuantizedBvh::serialize(o_alignedDataBuffer, i_dataBufferSize, i_swapEndian); - } - - ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' - static b3OptimizedBvh* deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); -}; - -#endif //B3_OPTIMIZED_BVH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp deleted file mode 100644 index e9a3d085bb1..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp +++ /dev/null @@ -1,1254 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3QuantizedBvh.h" - -#include "Bullet3Geometry/b3AabbUtil.h" - -#define RAYAABB2 - -b3QuantizedBvh::b3QuantizedBvh() : m_bulletVersion(B3_BULLET_VERSION), - m_useQuantization(false), - m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY) - //m_traversalMode(TRAVERSAL_STACKLESS) - //m_traversalMode(TRAVERSAL_RECURSIVE) - , - m_subtreeHeaderCount(0) //PCK: add this line -{ - m_bvhAabbMin.setValue(-B3_INFINITY, -B3_INFINITY, -B3_INFINITY); - m_bvhAabbMax.setValue(B3_INFINITY, B3_INFINITY, B3_INFINITY); -} - -void b3QuantizedBvh::buildInternal() -{ - ///assumes that caller filled in the m_quantizedLeafNodes - m_useQuantization = true; - int numLeafNodes = 0; - - if (m_useQuantization) - { - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_quantizedLeafNodes.size(); - - m_quantizedContiguousNodes.resize(2 * numLeafNodes); - } - - m_curNodeIndex = 0; - - buildTree(0, numLeafNodes); - - ///if the entire tree is small then subtree size, we need to create a header info for the tree - if (m_useQuantization && !m_SubtreeHeaders.size()) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); - subtree.m_rootNodeIndex = 0; - subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary - m_quantizedLeafNodes.clear(); - m_leafNodes.clear(); -} - -///just for debugging, to visualize the individual patches/subtrees -#ifdef DEBUG_PATCH_COLORS -b3Vector3 color[4] = - { - b3Vector3(1, 0, 0), - b3Vector3(0, 1, 0), - b3Vector3(0, 0, 1), - b3Vector3(0, 1, 1)}; -#endif //DEBUG_PATCH_COLORS - -void b3QuantizedBvh::setQuantizationValues(const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax, b3Scalar quantizationMargin) -{ - //enlarge the AABB to avoid division by zero when initializing the quantization values - b3Vector3 clampValue = b3MakeVector3(quantizationMargin, quantizationMargin, quantizationMargin); - m_bvhAabbMin = bvhAabbMin - clampValue; - m_bvhAabbMax = bvhAabbMax + clampValue; - b3Vector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin; - m_bvhQuantization = b3MakeVector3(b3Scalar(65533.0), b3Scalar(65533.0), b3Scalar(65533.0)) / aabbSize; - m_useQuantization = true; -} - -b3QuantizedBvh::~b3QuantizedBvh() -{ -} - -#ifdef DEBUG_TREE_BUILDING -int gStackDepth = 0; -int gMaxStackDepth = 0; -#endif //DEBUG_TREE_BUILDING - -void b3QuantizedBvh::buildTree(int startIndex, int endIndex) -{ -#ifdef DEBUG_TREE_BUILDING - gStackDepth++; - if (gStackDepth > gMaxStackDepth) - gMaxStackDepth = gStackDepth; -#endif //DEBUG_TREE_BUILDING - - int splitAxis, splitIndex, i; - int numIndices = endIndex - startIndex; - int curIndex = m_curNodeIndex; - - b3Assert(numIndices > 0); - - if (numIndices == 1) - { -#ifdef DEBUG_TREE_BUILDING - gStackDepth--; -#endif //DEBUG_TREE_BUILDING - - assignInternalNodeFromLeafNode(m_curNodeIndex, startIndex); - - m_curNodeIndex++; - return; - } - //calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'. - - splitAxis = calcSplittingAxis(startIndex, endIndex); - - splitIndex = sortAndCalcSplittingIndex(startIndex, endIndex, splitAxis); - - int internalNodeIndex = m_curNodeIndex; - - //set the min aabb to 'inf' or a max value, and set the max aabb to a -inf/minimum value. - //the aabb will be expanded during buildTree/mergeInternalNodeAabb with actual node values - setInternalNodeAabbMin(m_curNodeIndex, m_bvhAabbMax); //can't use b3Vector3(B3_INFINITY,B3_INFINITY,B3_INFINITY)) because of quantization - setInternalNodeAabbMax(m_curNodeIndex, m_bvhAabbMin); //can't use b3Vector3(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY)) because of quantization - - for (i = startIndex; i < endIndex; i++) - { - mergeInternalNodeAabb(m_curNodeIndex, getAabbMin(i), getAabbMax(i)); - } - - m_curNodeIndex++; - - //internalNode->m_escapeIndex; - - int leftChildNodexIndex = m_curNodeIndex; - - //build left child tree - buildTree(startIndex, splitIndex); - - int rightChildNodexIndex = m_curNodeIndex; - //build right child tree - buildTree(splitIndex, endIndex); - -#ifdef DEBUG_TREE_BUILDING - gStackDepth--; -#endif //DEBUG_TREE_BUILDING - - int escapeIndex = m_curNodeIndex - curIndex; - - if (m_useQuantization) - { - //escapeIndex is the number of nodes of this subtree - const int sizeQuantizedNode = sizeof(b3QuantizedBvhNode); - const int treeSizeInBytes = escapeIndex * sizeQuantizedNode; - if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES) - { - updateSubtreeHeaders(leftChildNodexIndex, rightChildNodexIndex); - } - } - else - { - } - - setInternalNodeEscapeIndex(internalNodeIndex, escapeIndex); -} - -void b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex, int rightChildNodexIndex) -{ - b3Assert(m_useQuantization); - - b3QuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex]; - int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex(); - int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast(sizeof(b3QuantizedBvhNode)); - - b3QuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex]; - int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex(); - int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast(sizeof(b3QuantizedBvhNode)); - - if (leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(leftChildNode); - subtree.m_rootNodeIndex = leftChildNodexIndex; - subtree.m_subtreeSize = leftSubTreeSize; - } - - if (rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(rightChildNode); - subtree.m_rootNodeIndex = rightChildNodexIndex; - subtree.m_subtreeSize = rightSubTreeSize; - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); -} - -int b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex, int endIndex, int splitAxis) -{ - int i; - int splitIndex = startIndex; - int numIndices = endIndex - startIndex; - b3Scalar splitValue; - - b3Vector3 means = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - means += center; - } - means *= (b3Scalar(1.) / (b3Scalar)numIndices); - - splitValue = means[splitAxis]; - - //sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'. - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - if (center[splitAxis] > splitValue) - { - //swap - swapLeafNodes(i, splitIndex); - splitIndex++; - } - } - - //if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex - //otherwise the tree-building might fail due to stack-overflows in certain cases. - //unbalanced1 is unsafe: it can cause stack overflows - //bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1))); - - //unbalanced2 should work too: always use center (perfect balanced trees) - //bool unbalanced2 = true; - - //this should be safe too: - int rangeBalancedIndices = numIndices / 3; - bool unbalanced = ((splitIndex <= (startIndex + rangeBalancedIndices)) || (splitIndex >= (endIndex - 1 - rangeBalancedIndices))); - - if (unbalanced) - { - splitIndex = startIndex + (numIndices >> 1); - } - - bool unbal = (splitIndex == startIndex) || (splitIndex == (endIndex)); - (void)unbal; - b3Assert(!unbal); - - return splitIndex; -} - -int b3QuantizedBvh::calcSplittingAxis(int startIndex, int endIndex) -{ - int i; - - b3Vector3 means = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - b3Vector3 variance = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - int numIndices = endIndex - startIndex; - - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - means += center; - } - means *= (b3Scalar(1.) / (b3Scalar)numIndices); - - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - b3Vector3 diff2 = center - means; - diff2 = diff2 * diff2; - variance += diff2; - } - variance *= (b3Scalar(1.) / ((b3Scalar)numIndices - 1)); - - return variance.maxAxis(); -} - -void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - //either choose recursive traversal (walkTree) or stackless (walkStacklessTree) - - if (m_useQuantization) - { - ///quantize query AABB - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin, aabbMin, 0); - quantizeWithClamp(quantizedQueryAabbMax, aabbMax, 1); - - switch (m_traversalMode) - { - case TRAVERSAL_STACKLESS: - walkStacklessQuantizedTree(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax, 0, m_curNodeIndex); - break; - case TRAVERSAL_STACKLESS_CACHE_FRIENDLY: - walkStacklessQuantizedTreeCacheFriendly(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - break; - case TRAVERSAL_RECURSIVE: - { - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0]; - walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - } - break; - default: - //unsupported - b3Assert(0); - } - } - else - { - walkStacklessTree(nodeCallback, aabbMin, aabbMax); - } -} - -static int b3s_maxIterations = 0; - -void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - b3Assert(!m_useQuantization); - - const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; - int escapeIndex, curIndex = 0; - int walkIterations = 0; - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - while (curIndex < m_curNodeIndex) - { - //catch bugs in tree data - b3Assert(walkIterations < m_curNodeIndex); - - walkIterations++; - aabbOverlap = b3TestAabbAgainstAabb2(aabbMin, aabbMax, rootNode->m_aabbMinOrg, rootNode->m_aabbMaxOrg); - isLeafNode = rootNode->m_escapeIndex == -1; - - //PCK: unsigned instead of bool - if (isLeafNode && (aabbOverlap != 0)) - { - nodeCallback->processNode(rootNode->m_subPart, rootNode->m_triangleIndex); - } - - //PCK: unsigned instead of bool - if ((aabbOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->m_escapeIndex; - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -/* -///this was the original recursive traversal, before we optimized towards stackless traversal -void b3QuantizedBvh::walkTree(b3OptimizedBvhNode* rootNode,b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const -{ - bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax); - if (aabbOverlap) - { - isLeafNode = (!rootNode->m_leftChild && !rootNode->m_rightChild); - if (isLeafNode) - { - nodeCallback->processNode(rootNode); - } else - { - walkTree(rootNode->m_leftChild,nodeCallback,aabbMin,aabbMax); - walkTree(rootNode->m_rightChild,nodeCallback,aabbMin,aabbMax); - } - } - -} -*/ - -void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode, b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const -{ - b3Assert(m_useQuantization); - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - //PCK: unsigned instead of bool - aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, currentNode->m_quantizedAabbMin, currentNode->m_quantizedAabbMax); - isLeafNode = currentNode->isLeafNode(); - - //PCK: unsigned instead of bool - if (aabbOverlap != 0) - { - if (isLeafNode) - { - nodeCallback->processNode(currentNode->getPartId(), currentNode->getTriangleIndex()); - } - else - { - //process left and right children - const b3QuantizedBvhNode* leftChildNode = currentNode + 1; - walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - - const b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode + 1 : leftChildNode + leftChildNode->getEscapeIndex(); - walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - } - } -} - -void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const -{ - b3Assert(!m_useQuantization); - - const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; - int escapeIndex, curIndex = 0; - int walkIterations = 0; - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap = 0; - unsigned rayBoxOverlap = 0; - b3Scalar lambda_max = 1.0; - - /* Quick pruning by quantized box */ - b3Vector3 rayAabbMin = raySource; - b3Vector3 rayAabbMax = raySource; - rayAabbMin.setMin(rayTarget); - rayAabbMax.setMax(rayTarget); - - /* Add box cast extents to bounding box */ - rayAabbMin += aabbMin; - rayAabbMax += aabbMax; - -#ifdef RAYAABB2 - b3Vector3 rayDir = (rayTarget - raySource); - rayDir.normalize(); - lambda_max = rayDir.dot(rayTarget - raySource); - ///what about division by zero? --> just set rayDirection[i] to 1.0 - b3Vector3 rayDirectionInverse; - rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0]; - rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1]; - rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2]; - unsigned int sign[3] = {rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; -#endif - - b3Vector3 bounds[2]; - - while (curIndex < m_curNodeIndex) - { - b3Scalar param = 1.0; - //catch bugs in tree data - b3Assert(walkIterations < m_curNodeIndex); - - walkIterations++; - - bounds[0] = rootNode->m_aabbMinOrg; - bounds[1] = rootNode->m_aabbMaxOrg; - /* Add box cast extents */ - bounds[0] -= aabbMax; - bounds[1] -= aabbMin; - - aabbOverlap = b3TestAabbAgainstAabb2(rayAabbMin, rayAabbMax, rootNode->m_aabbMinOrg, rootNode->m_aabbMaxOrg); - //perhaps profile if it is worth doing the aabbOverlap test first - -#ifdef RAYAABB2 - ///careful with this check: need to check division by zero (above) and fix the unQuantize method - ///thanks Joerg/hiker for the reproduction case! - ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 - rayBoxOverlap = aabbOverlap ? b3RayAabb2(raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false; - -#else - b3Vector3 normal; - rayBoxOverlap = b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); -#endif - - isLeafNode = rootNode->m_escapeIndex == -1; - - //PCK: unsigned instead of bool - if (isLeafNode && (rayBoxOverlap != 0)) - { - nodeCallback->processNode(rootNode->m_subPart, rootNode->m_triangleIndex); - } - - //PCK: unsigned instead of bool - if ((rayBoxOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->m_escapeIndex; - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const -{ - b3Assert(m_useQuantization); - - int curIndex = startNodeIndex; - int walkIterations = 0; - int subTreeSize = endNodeIndex - startNodeIndex; - (void)subTreeSize; - - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; - int escapeIndex; - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned boxBoxOverlap = 0; - unsigned rayBoxOverlap = 0; - - b3Scalar lambda_max = 1.0; - -#ifdef RAYAABB2 - b3Vector3 rayDirection = (rayTarget - raySource); - rayDirection.normalize(); - lambda_max = rayDirection.dot(rayTarget - raySource); - ///what about division by zero? --> just set rayDirection[i] to 1.0 - rayDirection[0] = rayDirection[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[0]; - rayDirection[1] = rayDirection[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[1]; - rayDirection[2] = rayDirection[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[2]; - unsigned int sign[3] = {rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0}; -#endif - - /* Quick pruning by quantized box */ - b3Vector3 rayAabbMin = raySource; - b3Vector3 rayAabbMax = raySource; - rayAabbMin.setMin(rayTarget); - rayAabbMax.setMax(rayTarget); - - /* Add box cast extents to bounding box */ - rayAabbMin += aabbMin; - rayAabbMax += aabbMax; - - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin, rayAabbMin, 0); - quantizeWithClamp(quantizedQueryAabbMax, rayAabbMax, 1); - - while (curIndex < endNodeIndex) - { -//#define VISUALLY_ANALYZE_BVH 1 -#ifdef VISUALLY_ANALYZE_BVH - //some code snippet to debugDraw aabb, to visually analyze bvh structure - static int drawPatch = 0; - //need some global access to a debugDrawer - extern b3IDebugDraw* debugDrawerPtr; - if (curIndex == drawPatch) - { - b3Vector3 aabbMin, aabbMax; - aabbMin = unQuantize(rootNode->m_quantizedAabbMin); - aabbMax = unQuantize(rootNode->m_quantizedAabbMax); - b3Vector3 color(1, 0, 0); - debugDrawerPtr->drawAabb(aabbMin, aabbMax, color); - } -#endif //VISUALLY_ANALYZE_BVH - - //catch bugs in tree data - b3Assert(walkIterations < subTreeSize); - - walkIterations++; - //PCK: unsigned instead of bool - // only interested if this is closer than any previous hit - b3Scalar param = 1.0; - rayBoxOverlap = 0; - boxBoxOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode->m_quantizedAabbMin, rootNode->m_quantizedAabbMax); - isLeafNode = rootNode->isLeafNode(); - if (boxBoxOverlap) - { - b3Vector3 bounds[2]; - bounds[0] = unQuantize(rootNode->m_quantizedAabbMin); - bounds[1] = unQuantize(rootNode->m_quantizedAabbMax); - /* Add box cast extents */ - bounds[0] -= aabbMax; - bounds[1] -= aabbMin; -#if 0 - b3Vector3 normal; - bool ra2 = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max); - bool ra = b3RayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal); - if (ra2 != ra) - { - printf("functions don't match\n"); - } -#endif -#ifdef RAYAABB2 - ///careful with this check: need to check division by zero (above) and fix the unQuantize method - ///thanks Joerg/hiker for the reproduction case! - ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 - - //B3_PROFILE("b3RayAabb2"); - rayBoxOverlap = b3RayAabb2(raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max); - -#else - rayBoxOverlap = true; //b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); -#endif - } - - if (isLeafNode && rayBoxOverlap) - { - nodeCallback->processNode(rootNode->getPartId(), rootNode->getTriangleIndex()); - } - - //PCK: unsigned instead of bool - if ((rayBoxOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->getEscapeIndex(); - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const -{ - b3Assert(m_useQuantization); - - int curIndex = startNodeIndex; - int walkIterations = 0; - int subTreeSize = endNodeIndex - startNodeIndex; - (void)subTreeSize; - - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; - int escapeIndex; - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - while (curIndex < endNodeIndex) - { -//#define VISUALLY_ANALYZE_BVH 1 -#ifdef VISUALLY_ANALYZE_BVH - //some code snippet to debugDraw aabb, to visually analyze bvh structure - static int drawPatch = 0; - //need some global access to a debugDrawer - extern b3IDebugDraw* debugDrawerPtr; - if (curIndex == drawPatch) - { - b3Vector3 aabbMin, aabbMax; - aabbMin = unQuantize(rootNode->m_quantizedAabbMin); - aabbMax = unQuantize(rootNode->m_quantizedAabbMax); - b3Vector3 color(1, 0, 0); - debugDrawerPtr->drawAabb(aabbMin, aabbMax, color); - } -#endif //VISUALLY_ANALYZE_BVH - - //catch bugs in tree data - b3Assert(walkIterations < subTreeSize); - - walkIterations++; - //PCK: unsigned instead of bool - aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode->m_quantizedAabbMin, rootNode->m_quantizedAabbMax); - isLeafNode = rootNode->isLeafNode(); - - if (isLeafNode && aabbOverlap) - { - nodeCallback->processNode(rootNode->getPartId(), rootNode->getTriangleIndex()); - } - - //PCK: unsigned instead of bool - if ((aabbOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->getEscapeIndex(); - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -//This traversal can be called from Playstation 3 SPU -void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const -{ - b3Assert(m_useQuantization); - - int i; - - for (i = 0; i < this->m_SubtreeHeaders.size(); i++) - { - const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - - //PCK: unsigned instead of bool - unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax); - if (overlap != 0) - { - walkStacklessQuantizedTree(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax, - subtree.m_rootNodeIndex, - subtree.m_rootNodeIndex + subtree.m_subtreeSize); - } - } -} - -void b3QuantizedBvh::reportRayOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const -{ - reportBoxCastOverlappingNodex(nodeCallback, raySource, rayTarget, b3MakeVector3(0, 0, 0), b3MakeVector3(0, 0, 0)); -} - -void b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - //always use stackless - - if (m_useQuantization) - { - walkStacklessQuantizedTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); - } - else - { - walkStacklessTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); - } - /* - { - //recursive traversal - b3Vector3 qaabbMin = raySource; - b3Vector3 qaabbMax = raySource; - qaabbMin.setMin(rayTarget); - qaabbMax.setMax(rayTarget); - qaabbMin += aabbMin; - qaabbMax += aabbMax; - reportAabbOverlappingNodex(nodeCallback,qaabbMin,qaabbMax); - } - */ -} - -void b3QuantizedBvh::swapLeafNodes(int i, int splitIndex) -{ - if (m_useQuantization) - { - b3QuantizedBvhNode tmp = m_quantizedLeafNodes[i]; - m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex]; - m_quantizedLeafNodes[splitIndex] = tmp; - } - else - { - b3OptimizedBvhNode tmp = m_leafNodes[i]; - m_leafNodes[i] = m_leafNodes[splitIndex]; - m_leafNodes[splitIndex] = tmp; - } -} - -void b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode, int leafNodeIndex) -{ - if (m_useQuantization) - { - m_quantizedContiguousNodes[internalNode] = m_quantizedLeafNodes[leafNodeIndex]; - } - else - { - m_contiguousNodes[internalNode] = m_leafNodes[leafNodeIndex]; - } -} - -//PCK: include -#include - -#if 0 -//PCK: consts -static const unsigned BVH_ALIGNMENT = 16; -static const unsigned BVH_ALIGNMENT_MASK = BVH_ALIGNMENT-1; - -static const unsigned BVH_ALIGNMENT_BLOCKS = 2; -#endif - -unsigned int b3QuantizedBvh::getAlignmentSerializationPadding() -{ - // I changed this to 0 since the extra padding is not needed or used. - return 0; //BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT; -} - -unsigned b3QuantizedBvh::calculateSerializeBufferSize() const -{ - unsigned baseSize = sizeof(b3QuantizedBvh) + getAlignmentSerializationPadding(); - baseSize += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; - if (m_useQuantization) - { - return baseSize + m_curNodeIndex * sizeof(b3QuantizedBvhNode); - } - return baseSize + m_curNodeIndex * sizeof(b3OptimizedBvhNode); -} - -bool b3QuantizedBvh::serialize(void* o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const -{ - b3Assert(m_subtreeHeaderCount == m_SubtreeHeaders.size()); - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - /* if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) - { - ///check alignedment for buffer? - b3Assert(0); - return false; - } -*/ - - b3QuantizedBvh* targetBvh = (b3QuantizedBvh*)o_alignedDataBuffer; - - // construct the class so the virtual function table, etc will be set up - // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor - new (targetBvh) b3QuantizedBvh; - - if (i_swapEndian) - { - targetBvh->m_curNodeIndex = static_cast(b3SwapEndian(m_curNodeIndex)); - - b3SwapVector3Endian(m_bvhAabbMin, targetBvh->m_bvhAabbMin); - b3SwapVector3Endian(m_bvhAabbMax, targetBvh->m_bvhAabbMax); - b3SwapVector3Endian(m_bvhQuantization, targetBvh->m_bvhQuantization); - - targetBvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(m_traversalMode); - targetBvh->m_subtreeHeaderCount = static_cast(b3SwapEndian(m_subtreeHeaderCount)); - } - else - { - targetBvh->m_curNodeIndex = m_curNodeIndex; - targetBvh->m_bvhAabbMin = m_bvhAabbMin; - targetBvh->m_bvhAabbMax = m_bvhAabbMax; - targetBvh->m_bvhQuantization = m_bvhQuantization; - targetBvh->m_traversalMode = m_traversalMode; - targetBvh->m_subtreeHeaderCount = m_subtreeHeaderCount; - } - - targetBvh->m_useQuantization = m_useQuantization; - - unsigned char* nodeData = (unsigned char*)targetBvh; - nodeData += sizeof(b3QuantizedBvh); - - unsigned sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - int nodeCount = m_curNodeIndex; - - if (m_useQuantization) - { - targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast(b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); - } - } - else - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]; - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]; - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex; - } - } - nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(NULL, 0, 0); - } - else - { - targetBvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); - b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); - - targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast(b3SwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex)); - targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast(b3SwapEndian(m_contiguousNodes[nodeIndex].m_subPart)); - targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast(b3SwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex)); - } - } - else - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg = m_contiguousNodes[nodeIndex].m_aabbMinOrg; - targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg = m_contiguousNodes[nodeIndex].m_aabbMaxOrg; - - targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = m_contiguousNodes[nodeIndex].m_escapeIndex; - targetBvh->m_contiguousNodes[nodeIndex].m_subPart = m_contiguousNodes[nodeIndex].m_subPart; - targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = m_contiguousNodes[nodeIndex].m_triangleIndex; - } - } - nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_contiguousNodes.initializeFromBuffer(NULL, 0, 0); - } - - sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - // Now serialize the subtree headers - targetBvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, m_subtreeHeaderCount, m_subtreeHeaderCount); - if (i_swapEndian) - { - for (int i = 0; i < m_subtreeHeaderCount; i++) - { - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast(b3SwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex)); - targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast(b3SwapEndian(m_SubtreeHeaders[i].m_subtreeSize)); - } - } - else - { - for (int i = 0; i < m_subtreeHeaderCount; i++) - { - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = (m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = (m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = (m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = (m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = (m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = (m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = (m_SubtreeHeaders[i].m_rootNodeIndex); - targetBvh->m_SubtreeHeaders[i].m_subtreeSize = (m_SubtreeHeaders[i].m_subtreeSize); - - // need to clear padding in destination buffer - targetBvh->m_SubtreeHeaders[i].m_padding[0] = 0; - targetBvh->m_SubtreeHeaders[i].m_padding[1] = 0; - targetBvh->m_SubtreeHeaders[i].m_padding[2] = 0; - } - } - nodeData += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_SubtreeHeaders.initializeFromBuffer(NULL, 0, 0); - - // this wipes the virtual function table pointer at the start of the buffer for the class - *((void**)o_alignedDataBuffer) = NULL; - - return true; -} - -b3QuantizedBvh* b3QuantizedBvh::deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) -{ - if (i_alignedDataBuffer == NULL) // || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) - { - return NULL; - } - b3QuantizedBvh* bvh = (b3QuantizedBvh*)i_alignedDataBuffer; - - if (i_swapEndian) - { - bvh->m_curNodeIndex = static_cast(b3SwapEndian(bvh->m_curNodeIndex)); - - b3UnSwapVector3Endian(bvh->m_bvhAabbMin); - b3UnSwapVector3Endian(bvh->m_bvhAabbMax); - b3UnSwapVector3Endian(bvh->m_bvhQuantization); - - bvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(bvh->m_traversalMode); - bvh->m_subtreeHeaderCount = static_cast(b3SwapEndian(bvh->m_subtreeHeaderCount)); - } - - unsigned int calculatedBufSize = bvh->calculateSerializeBufferSize(); - b3Assert(calculatedBufSize <= i_dataBufferSize); - - if (calculatedBufSize > i_dataBufferSize) - { - return NULL; - } - - unsigned char* nodeData = (unsigned char*)bvh; - nodeData += sizeof(b3QuantizedBvh); - - unsigned sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - int nodeCount = bvh->m_curNodeIndex; - - // Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor - // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor - new (bvh) b3QuantizedBvh(*bvh, false); - - if (bvh->m_useQuantization) - { - bvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); - - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); - - bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast(b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); - } - } - nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; - } - else - { - bvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); - b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); - - bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex)); - bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart)); - bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex)); - } - } - nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; - } - - sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - // Now serialize the subtree headers - bvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, bvh->m_subtreeHeaderCount, bvh->m_subtreeHeaderCount); - if (i_swapEndian) - { - for (int i = 0; i < bvh->m_subtreeHeaderCount; i++) - { - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex)); - bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize)); - } - } - - return bvh; -} - -// Constructor that prevents b3Vector3's default constructor from being called -b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh& self, bool /* ownsMemory */) : m_bvhAabbMin(self.m_bvhAabbMin), - m_bvhAabbMax(self.m_bvhAabbMax), - m_bvhQuantization(self.m_bvhQuantization), - m_bulletVersion(B3_BULLET_VERSION) -{ -} - -void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData) -{ - m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax); - m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin); - m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization); - - m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex; - m_useQuantization = quantizedBvhFloatData.m_useQuantization != 0; - - { - int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes; - m_contiguousNodes.resize(numElem); - - if (numElem) - { - b3OptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr; - - for (int i = 0; i < numElem; i++, memPtr++) - { - m_contiguousNodes[i].m_aabbMaxOrg.deSerializeFloat(memPtr->m_aabbMaxOrg); - m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg); - m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; - m_contiguousNodes[i].m_subPart = memPtr->m_subPart; - m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; - } - } - } - - { - int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes; - m_quantizedContiguousNodes.resize(numElem); - - if (numElem) - { - b3QuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - } - } - } - - m_traversalMode = b3TraversalMode(quantizedBvhFloatData.m_traversalMode); - - { - int numElem = quantizedBvhFloatData.m_numSubtreeHeaders; - m_SubtreeHeaders.resize(numElem); - if (numElem) - { - b3BvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; - m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; - } - } - } -} - -void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData) -{ - m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax); - m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin); - m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization); - - m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex; - m_useQuantization = quantizedBvhDoubleData.m_useQuantization != 0; - - { - int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes; - m_contiguousNodes.resize(numElem); - - if (numElem) - { - b3OptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr; - - for (int i = 0; i < numElem; i++, memPtr++) - { - m_contiguousNodes[i].m_aabbMaxOrg.deSerializeDouble(memPtr->m_aabbMaxOrg); - m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg); - m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; - m_contiguousNodes[i].m_subPart = memPtr->m_subPart; - m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; - } - } - } - - { - int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes; - m_quantizedContiguousNodes.resize(numElem); - - if (numElem) - { - b3QuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - } - } - } - - m_traversalMode = b3TraversalMode(quantizedBvhDoubleData.m_traversalMode); - - { - int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders; - m_SubtreeHeaders.resize(numElem); - if (numElem) - { - b3BvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; - m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; - } - } - } -} - -///fills the dataBuffer and returns the struct name (and 0 on failure) -const char* b3QuantizedBvh::serialize(void* dataBuffer, b3Serializer* serializer) const -{ - b3Assert(0); - return 0; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h deleted file mode 100644 index 13470829dc6..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h +++ /dev/null @@ -1,511 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_QUANTIZED_BVH_H -#define B3_QUANTIZED_BVH_H - -class b3Serializer; - -//#define DEBUG_CHECK_DEQUANTIZATION 1 -#ifdef DEBUG_CHECK_DEQUANTIZATION -#ifdef __SPU__ -#define printf spu_printf -#endif //__SPU__ - -#include -#include -#endif //DEBUG_CHECK_DEQUANTIZATION - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3AlignedAllocator.h" - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3QuantizedBvhData b3QuantizedBvhDoubleData -#define b3OptimizedBvhNodeData b3OptimizedBvhNodeDoubleData -#define b3QuantizedBvhDataName "b3QuantizedBvhDoubleData" -#else -#define b3QuantizedBvhData b3QuantizedBvhFloatData -#define b3OptimizedBvhNodeData b3OptimizedBvhNodeFloatData -#define b3QuantizedBvhDataName "b3QuantizedBvhFloatData" -#endif - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" - -//http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp - -//Note: currently we have 16 bytes per quantized node -#define MAX_SUBTREE_SIZE_IN_BYTES 2048 - -// 10 gives the potential for 1024 parts, with at most 2^21 (2097152) (minus one -// actually) triangles each (since the sign bit is reserved -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -B3_ATTRIBUTE_ALIGNED16(struct) -b3QuantizedBvhNode : public b3QuantizedBvhNodeData -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - bool isLeafNode() const - { - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (m_escapeIndexOrTriangleIndex >= 0); - } - int getEscapeIndex() const - { - b3Assert(!isLeafNode()); - return -m_escapeIndexOrTriangleIndex; - } - int getTriangleIndex() const - { - b3Assert(isLeafNode()); - unsigned int x = 0; - unsigned int y = (~(x & 0)) << (31 - MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (m_escapeIndexOrTriangleIndex & ~(y)); - } - int getPartId() const - { - b3Assert(isLeafNode()); - // Get only the highest bits where the part index is stored - return (m_escapeIndexOrTriangleIndex >> (31 - MAX_NUM_PARTS_IN_BITS)); - } -}; - -/// b3OptimizedBvhNode contains both internal and leaf node information. -/// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes. -B3_ATTRIBUTE_ALIGNED16(struct) -b3OptimizedBvhNode -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - //32 bytes - b3Vector3 m_aabbMinOrg; - b3Vector3 m_aabbMaxOrg; - - //4 - int m_escapeIndex; - - //8 - //for child nodes - int m_subPart; - int m_triangleIndex; - - //pad the size to 64 bytes - char m_padding[20]; -}; - -///b3BvhSubtreeInfo provides info to gather a subtree of limited size -B3_ATTRIBUTE_ALIGNED16(class) -b3BvhSubtreeInfo : public b3BvhSubtreeInfoData -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3BvhSubtreeInfo() - { - //memset(&m_padding[0], 0, sizeof(m_padding)); - } - - void setAabbFromQuantizeNode(const b3QuantizedBvhNode& quantizedNode) - { - m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0]; - m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1]; - m_quantizedAabbMin[2] = quantizedNode.m_quantizedAabbMin[2]; - m_quantizedAabbMax[0] = quantizedNode.m_quantizedAabbMax[0]; - m_quantizedAabbMax[1] = quantizedNode.m_quantizedAabbMax[1]; - m_quantizedAabbMax[2] = quantizedNode.m_quantizedAabbMax[2]; - } -}; - -class b3NodeOverlapCallback -{ -public: - virtual ~b3NodeOverlapCallback(){}; - - virtual void processNode(int subPart, int triangleIndex) = 0; -}; - -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -///for code readability: -typedef b3AlignedObjectArray NodeArray; -typedef b3AlignedObjectArray QuantizedNodeArray; -typedef b3AlignedObjectArray BvhSubtreeInfoArray; - -///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU. -///It is used by the b3BvhTriangleMeshShape as midphase -///It is recommended to use quantization for better performance and lower memory requirements. -B3_ATTRIBUTE_ALIGNED16(class) -b3QuantizedBvh -{ -public: - enum b3TraversalMode - { - TRAVERSAL_STACKLESS = 0, - TRAVERSAL_STACKLESS_CACHE_FRIENDLY, - TRAVERSAL_RECURSIVE - }; - - b3Vector3 m_bvhAabbMin; - b3Vector3 m_bvhAabbMax; - b3Vector3 m_bvhQuantization; - -protected: - int m_bulletVersion; //for serialization versioning. It could also be used to detect endianess. - - int m_curNodeIndex; - //quantization data - bool m_useQuantization; - - NodeArray m_leafNodes; - NodeArray m_contiguousNodes; - QuantizedNodeArray m_quantizedLeafNodes; - QuantizedNodeArray m_quantizedContiguousNodes; - - b3TraversalMode m_traversalMode; - BvhSubtreeInfoArray m_SubtreeHeaders; - - //This is only used for serialization so we don't have to add serialization directly to b3AlignedObjectArray - mutable int m_subtreeHeaderCount; - - ///two versions, one for quantized and normal nodes. This allows code-reuse while maintaining readability (no template/macro!) - ///this might be refactored into a virtual, it is usually not calculated at run-time - void setInternalNodeAabbMin(int nodeIndex, const b3Vector3& aabbMin) - { - if (m_useQuantization) - { - quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0], aabbMin, 0); - } - else - { - m_contiguousNodes[nodeIndex].m_aabbMinOrg = aabbMin; - } - } - void setInternalNodeAabbMax(int nodeIndex, const b3Vector3& aabbMax) - { - if (m_useQuantization) - { - quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0], aabbMax, 1); - } - else - { - m_contiguousNodes[nodeIndex].m_aabbMaxOrg = aabbMax; - } - } - - b3Vector3 getAabbMin(int nodeIndex) const - { - if (m_useQuantization) - { - return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMin[0]); - } - //non-quantized - return m_leafNodes[nodeIndex].m_aabbMinOrg; - } - b3Vector3 getAabbMax(int nodeIndex) const - { - if (m_useQuantization) - { - return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMax[0]); - } - //non-quantized - return m_leafNodes[nodeIndex].m_aabbMaxOrg; - } - - void setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex) - { - if (m_useQuantization) - { - m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = -escapeIndex; - } - else - { - m_contiguousNodes[nodeIndex].m_escapeIndex = escapeIndex; - } - } - - void mergeInternalNodeAabb(int nodeIndex, const b3Vector3& newAabbMin, const b3Vector3& newAabbMax) - { - if (m_useQuantization) - { - unsigned short int quantizedAabbMin[3]; - unsigned short int quantizedAabbMax[3]; - quantize(quantizedAabbMin, newAabbMin, 0); - quantize(quantizedAabbMax, newAabbMax, 1); - for (int i = 0; i < 3; i++) - { - if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] > quantizedAabbMin[i]) - m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] = quantizedAabbMin[i]; - - if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] < quantizedAabbMax[i]) - m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] = quantizedAabbMax[i]; - } - } - else - { - //non-quantized - m_contiguousNodes[nodeIndex].m_aabbMinOrg.setMin(newAabbMin); - m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax); - } - } - - void swapLeafNodes(int firstIndex, int secondIndex); - - void assignInternalNodeFromLeafNode(int internalNode, int leafNodeIndex); - -protected: - void buildTree(int startIndex, int endIndex); - - int calcSplittingAxis(int startIndex, int endIndex); - - int sortAndCalcSplittingIndex(int startIndex, int endIndex, int splitAxis); - - void walkStacklessTree(b3NodeOverlapCallback * nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - - void walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const; - void walkStacklessQuantizedTree(b3NodeOverlapCallback * nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const; - void walkStacklessTreeAgainstRay(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const; - - ///tree traversal designed for small-memory processors like PS3 SPU - void walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback * nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const; - - ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal - void walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode, b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const; - - ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal - void walkRecursiveQuantizedTreeAgainstQuantizedTree(const b3QuantizedBvhNode* treeNodeA, const b3QuantizedBvhNode* treeNodeB, b3NodeOverlapCallback* nodeCallback) const; - - void updateSubtreeHeaders(int leftChildNodexIndex, int rightChildNodexIndex); - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3QuantizedBvh(); - - virtual ~b3QuantizedBvh(); - - ///***************************************** expert/internal use only ************************* - void setQuantizationValues(const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax, b3Scalar quantizationMargin = b3Scalar(1.0)); - QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } - ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized - void buildInternal(); - ///***************************************** expert/internal use only ************************* - - void reportAabbOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - void reportRayOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const; - void reportBoxCastOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - - B3_FORCE_INLINE void quantize(unsigned short* out, const b3Vector3& point, int isMax) const - { - b3Assert(m_useQuantization); - - b3Assert(point.getX() <= m_bvhAabbMax.getX()); - b3Assert(point.getY() <= m_bvhAabbMax.getY()); - b3Assert(point.getZ() <= m_bvhAabbMax.getZ()); - - b3Assert(point.getX() >= m_bvhAabbMin.getX()); - b3Assert(point.getY() >= m_bvhAabbMin.getY()); - b3Assert(point.getZ() >= m_bvhAabbMin.getZ()); - - b3Vector3 v = (point - m_bvhAabbMin) * m_bvhQuantization; - ///Make sure rounding is done in a way that unQuantize(quantizeWithClamp(...)) is conservative - ///end-points always set the first bit, so that they are sorted properly (so that neighbouring AABBs overlap properly) - ///@todo: double-check this - if (isMax) - { - out[0] = (unsigned short)(((unsigned short)(v.getX() + b3Scalar(1.)) | 1)); - out[1] = (unsigned short)(((unsigned short)(v.getY() + b3Scalar(1.)) | 1)); - out[2] = (unsigned short)(((unsigned short)(v.getZ() + b3Scalar(1.)) | 1)); - } - else - { - out[0] = (unsigned short)(((unsigned short)(v.getX()) & 0xfffe)); - out[1] = (unsigned short)(((unsigned short)(v.getY()) & 0xfffe)); - out[2] = (unsigned short)(((unsigned short)(v.getZ()) & 0xfffe)); - } - -#ifdef DEBUG_CHECK_DEQUANTIZATION - b3Vector3 newPoint = unQuantize(out); - if (isMax) - { - if (newPoint.getX() < point.getX()) - { - printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n", newPoint.getX() - point.getX(), newPoint.getX(), point.getX()); - } - if (newPoint.getY() < point.getY()) - { - printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n", newPoint.getY() - point.getY(), newPoint.getY(), point.getY()); - } - if (newPoint.getZ() < point.getZ()) - { - printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n", newPoint.getZ() - point.getZ(), newPoint.getZ(), point.getZ()); - } - } - else - { - if (newPoint.getX() > point.getX()) - { - printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n", newPoint.getX() - point.getX(), newPoint.getX(), point.getX()); - } - if (newPoint.getY() > point.getY()) - { - printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n", newPoint.getY() - point.getY(), newPoint.getY(), point.getY()); - } - if (newPoint.getZ() > point.getZ()) - { - printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n", newPoint.getZ() - point.getZ(), newPoint.getZ(), point.getZ()); - } - } -#endif //DEBUG_CHECK_DEQUANTIZATION - } - - B3_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const b3Vector3& point2, int isMax) const - { - b3Assert(m_useQuantization); - - b3Vector3 clampedPoint(point2); - clampedPoint.setMax(m_bvhAabbMin); - clampedPoint.setMin(m_bvhAabbMax); - - quantize(out, clampedPoint, isMax); - } - - B3_FORCE_INLINE b3Vector3 unQuantize(const unsigned short* vecIn) const - { - b3Vector3 vecOut; - vecOut.setValue( - (b3Scalar)(vecIn[0]) / (m_bvhQuantization.getX()), - (b3Scalar)(vecIn[1]) / (m_bvhQuantization.getY()), - (b3Scalar)(vecIn[2]) / (m_bvhQuantization.getZ())); - vecOut += m_bvhAabbMin; - return vecOut; - } - - ///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees. - void setTraversalMode(b3TraversalMode traversalMode) - { - m_traversalMode = traversalMode; - } - - B3_FORCE_INLINE QuantizedNodeArray& getQuantizedNodeArray() - { - return m_quantizedContiguousNodes; - } - - B3_FORCE_INLINE BvhSubtreeInfoArray& getSubtreeInfoArray() - { - return m_SubtreeHeaders; - } - - //////////////////////////////////////////////////////////////////// - - /////Calculate space needed to store BVH for serialization - unsigned calculateSerializeBufferSize() const; - - /// Data buffer MUST be 16 byte aligned - virtual bool serialize(void* o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const; - - ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' - static b3QuantizedBvh* deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); - - static unsigned int getAlignmentSerializationPadding(); - ////////////////////////////////////////////////////////////////////// - - virtual int calculateSerializeBufferSizeNew() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; - - virtual void deSerializeFloat(struct b3QuantizedBvhFloatData & quantizedBvhFloatData); - - virtual void deSerializeDouble(struct b3QuantizedBvhDoubleData & quantizedBvhDoubleData); - - //////////////////////////////////////////////////////////////////// - - B3_FORCE_INLINE bool isQuantized() - { - return m_useQuantization; - } - -private: - // Special "copy" constructor that allows for in-place deserialization - // Prevents b3Vector3's default constructor from being called, but doesn't inialize much else - // ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need) - b3QuantizedBvh(b3QuantizedBvh & other, bool ownsMemory); -}; - -struct b3OptimizedBvhNodeFloatData -{ - b3Vector3FloatData m_aabbMinOrg; - b3Vector3FloatData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - -struct b3OptimizedBvhNodeDoubleData -{ - b3Vector3DoubleData m_aabbMinOrg; - b3Vector3DoubleData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - -struct b3QuantizedBvhFloatData -{ - b3Vector3FloatData m_bvhAabbMin; - b3Vector3FloatData m_bvhAabbMax; - b3Vector3FloatData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeFloatData* m_contiguousNodesPtr; - b3QuantizedBvhNodeData* m_quantizedContiguousNodesPtr; - b3BvhSubtreeInfoData* m_subTreeInfoPtr; - int m_traversalMode; - int m_numSubtreeHeaders; -}; - -struct b3QuantizedBvhDoubleData -{ - b3Vector3DoubleData m_bvhAabbMin; - b3Vector3DoubleData m_bvhAabbMax; - b3Vector3DoubleData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeDoubleData* m_contiguousNodesPtr; - b3QuantizedBvhNodeData* m_quantizedContiguousNodesPtr; - - int m_traversalMode; - int m_numSubtreeHeaders; - b3BvhSubtreeInfoData* m_subTreeInfoPtr; -}; - -B3_FORCE_INLINE int b3QuantizedBvh::calculateSerializeBufferSizeNew() const -{ - return sizeof(b3QuantizedBvhData); -} - -#endif //B3_QUANTIZED_BVH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp deleted file mode 100644 index 6b0c941f23e..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3StridingMeshInterface.h" - -b3StridingMeshInterface::~b3StridingMeshInterface() -{ -} - -void b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - (void)aabbMin; - (void)aabbMax; - int numtotalphysicsverts = 0; - int part, graphicssubparts = getNumSubParts(); - const unsigned char* vertexbase; - const unsigned char* indexbase; - int indexstride; - PHY_ScalarType type; - PHY_ScalarType gfxindextype; - int stride, numverts, numtriangles; - int gfxindex; - b3Vector3 triangle[3]; - - b3Vector3 meshScaling = getScaling(); - - ///if the number of parts is big, the performance might drop due to the innerloop switch on indextype - for (part = 0; part < graphicssubparts; part++) - { - getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numtriangles, gfxindextype, part); - numtotalphysicsverts += numtriangles * 3; //upper bound - - ///unlike that developers want to pass in double-precision meshes in single-precision Bullet build - ///so disable this feature by default - ///see patch http://code.google.com/p/bullet/issues/detail?id=213 - - switch (type) - { - case PHY_FLOAT: - { - float* graphicsbase; - - switch (gfxindextype) - { - case PHY_INTEGER: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned int* tri_indices = (unsigned int*)(indexbase + gfxindex * indexstride); - graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_SHORT: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned short int* tri_indices = (unsigned short int*)(indexbase + gfxindex * indexstride); - graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_UCHAR: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned char* tri_indices = (unsigned char*)(indexbase + gfxindex * indexstride); - graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - default: - b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); - } - break; - } - - case PHY_DOUBLE: - { - double* graphicsbase; - - switch (gfxindextype) - { - case PHY_INTEGER: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned int* tri_indices = (unsigned int*)(indexbase + gfxindex * indexstride); - graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_SHORT: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned short int* tri_indices = (unsigned short int*)(indexbase + gfxindex * indexstride); - graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_UCHAR: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned char* tri_indices = (unsigned char*)(indexbase + gfxindex * indexstride); - graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - default: - b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); - } - break; - } - default: - b3Assert((type == PHY_FLOAT) || (type == PHY_DOUBLE)); - } - - unLockReadOnlyVertexBase(part); - } -} - -void b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin, b3Vector3& aabbMax) -{ - struct AabbCalculationCallback : public b3InternalTriangleIndexCallback - { - b3Vector3 m_aabbMin; - b3Vector3 m_aabbMax; - - AabbCalculationCallback() - { - m_aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - m_aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) - { - (void)partId; - (void)triangleIndex; - - m_aabbMin.setMin(triangle[0]); - m_aabbMax.setMax(triangle[0]); - m_aabbMin.setMin(triangle[1]); - m_aabbMax.setMax(triangle[1]); - m_aabbMin.setMin(triangle[2]); - m_aabbMax.setMax(triangle[2]); - } - }; - - //first calculate the total aabb for all triangles - AabbCalculationCallback aabbCallback; - aabbMin.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - InternalProcessAllTriangles(&aabbCallback, aabbMin, aabbMax); - - aabbMin = aabbCallback.m_aabbMin; - aabbMax = aabbCallback.m_aabbMax; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h deleted file mode 100644 index 2b1e63be759..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h +++ /dev/null @@ -1,158 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_STRIDING_MESHINTERFACE_H -#define B3_STRIDING_MESHINTERFACE_H - -#include "Bullet3Common/b3Vector3.h" -#include "b3TriangleCallback.h" -//#include "b3ConcaveShape.h" - -enum PHY_ScalarType -{ - PHY_FLOAT, - PHY_DOUBLE, - PHY_INTEGER, - PHY_SHORT, - PHY_FIXEDPOINT88, - PHY_UCHAR -}; - -/// The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with b3BvhTriangleMeshShape and some other collision shapes. -/// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips. -/// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory. -B3_ATTRIBUTE_ALIGNED16(class) -b3StridingMeshInterface -{ -protected: - b3Vector3 m_scaling; - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3StridingMeshInterface() : m_scaling(b3MakeVector3(b3Scalar(1.), b3Scalar(1.), b3Scalar(1.))) - { - } - - virtual ~b3StridingMeshInterface(); - - virtual void InternalProcessAllTriangles(b3InternalTriangleIndexCallback * callback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - - ///brute force method to calculate aabb - void calculateAabbBruteForce(b3Vector3 & aabbMin, b3Vector3 & aabbMax); - - /// get read and write access to a subpart of a triangle mesh - /// this subpart has a continuous array of vertices and indices - /// in this way the mesh can be handled as chunks of memory with striding - /// very similar to OpenGL vertexarray support - /// make a call to unLockVertexBase when the read and write access is finished - virtual void getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& stride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) = 0; - - virtual void getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& stride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) const = 0; - - /// unLockVertexBase finishes the access to a subpart of the triangle mesh - /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished - virtual void unLockVertexBase(int subpart) = 0; - - virtual void unLockReadOnlyVertexBase(int subpart) const = 0; - - /// getNumSubParts returns the number of separate subparts - /// each subpart has a continuous array of vertices and indices - virtual int getNumSubParts() const = 0; - - virtual void preallocateVertices(int numverts) = 0; - virtual void preallocateIndices(int numindices) = 0; - - virtual bool hasPremadeAabb() const { return false; } - virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const - { - (void)aabbMin; - (void)aabbMax; - } - virtual void getPremadeAabb(b3Vector3 * aabbMin, b3Vector3 * aabbMax) const - { - (void)aabbMin; - (void)aabbMax; - } - - const b3Vector3& getScaling() const - { - return m_scaling; - } - void setScaling(const b3Vector3& scaling) - { - m_scaling = scaling; - } - - virtual int calculateSerializeBufferSize() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - //virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; -}; - -struct b3IntIndexData -{ - int m_value; -}; - -struct b3ShortIntIndexData -{ - short m_value; - char m_pad[2]; -}; - -struct b3ShortIntIndexTripletData -{ - short m_values[3]; - char m_pad[2]; -}; - -struct b3CharIndexTripletData -{ - unsigned char m_values[3]; - char m_pad; -}; - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3MeshPartData -{ - b3Vector3FloatData* m_vertices3f; - b3Vector3DoubleData* m_vertices3d; - - b3IntIndexData* m_indices32; - b3ShortIntIndexTripletData* m_3indices16; - b3CharIndexTripletData* m_3indices8; - - b3ShortIntIndexData* m_indices16; //backwards compatibility - - int m_numTriangles; //length of m_indices = m_numTriangles - int m_numVertices; -}; - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3StridingMeshInterfaceData -{ - b3MeshPartData* m_meshPartsPtr; - b3Vector3FloatData m_scaling; - int m_numMeshParts; - char m_padding[4]; -}; - -B3_FORCE_INLINE int b3StridingMeshInterface::calculateSerializeBufferSize() const -{ - return sizeof(b3StridingMeshInterfaceData); -} - -#endif //B3_STRIDING_MESHINTERFACE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h deleted file mode 100644 index 9ca1e229496..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h +++ /dev/null @@ -1,34 +0,0 @@ - -#ifndef B3_SUPPORT_MAPPINGS_H -#define B3_SUPPORT_MAPPINGS_H - -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "b3VectorFloat4.h" - -struct b3GjkPairDetector; - -inline b3Vector3 localGetSupportVertexWithMargin(const float4& supportVec, const struct b3ConvexPolyhedronData* hull, - const b3AlignedObjectArray& verticesA, b3Scalar margin) -{ - b3Vector3 supVec = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - b3Scalar maxDot = b3Scalar(-B3_LARGE_FLOAT); - - // Here we take advantage of dot(a, b*c) = dot(a*b, c). Note: This is true mathematically, but not numerically. - if (0 < hull->m_numVertices) - { - const b3Vector3 scaled = supportVec; - int index = (int)scaled.maxDot(&verticesA[hull->m_vertexOffset], hull->m_numVertices, maxDot); - return verticesA[hull->m_vertexOffset + index]; - } - - return supVec; -} - -inline b3Vector3 localGetSupportVertexWithoutMargin(const float4& supportVec, const struct b3ConvexPolyhedronData* hull, - const b3AlignedObjectArray& verticesA) -{ - return localGetSupportVertexWithMargin(supportVec, hull, verticesA, 0.f); -} - -#endif //B3_SUPPORT_MAPPINGS_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp deleted file mode 100644 index 3908c6de896..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3TriangleCallback.h" - -b3TriangleCallback::~b3TriangleCallback() -{ -} - -b3InternalTriangleIndexCallback::~b3InternalTriangleIndexCallback() -{ -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h deleted file mode 100644 index a0fd3e7ac71..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h +++ /dev/null @@ -1,37 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRIANGLE_CALLBACK_H -#define B3_TRIANGLE_CALLBACK_H - -#include "Bullet3Common/b3Vector3.h" - -///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles. -///This callback is called by processAllTriangles for all b3ConcaveShape derived class, such as b3BvhTriangleMeshShape, b3StaticPlaneShape and b3HeightfieldTerrainShape. -class b3TriangleCallback -{ -public: - virtual ~b3TriangleCallback(); - virtual void processTriangle(b3Vector3* triangle, int partId, int triangleIndex) = 0; -}; - -class b3InternalTriangleIndexCallback -{ -public: - virtual ~b3InternalTriangleIndexCallback(); - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) = 0; -}; - -#endif //B3_TRIANGLE_CALLBACK_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp deleted file mode 100644 index 73faadbdd0f..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3TriangleIndexVertexArray.h" - -b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles, int* triangleIndexBase, int triangleIndexStride, int numVertices, b3Scalar* vertexBase, int vertexStride) - : m_hasAabb(0) -{ - b3IndexedMesh mesh; - - mesh.m_numTriangles = numTriangles; - mesh.m_triangleIndexBase = (const unsigned char*)triangleIndexBase; - mesh.m_triangleIndexStride = triangleIndexStride; - mesh.m_numVertices = numVertices; - mesh.m_vertexBase = (const unsigned char*)vertexBase; - mesh.m_vertexStride = vertexStride; - - addIndexedMesh(mesh); -} - -b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray() -{ -} - -void b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart) -{ - b3Assert(subpart < getNumSubParts()); - - b3IndexedMesh& mesh = m_indexedMeshes[subpart]; - - numverts = mesh.m_numVertices; - (*vertexbase) = (unsigned char*)mesh.m_vertexBase; - - type = mesh.m_vertexType; - - vertexStride = mesh.m_vertexStride; - - numfaces = mesh.m_numTriangles; - - (*indexbase) = (unsigned char*)mesh.m_triangleIndexBase; - indexstride = mesh.m_triangleIndexStride; - indicestype = mesh.m_indexType; -} - -void b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart) const -{ - const b3IndexedMesh& mesh = m_indexedMeshes[subpart]; - - numverts = mesh.m_numVertices; - (*vertexbase) = (const unsigned char*)mesh.m_vertexBase; - - type = mesh.m_vertexType; - - vertexStride = mesh.m_vertexStride; - - numfaces = mesh.m_numTriangles; - (*indexbase) = (const unsigned char*)mesh.m_triangleIndexBase; - indexstride = mesh.m_triangleIndexStride; - indicestype = mesh.m_indexType; -} - -bool b3TriangleIndexVertexArray::hasPremadeAabb() const -{ - return (m_hasAabb == 1); -} - -void b3TriangleIndexVertexArray::setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - m_aabbMin = aabbMin; - m_aabbMax = aabbMax; - m_hasAabb = 1; // this is intentionally an int see notes in header -} - -void b3TriangleIndexVertexArray::getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax) const -{ - *aabbMin = m_aabbMin; - *aabbMax = m_aabbMax; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h deleted file mode 100644 index 57cbf03dc20..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h +++ /dev/null @@ -1,128 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRIANGLE_INDEX_VERTEX_ARRAY_H -#define B3_TRIANGLE_INDEX_VERTEX_ARRAY_H - -#include "b3StridingMeshInterface.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Scalar.h" - -///The b3IndexedMesh indexes a single vertex and index array. Multiple b3IndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh. -///Instead of the number of indices, we pass the number of triangles. -B3_ATTRIBUTE_ALIGNED16(struct) -b3IndexedMesh -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_numTriangles; - const unsigned char* m_triangleIndexBase; - // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed) - int m_triangleIndexStride; - int m_numVertices; - const unsigned char* m_vertexBase; - // Size of a vertex, in bytes - int m_vertexStride; - - // The index type is set when adding an indexed mesh to the - // b3TriangleIndexVertexArray, do not set it manually - PHY_ScalarType m_indexType; - - // The vertex type has a default type similar to Bullet's precision mode (float or double) - // but can be set manually if you for example run Bullet with double precision but have - // mesh data in single precision.. - PHY_ScalarType m_vertexType; - - b3IndexedMesh() - : m_indexType(PHY_INTEGER), -#ifdef B3_USE_DOUBLE_PRECISION - m_vertexType(PHY_DOUBLE) -#else // B3_USE_DOUBLE_PRECISION - m_vertexType(PHY_FLOAT) -#endif // B3_USE_DOUBLE_PRECISION - { - } -}; - -typedef b3AlignedObjectArray IndexedMeshArray; - -///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays. -///Additional meshes can be added using addIndexedMesh -///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays. -///So keep those arrays around during the lifetime of this b3TriangleIndexVertexArray. -B3_ATTRIBUTE_ALIGNED16(class) -b3TriangleIndexVertexArray : public b3StridingMeshInterface -{ -protected: - IndexedMeshArray m_indexedMeshes; - int m_pad[2]; - mutable int m_hasAabb; // using int instead of bool to maintain alignment - mutable b3Vector3 m_aabbMin; - mutable b3Vector3 m_aabbMax; - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3TriangleIndexVertexArray() : m_hasAabb(0) - { - } - - virtual ~b3TriangleIndexVertexArray(); - - //just to be backwards compatible - b3TriangleIndexVertexArray(int numTriangles, int* triangleIndexBase, int triangleIndexStride, int numVertices, b3Scalar* vertexBase, int vertexStride); - - void addIndexedMesh(const b3IndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER) - { - m_indexedMeshes.push_back(mesh); - m_indexedMeshes[m_indexedMeshes.size() - 1].m_indexType = indexType; - } - - virtual void getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0); - - virtual void getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) const; - - /// unLockVertexBase finishes the access to a subpart of the triangle mesh - /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished - virtual void unLockVertexBase(int subpart) { (void)subpart; } - - virtual void unLockReadOnlyVertexBase(int subpart) const { (void)subpart; } - - /// getNumSubParts returns the number of separate subparts - /// each subpart has a continuous array of vertices and indices - virtual int getNumSubParts() const - { - return (int)m_indexedMeshes.size(); - } - - IndexedMeshArray& getIndexedMeshArray() - { - return m_indexedMeshes; - } - - const IndexedMeshArray& getIndexedMeshArray() const - { - return m_indexedMeshes; - } - - virtual void preallocateVertices(int numverts) { (void)numverts; } - virtual void preallocateIndices(int numindices) { (void)numindices; } - - virtual bool hasPremadeAabb() const; - virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - virtual void getPremadeAabb(b3Vector3 * aabbMin, b3Vector3 * aabbMax) const; -}; - -#endif //B3_TRIANGLE_INDEX_VERTEX_ARRAY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h deleted file mode 100644 index 5cc4b5a6262..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef B3_VECTOR_FLOAT4_H -#define B3_VECTOR_FLOAT4_H - -#include "Bullet3Common/b3Transform.h" - -//#define cross3(a,b) (a.cross(b)) -#define float4 b3Vector3 -//#define make_float4(x,y,z,w) b3Vector4(x,y,z,w) - -#endif //B3_VECTOR_FLOAT4_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp deleted file mode 100644 index c599bca2b04..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp +++ /dev/null @@ -1,574 +0,0 @@ - -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. - - Elsevier CDROM license agreements grants nonexclusive license to use the software - for any purpose, commercial or non-commercial as long as the following credit is included - identifying the original source of the software: - - Parts of the source are "from the book Real-Time Collision Detection by - Christer Ericson, published by Morgan Kaufmann Publishers, - (c) 2005 Elsevier Inc." - -*/ - -#include "b3VoronoiSimplexSolver.h" - -#define VERTA 0 -#define VERTB 1 -#define VERTC 2 -#define VERTD 3 - -#define B3_CATCH_DEGENERATE_TETRAHEDRON 1 -void b3VoronoiSimplexSolver::removeVertex(int index) -{ - b3Assert(m_numVertices > 0); - m_numVertices--; - m_simplexVectorW[index] = m_simplexVectorW[m_numVertices]; - m_simplexPointsP[index] = m_simplexPointsP[m_numVertices]; - m_simplexPointsQ[index] = m_simplexPointsQ[m_numVertices]; -} - -void b3VoronoiSimplexSolver::reduceVertices(const b3UsageBitfield& usedVerts) -{ - if ((numVertices() >= 4) && (!usedVerts.usedVertexD)) - removeVertex(3); - - if ((numVertices() >= 3) && (!usedVerts.usedVertexC)) - removeVertex(2); - - if ((numVertices() >= 2) && (!usedVerts.usedVertexB)) - removeVertex(1); - - if ((numVertices() >= 1) && (!usedVerts.usedVertexA)) - removeVertex(0); -} - -//clear the simplex, remove all the vertices -void b3VoronoiSimplexSolver::reset() -{ - m_cachedValidClosest = false; - m_numVertices = 0; - m_needsUpdate = true; - m_lastW = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - m_cachedBC.reset(); -} - -//add a vertex -void b3VoronoiSimplexSolver::addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q) -{ - m_lastW = w; - m_needsUpdate = true; - - m_simplexVectorW[m_numVertices] = w; - m_simplexPointsP[m_numVertices] = p; - m_simplexPointsQ[m_numVertices] = q; - - m_numVertices++; -} - -bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints() -{ - if (m_needsUpdate) - { - m_cachedBC.reset(); - - m_needsUpdate = false; - - switch (numVertices()) - { - case 0: - m_cachedValidClosest = false; - break; - case 1: - { - m_cachedP1 = m_simplexPointsP[0]; - m_cachedP2 = m_simplexPointsQ[0]; - m_cachedV = m_cachedP1 - m_cachedP2; //== m_simplexVectorW[0] - m_cachedBC.reset(); - m_cachedBC.setBarycentricCoordinates(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - m_cachedValidClosest = m_cachedBC.isValid(); - break; - }; - case 2: - { - //closest point origin from line segment - const b3Vector3& from = m_simplexVectorW[0]; - const b3Vector3& to = m_simplexVectorW[1]; - b3Vector3 nearest; - - b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - b3Vector3 diff = p - from; - b3Vector3 v = to - from; - b3Scalar t = v.dot(diff); - - if (t > 0) - { - b3Scalar dotVV = v.dot(v); - if (t < dotVV) - { - t /= dotVV; - diff -= t * v; - m_cachedBC.m_usedVertices.usedVertexA = true; - m_cachedBC.m_usedVertices.usedVertexB = true; - } - else - { - t = 1; - diff -= v; - //reduce to 1 point - m_cachedBC.m_usedVertices.usedVertexB = true; - } - } - else - { - t = 0; - //reduce to 1 point - m_cachedBC.m_usedVertices.usedVertexA = true; - } - m_cachedBC.setBarycentricCoordinates(1 - t, t); - nearest = from + t * v; - - m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]); - m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]); - m_cachedV = m_cachedP1 - m_cachedP2; - - reduceVertices(m_cachedBC.m_usedVertices); - - m_cachedValidClosest = m_cachedBC.isValid(); - break; - } - case 3: - { - //closest point origin from triangle - b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - - const b3Vector3& a = m_simplexVectorW[0]; - const b3Vector3& b = m_simplexVectorW[1]; - const b3Vector3& c = m_simplexVectorW[2]; - - closestPtPointTriangle(p, a, b, c, m_cachedBC); - m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2]; - - m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2]; - - m_cachedV = m_cachedP1 - m_cachedP2; - - reduceVertices(m_cachedBC.m_usedVertices); - m_cachedValidClosest = m_cachedBC.isValid(); - - break; - } - case 4: - { - b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - - const b3Vector3& a = m_simplexVectorW[0]; - const b3Vector3& b = m_simplexVectorW[1]; - const b3Vector3& c = m_simplexVectorW[2]; - const b3Vector3& d = m_simplexVectorW[3]; - - bool hasSeparation = closestPtPointTetrahedron(p, a, b, c, d, m_cachedBC); - - if (hasSeparation) - { - m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] + - m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3]; - - m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] + - m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3]; - - m_cachedV = m_cachedP1 - m_cachedP2; - reduceVertices(m_cachedBC.m_usedVertices); - } - else - { - // printf("sub distance got penetration\n"); - - if (m_cachedBC.m_degenerate) - { - m_cachedValidClosest = false; - } - else - { - m_cachedValidClosest = true; - //degenerate case == false, penetration = true + zero - m_cachedV.setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - } - break; - } - - m_cachedValidClosest = m_cachedBC.isValid(); - - //closest point origin from tetrahedron - break; - } - default: - { - m_cachedValidClosest = false; - } - }; - } - - return m_cachedValidClosest; -} - -//return/calculate the closest vertex -bool b3VoronoiSimplexSolver::closest(b3Vector3& v) -{ - bool succes = updateClosestVectorAndPoints(); - v = m_cachedV; - return succes; -} - -b3Scalar b3VoronoiSimplexSolver::maxVertex() -{ - int i, numverts = numVertices(); - b3Scalar maxV = b3Scalar(0.); - for (i = 0; i < numverts; i++) - { - b3Scalar curLen2 = m_simplexVectorW[i].length2(); - if (maxV < curLen2) - maxV = curLen2; - } - return maxV; -} - -//return the current simplex -int b3VoronoiSimplexSolver::getSimplex(b3Vector3* pBuf, b3Vector3* qBuf, b3Vector3* yBuf) const -{ - int i; - for (i = 0; i < numVertices(); i++) - { - yBuf[i] = m_simplexVectorW[i]; - pBuf[i] = m_simplexPointsP[i]; - qBuf[i] = m_simplexPointsQ[i]; - } - return numVertices(); -} - -bool b3VoronoiSimplexSolver::inSimplex(const b3Vector3& w) -{ - bool found = false; - int i, numverts = numVertices(); - //b3Scalar maxV = b3Scalar(0.); - - //w is in the current (reduced) simplex - for (i = 0; i < numverts; i++) - { -#ifdef BT_USE_EQUAL_VERTEX_THRESHOLD - if (m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold) -#else - if (m_simplexVectorW[i] == w) -#endif - found = true; - } - - //check in case lastW is already removed - if (w == m_lastW) - return true; - - return found; -} - -void b3VoronoiSimplexSolver::backup_closest(b3Vector3& v) -{ - v = m_cachedV; -} - -bool b3VoronoiSimplexSolver::emptySimplex() const -{ - return (numVertices() == 0); -} - -void b3VoronoiSimplexSolver::compute_points(b3Vector3& p1, b3Vector3& p2) -{ - updateClosestVectorAndPoints(); - p1 = m_cachedP1; - p2 = m_cachedP2; -} - -bool b3VoronoiSimplexSolver::closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, b3SubSimplexClosestResult& result) -{ - result.m_usedVertices.reset(); - - // Check if P in vertex region outside A - b3Vector3 ab = b - a; - b3Vector3 ac = c - a; - b3Vector3 ap = p - a; - b3Scalar d1 = ab.dot(ap); - b3Scalar d2 = ac.dot(ap); - if (d1 <= b3Scalar(0.0) && d2 <= b3Scalar(0.0)) - { - result.m_closestPointOnSimplex = a; - result.m_usedVertices.usedVertexA = true; - result.setBarycentricCoordinates(1, 0, 0); - return true; // a; // barycentric coordinates (1,0,0) - } - - // Check if P in vertex region outside B - b3Vector3 bp = p - b; - b3Scalar d3 = ab.dot(bp); - b3Scalar d4 = ac.dot(bp); - if (d3 >= b3Scalar(0.0) && d4 <= d3) - { - result.m_closestPointOnSimplex = b; - result.m_usedVertices.usedVertexB = true; - result.setBarycentricCoordinates(0, 1, 0); - - return true; // b; // barycentric coordinates (0,1,0) - } - // Check if P in edge region of AB, if so return projection of P onto AB - b3Scalar vc = d1 * d4 - d3 * d2; - if (vc <= b3Scalar(0.0) && d1 >= b3Scalar(0.0) && d3 <= b3Scalar(0.0)) - { - b3Scalar v = d1 / (d1 - d3); - result.m_closestPointOnSimplex = a + v * ab; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexB = true; - result.setBarycentricCoordinates(1 - v, v, 0); - return true; - //return a + v * ab; // barycentric coordinates (1-v,v,0) - } - - // Check if P in vertex region outside C - b3Vector3 cp = p - c; - b3Scalar d5 = ab.dot(cp); - b3Scalar d6 = ac.dot(cp); - if (d6 >= b3Scalar(0.0) && d5 <= d6) - { - result.m_closestPointOnSimplex = c; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(0, 0, 1); - return true; //c; // barycentric coordinates (0,0,1) - } - - // Check if P in edge region of AC, if so return projection of P onto AC - b3Scalar vb = d5 * d2 - d1 * d6; - if (vb <= b3Scalar(0.0) && d2 >= b3Scalar(0.0) && d6 <= b3Scalar(0.0)) - { - b3Scalar w = d2 / (d2 - d6); - result.m_closestPointOnSimplex = a + w * ac; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(1 - w, 0, w); - return true; - //return a + w * ac; // barycentric coordinates (1-w,0,w) - } - - // Check if P in edge region of BC, if so return projection of P onto BC - b3Scalar va = d3 * d6 - d5 * d4; - if (va <= b3Scalar(0.0) && (d4 - d3) >= b3Scalar(0.0) && (d5 - d6) >= b3Scalar(0.0)) - { - b3Scalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6)); - - result.m_closestPointOnSimplex = b + w * (c - b); - result.m_usedVertices.usedVertexB = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(0, 1 - w, w); - return true; - // return b + w * (c - b); // barycentric coordinates (0,1-w,w) - } - - // P inside face region. Compute Q through its barycentric coordinates (u,v,w) - b3Scalar denom = b3Scalar(1.0) / (va + vb + vc); - b3Scalar v = vb * denom; - b3Scalar w = vc * denom; - - result.m_closestPointOnSimplex = a + ab * v + ac * w; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexB = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(1 - v - w, v, w); - - return true; - // return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = b3Scalar(1.0) - v - w -} - -/// Test if point p and d lie on opposite sides of plane through abc -int b3VoronoiSimplexSolver::pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d) -{ - b3Vector3 normal = (b - a).cross(c - a); - - b3Scalar signp = (p - a).dot(normal); // [AP AB AC] - b3Scalar signd = (d - a).dot(normal); // [AD AB AC] - -#ifdef B3_CATCH_DEGENERATE_TETRAHEDRON -#ifdef BT_USE_DOUBLE_PRECISION - if (signd * signd < (b3Scalar(1e-8) * b3Scalar(1e-8))) - { - return -1; - } -#else - if (signd * signd < (b3Scalar(1e-4) * b3Scalar(1e-4))) - { - // printf("affine dependent/degenerate\n");// - return -1; - } -#endif - -#endif - // Points on opposite sides if expression signs are opposite - return signp * signd < b3Scalar(0.); -} - -bool b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult) -{ - b3SubSimplexClosestResult tempResult; - - // Start out assuming point inside all halfspaces, so closest to itself - finalResult.m_closestPointOnSimplex = p; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = true; - finalResult.m_usedVertices.usedVertexB = true; - finalResult.m_usedVertices.usedVertexC = true; - finalResult.m_usedVertices.usedVertexD = true; - - int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d); - int pointOutsideACD = pointOutsideOfPlane(p, a, c, d, b); - int pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c); - int pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a); - - if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0) - { - finalResult.m_degenerate = true; - return false; - } - - if (!pointOutsideABC && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC) - { - return false; - } - - b3Scalar bestSqDist = FLT_MAX; - // If point outside face abc then compute closest point on abc - if (pointOutsideABC) - { - closestPtPointTriangle(p, a, b, c, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - - b3Scalar sqDist = (q - p).dot(q - p); - // Update best closest point if (squared) distance is less than current best - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - //convert result bitmask! - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexB; - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTB], - tempResult.m_barycentricCoords[VERTC], - 0); - } - } - - // Repeat test for face acd - if (pointOutsideACD) - { - closestPtPointTriangle(p, a, c, d, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - - b3Scalar sqDist = (q - p).dot(q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexB; - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexC; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - 0, - tempResult.m_barycentricCoords[VERTB], - tempResult.m_barycentricCoords[VERTC]); - } - } - // Repeat test for face adb - - if (pointOutsideADB) - { - closestPtPointTriangle(p, a, d, b, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - - b3Scalar sqDist = (q - p).dot(q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexC; - - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTC], - 0, - tempResult.m_barycentricCoords[VERTB]); - } - } - // Repeat test for face bdc - - if (pointOutsideBDC) - { - closestPtPointTriangle(p, b, d, c, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - b3Scalar sqDist = (q - p).dot(q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - // - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; - - finalResult.setBarycentricCoordinates( - 0, - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTC], - tempResult.m_barycentricCoords[VERTB]); - } - } - - //help! we ended up full ! - - if (finalResult.m_usedVertices.usedVertexA && - finalResult.m_usedVertices.usedVertexB && - finalResult.m_usedVertices.usedVertexC && - finalResult.m_usedVertices.usedVertexD) - { - return true; - } - - return true; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h deleted file mode 100644 index 46e36a3ae15..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h +++ /dev/null @@ -1,164 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans https://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_VORONOI_SIMPLEX_SOLVER_H -#define B3_VORONOI_SIMPLEX_SOLVER_H - -#include "Bullet3Common/b3Vector3.h" - -#define VORONOI_SIMPLEX_MAX_VERTS 5 - -///disable next define, or use defaultCollisionConfiguration->getSimplexSolver()->setEqualVertexThreshold(0.f) to disable/configure -//#define BT_USE_EQUAL_VERTEX_THRESHOLD -#define VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD 0.0001f - -struct b3UsageBitfield -{ - b3UsageBitfield() - { - reset(); - } - - void reset() - { - usedVertexA = false; - usedVertexB = false; - usedVertexC = false; - usedVertexD = false; - } - unsigned short usedVertexA : 1; - unsigned short usedVertexB : 1; - unsigned short usedVertexC : 1; - unsigned short usedVertexD : 1; - unsigned short unused1 : 1; - unsigned short unused2 : 1; - unsigned short unused3 : 1; - unsigned short unused4 : 1; -}; - -struct b3SubSimplexClosestResult -{ - b3Vector3 m_closestPointOnSimplex; - //MASK for m_usedVertices - //stores the simplex vertex-usage, using the MASK, - // if m_usedVertices & MASK then the related vertex is used - b3UsageBitfield m_usedVertices; - b3Scalar m_barycentricCoords[4]; - bool m_degenerate; - - void reset() - { - m_degenerate = false; - setBarycentricCoordinates(); - m_usedVertices.reset(); - } - bool isValid() - { - bool valid = (m_barycentricCoords[0] >= b3Scalar(0.)) && - (m_barycentricCoords[1] >= b3Scalar(0.)) && - (m_barycentricCoords[2] >= b3Scalar(0.)) && - (m_barycentricCoords[3] >= b3Scalar(0.)); - - return valid; - } - void setBarycentricCoordinates(b3Scalar a = b3Scalar(0.), b3Scalar b = b3Scalar(0.), b3Scalar c = b3Scalar(0.), b3Scalar d = b3Scalar(0.)) - { - m_barycentricCoords[0] = a; - m_barycentricCoords[1] = b; - m_barycentricCoords[2] = c; - m_barycentricCoords[3] = d; - } -}; - -/// b3VoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin. -/// Can be used with GJK, as an alternative to Johnson distance algorithm. - -B3_ATTRIBUTE_ALIGNED16(class) -b3VoronoiSimplexSolver -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_numVertices; - - b3Vector3 m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS]; - b3Vector3 m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS]; - b3Vector3 m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS]; - - b3Vector3 m_cachedP1; - b3Vector3 m_cachedP2; - b3Vector3 m_cachedV; - b3Vector3 m_lastW; - - b3Scalar m_equalVertexThreshold; - bool m_cachedValidClosest; - - b3SubSimplexClosestResult m_cachedBC; - - bool m_needsUpdate; - - void removeVertex(int index); - void reduceVertices(const b3UsageBitfield& usedVerts); - bool updateClosestVectorAndPoints(); - - bool closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult); - int pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d); - bool closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, b3SubSimplexClosestResult& result); - -public: - b3VoronoiSimplexSolver() - : m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD) - { - } - void reset(); - - void addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q); - - void setEqualVertexThreshold(b3Scalar threshold) - { - m_equalVertexThreshold = threshold; - } - - b3Scalar getEqualVertexThreshold() const - { - return m_equalVertexThreshold; - } - - bool closest(b3Vector3 & v); - - b3Scalar maxVertex(); - - bool fullSimplex() const - { - return (m_numVertices == 4); - } - - int getSimplex(b3Vector3 * pBuf, b3Vector3 * qBuf, b3Vector3 * yBuf) const; - - bool inSimplex(const b3Vector3& w); - - void backup_closest(b3Vector3 & v); - - bool emptySimplex() const; - - void compute_points(b3Vector3 & p1, b3Vector3 & p2); - - int numVertices() const - { - return m_numVertices; - } -}; - -#endif //B3_VORONOI_SIMPLEX_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl deleted file mode 100644 index faa413441c5..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl +++ /dev/null @@ -1,283 +0,0 @@ -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - -typedef unsigned int u32; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///btQuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} btQuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - -int getTriangleIndex(const btQuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeaf(const btQuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const btQuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} btBvhSubtreeInfo; - -///keep this in sync with btCollidable.h -typedef struct -{ - int m_numChildShapes; - int blaat2; - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - - -int testQuantizedAabbAgainstQuantizedAabb( - const unsigned short int* aabbMin1, - const unsigned short int* aabbMax1, - const unsigned short int* aabbMin2, - const unsigned short int* aabbMax2) -{ - //int overlap = 1; - if (aabbMin1[0] > aabbMax2[0]) - return 0; - if (aabbMax1[0] < aabbMin2[0]) - return 0; - if (aabbMin1[1] > aabbMax2[1]) - return 0; - if (aabbMax1[1] < aabbMin2[1]) - return 0; - if (aabbMin1[2] > aabbMax2[2]) - return 0; - if (aabbMax1[2] < aabbMin2[2]) - return 0; - return 1; - //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap; - //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap; - //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap; - //return overlap; -} - - -void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization) -{ - float4 clampedPoint = max(point2,bvhAabbMin); - clampedPoint = min (clampedPoint, bvhAabbMax); - - float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization; - if (isMax) - { - out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1)); - out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1)); - out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1)); - } else - { - out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe)); - out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe)); - out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe)); - } - -} - - -// work-in-progress -__kernel void bvhTraversalKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global btAabbCL* aabbs, - __global int4* concavePairsOut, - __global volatile int* numConcavePairsOut, - __global const btBvhSubtreeInfo* subtreeHeadersRoot, - __global const btQuantizedBvhNode* quantizedNodesRoot, - __global const b3BvhInfo* bvhInfos, - int numPairs, - int maxNumConcavePairsCapacity) -{ - int id = get_global_id(0); - if (id>=numPairs) - return; - - int bodyIndexA = pairs[id].x; - int bodyIndexB = pairs[id].y; - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) - return; - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - if (shapeTypeB!=SHAPE_CONVEX_HULL && - shapeTypeB!=SHAPE_SPHERE && - shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS - ) - return; - - b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes]; - - float4 bvhAabbMin = bvhInfo.m_aabbMin; - float4 bvhAabbMax = bvhInfo.m_aabbMax; - float4 bvhQuantization = bvhInfo.m_quantization; - int numSubtreeHeaders = bvhInfo.m_numSubTrees; - __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset]; - __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset]; - - - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization); - quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization); - - for (int i=0;im_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int isLeaf(const btQuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - " \n" - "int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes, points to the root of the subtree\n" - " int m_rootNodeIndex;\n" - " //4 bytes\n" - " int m_subtreeSize;\n" - " int m_padding[3];\n" - "} btBvhSubtreeInfo;\n" - "///keep this in sync with btCollidable.h\n" - "typedef struct\n" - "{\n" - " int m_numChildShapes;\n" - " int blaat2;\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - " \n" - "} btCollidableGpu;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "int testQuantizedAabbAgainstQuantizedAabb(\n" - " const unsigned short int* aabbMin1,\n" - " const unsigned short int* aabbMax1,\n" - " const unsigned short int* aabbMin2,\n" - " const unsigned short int* aabbMax2)\n" - "{\n" - " //int overlap = 1;\n" - " if (aabbMin1[0] > aabbMax2[0])\n" - " return 0;\n" - " if (aabbMax1[0] < aabbMin2[0])\n" - " return 0;\n" - " if (aabbMin1[1] > aabbMax2[1])\n" - " return 0;\n" - " if (aabbMax1[1] < aabbMin2[1])\n" - " return 0;\n" - " if (aabbMin1[2] > aabbMax2[2])\n" - " return 0;\n" - " if (aabbMax1[2] < aabbMin2[2])\n" - " return 0;\n" - " return 1;\n" - " //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" - " //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" - " //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" - " //return overlap;\n" - "}\n" - "void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" - "{\n" - " float4 clampedPoint = max(point2,bvhAabbMin);\n" - " clampedPoint = min (clampedPoint, bvhAabbMax);\n" - " float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" - " if (isMax)\n" - " {\n" - " out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" - " out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" - " out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" - " } else\n" - " {\n" - " out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" - " out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" - " out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" - " }\n" - "}\n" - "// work-in-progress\n" - "__kernel void bvhTraversalKernel( __global const int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global btAabbCL* aabbs,\n" - " __global int4* concavePairsOut,\n" - " __global volatile int* numConcavePairsOut,\n" - " __global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" - " __global const btQuantizedBvhNode* quantizedNodesRoot,\n" - " __global const b3BvhInfo* bvhInfos,\n" - " int numPairs,\n" - " int maxNumConcavePairsCapacity)\n" - "{\n" - " int id = get_global_id(0);\n" - " if (id>=numPairs)\n" - " return;\n" - " \n" - " int bodyIndexA = pairs[id].x;\n" - " int bodyIndexB = pairs[id].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " //once the broadphase avoids static-static pairs, we can remove this test\n" - " if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" - " {\n" - " return;\n" - " }\n" - " \n" - " if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" - " return;\n" - " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" - " \n" - " if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" - " shapeTypeB!=SHAPE_SPHERE &&\n" - " shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" - " )\n" - " return;\n" - " b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" - " float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" - " float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" - " float4 bvhQuantization = bvhInfo.m_quantization;\n" - " int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" - " __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" - " __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" - " \n" - " unsigned short int quantizedQueryAabbMin[3];\n" - " unsigned short int quantizedQueryAabbMax[3];\n" - " quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" - " quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" - " \n" - " for (int i=0;im_worldNormalOnB = -dirOut;//normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - //for (int i=0;im_worldPosB[0] = posOut;//localPoints[contactIdx[i]]; - GET_NPOINTS(*c) = 1;//nContacts; - } - } - - } -} - -typedef float4 Quaternion; -#define make_float4 (float4) - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - -inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;im_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - - -bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - float4* sep, - float* dmin) -{ - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test unit sphere directions - for (int i=0;i0) - crossje *= -1.f; - { - float dist; - bool result = true; - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max00.0f) - { - *sep = -(*sep); - } - return true; -} - - - -__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* unitSphereDirections, - __global float4* separatingNormals, - __global int* hasSeparatingAxis, - __global float* dmins, - int numUnitSphereDirections, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (inumUnitSphereDirections) - { - bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin); - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - } //if (hasSeparatingAxis[i]) - }//(i\n" - " *\n" - " * This file was ported from mpr.c file, part of libccd.\n" - " * The Minkoski Portal Refinement implementation was ported \n" - " * to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" - " * at http://github.com/erwincoumans/bullet3\n" - " *\n" - " * Distributed under the OSI-approved BSD License (the \"License\");\n" - " * see .\n" - " * This software is distributed WITHOUT ANY WARRANTY; without even the\n" - " * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" - " * See the License for more information.\n" - " */\n" - "#ifndef B3_MPR_PENETRATION_H\n" - "#define B3_MPR_PENETRATION_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" - "#define B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "typedef struct b3GpuFace b3GpuFace_t;\n" - "struct b3GpuFace\n" - "{\n" - " b3Float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - " int m_unusedPadding1;\n" - " int m_unusedPadding2;\n" - "};\n" - "typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" - "struct b3ConvexPolyhedronData\n" - "{\n" - " b3Float4 m_localCenter;\n" - " b3Float4 m_extents;\n" - " b3Float4 mC;\n" - " b3Float4 mE;\n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "};\n" - "#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_COLLIDABLE_H\n" - "#define B3_COLLIDABLE_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "enum b3ShapeTypes\n" - "{\n" - " SHAPE_HEIGHT_FIELD=1,\n" - " SHAPE_CONVEX_HULL=3,\n" - " SHAPE_PLANE=4,\n" - " SHAPE_CONCAVE_TRIMESH=5,\n" - " SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" - " SHAPE_SPHERE=7,\n" - " MAX_NUM_SHAPE_TYPES,\n" - "};\n" - "typedef struct b3Collidable b3Collidable_t;\n" - "struct b3Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - "};\n" - "typedef struct b3GpuChildShape b3GpuChildShape_t;\n" - "struct b3GpuChildShape\n" - "{\n" - " b3Float4 m_childPosition;\n" - " b3Quat m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "struct b3CompoundOverlappingPair\n" - "{\n" - " int m_bodyIndexA;\n" - " int m_bodyIndexB;\n" - "// int m_pairType;\n" - " int m_childShapeIndexA;\n" - " int m_childShapeIndexB;\n" - "};\n" - "#endif //B3_COLLIDABLE_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#define B3_MPR_SQRT sqrt\n" - "#endif\n" - "#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" - "#define B3_MPR_FABS fabs\n" - "#define B3_MPR_TOLERANCE 1E-6f\n" - "#define B3_MPR_MAX_ITERATIONS 1000\n" - "struct _b3MprSupport_t \n" - "{\n" - " b3Float4 v; //!< Support point in minkowski sum\n" - " b3Float4 v1; //!< Support point in obj1\n" - " b3Float4 v2; //!< Support point in obj2\n" - "};\n" - "typedef struct _b3MprSupport_t b3MprSupport_t;\n" - "struct _b3MprSimplex_t \n" - "{\n" - " b3MprSupport_t ps[4];\n" - " int last; //!< index of last added point\n" - "};\n" - "typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" - "inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" - "{\n" - " return &s->ps[idx];\n" - "}\n" - "inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" - "{\n" - " s->last = size - 1;\n" - "}\n" - "inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" - "{\n" - " return s->last + 1;\n" - "}\n" - "inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" - "{\n" - " // here is no check on boundaries\n" - " return &s->ps[idx];\n" - "}\n" - "inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" - "{\n" - " *d = *s;\n" - "}\n" - "inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" - "{\n" - " b3MprSupportCopy(s->ps + pos, a);\n" - "}\n" - "inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" - "{\n" - " b3MprSupport_t supp;\n" - " b3MprSupportCopy(&supp, &s->ps[pos1]);\n" - " b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" - " b3MprSupportCopy(&s->ps[pos2], &supp);\n" - "}\n" - "inline int b3MprIsZero(float val)\n" - "{\n" - " return B3_MPR_FABS(val) < FLT_EPSILON;\n" - "}\n" - "inline int b3MprEq(float _a, float _b)\n" - "{\n" - " float ab;\n" - " float a, b;\n" - " ab = B3_MPR_FABS(_a - _b);\n" - " if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" - " return 1;\n" - " a = B3_MPR_FABS(_a);\n" - " b = B3_MPR_FABS(_b);\n" - " if (b > a){\n" - " return ab < FLT_EPSILON * b;\n" - " }else{\n" - " return ab < FLT_EPSILON * a;\n" - " }\n" - "}\n" - "inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" - "{\n" - " return b3MprEq((*a).x, (*b).x)\n" - " && b3MprEq((*a).y, (*b).y)\n" - " && b3MprEq((*a).z, (*b).z);\n" - "}\n" - "inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, b3ConstArray(b3Float4) verticesA)\n" - "{\n" - " b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" - " float maxDot = -B3_LARGE_FLOAT;\n" - " if( 0 < hull->m_numVertices )\n" - " {\n" - " const b3Float4 scaled = supportVec;\n" - " int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" - " return verticesA[hull->m_vertexOffset+index];\n" - " }\n" - " return supVec;\n" - "}\n" - "B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " const b3Float4* _dir, b3Float4* outp, int logme)\n" - "{\n" - " //dir is in worldspace, move to local space\n" - " \n" - " b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" - " b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" - " \n" - " b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" - " \n" - " const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" - " \n" - " //find local support vertex\n" - " int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" - " \n" - " b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" - " __global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" - " \n" - " b3Float4 pInA;\n" - " if (logme)\n" - " {\n" - " b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" - " float maxDot = -B3_LARGE_FLOAT;\n" - " if( 0 < hull->m_numVertices )\n" - " {\n" - " const b3Float4 scaled = localDir;\n" - " int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" - " pInA = cpuVertices[hull->m_vertexOffset+index];\n" - " \n" - " }\n" - " } else\n" - " {\n" - " pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" - " }\n" - " //move vertex to world space\n" - " *outp = b3TransformPoint(pInA,pos,orn);\n" - " \n" - "}\n" - "inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " const b3Float4* _dir, b3MprSupport_t *supp)\n" - "{\n" - " b3Float4 dir;\n" - " dir = *_dir;\n" - " b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" - " dir = *_dir*-1.f;\n" - " b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" - " supp->v = supp->v1 - supp->v2;\n" - "}\n" - "inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" - "{\n" - " center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" - " center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" - " center->v = center->v1 - center->v2;\n" - "}\n" - "inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" - "{\n" - " (*v).x = x;\n" - " (*v).y = y;\n" - " (*v).z = z;\n" - " (*v).w = 0.f;\n" - "}\n" - "inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" - "{\n" - " (*v).x += (*w).x;\n" - " (*v).y += (*w).y;\n" - " (*v).z += (*w).z;\n" - "}\n" - "inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" - "{\n" - " *v = *w;\n" - "}\n" - "inline void b3MprVec3Scale(b3Float4 *d, float k)\n" - "{\n" - " *d *= k;\n" - "}\n" - "inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" - "{\n" - " float dot;\n" - " dot = b3Dot3F4(*a,*b);\n" - " return dot;\n" - "}\n" - "inline float b3MprVec3Len2(const b3Float4 *v)\n" - "{\n" - " return b3MprVec3Dot(v, v);\n" - "}\n" - "inline void b3MprVec3Normalize(b3Float4 *d)\n" - "{\n" - " float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" - " b3MprVec3Scale(d, k);\n" - "}\n" - "inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" - "{\n" - " *d = b3Cross3(*a,*b);\n" - " \n" - "}\n" - "inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" - "{\n" - " *d = *v - *w;\n" - "}\n" - "inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" - "{\n" - " b3Float4 v2v1, v3v1;\n" - " b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b3MprVec3Cross(dir, &v2v1, &v3v1);\n" - " b3MprVec3Normalize(dir);\n" - "}\n" - "inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" - " const b3Float4 *dir)\n" - "{\n" - " float dot;\n" - " dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" - " return b3MprIsZero(dot) || dot > 0.f;\n" - "}\n" - "inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" - " const b3MprSupport_t *v4,\n" - " const b3Float4 *dir)\n" - "{\n" - " float dv1, dv2, dv3, dv4;\n" - " float dot1, dot2, dot3;\n" - " // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" - " dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" - " dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" - " dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" - " dv4 = b3MprVec3Dot(&v4->v, dir);\n" - " dot1 = dv4 - dv1;\n" - " dot2 = dv4 - dv2;\n" - " dot3 = dv4 - dv3;\n" - " dot1 = B3_MPR_FMIN(dot1, dot2);\n" - " dot1 = B3_MPR_FMIN(dot1, dot3);\n" - " return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" - "}\n" - "inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal, \n" - " const b3MprSupport_t *v4,\n" - " const b3Float4 *dir)\n" - "{\n" - " float dot;\n" - " dot = b3MprVec3Dot(&v4->v, dir);\n" - " return b3MprIsZero(dot) || dot > 0.f;\n" - "}\n" - "inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" - " const b3MprSupport_t *v4)\n" - "{\n" - " float dot;\n" - " b3Float4 v4v0;\n" - " b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" - " if (dot > 0.f){\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" - " if (dot > 0.f){\n" - " b3MprSimplexSet(portal, 1, v4);\n" - " }else{\n" - " b3MprSimplexSet(portal, 3, v4);\n" - " }\n" - " }else{\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" - " if (dot > 0.f){\n" - " b3MprSimplexSet(portal, 2, v4);\n" - " }else{\n" - " b3MprSimplexSet(portal, 1, v4);\n" - " }\n" - " }\n" - "}\n" - "B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " __global int* hasSepAxis,\n" - " b3MprSimplex_t *portal)\n" - "{\n" - " b3Float4 dir, va, vb;\n" - " float dot;\n" - " int cont;\n" - " \n" - " \n" - " // vertex 0 is center of portal\n" - " b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" - " // vertex 0 is center of portal\n" - " b3MprSimplexSetSize(portal, 1);\n" - " \n" - " b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" - " b3Float4* b3mpr_vec3_origin = &zero;\n" - " if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" - " // Portal's center lies on origin (0,0,0) => we know that objects\n" - " // intersect but we would need to know penetration info.\n" - " // So move center little bit...\n" - " b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" - " b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" - " }\n" - " // vertex 1 = support in direction of origin\n" - " b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Scale(&dir, -1.f);\n" - " b3MprVec3Normalize(&dir);\n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" - " b3MprSimplexSetSize(portal, 2);\n" - " // test if origin isn't outside of v1\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" - " \n" - " if (b3MprIsZero(dot) || dot < 0.f)\n" - " return -1;\n" - " // vertex 2\n" - " b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" - " if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" - " // origin lies on v1\n" - " return 1;\n" - " }else{\n" - " // origin lies on v0-v1 segment\n" - " return 2;\n" - " }\n" - " }\n" - " b3MprVec3Normalize(&dir);\n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" - " \n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" - " if (b3MprIsZero(dot) || dot < 0.f)\n" - " return -1;\n" - " b3MprSimplexSetSize(portal, 3);\n" - " // vertex 3 direction\n" - " b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Cross(&dir, &va, &vb);\n" - " b3MprVec3Normalize(&dir);\n" - " // it is better to form portal faces to be oriented \"outside\" origin\n" - " dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" - " if (dot > 0.f){\n" - " b3MprSimplexSwap(portal, 1, 2);\n" - " b3MprVec3Scale(&dir, -1.f);\n" - " }\n" - " while (b3MprSimplexSize(portal) < 4){\n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" - " \n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" - " if (b3MprIsZero(dot) || dot < 0.f)\n" - " return -1;\n" - " cont = 0;\n" - " // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" - " // continue\n" - " b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 3)->v);\n" - " dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" - " if (dot < 0.f && !b3MprIsZero(dot)){\n" - " b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" - " cont = 1;\n" - " }\n" - " if (!cont){\n" - " // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" - " // continue\n" - " b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" - " if (dot < 0.f && !b3MprIsZero(dot)){\n" - " b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" - " cont = 1;\n" - " }\n" - " }\n" - " if (cont){\n" - " b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Cross(&dir, &va, &vb);\n" - " b3MprVec3Normalize(&dir);\n" - " }else{\n" - " b3MprSimplexSetSize(portal, 4);\n" - " }\n" - " }\n" - " return 0;\n" - "}\n" - "B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " b3MprSimplex_t *portal)\n" - "{\n" - " b3Float4 dir;\n" - " b3MprSupport_t v4;\n" - " for (int i=0;iv,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" - " sum = b[0] + b[1] + b[2] + b[3];\n" - " if (b3MprIsZero(sum) || sum < 0.f){\n" - " b[0] = 0.f;\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 3)->v);\n" - " b[1] = b3MprVec3Dot(&vec, &dir);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b[2] = b3MprVec3Dot(&vec, &dir);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " b[3] = b3MprVec3Dot(&vec, &dir);\n" - " sum = b[1] + b[2] + b[3];\n" - " }\n" - " inv = 1.f / sum;\n" - " b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" - " b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" - " for (i = 0; i < 4; i++){\n" - " b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" - " b3MprVec3Scale(&vec, b[i]);\n" - " b3MprVec3Add(&p1, &vec);\n" - " b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" - " b3MprVec3Scale(&vec, b[i]);\n" - " b3MprVec3Add(&p2, &vec);\n" - " }\n" - " b3MprVec3Scale(&p1, inv);\n" - " b3MprVec3Scale(&p2, inv);\n" - " b3MprVec3Copy(pos, &p1);\n" - " b3MprVec3Add(pos, &p2);\n" - " b3MprVec3Scale(pos, 0.5);\n" - "}\n" - "inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" - "{\n" - " b3Float4 ab;\n" - " b3MprVec3Sub2(&ab, a, b);\n" - " return b3MprVec3Len2(&ab);\n" - "}\n" - "inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" - " const b3Float4 *x0,\n" - " const b3Float4 *b,\n" - " b3Float4 *witness)\n" - "{\n" - " // The computation comes from solving equation of segment:\n" - " // S(t) = x0 + t.d\n" - " // where - x0 is initial point of segment\n" - " // - d is direction of segment from x0 (|d| > 0)\n" - " // - t belongs to <0, 1> interval\n" - " // \n" - " // Than, distance from a segment to some point P can be expressed:\n" - " // D(t) = |x0 + t.d - P|^2\n" - " // which is distance from any point on segment. Minimization\n" - " // of this function brings distance from P to segment.\n" - " // Minimization of D(t) leads to simple quadratic equation that's\n" - " // solving is straightforward.\n" - " //\n" - " // Bonus of this method is witness point for free.\n" - " float dist, t;\n" - " b3Float4 d, a;\n" - " // direction of segment\n" - " b3MprVec3Sub2(&d, b, x0);\n" - " // precompute vector from P to x0\n" - " b3MprVec3Sub2(&a, x0, P);\n" - " t = -1.f * b3MprVec3Dot(&a, &d);\n" - " t /= b3MprVec3Len2(&d);\n" - " if (t < 0.f || b3MprIsZero(t)){\n" - " dist = b3MprVec3Dist2(x0, P);\n" - " if (witness)\n" - " b3MprVec3Copy(witness, x0);\n" - " }else if (t > 1.f || b3MprEq(t, 1.f)){\n" - " dist = b3MprVec3Dist2(b, P);\n" - " if (witness)\n" - " b3MprVec3Copy(witness, b);\n" - " }else{\n" - " if (witness){\n" - " b3MprVec3Copy(witness, &d);\n" - " b3MprVec3Scale(witness, t);\n" - " b3MprVec3Add(witness, x0);\n" - " dist = b3MprVec3Dist2(witness, P);\n" - " }else{\n" - " // recycling variables\n" - " b3MprVec3Scale(&d, t);\n" - " b3MprVec3Add(&d, &a);\n" - " dist = b3MprVec3Len2(&d);\n" - " }\n" - " }\n" - " return dist;\n" - "}\n" - "inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" - " const b3Float4 *x0, const b3Float4 *B,\n" - " const b3Float4 *C,\n" - " b3Float4 *witness)\n" - "{\n" - " // Computation comes from analytic expression for triangle (x0, B, C)\n" - " // T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" - " // Then equation for distance is:\n" - " // D(s, t) = | T(s, t) - P |^2\n" - " // This leads to minimization of quadratic function of two variables.\n" - " // The solution from is taken only if s is between 0 and 1, t is\n" - " // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" - " // computed.\n" - " b3Float4 d1, d2, a;\n" - " float u, v, w, p, q, r;\n" - " float s, t, dist, dist2;\n" - " b3Float4 witness2;\n" - " b3MprVec3Sub2(&d1, B, x0);\n" - " b3MprVec3Sub2(&d2, C, x0);\n" - " b3MprVec3Sub2(&a, x0, P);\n" - " u = b3MprVec3Dot(&a, &a);\n" - " v = b3MprVec3Dot(&d1, &d1);\n" - " w = b3MprVec3Dot(&d2, &d2);\n" - " p = b3MprVec3Dot(&a, &d1);\n" - " q = b3MprVec3Dot(&a, &d2);\n" - " r = b3MprVec3Dot(&d1, &d2);\n" - " s = (q * r - w * p) / (w * v - r * r);\n" - " t = (-s * r - q) / w;\n" - " if ((b3MprIsZero(s) || s > 0.f)\n" - " && (b3MprEq(s, 1.f) || s < 1.f)\n" - " && (b3MprIsZero(t) || t > 0.f)\n" - " && (b3MprEq(t, 1.f) || t < 1.f)\n" - " && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" - " if (witness){\n" - " b3MprVec3Scale(&d1, s);\n" - " b3MprVec3Scale(&d2, t);\n" - " b3MprVec3Copy(witness, x0);\n" - " b3MprVec3Add(witness, &d1);\n" - " b3MprVec3Add(witness, &d2);\n" - " dist = b3MprVec3Dist2(witness, P);\n" - " }else{\n" - " dist = s * s * v;\n" - " dist += t * t * w;\n" - " dist += 2.f * s * t * r;\n" - " dist += 2.f * s * p;\n" - " dist += 2.f * t * q;\n" - " dist += u;\n" - " }\n" - " }else{\n" - " dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" - " dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" - " if (dist2 < dist){\n" - " dist = dist2;\n" - " if (witness)\n" - " b3MprVec3Copy(witness, &witness2);\n" - " }\n" - " dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" - " if (dist2 < dist){\n" - " dist = dist2;\n" - " if (witness)\n" - " b3MprVec3Copy(witness, &witness2);\n" - " }\n" - " }\n" - " return dist;\n" - "}\n" - "B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " b3MprSimplex_t *portal,\n" - " float *depth, b3Float4 *pdir, b3Float4 *pos)\n" - "{\n" - " b3Float4 dir;\n" - " b3MprSupport_t v4;\n" - " unsigned long iterations;\n" - " b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" - " b3Float4* b3mpr_vec3_origin = &zero;\n" - " iterations = 1UL;\n" - " for (int i=0;i find penetration info\n" - " if (portalReachTolerance(portal, &v4, &dir)\n" - " || iterations ==B3_MPR_MAX_ITERATIONS)\n" - " {\n" - " *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" - " *depth = B3_MPR_SQRT(*depth);\n" - " \n" - " if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" - " {\n" - " \n" - " *pdir = dir;\n" - " } \n" - " b3MprVec3Normalize(pdir);\n" - " \n" - " // barycentric coordinates:\n" - " b3FindPos(portal, pos);\n" - " return;\n" - " }\n" - " b3ExpandPortal(portal, &v4);\n" - " iterations++;\n" - " }\n" - "}\n" - "B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" - "{\n" - " // Touching contact on portal's v1 - so depth is zero and direction\n" - " // is unimportant and pos can be guessed\n" - " *depth = 0.f;\n" - " b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" - " b3Float4* b3mpr_vec3_origin = &zero;\n" - " b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" - " b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" - " b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" - " b3MprVec3Scale(pos, 0.5);\n" - "}\n" - "B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" - " float *depth, b3Float4 *dir, b3Float4 *pos)\n" - "{\n" - " \n" - " // Origin lies on v0-v1 segment.\n" - " // Depth is distance to v1, direction also and position must be\n" - " // computed\n" - " b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" - " b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" - " b3MprVec3Scale(pos, 0.5f);\n" - " \n" - " b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" - " *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" - " b3MprVec3Normalize(dir);\n" - "}\n" - "inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" - " b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " __global int* hasSepAxis,\n" - " float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" - "{\n" - " \n" - " b3MprSimplex_t portal;\n" - " \n" - "// if (!hasSepAxis[pairIndex])\n" - " // return -1;\n" - " \n" - " hasSepAxis[pairIndex] = 0;\n" - " int res;\n" - " // Phase 1: Portal discovery\n" - " res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" - " \n" - " \n" - " //sepAxis[pairIndex] = *pdir;//or -dir?\n" - " switch (res)\n" - " {\n" - " case 0:\n" - " {\n" - " // Phase 2: Portal refinement\n" - " \n" - " res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" - " if (res < 0)\n" - " return -1;\n" - " // Phase 3. Penetration info\n" - " b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" - " hasSepAxis[pairIndex] = 1;\n" - " sepAxis[pairIndex] = -*dirOut;\n" - " break;\n" - " }\n" - " case 1:\n" - " {\n" - " // Touching contact on portal's v1.\n" - " b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" - " break;\n" - " }\n" - " case 2:\n" - " {\n" - " \n" - " b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" - " break;\n" - " }\n" - " default:\n" - " {\n" - " hasSepAxis[pairIndex]=0;\n" - " //if (res < 0)\n" - " //{\n" - " // Origin isn't inside portal - no collision.\n" - " return -1;\n" - " //}\n" - " }\n" - " };\n" - " \n" - " return 0;\n" - "};\n" - "#endif //B3_MPR_PENETRATION_H\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" - "#ifdef cl_ext_atomic_counters_32\n" - " #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - " #define counter32_t volatile __global int*\n" - "#endif\n" - "__kernel void mprPenetrationKernel( __global int4* pairs,\n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global float4* separatingNormals,\n" - " __global int* hasSeparatingAxis,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int contactCapacity,\n" - " int numPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " if (im_worldNormalOnB = -dirOut;//normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " //for (int i=0;im_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" - " GET_NPOINTS(*c) = 1;//nContacts;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "typedef float4 Quaternion;\n" - "#define make_float4 (float4)\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, __global const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;im_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* vertices,\n" - " __global const float4* unitSphereDirections,\n" - " int numUnitSphereDirections,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test unit sphere directions\n" - " for (int i=0;i0)\n" - " crossje *= -1.f;\n" - " {\n" - " float dist;\n" - " bool result = true;\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" - " \n" - " if(Max00.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* unitSphereDirections,\n" - " __global float4* separatingNormals,\n" - " __global int* hasSeparatingAxis,\n" - " __global float* dmins,\n" - " int numUnitSphereDirections,\n" - " int numPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " \n" - " if (inumUnitSphereDirections)\n" - " {\n" - " bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" - " if (!sepEE)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis[i] = 1;\n" - " separatingNormals[i] = sepNormal;\n" - " }\n" - " }\n" - " } //if (hasSeparatingAxis[i])\n" - " }//(im_plane.x,face->m_plane.y,face->m_plane.z,0.f); - - if (face->m_numIndices<2) - return false; - - - float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; - - b = v0; - - for(unsigned i=0; i != face->m_numIndices; ++i) - { - a = b; - float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; - b = vi; - ab = b-a; - ap = p-a; - v = cross3(ab,plane); - - if (dot(ap, v) > 0.f) - { - float ab_m2 = dot(ab, ab); - float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f; - if (rt <= 0.f) - { - *out = a; - } - else if (rt >= 1.f) - { - *out = b; - } - else - { - float s = 1.f - rt; - out[0].x = s * a.x + rt * b.x; - out[0].y = s * a.y + rt * b.y; - out[0].z = s * a.z + rt * b.z; - } - return false; - } - } - return true; -} - - - - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* convexVertices, - __global const int* convexIndices, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 spherePos2, - float radius, - float4 pos, - float4 quat - ) -{ - - float4 invPos; - float4 invOrn; - - trInverse(pos,quat, &invPos,&invOrn); - - float4 spherePos = transform(&spherePos2,&invPos,&invOrn); - - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = (float4)(0, 0, 0, 0); - float4 hitNormalWorld = (float4)(0, 0, 0, 0); - float minDist = -1000000.f; - bool bCollide = true; - - for ( int f = 0; f < numFaces; f++ ) - { - btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; - - // set up a plane equation - float4 planeEqn; - float4 n1 = face.m_plane; - n1.w = 0.f; - planeEqn = n1; - planeEqn.w = face.m_plane.w; - - - // compute a signed distance from the vertex in cloth to the face of rigidbody. - float4 pntReturn; - float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - - // If the distance is positive, the plane is a separating plane. - if ( dist > radius ) - { - bCollide = false; - break; - } - - - if (dist>0) - { - //might hit an edge or vertex - float4 out; - float4 zeroPos = make_float4(0,0,0,0); - - bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); - if (isInPoly) - { - if (dist>minDist) - { - minDist = dist; - closestPnt = pntReturn; - hitNormalWorld = planeEqn; - - } - } else - { - float4 tmp = spherePos-out; - float l2 = dot(tmp,tmp); - if (l2minDist) - { - minDist = dist; - closestPnt = out; - hitNormalWorld = tmp/dist; - - } - - } else - { - bCollide = false; - break; - } - } - } else - { - if ( dist > minDist ) - { - minDist = dist; - closestPnt = pntReturn; - hitNormalWorld.xyz = planeEqn.xyz; - } - } - - } - - - - if (bCollide && minDist > -10000) - { - float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); - float4 pOnB1 = transform(&closestPnt,&pos,&quat); - - float actualDepth = minDist-radius; - if (actualDepth<=0.f) - { - - - pOnB1.w = actualDepth; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - - if (1)//dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - GET_NPOINTS(*c) = 1; - } - - } - }//if (hasCollision) - -} - - - -int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = make_float4(0.f); - { - - for (int i=0;im_numVertices;i++) - { - float4 vtx = convexVertices[hullB->m_vertexOffset+i]; - float curDot = dot(vtx,planeNormalInConvex); - - - if (curDot>maxDot) - { - hitVertex=i; - maxDot=curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints==MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints4) - { - numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - - if (numReducedPoints>0) - { - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - resultIndex = dstIdx; - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - //c->setFrictionCoeff(0.7); - //c->setRestituitionCoeff(0.f); - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - switch (numReducedPoints) - { - case 4: - c->m_worldPosB[3] = contactPoints[contactIdx.w]; - case 3: - c->m_worldPosB[2] = contactPoints[contactIdx.z]; - case 2: - c->m_worldPosB[1] = contactPoints[contactIdx.y]; - case 1: - c->m_worldPosB[0] = contactPoints[contactIdx.x]; - default: - { - } - }; - - GET_NPOINTS(*c) = numReducedPoints; - }//if (dstIdx < numPairs) - } - - return resultIndex; -} - - -void computeContactPlaneSphere(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity) -{ - float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - float radius = collidables[collidableIndexB].m_radius; - float4 posA1 = rigidBodies[bodyIndexA].m_pos; - float4 ornA1 = rigidBodies[bodyIndexA].m_quat; - float4 posB1 = rigidBodies[bodyIndexB].m_pos; - float4 ornB1 = rigidBodies[bodyIndexB].m_quat; - - bool hasCollision = false; - float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); - float planeConstant = planeEq.w; - float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; - { - float4 invPosA;Quaternion invOrnA; - trInverse(posA1,ornA1,&invPosA,&invOrnA); - trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - } - float4 planeInConvexPos1; Quaternion planeInConvexOrn1; - { - float4 invPosB;Quaternion invOrnB; - trInverse(posB1,ornB1,&invPosB,&invOrnB); - trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); - } - float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius; - float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant; - hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold(); - if (hasCollision) - { - float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1; - float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1); - float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1); - float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance; - pOnB1.w = distance; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if (hasCollision) -} - - -__kernel void primitiveContactsKernel( __global int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numPairs, int maxContactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i=0) - pairs[pairIndex].z = contactIndex; - - return; - } - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - - float4 posA; - posA = rigidBodies[bodyIndexA].m_pos; - Quaternion ornA; - ornA = rigidBodies[bodyIndexA].m_quat; - - - int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); - - if (contactIndex>=0) - pairs[pairIndex].z = contactIndex; - - return; - } - - if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); - return; - } - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - - - computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); - - return; - } - - - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - - float4 spherePos = rigidBodies[bodyIndexA].m_pos; - float sphereRadius = collidables[collidableIndexA].m_radius; - float4 convexPos = rigidBodies[bodyIndexB].m_pos; - float4 convexOrn = rigidBodies[bodyIndexB].m_quat; - - computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - - if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = rigidBodies[bodyIndexA].m_pos; - float4 convexOrn = rigidBodies[bodyIndexA].m_quat; - - computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - return; - } - - - - - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - //sphere-sphere - float radiusA = collidables[collidableIndexA].m_radius; - float radiusB = collidables[collidableIndexB].m_radius; - float4 posA = rigidBodies[bodyIndexA].m_pos; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - float4 diff = posA-posB; - float len = length(diff); - - ///iff distance positive, don't generate a new contact - if ( len <= (radiusA+radiusB)) - { - ///distance (negative means penetration) - float dist = len - (radiusA+radiusB); - float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); - if (len > 0.00001) - { - normalOnSurfaceB = diff / len; - } - float4 contactPosB = posB + normalOnSurfaceB*radiusB; - contactPosB.w = dist; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_worldPosB[0] = contactPosB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if ( len <= (radiusA+radiusB)) - - return; - }//SHAPE_SPHERE SHAPE_SPHERE - - }// if (i= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - int pairIndex = i; - if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL)) - { - - computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB); - return; - } - - if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE)) - { - - computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); - return; - } - - if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE)) - { - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = posA; - float4 convexOrn = ornA; - - computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - - if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL)) - { - - float4 spherePos = rigidBodies[bodyIndexA].m_pos; - float sphereRadius = collidables[collidableIndexA].m_radius; - float4 convexPos = posB; - float4 convexOrn = ornB; - - - computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - }// if (i 0 && r2 > 0 && r3 > 0 ) - return true; - if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) - return true; - return false; - -} - - -float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) -{ - float4 diff = p - from; - float4 v = to - from; - float t = dot(v,diff); - - if (t > 0) - { - float dotVV = dot(v,v); - if (t < dotVV) - { - t /= dotVV; - diff -= t*v; - } else - { - t = 1; - diff -= v; - } - } else - { - t = 0; - } - *nearest = from + t*v; - return dot(diff,diff); -} - - -void computeContactSphereTriangle(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - const float4* triangleVertices, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 spherePos2, - float radius, - float4 pos, - float4 quat, - int faceIndex - ) -{ - - float4 invPos; - float4 invOrn; - - trInverse(pos,quat, &invPos,&invOrn); - float4 spherePos = transform(&spherePos2,&invPos,&invOrn); - int numFaces = 3; - float4 closestPnt = (float4)(0, 0, 0, 0); - float4 hitNormalWorld = (float4)(0, 0, 0, 0); - float minDist = -1000000.f; - bool bCollide = false; - - - ////////////////////////////////////// - - float4 sphereCenter; - sphereCenter = spherePos; - - const float4* vertices = triangleVertices; - float contactBreakingThreshold = 0.f;//todo? - float radiusWithThreshold = radius + contactBreakingThreshold; - float4 edge10; - edge10 = vertices[1]-vertices[0]; - edge10.w = 0.f;//is this needed? - float4 edge20; - edge20 = vertices[2]-vertices[0]; - edge20.w = 0.f;//is this needed? - float4 normal = cross3(edge10,edge20); - normal = normalize(normal); - float4 p1ToCenter; - p1ToCenter = sphereCenter - vertices[0]; - - float distanceFromPlane = dot(p1ToCenter,normal); - - if (distanceFromPlane < 0.f) - { - //triangle facing the other way - distanceFromPlane *= -1.f; - normal *= -1.f; - } - hitNormalWorld = normal; - - bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold; - - // Check for contact / intersection - bool hasContact = false; - float4 contactPoint; - if (isInsideContactPlane) - { - - if (pointInTriangle(vertices,&normal, &sphereCenter)) - { - // Inside the contact wedge - touches a point on the shell plane - hasContact = true; - contactPoint = sphereCenter - normal*distanceFromPlane; - - } else { - // Could be inside one of the contact capsules - float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold; - float4 nearestOnEdge; - int numEdges = 3; - for (int i = 0; i < numEdges; i++) - { - float4 pa =vertices[i]; - float4 pb = vertices[(i+1)%3]; - - float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge); - if (distanceSqr < contactCapsuleRadiusSqr) - { - // Yep, we're inside a capsule - hasContact = true; - contactPoint = nearestOnEdge; - - } - - } - } - } - - if (hasContact) - { - - closestPnt = contactPoint; - float4 contactToCenter = sphereCenter - contactPoint; - minDist = length(contactToCenter); - if (minDist>FLT_EPSILON) - { - hitNormalWorld = normalize(contactToCenter);//*(1./minDist); - bCollide = true; - } - - } - - - ///////////////////////////////////// - - if (bCollide && minDist > -10000) - { - - float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); - float4 pOnB1 = transform(&closestPnt,&pos,&quat); - float actualDepth = minDist-radius; - - - if (actualDepth<=0.f) - { - pOnB1.w = actualDepth; - int dstIdx; - - - float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1); - if (lenSqr>FLT_EPSILON) - { - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - - c->m_childIndexA = -1; - c->m_childIndexB = faceIndex; - - GET_NPOINTS(*c) = 1; - } - } - - } - }//if (hasCollision) - -} - - - -// work-in-progress -__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numConcavePairs, int maxContactCapacity - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE) - { - int f = concavePairs[i].z; - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - } - - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = rigidBodies[bodyIndexA].m_pos; - float4 convexOrn = rigidBodies[bodyIndexA].m_quat; - - computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, - rigidBodies,collidables, - verticesA, - globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn, f); - - return; - } -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h deleted file mode 100644 index b2e0a2dd472..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ /dev/null @@ -1,1288 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* primitiveContactsKernelsCL = - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#define SHAPE_CONVEX_HULL 3\n" - "#define SHAPE_PLANE 4\n" - "#define SHAPE_CONCAVE_TRIMESH 5\n" - "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" - "#define SHAPE_SPHERE 7\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile __global int*\n" - "#endif\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define max2 max\n" - "#define min2 min\n" - "typedef unsigned int u32;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "///keep this in sync with btCollidable.h\n" - "typedef struct\n" - "{\n" - " int m_numChildShapes;\n" - " float m_radius;\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - " \n" - "} btCollidableGpu;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx; \n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " \n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " \n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} btGpuFace;\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "//#define dot3F4 dot\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "void trInverse(float4 translationIn, Quaternion orientationIn,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtInvert(orientationIn);\n" - " *translationOut = qtRotate(*orientationOut, -translationIn);\n" - "}\n" - "void trMul(float4 translationA, Quaternion orientationA,\n" - " float4 translationB, Quaternion orientationB,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtMul(orientationA,orientationB);\n" - " *translationOut = transform(&translationB,&translationA,&orientationA);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "__inline float4 lerp3(const float4 a,const float4 b, float t)\n" - "{\n" - " return make_float4( a.x + (b.x - a.x) * t,\n" - " a.y + (b.y - a.y) * t,\n" - " a.z + (b.z - a.z) * t,\n" - " 0.f);\n" - "}\n" - "float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" - "{\n" - " float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" - " float dist = dot3F4(n, point) + planeEqn.w;\n" - " *closestPointOnFace = point - dist * n;\n" - " return dist;\n" - "}\n" - "inline bool IsPointInPolygon(float4 p, \n" - " const btGpuFace* face,\n" - " __global const float4* baseVertex,\n" - " __global const int* convexIndices,\n" - " float4* out)\n" - "{\n" - " float4 a;\n" - " float4 b;\n" - " float4 ab;\n" - " float4 ap;\n" - " float4 v;\n" - " float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" - " \n" - " if (face->m_numIndices<2)\n" - " return false;\n" - " \n" - " float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" - " \n" - " b = v0;\n" - " for(unsigned i=0; i != face->m_numIndices; ++i)\n" - " {\n" - " a = b;\n" - " float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" - " b = vi;\n" - " ab = b-a;\n" - " ap = p-a;\n" - " v = cross3(ab,plane);\n" - " if (dot(ap, v) > 0.f)\n" - " {\n" - " float ab_m2 = dot(ab, ab);\n" - " float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" - " if (rt <= 0.f)\n" - " {\n" - " *out = a;\n" - " }\n" - " else if (rt >= 1.f) \n" - " {\n" - " *out = b;\n" - " }\n" - " else\n" - " {\n" - " float s = 1.f - rt;\n" - " out[0].x = s * a.x + rt * b.x;\n" - " out[0].y = s * a.y + rt * b.y;\n" - " out[0].z = s * a.z + rt * b.z;\n" - " }\n" - " return false;\n" - " }\n" - " }\n" - " return true;\n" - "}\n" - "void computeContactSphereConvex(int pairIndex,\n" - " int bodyIndexA, int bodyIndexB, \n" - " int collidableIndexA, int collidableIndexB, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " __global const float4* convexVertices,\n" - " __global const int* convexIndices,\n" - " __global const btGpuFace* faces,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int maxContactCapacity,\n" - " float4 spherePos2,\n" - " float radius,\n" - " float4 pos,\n" - " float4 quat\n" - " )\n" - "{\n" - " float4 invPos;\n" - " float4 invOrn;\n" - " trInverse(pos,quat, &invPos,&invOrn);\n" - " float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" - " int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" - " int numFaces = convexShapes[shapeIndex].m_numFaces;\n" - " float4 closestPnt = (float4)(0, 0, 0, 0);\n" - " float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" - " float minDist = -1000000.f;\n" - " bool bCollide = true;\n" - " for ( int f = 0; f < numFaces; f++ )\n" - " {\n" - " btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" - " // set up a plane equation \n" - " float4 planeEqn;\n" - " float4 n1 = face.m_plane;\n" - " n1.w = 0.f;\n" - " planeEqn = n1;\n" - " planeEqn.w = face.m_plane.w;\n" - " \n" - " \n" - " // compute a signed distance from the vertex in cloth to the face of rigidbody.\n" - " float4 pntReturn;\n" - " float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" - " // If the distance is positive, the plane is a separating plane. \n" - " if ( dist > radius )\n" - " {\n" - " bCollide = false;\n" - " break;\n" - " }\n" - " if (dist>0)\n" - " {\n" - " //might hit an edge or vertex\n" - " float4 out;\n" - " float4 zeroPos = make_float4(0,0,0,0);\n" - " bool isInPoly = IsPointInPolygon(spherePos,\n" - " &face,\n" - " &convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" - " convexIndices,\n" - " &out);\n" - " if (isInPoly)\n" - " {\n" - " if (dist>minDist)\n" - " {\n" - " minDist = dist;\n" - " closestPnt = pntReturn;\n" - " hitNormalWorld = planeEqn;\n" - " \n" - " }\n" - " } else\n" - " {\n" - " float4 tmp = spherePos-out;\n" - " float l2 = dot(tmp,tmp);\n" - " if (l2minDist)\n" - " {\n" - " minDist = dist;\n" - " closestPnt = out;\n" - " hitNormalWorld = tmp/dist;\n" - " \n" - " }\n" - " \n" - " } else\n" - " {\n" - " bCollide = false;\n" - " break;\n" - " }\n" - " }\n" - " } else\n" - " {\n" - " if ( dist > minDist )\n" - " {\n" - " minDist = dist;\n" - " closestPnt = pntReturn;\n" - " hitNormalWorld.xyz = planeEqn.xyz;\n" - " }\n" - " }\n" - " \n" - " }\n" - " \n" - " if (bCollide && minDist > -10000)\n" - " {\n" - " float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" - " float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" - " \n" - " float actualDepth = minDist-radius;\n" - " if (actualDepth<=0.f)\n" - " {\n" - " \n" - " pOnB1.w = actualDepth;\n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " \n" - " if (1)//dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB1;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_worldPosB[0] = pOnB1;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " } \n" - " }\n" - " }//if (hasCollision)\n" - "}\n" - " \n" - "int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" - "{\n" - " if( nPoints == 0 )\n" - " return 0;\n" - " \n" - " if (nPoints <=4)\n" - " return nPoints;\n" - " \n" - " \n" - " if (nPoints >64)\n" - " nPoints = 64;\n" - " \n" - " float4 center = make_float4(0.f);\n" - " {\n" - " \n" - " for (int i=0;im_numVertices;i++)\n" - " {\n" - " float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" - " float curDot = dot(vtx,planeNormalInConvex);\n" - " if (curDot>maxDot)\n" - " {\n" - " hitVertex=i;\n" - " maxDot=curDot;\n" - " hitVtx = vtx;\n" - " //make sure the deepest points is always included\n" - " if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" - " numPoints--;\n" - " }\n" - " if (numPoints4)\n" - " {\n" - " numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" - " }\n" - " if (numReducedPoints>0)\n" - " {\n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " resultIndex = dstIdx;\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -planeNormalWorld;\n" - " //c->setFrictionCoeff(0.7);\n" - " //c->setRestituitionCoeff(0.f);\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " switch (numReducedPoints)\n" - " {\n" - " case 4:\n" - " c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" - " case 3:\n" - " c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" - " case 2:\n" - " c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" - " case 1:\n" - " c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" - " default:\n" - " {\n" - " }\n" - " };\n" - " \n" - " GET_NPOINTS(*c) = numReducedPoints;\n" - " }//if (dstIdx < numPairs)\n" - " } \n" - " return resultIndex;\n" - "}\n" - "void computeContactPlaneSphere(int pairIndex,\n" - " int bodyIndexA, int bodyIndexB, \n" - " int collidableIndexA, int collidableIndexB, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const btGpuFace* faces,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int maxContactCapacity)\n" - "{\n" - " float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" - " float radius = collidables[collidableIndexB].m_radius;\n" - " float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" - " float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" - " float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" - " float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" - " \n" - " bool hasCollision = false;\n" - " float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" - " float planeConstant = planeEq.w;\n" - " float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" - " {\n" - " float4 invPosA;Quaternion invOrnA;\n" - " trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" - " trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" - " }\n" - " float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" - " {\n" - " float4 invPosB;Quaternion invOrnB;\n" - " trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" - " trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); \n" - " }\n" - " float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" - " float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" - " float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" - " hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" - " if (hasCollision)\n" - " {\n" - " float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1;\n" - " float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" - " float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" - " float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" - " pOnB1.w = distance;\n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB1;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_worldPosB[0] = pOnB1;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " }//if (dstIdx < numPairs)\n" - " }//if (hasCollision)\n" - "}\n" - "__kernel void primitiveContactsKernel( __global int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numPairs, int maxContactCapacity)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " float4 worldVertsB1[64];\n" - " float4 worldVertsB2[64];\n" - " int capacityWorldVerts = 64; \n" - " float4 localContactsOut[64];\n" - " int localContactCapacity=64;\n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " if (i=0)\n" - " pairs[pairIndex].z = contactIndex;\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" - " {\n" - " float4 posA;\n" - " posA = rigidBodies[bodyIndexA].m_pos;\n" - " Quaternion ornA;\n" - " ornA = rigidBodies[bodyIndexA].m_quat;\n" - " int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" - " if (contactIndex>=0)\n" - " pairs[pairIndex].z = contactIndex;\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" - " rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" - " {\n" - " computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" - " return;\n" - " }\n" - " \n" - " \n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" - " {\n" - " \n" - " float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" - " float sphereRadius = collidables[collidableIndexA].m_radius;\n" - " float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" - " float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" - " computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " \n" - " float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" - " float sphereRadius = collidables[collidableIndexB].m_radius;\n" - " float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" - " float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" - " computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " return;\n" - " }\n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " //sphere-sphere\n" - " float radiusA = collidables[collidableIndexA].m_radius;\n" - " float radiusB = collidables[collidableIndexB].m_radius;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " float4 diff = posA-posB;\n" - " float len = length(diff);\n" - " \n" - " ///iff distance positive, don't generate a new contact\n" - " if ( len <= (radiusA+radiusB))\n" - " {\n" - " ///distance (negative means penetration)\n" - " float dist = len - (radiusA+radiusB);\n" - " float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" - " if (len > 0.00001)\n" - " {\n" - " normalOnSurfaceB = diff / len;\n" - " }\n" - " float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" - " contactPosB.w = dist;\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = normalOnSurfaceB;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_worldPosB[0] = contactPosB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " }//if (dstIdx < numPairs)\n" - " }//if ( len <= (radiusA+radiusB))\n" - " return;\n" - " }//SHAPE_SPHERE SHAPE_SPHERE\n" - " }// if (i= 0)\n" - " {\n" - " collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" - " float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" - " float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" - " float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" - " float4 newOrnA = qtMul(ornA,childOrnA);\n" - " posA = newPosA;\n" - " ornA = newOrnA;\n" - " } else\n" - " {\n" - " collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " }\n" - " \n" - " if (childShapeIndexB>=0)\n" - " {\n" - " collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " } else\n" - " {\n" - " collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" - " }\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" - " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" - " int pairIndex = i;\n" - " if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" - " {\n" - " computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" - " return;\n" - " }\n" - " if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" - " {\n" - " computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" - " return;\n" - " }\n" - " if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" - " {\n" - " float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" - " float sphereRadius = collidables[collidableIndexB].m_radius;\n" - " float4 convexPos = posA;\n" - " float4 convexOrn = ornA;\n" - " \n" - " computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " \n" - " return;\n" - " }\n" - " if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" - " {\n" - " float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" - " float sphereRadius = collidables[collidableIndexA].m_radius;\n" - " float4 convexPos = posB;\n" - " float4 convexOrn = ornB;\n" - " \n" - " computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " \n" - " return;\n" - " }\n" - " }// if (i 0 && r2 > 0 && r3 > 0 )\n" - " return true;\n" - " if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" - " return true;\n" - " return false;\n" - "}\n" - "float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" - "{\n" - " float4 diff = p - from;\n" - " float4 v = to - from;\n" - " float t = dot(v,diff);\n" - " \n" - " if (t > 0) \n" - " {\n" - " float dotVV = dot(v,v);\n" - " if (t < dotVV) \n" - " {\n" - " t /= dotVV;\n" - " diff -= t*v;\n" - " } else \n" - " {\n" - " t = 1;\n" - " diff -= v;\n" - " }\n" - " } else\n" - " {\n" - " t = 0;\n" - " }\n" - " *nearest = from + t*v;\n" - " return dot(diff,diff); \n" - "}\n" - "void computeContactSphereTriangle(int pairIndex,\n" - " int bodyIndexA, int bodyIndexB,\n" - " int collidableIndexA, int collidableIndexB, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " const float4* triangleVertices,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int maxContactCapacity,\n" - " float4 spherePos2,\n" - " float radius,\n" - " float4 pos,\n" - " float4 quat,\n" - " int faceIndex\n" - " )\n" - "{\n" - " float4 invPos;\n" - " float4 invOrn;\n" - " trInverse(pos,quat, &invPos,&invOrn);\n" - " float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" - " int numFaces = 3;\n" - " float4 closestPnt = (float4)(0, 0, 0, 0);\n" - " float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" - " float minDist = -1000000.f;\n" - " bool bCollide = false;\n" - " \n" - " //////////////////////////////////////\n" - " float4 sphereCenter;\n" - " sphereCenter = spherePos;\n" - " const float4* vertices = triangleVertices;\n" - " float contactBreakingThreshold = 0.f;//todo?\n" - " float radiusWithThreshold = radius + contactBreakingThreshold;\n" - " float4 edge10;\n" - " edge10 = vertices[1]-vertices[0];\n" - " edge10.w = 0.f;//is this needed?\n" - " float4 edge20;\n" - " edge20 = vertices[2]-vertices[0];\n" - " edge20.w = 0.f;//is this needed?\n" - " float4 normal = cross3(edge10,edge20);\n" - " normal = normalize(normal);\n" - " float4 p1ToCenter;\n" - " p1ToCenter = sphereCenter - vertices[0];\n" - " \n" - " float distanceFromPlane = dot(p1ToCenter,normal);\n" - " if (distanceFromPlane < 0.f)\n" - " {\n" - " //triangle facing the other way\n" - " distanceFromPlane *= -1.f;\n" - " normal *= -1.f;\n" - " }\n" - " hitNormalWorld = normal;\n" - " bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" - " \n" - " // Check for contact / intersection\n" - " bool hasContact = false;\n" - " float4 contactPoint;\n" - " if (isInsideContactPlane) \n" - " {\n" - " \n" - " if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" - " {\n" - " // Inside the contact wedge - touches a point on the shell plane\n" - " hasContact = true;\n" - " contactPoint = sphereCenter - normal*distanceFromPlane;\n" - " \n" - " } else {\n" - " // Could be inside one of the contact capsules\n" - " float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" - " float4 nearestOnEdge;\n" - " int numEdges = 3;\n" - " for (int i = 0; i < numEdges; i++) \n" - " {\n" - " float4 pa =vertices[i];\n" - " float4 pb = vertices[(i+1)%3];\n" - " float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" - " if (distanceSqr < contactCapsuleRadiusSqr) \n" - " {\n" - " // Yep, we're inside a capsule\n" - " hasContact = true;\n" - " contactPoint = nearestOnEdge;\n" - " \n" - " }\n" - " \n" - " }\n" - " }\n" - " }\n" - " if (hasContact) \n" - " {\n" - " closestPnt = contactPoint;\n" - " float4 contactToCenter = sphereCenter - contactPoint;\n" - " minDist = length(contactToCenter);\n" - " if (minDist>FLT_EPSILON)\n" - " {\n" - " hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" - " bCollide = true;\n" - " }\n" - " \n" - " }\n" - " /////////////////////////////////////\n" - " if (bCollide && minDist > -10000)\n" - " {\n" - " \n" - " float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" - " float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" - " float actualDepth = minDist-radius;\n" - " \n" - " if (actualDepth<=0.f)\n" - " {\n" - " pOnB1.w = actualDepth;\n" - " int dstIdx;\n" - " \n" - " float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" - " if (lenSqr>FLT_EPSILON)\n" - " {\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB1;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_worldPosB[0] = pOnB1;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = faceIndex;\n" - " GET_NPOINTS(*c) = 1;\n" - " } \n" - " }\n" - " }\n" - " }//if (hasCollision)\n" - "}\n" - "// work-in-progress\n" - "__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,\n" - " __global const BodyData* rigidBodies,\n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numConcavePairs, int maxContactCapacity\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConcavePairs)\n" - " return;\n" - " int pairIdx = i;\n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" - " {\n" - " int f = concavePairs[i].z;\n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " }\n" - " float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" - " float sphereRadius = collidables[collidableIndexB].m_radius;\n" - " float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" - " float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" - " computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" - " rigidBodies,collidables,\n" - " verticesA,\n" - " globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn, f);\n" - " return;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl deleted file mode 100644 index a6565fd6fac..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl +++ /dev/null @@ -1,2018 +0,0 @@ -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 - -#define B3_MAX_STACK_DEPTH 256 - - -typedef unsigned int u32; - -///keep this in sync with btCollidable.h -typedef struct -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - union - { - float m_radius; - int m_compoundBvhIndex; - }; - - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} b3QuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - - -int getTriangleIndex(const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeafNode(const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} b3BvhSubtreeInfo; - - - - - - - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; -} ConvexPolyhedronCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Common/shared/b3Int2.h" - - - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define make_float4 (float4) - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); - - -// float4 a1 = make_float4(a.xyz,0.f); -// float4 b1 = make_float4(b.xyz,0.f); - -// return cross(a1,b1); - -//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); - - // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); - - //return c; -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = make_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - -inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;im_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;im_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); - - if(Max01e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) - return false; - return true; -} - - - -bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;im_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - -bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* verticesA, - __global const float4* uniqueEdgesA, - __global const btGpuFace* facesA, - __global const int* indicesA, - const float4* verticesB, - const float4* uniqueEdgesB, - const btGpuFace* facesB, - const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;im_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS *= -1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); - - if(Max00.0f) - { - *sep = -(*sep); - } - return true; -} - - -inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, __global const float4* vertices,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1); - - if(Max0m_numFaces; - // Test normals from hullA - for(int i=0;im_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - - curPlaneTests++; - - float d; - if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d)) - return false; - - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - - return true; -} - - - - -bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - float4* sep, - float* dmin) -{ - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test unit sphere directions - for (int i=0;i0) - crossje *= -1.f; - { - float dist; - bool result = true; - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max00.0f) - { - *sep = -(*sep); - } - return true; -} - - -bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje*=-1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max00.0f) - { - *sep = -(*sep); - } - return true; -} - - -// work-in-progress -__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global const btGpuChildShape* gpuChildShapes, - __global volatile float4* gpuCompoundSepNormalsOut, - __global volatile int* gpuHasCompoundSepNormalsOut, - int numCompoundPairs - ) -{ - - int i = get_global_id(0); - if (i= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - gpuHasCompoundSepNormalsOut[i] = 0; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - - if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) - { - return; - } - - int hasSeparatingAxis = 5; - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - posA.w = 0.f; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = make_float4(1,0,0,0); - bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else//(!sepB) - { - bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - if (sepEE) - { - gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); - gpuHasCompoundSepNormalsOut[i] = 1; - }//sepEE - }//(!sepB) - }//(!sepA) - - - } - -} - - -inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) -{ - b3Float4 vecOut; - vecOut = b3MakeFloat4( - (float)(vecIn[0]) / (quantization.x), - (float)(vecIn[1]) / (quantization.y), - (float)(vecIn[2]) / (quantization.z), - 0.f); - - vecOut += bvhAabbMin; - return vecOut; -} - -inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) -{ - b3Float4 vecOut; - vecOut = b3MakeFloat4( - (float)(vecIn[0]) / (quantization.x), - (float)(vecIn[1]) / (quantization.y), - (float)(vecIn[2]) / (quantization.z), - 0.f); - - vecOut += bvhAabbMin; - return vecOut; -} - - -// work-in-progress -__kernel void findCompoundPairsKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global b3Aabb_t* aabbLocalSpace, - __global const btGpuChildShape* gpuChildShapes, - __global volatile int4* gpuCompoundPairsOut, - __global volatile int* numCompoundPairsOut, - __global const b3BvhSubtreeInfo* subtrees, - __global const b3QuantizedBvhNode* quantizedNodes, - __global const b3BvhInfo* bvhInfos, - int numPairs, - int maxNumCompoundPairsCapacity - ) -{ - - int i = get_global_id(0); - - if (imaxStackDepth && !(isLeafA && isLeafB)) - { - //printf("Error: traversal exceeded maxStackDepth"); - continue; - } - - if(isInternalA) - { - int nodeAleftChild = node.x+1; - bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]); - int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]); - - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); - - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); - } - else - { - nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); - nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); - } - } - else - { - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); - } - else - { - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdxm_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; - int numVertices = polyB.m_numIndices; - if (numVertices>capacityWorldVerts) - numVertices = capacityWorldVerts; - - for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - } - - int closestFaceA=0; - { - float dmin = FLT_MAX; - for(int face=0;facem_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; - if (numVerticesA>capacityWorldVerts) - numVerticesA = capacityWorldVerts; - - for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - concaveHasSeparatingNormals[i] = 0; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& - collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - concavePairs[pairIdx].w = -1; - return; - } - - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = FLT_MAX; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - } - } - else - { - if (de<0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - - - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut) -{ - - int ve; - float ds, de; - int numVertsOut = 0; -//double-check next test - if (numVertsIn < 2) - return 0; - - float4 firstVertex=pVtxIn[numVertsIn-1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex=pVtxIn[ve]; - - de = dot3F4(planeNormalWS,endVertex)+planeEqWS; - - if (ds<0) - { - if (de<0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - } - } - else - { - if (de<0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - - -int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, - const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - __global const float4* vertices, - __global const b3GpuFace_t* faces, - __global const int* indices, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;facem_numFaces;face++) - { - const float4 Normal = make_float4( - faces[hullA->m_faceOffset+face].m_plane.x, - faces[hullA->m_faceOffset+face].m_plane.y, - faces[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA<0) - return numContactsOut; - - b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0m_vertexOffset+indices[polyA.m_indexOffset+e0]]; - const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = qtRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;im_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA<0) - return numContactsOut; - - b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; - const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = qtRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;im_numFaces;face++) - { - const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, - faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - if (closestFaceB>=0) - { - numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices, - faces, - indices,localContactsOut,localContactCapacity); - } - - return numContactsOut; -} - - -int clipHullAgainstHullLocalA(const float4 separatingNormal, - const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const b3GpuFace_t* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const b3GpuFace_t* facesB, - __global const int* indicesB, - float4* localContactsOut, - int localContactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - for(int face=0;facem_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - if (closestFaceB>=0) - { - numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, - verticesA,facesA,indicesA, - verticesB,facesB,indicesB, - localContactsOut,localContactCapacity); - } - - return numContactsOut; -} - -#define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; } -#define REDUCE_MIN(v, n) {int i=0;\ -for(int offset=0; offset64) - nPoints = 64; - - float4 center = make_float4(0.f); - { - - for (int i=0;i a[ie].x )? a[0].x: a[ie].x; - a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y; - a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z; - a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w; - } - - idx[0] = (int)a[0].x & 0xff; - idx[1] = (int)a[0].y & 0xff; - idx[2] = (int)a[0].z & 0xff; - idx[3] = (int)a[0].w & 0xff; - } - } - - { - float2 h[64]; - PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints ); - REDUCE_MIN( h, nPoints ); - max00 = h[0]; - } - } - - contactIdx[0] = idx[0]; - contactIdx[1] = idx[1]; - contactIdx[2] = idx[2]; - contactIdx[3] = idx[3]; - - - return 4; - } -} - - - -__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const float4* closestPointsWorld, - __global const float4* separatingNormalsWorld, - __global const int* contactCounts, - __global const int* contactOffsets, - __global struct b3Contact4Data* restrict contactsOut, - counter32_t nContactsOut, - int contactCapacity, - int numPairs, - int pairIndex - ) -{ - int idx = get_global_id(0); - - if (idxm_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = idx; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - for (int i=0;im_worldPosB[i] = localPoints[contactIdx[i]]; - } - GET_NPOINTS(*c) = nContacts; - } - } -} - - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - -void trMul(float4 translationA, Quaternion orientationA, - float4 translationB, Quaternion orientationB, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtMul(orientationA,orientationB); - *translationOut = transform(&translationB,&translationA,&orientationA); -} - - - - -__kernel void clipHullHullKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numPairs, - int contactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i0) - { - float4 normal = -separatingNormals[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - - int mprContactIndex = pairs[pairIndex].z; - - int dstIdx = mprContactIndex; - if (dstIdx<0) - { - AppendInc( nGlobalContactsOut, dstIdx ); - } - - if (dstIdxm_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - for (int i=0;i0||(mprContactIndex<0)) - { - c->m_worldPosB[i] = pointsIn[contactIdx[i]]; - } - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (hasSeparatingAxis[i]) - }// if (i= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], - &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -gpuCompoundSepNormalsOut[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if ((dstIdx+nReducedContacts) < maxContactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = gpuCompoundPairs[pairIndex].x; - int bodyB = gpuCompoundPairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i=0;im_worldPosB[i] = pointsIn[contactIdx[i]]; - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (gpuHasCompoundSepNormalsOut[i]) - }// if (i 0.00001) - { - normalOnSurfaceB = diff / len; - } - float4 contactPosB = posB + normalOnSurfaceB*radiusB; - contactPosB.w = dist; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdx < contactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_worldPosB[0] = contactPosB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if ( len <= (radiusA+radiusB)) - }//SHAPE_SPHERE SHAPE_SPHERE - }//if (i0) - { - float4 normal = -separatingNormals[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdxm_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = concavePairsIn[pairIndex].x; - int bodyB = concavePairsIn[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i=0;im_worldPosB[i] = pointsIn[contactIdx[i]]; - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (im_numFaces;face++) - { - const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, - faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;facem_numFaces;face++) - { - const float4 Normal = make_float4( - faces[hullA->m_faceOffset+face].m_plane.x, - faces[hullA->m_faceOffset+face].m_plane.y, - faces[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices; - for(int e0=0;e0m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - -int clipFaces(__global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - __global float4* worldVertsB2, - int capacityWorldVertsB2, - const float minDist, float maxDist, - __global int4* clippingFaces, - int pairIndex) -{ - int numContactsOut = 0; - - int closestFaceA = clippingFaces[pairIndex].x; - int closestFaceB = clippingFaces[pairIndex].y; - int numVertsInA = clippingFaces[pairIndex].z; - int numVertsInB = clippingFaces[pairIndex].w; - - int numVertsOut = 0; - - if (closestFaceA<0) - return numContactsOut; - - __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; - __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; - - - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - - for(int e0=0;e0=0) - { - - - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - - for(int e0=0;e00) - { - - __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity]; - float4 normal = -separatingNormals[i]; - - int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - - int mprContactIndex = pairs[pairIndex].z; - - int dstIdx = mprContactIndex; - - if (dstIdx<0) - { - AppendInc( nGlobalContactsOut, dstIdx ); - } -//#if 0 - - if (dstIdx < contactCapacity) - { - - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - - pairs[pairIndex].w = dstIdx; - - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA =-1; - c->m_childIndexB =-1; - - switch (nReducedContacts) - { - case 4: - c->m_worldPosB[3] = pointsIn[contactIdx.w]; - case 3: - c->m_worldPosB[2] = pointsIn[contactIdx.z]; - case 2: - c->m_worldPosB[1] = pointsIn[contactIdx.y]; - case 1: - if (mprContactIndex<0)//test - c->m_worldPosB[0] = pointsIn[contactIdx.x]; - default: - { - } - }; - - GET_NPOINTS(*c) = nReducedContacts; - - } - - -//#endif - - }// if (numContactsOut>0) - }// if (hasSeparatingAxis[i]) - }// if (i1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" - "#define B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "typedef struct b3GpuFace b3GpuFace_t;\n" - "struct b3GpuFace\n" - "{\n" - " b3Float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - " int m_unusedPadding1;\n" - " int m_unusedPadding2;\n" - "};\n" - "typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" - "struct b3ConvexPolyhedronData\n" - "{\n" - " b3Float4 m_localCenter;\n" - " b3Float4 m_extents;\n" - " b3Float4 mC;\n" - " b3Float4 mE;\n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "};\n" - "#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_COLLIDABLE_H\n" - "#define B3_COLLIDABLE_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "enum b3ShapeTypes\n" - "{\n" - " SHAPE_HEIGHT_FIELD=1,\n" - " SHAPE_CONVEX_HULL=3,\n" - " SHAPE_PLANE=4,\n" - " SHAPE_CONCAVE_TRIMESH=5,\n" - " SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" - " SHAPE_SPHERE=7,\n" - " MAX_NUM_SHAPE_TYPES,\n" - "};\n" - "typedef struct b3Collidable b3Collidable_t;\n" - "struct b3Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - "};\n" - "typedef struct b3GpuChildShape b3GpuChildShape_t;\n" - "struct b3GpuChildShape\n" - "{\n" - " b3Float4 m_childPosition;\n" - " b3Quat m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "struct b3CompoundOverlappingPair\n" - "{\n" - " int m_bodyIndexA;\n" - " int m_bodyIndexB;\n" - "// int m_pairType;\n" - " int m_childShapeIndexA;\n" - " int m_childShapeIndexB;\n" - "};\n" - "#endif //B3_COLLIDABLE_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "//#define dot3F4 dot\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "__inline float4 lerp3(const float4 a,const float4 b, float t)\n" - "{\n" - " return make_float4( a.x + (b.x - a.x) * t,\n" - " a.y + (b.y - a.y) * t,\n" - " a.z + (b.z - a.z) * t,\n" - " 0.f);\n" - "}\n" - "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" - "int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" - "{\n" - " \n" - " int ve;\n" - " float ds, de;\n" - " int numVertsOut = 0;\n" - " //double-check next test\n" - " if (numVertsIn < 2)\n" - " return 0;\n" - " \n" - " float4 firstVertex=pVtxIn[numVertsIn-1];\n" - " float4 endVertex = pVtxIn[0];\n" - " \n" - " ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" - " \n" - " for (ve = 0; ve < numVertsIn; ve++)\n" - " {\n" - " endVertex=pVtxIn[ve];\n" - " de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" - " if (ds<0)\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start < 0, end < 0, so output endVertex\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " else\n" - " {\n" - " // Start < 0, end >= 0, so output intersection\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " }\n" - " }\n" - " else\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start >= 0, end < 0 so output intersection and end\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " }\n" - " firstVertex = endVertex;\n" - " ds = de;\n" - " }\n" - " return numVertsOut;\n" - "}\n" - "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" - "int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" - "{\n" - " \n" - " int ve;\n" - " float ds, de;\n" - " int numVertsOut = 0;\n" - "//double-check next test\n" - " if (numVertsIn < 2)\n" - " return 0;\n" - " float4 firstVertex=pVtxIn[numVertsIn-1];\n" - " float4 endVertex = pVtxIn[0];\n" - " \n" - " ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" - " for (ve = 0; ve < numVertsIn; ve++)\n" - " {\n" - " endVertex=pVtxIn[ve];\n" - " de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" - " if (ds<0)\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start < 0, end < 0, so output endVertex\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " else\n" - " {\n" - " // Start < 0, end >= 0, so output intersection\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " }\n" - " }\n" - " else\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start >= 0, end < 0 so output intersection and end\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " }\n" - " firstVertex = endVertex;\n" - " ds = de;\n" - " }\n" - " return numVertsOut;\n" - "}\n" - "int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, \n" - " const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" - " float4* worldVertsB2, int capacityWorldVertsB2,\n" - " const float minDist, float maxDist,\n" - " __global const float4* vertices,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " float4* contactsOut,\n" - " int contactCapacity)\n" - "{\n" - " int numContactsOut = 0;\n" - " float4* pVtxIn = worldVertsB1;\n" - " float4* pVtxOut = worldVertsB2;\n" - " \n" - " int numVertsIn = numWorldVertsB1;\n" - " int numVertsOut = 0;\n" - " int closestFaceA=-1;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;facem_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " faces[hullA->m_faceOffset+face].m_plane.x, \n" - " faces[hullA->m_faceOffset+face].m_plane.y, \n" - " faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " }\n" - " }\n" - " }\n" - " if (closestFaceA<0)\n" - " return numContactsOut;\n" - " b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " int numVerticesA = polyA.m_numIndices;\n" - " for(int e0=0;e0m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" - " const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" - " const float4 edge0 = a - b;\n" - " const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" - " float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" - " float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" - " float4 worldA1 = transform(&a,&posA,&ornA);\n" - " float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" - " \n" - " float4 planeNormalWS = planeNormalWS1;\n" - " float planeEqWS=planeEqWS1;\n" - " \n" - " //clip face\n" - " //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" - " numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" - " //btSwap(pVtxIn,pVtxOut);\n" - " float4* tmp = pVtxOut;\n" - " pVtxOut = pVtxIn;\n" - " pVtxIn = tmp;\n" - " numVertsIn = numVertsOut;\n" - " numVertsOut = 0;\n" - " }\n" - " \n" - " // only keep points that are behind the witness face\n" - " {\n" - " float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float localPlaneEq = polyA.m_plane.w;\n" - " float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" - " float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" - " for (int i=0;im_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " facesA[hullA->m_faceOffset+face].m_plane.x, \n" - " facesA[hullA->m_faceOffset+face].m_plane.y, \n" - " facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " }\n" - " }\n" - " }\n" - " if (closestFaceA<0)\n" - " return numContactsOut;\n" - " b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " int numVerticesA = polyA.m_numIndices;\n" - " for(int e0=0;e0m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" - " const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" - " const float4 edge0 = a - b;\n" - " const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" - " float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" - " float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" - " float4 worldA1 = transform(&a,&posA,&ornA);\n" - " float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" - " \n" - " float4 planeNormalWS = planeNormalWS1;\n" - " float planeEqWS=planeEqWS1;\n" - " \n" - " //clip face\n" - " //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" - " numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" - " //btSwap(pVtxIn,pVtxOut);\n" - " float4* tmp = pVtxOut;\n" - " pVtxOut = pVtxIn;\n" - " pVtxIn = tmp;\n" - " numVertsIn = numVertsOut;\n" - " numVertsOut = 0;\n" - " }\n" - " \n" - " // only keep points that are behind the witness face\n" - " {\n" - " float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float localPlaneEq = polyA.m_plane.w;\n" - " float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" - " float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" - " for (int i=0;im_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" - " faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " {\n" - " const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" - " const int numVertices = polyB.m_numIndices;\n" - " for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " if (closestFaceB>=0)\n" - " {\n" - " numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" - " posA,ornA,\n" - " worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" - " faces,\n" - " indices,localContactsOut,localContactCapacity);\n" - " }\n" - " return numContactsOut;\n" - "}\n" - "int clipHullAgainstHullLocalA(const float4 separatingNormal,\n" - " const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" - " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" - " float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" - " const float minDist, float maxDist,\n" - " const float4* verticesA,\n" - " const b3GpuFace_t* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB,\n" - " __global const b3GpuFace_t* facesB,\n" - " __global const int* indicesB,\n" - " float4* localContactsOut,\n" - " int localContactCapacity)\n" - "{\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " int closestFaceB=-1;\n" - " float dmax = -FLT_MAX;\n" - " {\n" - " for(int face=0;facem_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" - " facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " {\n" - " const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" - " const int numVertices = polyB.m_numIndices;\n" - " for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " if (closestFaceB>=0)\n" - " {\n" - " numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" - " posA,ornA,\n" - " worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" - " verticesA,facesA,indicesA,\n" - " verticesB,facesB,indicesB,\n" - " localContactsOut,localContactCapacity);\n" - " }\n" - " return numContactsOut;\n" - "}\n" - "#define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; }\n" - "#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset64)\n" - " nPoints = 64;\n" - " \n" - " float4 center = make_float4(0.f);\n" - " {\n" - " \n" - " for (int i=0;i a[ie].x )? a[0].x: a[ie].x;\n" - " a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" - " a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" - " a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" - " }\n" - " idx[0] = (int)a[0].x & 0xff;\n" - " idx[1] = (int)a[0].y & 0xff;\n" - " idx[2] = (int)a[0].z & 0xff;\n" - " idx[3] = (int)a[0].w & 0xff;\n" - " }\n" - " }\n" - " {\n" - " float2 h[64];\n" - " PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" - " REDUCE_MIN( h, nPoints );\n" - " max00 = h[0];\n" - " }\n" - " }\n" - " contactIdx[0] = idx[0];\n" - " contactIdx[1] = idx[1];\n" - " contactIdx[2] = idx[2];\n" - " contactIdx[3] = idx[3];\n" - " return 4;\n" - " }\n" - "}\n" - "__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const float4* closestPointsWorld,\n" - " __global const float4* separatingNormalsWorld,\n" - " __global const int* contactCounts,\n" - " __global const int* contactOffsets,\n" - " __global struct b3Contact4Data* restrict contactsOut,\n" - " counter32_t nContactsOut,\n" - " int contactCapacity,\n" - " int numPairs,\n" - " int pairIndex\n" - " )\n" - "{\n" - " int idx = get_global_id(0);\n" - " \n" - " if (idxm_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = idx;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " for (int i=0;im_worldPosB[i] = localPoints[contactIdx[i]];\n" - " }\n" - " GET_NPOINTS(*c) = nContacts;\n" - " }\n" - " }\n" - "}\n" - "void trInverse(float4 translationIn, Quaternion orientationIn,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtInvert(orientationIn);\n" - " *translationOut = qtRotate(*orientationOut, -translationIn);\n" - "}\n" - "void trMul(float4 translationA, Quaternion orientationA,\n" - " float4 translationB, Quaternion orientationB,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtMul(orientationA,orientationB);\n" - " *translationOut = transform(&translationB,&translationA,&orientationA);\n" - "}\n" - "__kernel void clipHullHullKernel( __global int4* pairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " __global const float4* separatingNormals,\n" - " __global const int* hasSeparatingAxis,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numPairs,\n" - " int contactCapacity)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " float4 worldVertsB1[64];\n" - " float4 worldVertsB2[64];\n" - " int capacityWorldVerts = 64; \n" - " float4 localContactsOut[64];\n" - " int localContactCapacity=64;\n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " if (i0)\n" - " {\n" - " float4 normal = -separatingNormals[i];\n" - " int nPoints = numLocalContactsOut;\n" - " float4* pointsIn = localContactsOut;\n" - " int contactIdx[4];// = {-1,-1,-1,-1};\n" - " contactIdx[0] = -1;\n" - " contactIdx[1] = -1;\n" - " contactIdx[2] = -1;\n" - " contactIdx[3] = -1;\n" - " \n" - " int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" - " \n" - " \n" - " int mprContactIndex = pairs[pairIndex].z;\n" - " int dstIdx = mprContactIndex;\n" - " if (dstIdx<0)\n" - " {\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " }\n" - " if (dstIdxm_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " for (int i=0;i0||(mprContactIndex<0))\n" - " {\n" - " c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" - " }\n" - " }\n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " }\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (hasSeparatingAxis[i])\n" - " }// if (i= 0)\n" - " {\n" - " collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" - " float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" - " float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" - " float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" - " float4 newOrnA = qtMul(ornA,childOrnA);\n" - " posA = newPosA;\n" - " ornA = newOrnA;\n" - " } else\n" - " {\n" - " collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " }\n" - " \n" - " if (childShapeIndexB>=0)\n" - " {\n" - " collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " } else\n" - " {\n" - " collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" - " }\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" - " &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " worldVertsB1,worldVertsB2,capacityWorldVerts,\n" - " minDist, maxDist,\n" - " vertices,faces,indices,\n" - " localContactsOut,localContactCapacity);\n" - " \n" - " if (numLocalContactsOut>0)\n" - " {\n" - " float4 normal = -gpuCompoundSepNormalsOut[i];\n" - " int nPoints = numLocalContactsOut;\n" - " float4* pointsIn = localContactsOut;\n" - " int contactIdx[4];// = {-1,-1,-1,-1};\n" - " contactIdx[0] = -1;\n" - " contactIdx[1] = -1;\n" - " contactIdx[2] = -1;\n" - " contactIdx[3] = -1;\n" - " \n" - " int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" - " c->m_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = gpuCompoundPairs[pairIndex].x;\n" - " int bodyB = gpuCompoundPairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA = childShapeIndexA;\n" - " c->m_childIndexB = childShapeIndexB;\n" - " for (int i=0;im_worldPosB[i] = pointsIn[contactIdx[i]];\n" - " }\n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " }\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (gpuHasCompoundSepNormalsOut[i])\n" - " }// if (i 0.00001)\n" - " {\n" - " normalOnSurfaceB = diff / len;\n" - " }\n" - " float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" - " contactPosB.w = dist;\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if (dstIdx < contactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_worldPosB[0] = contactPosB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " }//if (dstIdx < numPairs)\n" - " }//if ( len <= (radiusA+radiusB))\n" - " }//SHAPE_SPHERE SHAPE_SPHERE\n" - " }//if (i0)\n" - " {\n" - " float4 normal = -separatingNormals[i];\n" - " int nPoints = numLocalContactsOut;\n" - " float4* pointsIn = localContactsOut;\n" - " int contactIdx[4];// = {-1,-1,-1,-1};\n" - " contactIdx[0] = -1;\n" - " contactIdx[1] = -1;\n" - " contactIdx[2] = -1;\n" - " contactIdx[3] = -1;\n" - " \n" - " int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if (dstIdxm_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = concavePairsIn[pairIndex].x;\n" - " int bodyB = concavePairsIn[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA = childShapeIndexA;\n" - " c->m_childIndexB = childShapeIndexB;\n" - " for (int i=0;im_worldPosB[i] = pointsIn[contactIdx[i]];\n" - " }\n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " }\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (im_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" - " faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " \n" - " {\n" - " const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" - " const int numVertices = polyB.m_numIndices;\n" - " for(int e0=0;e0m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " \n" - " int closestFaceA=-1;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;facem_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " faces[hullA->m_faceOffset+face].m_plane.x,\n" - " faces[hullA->m_faceOffset+face].m_plane.y,\n" - " faces[hullA->m_faceOffset+face].m_plane.z,\n" - " 0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " worldNormalsA1[pairIndex] = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " \n" - " int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" - " for(int e0=0;e0m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" - " worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" - " }\n" - " \n" - " clippingFaces[pairIndex].x = closestFaceA;\n" - " clippingFaces[pairIndex].y = closestFaceB;\n" - " clippingFaces[pairIndex].z = numVerticesA;\n" - " clippingFaces[pairIndex].w = numWorldVertsB1;\n" - " \n" - " \n" - " return numContactsOut;\n" - "}\n" - "int clipFaces(__global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " __global float4* worldVertsB2, \n" - " int capacityWorldVertsB2,\n" - " const float minDist, float maxDist,\n" - " __global int4* clippingFaces,\n" - " int pairIndex)\n" - "{\n" - " int numContactsOut = 0;\n" - " \n" - " int closestFaceA = clippingFaces[pairIndex].x;\n" - " int closestFaceB = clippingFaces[pairIndex].y;\n" - " int numVertsInA = clippingFaces[pairIndex].z;\n" - " int numVertsInB = clippingFaces[pairIndex].w;\n" - " \n" - " int numVertsOut = 0;\n" - " \n" - " if (closestFaceA<0)\n" - " return numContactsOut;\n" - " \n" - " __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" - " __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" - " \n" - " \n" - " \n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " \n" - " for(int e0=0;e0=0)\n" - " {\n" - " \n" - " \n" - " \n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " \n" - " for(int e0=0;e00)\n" - " {\n" - " __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" - " float4 normal = -separatingNormals[i];\n" - " \n" - " int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" - " \n" - " int mprContactIndex = pairs[pairIndex].z;\n" - " int dstIdx = mprContactIndex;\n" - " if (dstIdx<0)\n" - " {\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " }\n" - "//#if 0\n" - " \n" - " if (dstIdx < contactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " pairs[pairIndex].w = dstIdx;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA =-1;\n" - " c->m_childIndexB =-1;\n" - " switch (nReducedContacts)\n" - " {\n" - " case 4:\n" - " c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" - " case 3:\n" - " c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" - " case 2:\n" - " c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" - " case 1:\n" - " if (mprContactIndex<0)//test\n" - " c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" - " default:\n" - " {\n" - " }\n" - " };\n" - " \n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " \n" - " }\n" - " \n" - " \n" - "//#endif\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (hasSeparatingAxis[i])\n" - " }// if (im_escapeIndexOrTriangleIndex&~(y)); -} - -int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeafNode(const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} b3BvhSubtreeInfo; - - - - - - - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; -} ConvexPolyhedronCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Common/shared/b3Int2.h" - - - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define make_float4 (float4) - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); - - -// float4 a1 = make_float4(a.xyz,0.f); -// float4 b1 = make_float4(b.xyz,0.f); - -// return cross(a1,b1); - -//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); - - // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); - - //return c; -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = make_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - -inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;im_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;im_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); - - if(Max01e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) - return false; - return true; -} - - - -bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;im_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - -bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* verticesA, - __global const float4* uniqueEdgesA, - __global const btGpuFace* facesA, - __global const int* indicesA, - const float4* verticesB, - const float4* uniqueEdgesB, - const btGpuFace* facesB, - const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;im_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS *= -1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); - - if(Max00.0f) - { - *sep = -(*sep); - } - return true; -} - - - -inline int findClippingFaces(const float4 separatingNormal, - const ConvexPolyhedronCL* hullA, - __global const ConvexPolyhedronCL* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - __global int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=0; - float dmax = -FLT_MAX; - - { - for(int face=0;facem_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; - int numVertices = polyB.m_numIndices; - if (numVertices>capacityWorldVerts) - numVertices = capacityWorldVerts; - if (numVertices<0) - numVertices = 0; - - for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - } - - int closestFaceA=0; - { - float dmin = FLT_MAX; - for(int face=0;facem_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; - if (numVerticesA>capacityWorldVerts) - numVerticesA = capacityWorldVerts; - if (numVerticesA<0) - numVerticesA=0; - - for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - __global float* dmins, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - concaveHasSeparatingNormals[i] = 0; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& - collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - concavePairs[pairIdx].w = -1; - return; - } - - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = FLT_MAX; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i=numConcavePairs) - return; - - if (!concaveHasSeparatingNormals[i]) - return; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = dmins[i]; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - sepAxis = concaveSeparatingNormalsOut[pairIdx]; - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;im_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " unsigned int x=0;\n" - " unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" - " // Get only the lower bits where the triangle index is stored\n" - " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - "int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - " \n" - "int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes, points to the root of the subtree\n" - " int m_rootNodeIndex;\n" - " //4 bytes\n" - " int m_subtreeSize;\n" - " int m_padding[3];\n" - "} b3BvhSubtreeInfo;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " \n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "#ifndef B3_AABB_H\n" - "#define B3_AABB_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3Aabb b3Aabb_t;\n" - "struct b3Aabb\n" - "{\n" - " union\n" - " {\n" - " float m_min[4];\n" - " b3Float4 m_minVec;\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float m_max[4];\n" - " b3Float4 m_maxVec;\n" - " int m_signedMaxIndices[4];\n" - " };\n" - "};\n" - "inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" - " b3Float4ConstArg pos,\n" - " b3QuatConstArg orn,\n" - " b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" - "{\n" - " b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" - " localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" - " b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" - " b3Mat3x3 m;\n" - " m = b3QuatGetRotationMatrix(orn);\n" - " b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" - " b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" - " \n" - " b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" - " 0.f);\n" - " *aabbMinOut = center-extent;\n" - " *aabbMaxOut = center+extent;\n" - "}\n" - "/// conservative test for overlap between two aabbs\n" - "inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" - " b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" - " overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" - " overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "#endif //B3_AABB_H\n" - "/*\n" - "Bullet Continuous Collision Detection and Physics Library\n" - "Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose,\n" - "including commercial applications, and to alter it and redistribute it freely,\n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "#ifndef B3_INT2_H\n" - "#define B3_INT2_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#define b3UnsignedInt2 uint2\n" - "#define b3Int2 int2\n" - "#define b3MakeInt2 (int2)\n" - "#endif //__cplusplus\n" - "#endif\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} btGpuFace;\n" - "#define make_float4 (float4)\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - " \n" - "// float4 a1 = make_float4(a.xyz,0.f);\n" - "// float4 b1 = make_float4(b.xyz,0.f);\n" - "// return cross(a1,b1);\n" - "//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " //return c;\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " v = make_float4(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;im_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, __global const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;im_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA,const float4 ornA,\n" - " const float4 posB,const float4 ornB,\n" - " float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" - "{\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" - " if(Max01e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" - " return false;\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " \n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;im_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS*=-1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* verticesA, \n" - " __global const float4* uniqueEdgesA, \n" - " __global const btGpuFace* facesA,\n" - " __global const int* indicesA,\n" - " const float4* verticesB,\n" - " const float4* uniqueEdgesB, \n" - " const btGpuFace* facesB,\n" - " const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;im_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS *= -1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test edges\n" - " for(int e0=0;e0m_numUniqueEdges;e0++)\n" - " {\n" - " const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" - " float4 edge0World = qtRotate(ornA,edge0);\n" - " for(int e1=0;e1m_numUniqueEdges;e1++)\n" - " {\n" - " const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" - " float4 edge1World = qtRotate(ornB,edge1);\n" - " float4 crossje = cross3(edge0World,edge1World);\n" - " curEdgeEdge++;\n" - " if(!IsAlmostZero(crossje))\n" - " {\n" - " crossje = normalize3(crossje);\n" - " if (dot3F4(DeltaC2,crossje)<0)\n" - " crossje *= -1.f;\n" - " float dist;\n" - " bool result = true;\n" - " {\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" - " \n" - " if(Max00.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "inline int findClippingFaces(const float4 separatingNormal,\n" - " const ConvexPolyhedronCL* hullA, \n" - " __global const ConvexPolyhedronCL* hullB,\n" - " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" - " __global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " int capacityWorldVerts,\n" - " const float minDist, float maxDist,\n" - " const float4* verticesA,\n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB,\n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " __global int4* clippingFaces, int pairIndex)\n" - "{\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " \n" - " \n" - " int closestFaceB=0;\n" - " float dmax = -FLT_MAX;\n" - " \n" - " {\n" - " for(int face=0;facem_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" - " facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " \n" - " {\n" - " const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" - " int numVertices = polyB.m_numIndices;\n" - " if (numVertices>capacityWorldVerts)\n" - " numVertices = capacityWorldVerts;\n" - " if (numVertices<0)\n" - " numVertices = 0;\n" - " \n" - " for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " }\n" - " \n" - " int closestFaceA=0;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;facem_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " facesA[hullA->m_faceOffset+face].m_plane.x,\n" - " facesA[hullA->m_faceOffset+face].m_plane.y,\n" - " facesA[hullA->m_faceOffset+face].m_plane.z,\n" - " 0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " worldNormalsA1[pairIndex] = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " \n" - " int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" - " if (numVerticesA>capacityWorldVerts)\n" - " numVerticesA = capacityWorldVerts;\n" - " if (numVerticesA<0)\n" - " numVerticesA=0;\n" - " \n" - " for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" - " worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" - " }\n" - " }\n" - " \n" - " clippingFaces[pairIndex].x = closestFaceA;\n" - " clippingFaces[pairIndex].y = closestFaceB;\n" - " clippingFaces[pairIndex].z = numVerticesA;\n" - " clippingFaces[pairIndex].w = numWorldVertsB1;\n" - " \n" - " \n" - " return numContactsOut;\n" - "}\n" - "// work-in-progress\n" - "__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" - " __global const BodyData* rigidBodies,\n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global btAabbCL* aabbs,\n" - " __global float4* concaveSeparatingNormalsOut,\n" - " __global int* concaveHasSeparatingNormals,\n" - " __global int4* clippingFacesOut,\n" - " __global float4* worldVertsA1GPU,\n" - " __global float4* worldNormalsAGPU,\n" - " __global float4* worldVertsB1GPU,\n" - " __global float* dmins,\n" - " int vertexFaceCapacity,\n" - " int numConcavePairs\n" - " )\n" - "{\n" - " \n" - " int i = get_global_id(0);\n" - " if (i>=numConcavePairs)\n" - " return;\n" - " \n" - " concaveHasSeparatingNormals[i] = 0;\n" - " \n" - " int pairIdx = i;\n" - " \n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" - " collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " concavePairs[pairIdx].w = -1;\n" - " return;\n" - " }\n" - " \n" - " \n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " int numActualConcaveConvexTests = 0;\n" - " \n" - " int f = concavePairs[i].z;\n" - " \n" - " bool overlap = false;\n" - " \n" - " ConvexPolyhedronCL convexPolyhedronA;\n" - " \n" - " //add 3 vertices of the triangle\n" - " convexPolyhedronA.m_numVertices = 3;\n" - " convexPolyhedronA.m_vertexOffset = 0;\n" - " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" - " \n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " float4 triMinAabb, triMaxAabb;\n" - " btAabbCL triAabb;\n" - " triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" - " triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " localCenter += vert;\n" - " \n" - " triAabb.m_min = min(triAabb.m_min,vert);\n" - " triAabb.m_max = max(triAabb.m_max,vert);\n" - " \n" - " }\n" - " \n" - " overlap = true;\n" - " overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" - " overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" - " overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" - " \n" - " if (overlap)\n" - " {\n" - " float dmin = FLT_MAX;\n" - " int hasSeparatingAxis=5;\n" - " float4 sepAxis=make_float4(1,2,3,4);\n" - " \n" - " int localCC=0;\n" - " numActualConcaveConvexTests++;\n" - " \n" - " //a triangle has 3 unique edges\n" - " convexPolyhedronA.m_numUniqueEdges = 3;\n" - " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" - " float4 uniqueEdgesA[3];\n" - " \n" - " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" - " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" - " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" - " \n" - " \n" - " convexPolyhedronA.m_faceOffset = 0;\n" - " \n" - " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" - " \n" - " btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" - " int indicesA[3+3+2+2+2];\n" - " int curUsedIndices=0;\n" - " int fidx=0;\n" - " \n" - " //front size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[0] = 0;\n" - " indicesA[1] = 1;\n" - " indicesA[2] = 2;\n" - " curUsedIndices+=3;\n" - " float c = face.m_plane.w;\n" - " facesA[fidx].m_plane.x = normal.x;\n" - " facesA[fidx].m_plane.y = normal.y;\n" - " facesA[fidx].m_plane.z = normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " //back size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[3]=2;\n" - " indicesA[4]=1;\n" - " indicesA[5]=0;\n" - " curUsedIndices+=3;\n" - " float c = dot(normal,verticesA[0]);\n" - " float c1 = -face.m_plane.w;\n" - " facesA[fidx].m_plane.x = -normal.x;\n" - " facesA[fidx].m_plane.y = -normal.y;\n" - " facesA[fidx].m_plane.z = -normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " \n" - " bool addEdgePlanes = true;\n" - " if (addEdgePlanes)\n" - " {\n" - " int numVertices=3;\n" - " int prevVertex = numVertices-1;\n" - " for (int i=0;i=numConcavePairs)\n" - " return;\n" - " \n" - " if (!concaveHasSeparatingNormals[i])\n" - " return;\n" - " \n" - " int pairIdx = i;\n" - " \n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " int numActualConcaveConvexTests = 0;\n" - " \n" - " int f = concavePairs[i].z;\n" - " \n" - " bool overlap = false;\n" - " \n" - " ConvexPolyhedronCL convexPolyhedronA;\n" - " \n" - " //add 3 vertices of the triangle\n" - " convexPolyhedronA.m_numVertices = 3;\n" - " convexPolyhedronA.m_vertexOffset = 0;\n" - " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" - " \n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " float4 triMinAabb, triMaxAabb;\n" - " btAabbCL triAabb;\n" - " triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" - " triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " localCenter += vert;\n" - " \n" - " triAabb.m_min = min(triAabb.m_min,vert);\n" - " triAabb.m_max = max(triAabb.m_max,vert);\n" - " \n" - " }\n" - " \n" - " overlap = true;\n" - " overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" - " overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" - " overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" - " \n" - " if (overlap)\n" - " {\n" - " float dmin = dmins[i];\n" - " int hasSeparatingAxis=5;\n" - " float4 sepAxis=make_float4(1,2,3,4);\n" - " sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" - " \n" - " int localCC=0;\n" - " numActualConcaveConvexTests++;\n" - " \n" - " //a triangle has 3 unique edges\n" - " convexPolyhedronA.m_numUniqueEdges = 3;\n" - " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" - " float4 uniqueEdgesA[3];\n" - " \n" - " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" - " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" - " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" - " \n" - " \n" - " convexPolyhedronA.m_faceOffset = 0;\n" - " \n" - " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" - " \n" - " btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" - " int indicesA[3+3+2+2+2];\n" - " int curUsedIndices=0;\n" - " int fidx=0;\n" - " \n" - " //front size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[0] = 0;\n" - " indicesA[1] = 1;\n" - " indicesA[2] = 2;\n" - " curUsedIndices+=3;\n" - " float c = face.m_plane.w;\n" - " facesA[fidx].m_plane.x = normal.x;\n" - " facesA[fidx].m_plane.y = normal.y;\n" - " facesA[fidx].m_plane.z = normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " //back size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[3]=2;\n" - " indicesA[4]=1;\n" - " indicesA[5]=0;\n" - " curUsedIndices+=3;\n" - " float c = dot(normal,verticesA[0]);\n" - " float c1 = -face.m_plane.w;\n" - " facesA[fidx].m_plane.x = -normal.x;\n" - " facesA[fidx].m_plane.y = -normal.y;\n" - " facesA[fidx].m_plane.z = -normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " \n" - " bool addEdgePlanes = true;\n" - " if (addEdgePlanes)\n" - " {\n" - " int numVertices=3;\n" - " int prevVertex = numVertices-1;\n" - " for (int i=0;im_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " unsigned int x=0;\n" - " unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" - " // Get only the lower bits where the triangle index is stored\n" - " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - "int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - " \n" - "int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes, points to the root of the subtree\n" - " int m_rootNodeIndex;\n" - " //4 bytes\n" - " int m_subtreeSize;\n" - " int m_padding[3];\n" - "} b3BvhSubtreeInfo;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " \n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "#ifndef B3_AABB_H\n" - "#define B3_AABB_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3Aabb b3Aabb_t;\n" - "struct b3Aabb\n" - "{\n" - " union\n" - " {\n" - " float m_min[4];\n" - " b3Float4 m_minVec;\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float m_max[4];\n" - " b3Float4 m_maxVec;\n" - " int m_signedMaxIndices[4];\n" - " };\n" - "};\n" - "inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" - " b3Float4ConstArg pos,\n" - " b3QuatConstArg orn,\n" - " b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" - "{\n" - " b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" - " localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" - " b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" - " b3Mat3x3 m;\n" - " m = b3QuatGetRotationMatrix(orn);\n" - " b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" - " b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" - " \n" - " b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" - " 0.f);\n" - " *aabbMinOut = center-extent;\n" - " *aabbMaxOut = center+extent;\n" - "}\n" - "/// conservative test for overlap between two aabbs\n" - "inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" - " b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" - " overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" - " overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "#endif //B3_AABB_H\n" - "/*\n" - "Bullet Continuous Collision Detection and Physics Library\n" - "Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose,\n" - "including commercial applications, and to alter it and redistribute it freely,\n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "#ifndef B3_INT2_H\n" - "#define B3_INT2_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#define b3UnsignedInt2 uint2\n" - "#define b3Int2 int2\n" - "#define b3MakeInt2 (int2)\n" - "#endif //__cplusplus\n" - "#endif\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} btGpuFace;\n" - "#define make_float4 (float4)\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - " \n" - "// float4 a1 = make_float4(a.xyz,0.f);\n" - "// float4 b1 = make_float4(b.xyz,0.f);\n" - "// return cross(a1,b1);\n" - "//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " //return c;\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " v = make_float4(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;im_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, __global const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;im_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA,const float4 ornA,\n" - " const float4 posB,const float4 ornB,\n" - " float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" - "{\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" - " if(Max01e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" - " return false;\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " \n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;im_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS*=-1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* verticesA, \n" - " __global const float4* uniqueEdgesA, \n" - " __global const btGpuFace* facesA,\n" - " __global const int* indicesA,\n" - " const float4* verticesB,\n" - " const float4* uniqueEdgesB, \n" - " const btGpuFace* facesB,\n" - " const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;im_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS *= -1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test edges\n" - " for(int e0=0;e0m_numUniqueEdges;e0++)\n" - " {\n" - " const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" - " float4 edge0World = qtRotate(ornA,edge0);\n" - " for(int e1=0;e1m_numUniqueEdges;e1++)\n" - " {\n" - " const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" - " float4 edge1World = qtRotate(ornB,edge1);\n" - " float4 crossje = cross3(edge0World,edge1World);\n" - " curEdgeEdge++;\n" - " if(!IsAlmostZero(crossje))\n" - " {\n" - " crossje = normalize3(crossje);\n" - " if (dot3F4(DeltaC2,crossje)<0)\n" - " crossje *= -1.f;\n" - " float dist;\n" - " bool result = true;\n" - " {\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" - " \n" - " if(Max00.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA,const float4 ornA,\n" - " const float4 posB,const float4 ornB,\n" - " float4* sep_axis, __global const float4* vertices,float* depth)\n" - "{\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" - " if(Max0m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;im_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " \n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS*=-1.f;\n" - " \n" - " curPlaneTests++;\n" - " \n" - " float d;\n" - " if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" - " return false;\n" - " \n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " \n" - " return true;\n" - "}\n" - "bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* vertices,\n" - " __global const float4* unitSphereDirections,\n" - " int numUnitSphereDirections,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test unit sphere directions\n" - " for (int i=0;i0)\n" - " crossje *= -1.f;\n" - " {\n" - " float dist;\n" - " bool result = true;\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" - " \n" - " if(Max00.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* vertices, \n" - " __global const float4* uniqueEdges, \n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test edges\n" - " for(int e0=0;e0m_numUniqueEdges;e0++)\n" - " {\n" - " const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" - " float4 edge0World = qtRotate(ornA,edge0);\n" - " for(int e1=0;e1m_numUniqueEdges;e1++)\n" - " {\n" - " const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" - " float4 edge1World = qtRotate(ornB,edge1);\n" - " float4 crossje = cross3(edge0World,edge1World);\n" - " curEdgeEdge++;\n" - " if(!IsAlmostZero(crossje))\n" - " {\n" - " crossje = normalize3(crossje);\n" - " if (dot3F4(DeltaC2,crossje)<0)\n" - " crossje*=-1.f;\n" - " \n" - " float dist;\n" - " bool result = true;\n" - " {\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" - " \n" - " if(Max00.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "// work-in-progress\n" - "__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global volatile float4* gpuCompoundSepNormalsOut,\n" - " __global volatile int* gpuHasCompoundSepNormalsOut,\n" - " int numCompoundPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i= 0)\n" - " {\n" - " collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" - " float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" - " float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" - " float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" - " float4 newOrnA = qtMul(ornA,childOrnA);\n" - " posA = newPosA;\n" - " ornA = newOrnA;\n" - " } else\n" - " {\n" - " collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " }\n" - " \n" - " if (childShapeIndexB>=0)\n" - " {\n" - " collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " } else\n" - " {\n" - " collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" - " }\n" - " \n" - " gpuHasCompoundSepNormalsOut[i] = 0;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" - " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" - " \n" - " if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" - " {\n" - " return;\n" - " }\n" - " int hasSeparatingAxis = 5;\n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " float dmin = FLT_MAX;\n" - " posA.w = 0.f;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " float4 sepNormal = make_float4(1,0,0,0);\n" - " bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" - " hasSeparatingAxis = 4;\n" - " if (!sepA)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" - " if (!sepB)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else//(!sepB)\n" - " {\n" - " bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" - " if (sepEE)\n" - " {\n" - " gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" - " gpuHasCompoundSepNormalsOut[i] = 1;\n" - " }//sepEE\n" - " }//(!sepB)\n" - " }//(!sepA)\n" - " \n" - " \n" - " }\n" - " \n" - "}\n" - "inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" - "{\n" - " b3Float4 vecOut;\n" - " vecOut = b3MakeFloat4(\n" - " (float)(vecIn[0]) / (quantization.x),\n" - " (float)(vecIn[1]) / (quantization.y),\n" - " (float)(vecIn[2]) / (quantization.z),\n" - " 0.f);\n" - " vecOut += bvhAabbMin;\n" - " return vecOut;\n" - "}\n" - "inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" - "{\n" - " b3Float4 vecOut;\n" - " vecOut = b3MakeFloat4(\n" - " (float)(vecIn[0]) / (quantization.x),\n" - " (float)(vecIn[1]) / (quantization.y),\n" - " (float)(vecIn[2]) / (quantization.z),\n" - " 0.f);\n" - " vecOut += bvhAabbMin;\n" - " return vecOut;\n" - "}\n" - "// work-in-progress\n" - "__kernel void findCompoundPairsKernel( __global const int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global b3Aabb_t* aabbLocalSpace,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global volatile int4* gpuCompoundPairsOut,\n" - " __global volatile int* numCompoundPairsOut,\n" - " __global const b3BvhSubtreeInfo* subtrees,\n" - " __global const b3QuantizedBvhNode* quantizedNodes,\n" - " __global const b3BvhInfo* bvhInfos,\n" - " int numPairs,\n" - " int maxNumCompoundPairsCapacity\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (imaxStackDepth && !(isLeafA && isLeafB))\n" - " {\n" - " //printf(\"Error: traversal exceeded maxStackDepth\");\n" - " continue;\n" - " }\n" - " if(isInternalA)\n" - " {\n" - " int nodeAleftChild = node.x+1;\n" - " bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" - " int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" - " if(isInternalB)\n" - " { \n" - " int nodeBleftChild = node.y+1;\n" - " bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" - " int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" - " nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" - " nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" - " nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" - " nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" - " }\n" - " else\n" - " {\n" - " nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" - " nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" - " }\n" - " }\n" - " else\n" - " {\n" - " if(isInternalB)\n" - " {\n" - " int nodeBleftChild = node.y+1;\n" - " bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" - " int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" - " nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" - " nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" - " }\n" - " else\n" - " {\n" - " int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" - " if (compoundPairIdxm_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" - " facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " \n" - " {\n" - " const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" - " int numVertices = polyB.m_numIndices;\n" - " if (numVertices>capacityWorldVerts)\n" - " numVertices = capacityWorldVerts;\n" - " \n" - " for(int e0=0;e0m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " }\n" - " \n" - " int closestFaceA=0;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;facem_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " facesA[hullA->m_faceOffset+face].m_plane.x,\n" - " facesA[hullA->m_faceOffset+face].m_plane.y,\n" - " facesA[hullA->m_faceOffset+face].m_plane.z,\n" - " 0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " worldNormalsA1[pairIndex] = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " \n" - " int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" - " if (numVerticesA>capacityWorldVerts)\n" - " numVerticesA = capacityWorldVerts;\n" - " \n" - " for(int e0=0;e0m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" - " worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" - " }\n" - " }\n" - " \n" - " clippingFaces[pairIndex].x = closestFaceA;\n" - " clippingFaces[pairIndex].y = closestFaceB;\n" - " clippingFaces[pairIndex].z = numVerticesA;\n" - " clippingFaces[pairIndex].w = numWorldVertsB1;\n" - " \n" - " \n" - " return numContactsOut;\n" - "}\n" - "// work-in-progress\n" - "__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" - " __global const BodyData* rigidBodies,\n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global btAabbCL* aabbs,\n" - " __global float4* concaveSeparatingNormalsOut,\n" - " __global int* concaveHasSeparatingNormals,\n" - " __global int4* clippingFacesOut,\n" - " __global float4* worldVertsA1GPU,\n" - " __global float4* worldNormalsAGPU,\n" - " __global float4* worldVertsB1GPU,\n" - " int vertexFaceCapacity,\n" - " int numConcavePairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConcavePairs)\n" - " return;\n" - " concaveHasSeparatingNormals[i] = 0;\n" - " int pairIdx = i;\n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" - " collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " concavePairs[pairIdx].w = -1;\n" - " return;\n" - " }\n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " int numActualConcaveConvexTests = 0;\n" - " \n" - " int f = concavePairs[i].z;\n" - " \n" - " bool overlap = false;\n" - " \n" - " ConvexPolyhedronCL convexPolyhedronA;\n" - " //add 3 vertices of the triangle\n" - " convexPolyhedronA.m_numVertices = 3;\n" - " convexPolyhedronA.m_vertexOffset = 0;\n" - " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " float4 triMinAabb, triMaxAabb;\n" - " btAabbCL triAabb;\n" - " triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" - " triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " localCenter += vert;\n" - " \n" - " triAabb.m_min = min(triAabb.m_min,vert); \n" - " triAabb.m_max = max(triAabb.m_max,vert); \n" - " }\n" - " overlap = true;\n" - " overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" - " overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" - " overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" - " \n" - " if (overlap)\n" - " {\n" - " float dmin = FLT_MAX;\n" - " int hasSeparatingAxis=5;\n" - " float4 sepAxis=make_float4(1,2,3,4);\n" - " int localCC=0;\n" - " numActualConcaveConvexTests++;\n" - " //a triangle has 3 unique edges\n" - " convexPolyhedronA.m_numUniqueEdges = 3;\n" - " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" - " float4 uniqueEdgesA[3];\n" - " \n" - " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" - " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" - " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" - " convexPolyhedronA.m_faceOffset = 0;\n" - " \n" - " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" - " \n" - " btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" - " int indicesA[3+3+2+2+2];\n" - " int curUsedIndices=0;\n" - " int fidx=0;\n" - " //front size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[0] = 0;\n" - " indicesA[1] = 1;\n" - " indicesA[2] = 2;\n" - " curUsedIndices+=3;\n" - " float c = face.m_plane.w;\n" - " facesA[fidx].m_plane.x = normal.x;\n" - " facesA[fidx].m_plane.y = normal.y;\n" - " facesA[fidx].m_plane.z = normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " //back size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[3]=2;\n" - " indicesA[4]=1;\n" - " indicesA[5]=0;\n" - " curUsedIndices+=3;\n" - " float c = dot(normal,verticesA[0]);\n" - " float c1 = -face.m_plane.w;\n" - " facesA[fidx].m_plane.x = -normal.x;\n" - " facesA[fidx].m_plane.y = -normal.y;\n" - " facesA[fidx].m_plane.z = -normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " bool addEdgePlanes = true;\n" - " if (addEdgePlanes)\n" - " {\n" - " int numVertices=3;\n" - " int prevVertex = numVertices-1;\n" - " for (int i=0;i( device, 1, BufferBase::BUFFER_CONST ); - - m_lower = (maxSize == 0) ? 0 : new b3OpenCLArray(ctx, queue, maxSize); - m_upper = (maxSize == 0) ? 0 : new b3OpenCLArray(ctx, queue, maxSize); - - m_filler = new b3FillCL(ctx, device, queue); -} - -b3BoundSearchCL::~b3BoundSearchCL() -{ - delete m_lower; - delete m_upper; - delete m_filler; - - clReleaseKernel(m_lowerSortDataKernel); - clReleaseKernel(m_upperSortDataKernel); - clReleaseKernel(m_subtractKernel); -} - -void b3BoundSearchCL::execute(b3OpenCLArray& src, int nSrc, b3OpenCLArray& dst, int nDst, Option option) -{ - b3Int4 constBuffer; - constBuffer.x = nSrc; - constBuffer.y = nDst; - - if (option == BOUND_LOWER) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_lowerSortDataKernel, "m_lowerSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nSrc); - launcher.setConst(nDst); - - launcher.launch1D(nSrc, 64); - } - else if (option == BOUND_UPPER) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_upperSortDataKernel, "m_upperSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nSrc); - launcher.setConst(nDst); - - launcher.launch1D(nSrc, 64); - } - else if (option == COUNT) - { - b3Assert(m_lower); - b3Assert(m_upper); - b3Assert(m_lower->capacity() <= (int)nDst); - b3Assert(m_upper->capacity() <= (int)nDst); - - int zero = 0; - m_filler->execute(*m_lower, zero, nDst); - m_filler->execute(*m_upper, zero, nDst); - - execute(src, nSrc, *m_lower, nDst, BOUND_LOWER); - execute(src, nSrc, *m_upper, nDst, BOUND_UPPER); - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_upper->getBufferCL(), true), b3BufferInfoCL(m_lower->getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_subtractKernel, "m_subtractKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nSrc); - launcher.setConst(nDst); - - launcher.launch1D(nDst, 64); - } - } - else - { - b3Assert(0); - } -} - -void b3BoundSearchCL::executeHost(b3AlignedObjectArray& src, int nSrc, - b3AlignedObjectArray& dst, int nDst, Option option) -{ - for (int i = 0; i < nSrc - 1; i++) - b3Assert(src[i].m_key <= src[i + 1].m_key); - - b3SortData minData, zeroData, maxData; - minData.m_key = -1; - minData.m_value = -1; - zeroData.m_key = 0; - zeroData.m_value = 0; - maxData.m_key = nDst; - maxData.m_value = nDst; - - if (option == BOUND_LOWER) - { - for (int i = 0; i < nSrc; i++) - { - b3SortData& iData = (i == 0) ? minData : src[i - 1]; - b3SortData& jData = (i == nSrc) ? maxData : src[i]; - - if (iData.m_key != jData.m_key) - { - int k = jData.m_key; - { - dst[k] = i; - } - } - } - } - else if (option == BOUND_UPPER) - { - for (int i = 1; i < nSrc + 1; i++) - { - b3SortData& iData = src[i - 1]; - b3SortData& jData = (i == nSrc) ? maxData : src[i]; - - if (iData.m_key != jData.m_key) - { - int k = iData.m_key; - { - dst[k] = i; - } - } - } - } - else if (option == COUNT) - { - b3AlignedObjectArray lower; - lower.resize(nDst); - b3AlignedObjectArray upper; - upper.resize(nDst); - - for (int i = 0; i < nDst; i++) - { - lower[i] = upper[i] = 0; - } - - executeHost(src, nSrc, lower, nDst, BOUND_LOWER); - executeHost(src, nSrc, upper, nDst, BOUND_UPPER); - - for (int i = 0; i < nDst; i++) - { - dst[i] = upper[i] - lower[i]; - } - } - else - { - b3Assert(0); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h deleted file mode 100644 index 0d633e3d235..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#ifndef B3_BOUNDSEARCH_H -#define B3_BOUNDSEARCH_H - -#pragma once - -/*#include -#include -#include -#include -*/ - -#include "b3OpenCLArray.h" -#include "b3FillCL.h" -#include "b3RadixSort32CL.h" //for b3SortData (perhaps move it?) -class b3BoundSearchCL -{ -public: - enum Option - { - BOUND_LOWER, - BOUND_UPPER, - COUNT, - }; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - cl_kernel m_lowerSortDataKernel; - cl_kernel m_upperSortDataKernel; - cl_kernel m_subtractKernel; - - b3OpenCLArray* m_constbtOpenCLArray; - b3OpenCLArray* m_lower; - b3OpenCLArray* m_upper; - - b3FillCL* m_filler; - - b3BoundSearchCL(cl_context context, cl_device_id device, cl_command_queue queue, int size); - - virtual ~b3BoundSearchCL(); - - // src has to be src[i].m_key <= src[i+1].m_key - void execute(b3OpenCLArray& src, int nSrc, b3OpenCLArray& dst, int nDst, Option option = BOUND_LOWER); - - void executeHost(b3AlignedObjectArray& src, int nSrc, b3AlignedObjectArray& dst, int nDst, Option option = BOUND_LOWER); -}; - -#endif //B3_BOUNDSEARCH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h deleted file mode 100644 index 35fc467b207..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h +++ /dev/null @@ -1,18 +0,0 @@ - -#ifndef B3_BUFFER_INFO_CL_H -#define B3_BUFFER_INFO_CL_H - -#include "b3OpenCLArray.h" - -struct b3BufferInfoCL -{ - //b3BufferInfoCL(){} - - // template - b3BufferInfoCL(cl_mem buff, bool isReadOnly = false) : m_clBuffer(buff), m_isReadOnly(isReadOnly) {} - - cl_mem m_clBuffer; - bool m_isReadOnly; -}; - -#endif //B3_BUFFER_INFO_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp deleted file mode 100644 index bd25bb2101b..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include "b3FillCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3BufferInfoCL.h" -#include "b3LauncherCL.h" - -#define FILL_CL_PROGRAM_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl" - -#include "kernels/FillKernelsCL.h" - -b3FillCL::b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue) - : m_commandQueue(queue) -{ - const char* kernelSource = fillKernelsCL; - cl_int pErrNum; - const char* additionalMacros = ""; - - cl_program fillProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, kernelSource, &pErrNum, additionalMacros, FILL_CL_PROGRAM_PATH); - b3Assert(fillProg); - - m_fillIntKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillIntKernel); - - m_fillUnsignedIntKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillIntKernel); - - m_fillFloatKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillFloatKernel); - - m_fillKernelInt2 = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillKernelInt2); -} - -b3FillCL::~b3FillCL() -{ - clReleaseKernel(m_fillKernelInt2); - clReleaseKernel(m_fillIntKernel); - clReleaseKernel(m_fillUnsignedIntKernel); - clReleaseKernel(m_fillFloatKernel); -} - -void b3FillCL::execute(b3OpenCLArray& src, const float value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3LauncherCL launcher(m_commandQueue, m_fillFloatKernel, "m_fillFloatKernel"); - launcher.setBuffer(src.getBufferCL()); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - - launcher.launch1D(n); - } -} - -void b3FillCL::execute(b3OpenCLArray& src, const int value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3LauncherCL launcher(m_commandQueue, m_fillIntKernel, "m_fillIntKernel"); - launcher.setBuffer(src.getBufferCL()); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - launcher.launch1D(n); - } -} - -void b3FillCL::execute(b3OpenCLArray& src, const unsigned int value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_fillUnsignedIntKernel, "m_fillUnsignedIntKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - - launcher.launch1D(n); - } -} - -void b3FillCL::executeHost(b3AlignedObjectArray& src, const b3Int2& value, int n, int offset) -{ - for (int i = 0; i < n; i++) - { - src[i + offset] = value; - } -} - -void b3FillCL::executeHost(b3AlignedObjectArray& src, const int value, int n, int offset) -{ - for (int i = 0; i < n; i++) - { - src[i + offset] = value; - } -} - -void b3FillCL::execute(b3OpenCLArray& src, const b3Int2& value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2, "m_fillKernelInt2"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - - //( constBuffer ); - launcher.launch1D(n); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h deleted file mode 100644 index c92c3e51190..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef B3_FILL_CL_H -#define B3_FILL_CL_H - -#include "b3OpenCLArray.h" -#include "Bullet3Common/b3Scalar.h" - -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" - -class b3FillCL -{ - cl_command_queue m_commandQueue; - - cl_kernel m_fillKernelInt2; - cl_kernel m_fillIntKernel; - cl_kernel m_fillUnsignedIntKernel; - cl_kernel m_fillFloatKernel; - -public: - struct b3ConstData - { - union { - b3Int4 m_data; - b3UnsignedInt4 m_UnsignedData; - }; - int m_offset; - int m_n; - int m_padding[2]; - }; - -protected: -public: - b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue); - - virtual ~b3FillCL(); - - void execute(b3OpenCLArray& src, const unsigned int value, int n, int offset = 0); - - void execute(b3OpenCLArray& src, const int value, int n, int offset = 0); - - void execute(b3OpenCLArray& src, const float value, int n, int offset = 0); - - void execute(b3OpenCLArray& src, const b3Int2& value, int n, int offset = 0); - - void executeHost(b3AlignedObjectArray& src, const b3Int2& value, int n, int offset); - - void executeHost(b3AlignedObjectArray& src, const int value, int n, int offset); - - // void execute(b3OpenCLArray& src, const b3Int4& value, int n, int offset = 0); -}; - -#endif //B3_FILL_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp deleted file mode 100644 index c97d02eb45a..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp +++ /dev/null @@ -1,296 +0,0 @@ -#include "b3LauncherCL.h" - -bool gDebugLauncherCL = false; - -b3LauncherCL::b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name) - : m_commandQueue(queue), - m_kernel(kernel), - m_idx(0), - m_enableSerialization(false), - m_name(name) -{ - if (gDebugLauncherCL) - { - static int counter = 0; - printf("[%d] Prepare to launch OpenCL kernel %s\n", counter++, name); - } - - m_serializationSizeInBytes = sizeof(int); -} - -b3LauncherCL::~b3LauncherCL() -{ - for (int i = 0; i < m_arrays.size(); i++) - { - delete (m_arrays[i]); - } - - m_arrays.clear(); - if (gDebugLauncherCL) - { - static int counter = 0; - printf("[%d] Finished launching OpenCL kernel %s\n", counter++, m_name); - } -} - -void b3LauncherCL::setBuffer(cl_mem clBuffer) -{ - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 1; - kernelArg.m_clBuffer = clBuffer; - - cl_mem_info param_name = CL_MEM_SIZE; - size_t param_value; - size_t sizeInBytes = sizeof(size_t); - size_t actualSizeInBytes; - cl_int err; - err = clGetMemObjectInfo(kernelArg.m_clBuffer, - param_name, - sizeInBytes, - ¶m_value, - &actualSizeInBytes); - - b3Assert(err == CL_SUCCESS); - kernelArg.m_argSizeInBytes = param_value; - - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes += sizeof(b3KernelArgData); - m_serializationSizeInBytes += param_value; - } - cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &clBuffer); - b3Assert(status == CL_SUCCESS); -} - -void b3LauncherCL::setBuffers(b3BufferInfoCL* buffInfo, int n) -{ - for (int i = 0; i < n; i++) - { - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 1; - kernelArg.m_clBuffer = buffInfo[i].m_clBuffer; - - cl_mem_info param_name = CL_MEM_SIZE; - size_t param_value; - size_t sizeInBytes = sizeof(size_t); - size_t actualSizeInBytes; - cl_int err; - err = clGetMemObjectInfo(kernelArg.m_clBuffer, - param_name, - sizeInBytes, - ¶m_value, - &actualSizeInBytes); - - b3Assert(err == CL_SUCCESS); - kernelArg.m_argSizeInBytes = param_value; - - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes += sizeof(b3KernelArgData); - m_serializationSizeInBytes += param_value; - } - cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer); - b3Assert(status == CL_SUCCESS); - } -} - -struct b3KernelArgDataUnaligned -{ - int m_isBuffer; - int m_argIndex; - int m_argSizeInBytes; - int m_unusedPadding; - union { - cl_mem m_clBuffer; - unsigned char m_argData[B3_CL_MAX_ARG_SIZE]; - }; -}; -#include - -int b3LauncherCL::deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx) -{ - int index = 0; - - int numArguments = *(int*)&buf[index]; - index += sizeof(int); - - for (int i = 0; i < numArguments; i++) - { - b3KernelArgDataUnaligned* arg = (b3KernelArgDataUnaligned*)&buf[index]; - - index += sizeof(b3KernelArgData); - if (arg->m_isBuffer) - { - b3OpenCLArray* clData = new b3OpenCLArray(ctx, m_commandQueue, arg->m_argSizeInBytes); - clData->resize(arg->m_argSizeInBytes); - - clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes); - - arg->m_clBuffer = clData->getBufferCL(); - - m_arrays.push_back(clData); - - cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer); - b3Assert(status == CL_SUCCESS); - index += arg->m_argSizeInBytes; - } - else - { - cl_int status = clSetKernelArg(m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData); - b3Assert(status == CL_SUCCESS); - } - b3KernelArgData b; - memcpy(&b, arg, sizeof(b3KernelArgDataUnaligned)); - m_kernelArguments.push_back(b); - } - m_serializationSizeInBytes = index; - return index; -} - -int b3LauncherCL::validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx) -{ - int index = 0; - - int numArguments = *(int*)&goldBuffer[index]; - index += sizeof(int); - - if (numArguments != m_kernelArguments.size()) - { - printf("failed validation: expected %d arguments, found %d\n", numArguments, m_kernelArguments.size()); - return -1; - } - - for (int ii = 0; ii < numArguments; ii++) - { - b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index]; - - if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes) - { - printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n", ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes); - return -2; - } - - { - int expected = argGold->m_isBuffer; - int found = m_kernelArguments[ii].m_isBuffer; - - if (expected != found) - { - printf("failed validation: argument %d isBuffer expected: %d, found %d\n", ii, expected, found); - return -3; - } - } - index += sizeof(b3KernelArgData); - - if (argGold->m_isBuffer) - { - unsigned char* memBuf = (unsigned char*)malloc(m_kernelArguments[ii].m_argSizeInBytes); - unsigned char* goldBuf = &goldBuffer[index]; - for (int j = 0; j < m_kernelArguments[j].m_argSizeInBytes; j++) - { - memBuf[j] = 0xaa; - } - - cl_int status = 0; - status = clEnqueueReadBuffer(m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes, - memBuf, 0, 0, 0); - b3Assert(status == CL_SUCCESS); - clFinish(m_commandQueue); - - for (int b = 0; b < m_kernelArguments[ii].m_argSizeInBytes; b++) - { - int expected = goldBuf[b]; - int found = memBuf[b]; - if (expected != found) - { - printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n", - ii, b, expected, found); - return -4; - } - } - - index += argGold->m_argSizeInBytes; - } - else - { - //compare content - for (int b = 0; b < m_kernelArguments[ii].m_argSizeInBytes; b++) - { - int expected = argGold->m_argData[b]; - int found = m_kernelArguments[ii].m_argData[b]; - if (expected != found) - { - printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n", - ii, b, expected, found); - return -5; - } - } - } - } - return index; -} - -int b3LauncherCL::serializeArguments(unsigned char* destBuffer, int destBufferCapacity) -{ - //initialize to known values - for (int i = 0; i < destBufferCapacity; i++) - destBuffer[i] = 0xec; - - assert(destBufferCapacity >= m_serializationSizeInBytes); - - //todo: use the b3Serializer for this to allow for 32/64bit, endianness etc - int numArguments = m_kernelArguments.size(); - int curBufferSize = 0; - int* dest = (int*)&destBuffer[curBufferSize]; - *dest = numArguments; - curBufferSize += sizeof(int); - - for (int i = 0; i < this->m_kernelArguments.size(); i++) - { - b3KernelArgData* arg = (b3KernelArgData*)&destBuffer[curBufferSize]; - *arg = m_kernelArguments[i]; - curBufferSize += sizeof(b3KernelArgData); - if (arg->m_isBuffer == 1) - { - //copy the OpenCL buffer content - cl_int status = 0; - status = clEnqueueReadBuffer(m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes, - &destBuffer[curBufferSize], 0, 0, 0); - b3Assert(status == CL_SUCCESS); - clFinish(m_commandQueue); - curBufferSize += arg->m_argSizeInBytes; - } - } - return curBufferSize; -} - -void b3LauncherCL::serializeToFile(const char* fileName, int numWorkItems) -{ - int num = numWorkItems; - int buffSize = getSerializationBufferSize(); - unsigned char* buf = new unsigned char[buffSize + sizeof(int)]; - for (int i = 0; i < buffSize + 1; i++) - { - unsigned char* ptr = (unsigned char*)&buf[i]; - *ptr = 0xff; - } - // int actualWrite = serializeArguments(buf,buffSize); - - // unsigned char* cptr = (unsigned char*)&buf[buffSize]; - // printf("buf[buffSize] = %d\n",*cptr); - - assert(buf[buffSize] == 0xff); //check for buffer overrun - int* ptr = (int*)&buf[buffSize]; - - *ptr = num; - - FILE* f = fopen(fileName, "wb"); - fwrite(buf, buffSize + sizeof(int), 1, f); - fclose(f); - - delete[] buf; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h deleted file mode 100644 index 18e9c1db2ba..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h +++ /dev/null @@ -1,128 +0,0 @@ - -#ifndef B3_LAUNCHER_CL_H -#define B3_LAUNCHER_CL_H - -#include "b3BufferInfoCL.h" -#include "Bullet3Common/b3MinMax.h" -#include "b3OpenCLArray.h" -#include - -#define B3_DEBUG_SERIALIZE_CL - -#ifdef _WIN32 -#pragma warning(disable : 4996) -#endif -#define B3_CL_MAX_ARG_SIZE 16 -B3_ATTRIBUTE_ALIGNED16(struct) -b3KernelArgData -{ - int m_isBuffer; - int m_argIndex; - int m_argSizeInBytes; - int m_unusedPadding; - union { - cl_mem m_clBuffer; - unsigned char m_argData[B3_CL_MAX_ARG_SIZE]; - }; -}; - -class b3LauncherCL -{ - cl_command_queue m_commandQueue; - cl_kernel m_kernel; - int m_idx; - - b3AlignedObjectArray m_kernelArguments; - int m_serializationSizeInBytes; - bool m_enableSerialization; - - const char* m_name; - -public: - b3AlignedObjectArray*> m_arrays; - - b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name); - - virtual ~b3LauncherCL(); - - void setBuffer(cl_mem clBuffer); - - void setBuffers(b3BufferInfoCL* buffInfo, int n); - - int getSerializationBufferSize() const - { - return m_serializationSizeInBytes; - } - - int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx); - - inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx); - - int serializeArguments(unsigned char* destBuffer, int destBufferCapacity); - - int getNumArguments() const - { - return m_kernelArguments.size(); - } - - b3KernelArgData getArgument(int index) - { - return m_kernelArguments[index]; - } - - void serializeToFile(const char* fileName, int numWorkItems); - - template - inline void setConst(const T& consts) - { - int sz = sizeof(T); - b3Assert(sz <= B3_CL_MAX_ARG_SIZE); - - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 0; - T* destArg = (T*)kernelArg.m_argData; - *destArg = consts; - kernelArg.m_argSizeInBytes = sizeof(T); - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes += sizeof(b3KernelArgData); - } - - cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts); - b3Assert(status == CL_SUCCESS); - } - - inline void launch1D(int numThreads, int localSize = 64) - { - launch2D(numThreads, 1, localSize, 1); - } - - inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY) - { - size_t gRange[3] = {1, 1, 1}; - size_t lRange[3] = {1, 1, 1}; - lRange[0] = localSizeX; - lRange[1] = localSizeY; - gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1)); - gRange[0] *= lRange[0]; - gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1)); - gRange[1] *= lRange[1]; - - cl_int status = clEnqueueNDRangeKernel(m_commandQueue, - m_kernel, 2, NULL, gRange, lRange, 0, 0, 0); - if (status != CL_SUCCESS) - { - printf("Error: OpenCL status = %d\n", status); - } - b3Assert(status == CL_SUCCESS); - } - - void enableSerialization(bool serialize) - { - m_enableSerialization = serialize; - } -}; - -#endif //B3_LAUNCHER_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h deleted file mode 100644 index e837cceb665..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h +++ /dev/null @@ -1,300 +0,0 @@ -#ifndef B3_OPENCL_ARRAY_H -#define B3_OPENCL_ARRAY_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" - -template -class b3OpenCLArray -{ - size_t m_size; - size_t m_capacity; - cl_mem m_clBuffer; - - cl_context m_clContext; - cl_command_queue m_commandQueue; - - bool m_ownsMemory; - - bool m_allowGrowingCapacity; - - void deallocate() - { - if (m_clBuffer && m_ownsMemory) - { - clReleaseMemObject(m_clBuffer); - } - m_clBuffer = 0; - m_capacity = 0; - } - - b3OpenCLArray& operator=(const b3OpenCLArray& src); - - B3_FORCE_INLINE size_t allocSize(size_t size) - { - return (size ? size * 2 : 1); - } - -public: - b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity = 0, bool allowGrowingCapacity = true) - : m_size(0), m_capacity(0), m_clBuffer(0), m_clContext(ctx), m_commandQueue(queue), m_ownsMemory(true), m_allowGrowingCapacity(true) - { - if (initialCapacity) - { - reserve(initialCapacity); - } - m_allowGrowingCapacity = allowGrowingCapacity; - } - - ///this is an error-prone method with no error checking, be careful! - void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements) - { - deallocate(); - m_ownsMemory = false; - m_allowGrowingCapacity = false; - m_clBuffer = buffer; - m_size = sizeInElements; - m_capacity = sizeInElements; - } - - // we could enable this assignment, but need to make sure to avoid accidental deep copies - // b3OpenCLArray& operator=(const b3AlignedObjectArray& src) - // { - // copyFromArray(src); - // return *this; - // } - - cl_mem getBufferCL() const - { - return m_clBuffer; - } - - virtual ~b3OpenCLArray() - { - deallocate(); - m_size = 0; - m_capacity = 0; - } - - B3_FORCE_INLINE bool push_back(const T& _Val, bool waitForCompletion = true) - { - bool result = true; - size_t sz = size(); - if (sz == capacity()) - { - result = reserve(allocSize(size())); - } - copyFromHostPointer(&_Val, 1, sz, waitForCompletion); - m_size++; - return result; - } - - B3_FORCE_INLINE T forcedAt(size_t n) const - { - b3Assert(n >= 0); - b3Assert(n < capacity()); - T elem; - copyToHostPointer(&elem, 1, n, true); - return elem; - } - - B3_FORCE_INLINE T at(size_t n) const - { - b3Assert(n >= 0); - b3Assert(n < size()); - T elem; - copyToHostPointer(&elem, 1, n, true); - return elem; - } - - B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents = true) - { - bool result = true; - size_t curSize = size(); - - if (newsize < curSize) - { - //leave the OpenCL memory for now - } - else - { - if (newsize > size()) - { - result = reserve(newsize, copyOldContents); - } - - //leave new data uninitialized (init in debug mode?) - //for (size_t i=curSize;i 0); - b3Assert(numElements <= m_size); - - size_t srcOffsetBytes = sizeof(T) * firstElem; - size_t dstOffsetInBytes = sizeof(T) * dstOffsetInElems; - - status = clEnqueueCopyBuffer(m_commandQueue, m_clBuffer, destination, - srcOffsetBytes, dstOffsetInBytes, sizeof(T) * numElements, 0, 0, 0); - - b3Assert(status == CL_SUCCESS); - } - - void copyFromHost(const b3AlignedObjectArray& srcArray, bool waitForCompletion = true) - { - size_t newSize = srcArray.size(); - - bool copyOldContents = false; - resize(newSize, copyOldContents); - if (newSize) - copyFromHostPointer(&srcArray[0], newSize, 0, waitForCompletion); - } - - void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem = 0, bool waitForCompletion = true) - { - b3Assert(numElems + destFirstElem <= capacity()); - - if (numElems + destFirstElem) - { - cl_int status = 0; - size_t sizeInBytes = sizeof(T) * numElems; - status = clEnqueueWriteBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * destFirstElem, sizeInBytes, - src, 0, 0, 0); - b3Assert(status == CL_SUCCESS); - if (waitForCompletion) - clFinish(m_commandQueue); - } - else - { - b3Error("copyFromHostPointer invalid range\n"); - } - } - - void copyToHost(b3AlignedObjectArray& destArray, bool waitForCompletion = true) const - { - destArray.resize(this->size()); - if (size()) - copyToHostPointer(&destArray[0], size(), 0, waitForCompletion); - } - - void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem = 0, bool waitForCompletion = true) const - { - b3Assert(numElem + srcFirstElem <= capacity()); - - if (numElem + srcFirstElem <= capacity()) - { - cl_int status = 0; - status = clEnqueueReadBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * srcFirstElem, sizeof(T) * numElem, - destPtr, 0, 0, 0); - b3Assert(status == CL_SUCCESS); - - if (waitForCompletion) - clFinish(m_commandQueue); - } - else - { - b3Error("copyToHostPointer invalid range\n"); - } - } - - void copyFromOpenCLArray(const b3OpenCLArray& src) - { - size_t newSize = src.size(); - resize(newSize); - if (size()) - { - src.copyToCL(m_clBuffer, size()); - } - } -}; - -#endif //B3_OPENCL_ARRAY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp deleted file mode 100644 index 822b5116334..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include "b3PrefixScanCL.h" -#include "b3FillCL.h" -#define B3_PREFIXSCAN_PROG_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl" - -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/PrefixScanKernelsCL.h" - -b3PrefixScanCL::b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size) - : m_commandQueue(queue) -{ - const char* scanKernelSource = prefixScanKernelsCL; - cl_int pErrNum; - char* additionalMacros = 0; - - m_workBuffer = new b3OpenCLArray(ctx, queue, size); - cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, scanKernelSource, &pErrNum, additionalMacros, B3_PREFIXSCAN_PROG_PATH); - b3Assert(scanProg); - - m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_localScanKernel); - m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_blockSumKernel); - m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_propagationKernel); -} - -b3PrefixScanCL::~b3PrefixScanCL() -{ - delete m_workBuffer; - clReleaseKernel(m_localScanKernel); - clReleaseKernel(m_blockSumKernel); - clReleaseKernel(m_propagationKernel); -} - -template -T b3NextPowerOf2(T n) -{ - n -= 1; - for (int i = 0; i < sizeof(T) * 8; i++) - n = n | (n >> i); - return n + 1; -} - -void b3PrefixScanCL::execute(b3OpenCLArray& src, b3OpenCLArray& dst, int n, unsigned int* sum) -{ - // b3Assert( data->m_option == EXCLUSIVE ); - const unsigned int numBlocks = (const unsigned int)((n + BLOCK_SIZE * 2 - 1) / (BLOCK_SIZE * 2)); - - dst.resize(src.size()); - m_workBuffer->resize(src.size()); - - b3Int4 constBuffer; - constBuffer.x = n; - constBuffer.y = numBlocks; - constBuffer.z = (int)b3NextPowerOf2(numBlocks); - - b3OpenCLArray* srcNative = &src; - b3OpenCLArray* dstNative = &dst; - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(srcNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_localScanKernel, "m_localScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(numBlocks * BLOCK_SIZE, BLOCK_SIZE); - } - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_blockSumKernel, "m_blockSumKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(BLOCK_SIZE, BLOCK_SIZE); - } - - if (numBlocks > 1) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_propagationKernel, "m_propagationKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D((numBlocks - 1) * BLOCK_SIZE, BLOCK_SIZE); - } - - if (sum) - { - clFinish(m_commandQueue); - dstNative->copyToHostPointer(sum, 1, n - 1, true); - } -} - -void b3PrefixScanCL::executeHost(b3AlignedObjectArray& src, b3AlignedObjectArray& dst, int n, unsigned int* sum) -{ - unsigned int s = 0; - //if( data->m_option == EXCLUSIVE ) - { - for (int i = 0; i < n; i++) - { - dst[i] = s; - s += src[i]; - } - } - /*else - { - for(int i=0; i* m_workBuffer; - -public: - b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size = 0); - - virtual ~b3PrefixScanCL(); - - void execute(b3OpenCLArray& src, b3OpenCLArray& dst, int n, unsigned int* sum = 0); - void executeHost(b3AlignedObjectArray& src, b3AlignedObjectArray& dst, int n, unsigned int* sum = 0); -}; - -#endif //B3_PREFIX_SCAN_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp deleted file mode 100644 index 1cac97c9888..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include "b3PrefixScanFloat4CL.h" -#include "b3FillCL.h" -#define B3_PREFIXSCAN_FLOAT4_PROG_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl" - -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/PrefixScanKernelsFloat4CL.h" - -b3PrefixScanFloat4CL::b3PrefixScanFloat4CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size) - : m_commandQueue(queue) -{ - const char* scanKernelSource = prefixScanKernelsFloat4CL; - cl_int pErrNum; - char* additionalMacros = 0; - - m_workBuffer = new b3OpenCLArray(ctx, queue, size); - cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, scanKernelSource, &pErrNum, additionalMacros, B3_PREFIXSCAN_FLOAT4_PROG_PATH); - b3Assert(scanProg); - - m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_localScanKernel); - m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_blockSumKernel); - m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_propagationKernel); -} - -b3PrefixScanFloat4CL::~b3PrefixScanFloat4CL() -{ - delete m_workBuffer; - clReleaseKernel(m_localScanKernel); - clReleaseKernel(m_blockSumKernel); - clReleaseKernel(m_propagationKernel); -} - -template -T b3NextPowerOf2(T n) -{ - n -= 1; - for (int i = 0; i < sizeof(T) * 8; i++) - n = n | (n >> i); - return n + 1; -} - -void b3PrefixScanFloat4CL::execute(b3OpenCLArray& src, b3OpenCLArray& dst, int n, b3Vector3* sum) -{ - // b3Assert( data->m_option == EXCLUSIVE ); - const unsigned int numBlocks = (const unsigned int)((n + BLOCK_SIZE * 2 - 1) / (BLOCK_SIZE * 2)); - - dst.resize(src.size()); - m_workBuffer->resize(src.size()); - - b3Int4 constBuffer; - constBuffer.x = n; - constBuffer.y = numBlocks; - constBuffer.z = (int)b3NextPowerOf2(numBlocks); - - b3OpenCLArray* srcNative = &src; - b3OpenCLArray* dstNative = &dst; - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(srcNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_localScanKernel, "m_localScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(numBlocks * BLOCK_SIZE, BLOCK_SIZE); - } - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_blockSumKernel, "m_blockSumKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(BLOCK_SIZE, BLOCK_SIZE); - } - - if (numBlocks > 1) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_propagationKernel, "m_propagationKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D((numBlocks - 1) * BLOCK_SIZE, BLOCK_SIZE); - } - - if (sum) - { - clFinish(m_commandQueue); - dstNative->copyToHostPointer(sum, 1, n - 1, true); - } -} - -void b3PrefixScanFloat4CL::executeHost(b3AlignedObjectArray& src, b3AlignedObjectArray& dst, int n, b3Vector3* sum) -{ - b3Vector3 s = b3MakeVector3(0, 0, 0); - //if( data->m_option == EXCLUSIVE ) - { - for (int i = 0; i < n; i++) - { - dst[i] = s; - s += src[i]; - } - } - /*else - { - for(int i=0; i* m_workBuffer; - -public: - b3PrefixScanFloat4CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size = 0); - - virtual ~b3PrefixScanFloat4CL(); - - void execute(b3OpenCLArray& src, b3OpenCLArray& dst, int n, b3Vector3* sum = 0); - void executeHost(b3AlignedObjectArray& src, b3AlignedObjectArray& dst, int n, b3Vector3* sum); -}; - -#endif //B3_PREFIX_SCAN_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp deleted file mode 100644 index e86af6583f1..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp +++ /dev/null @@ -1,646 +0,0 @@ - -#include "b3RadixSort32CL.h" -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3PrefixScanCL.h" -#include "b3FillCL.h" - -#define RADIXSORT32_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl" - -#include "kernels/RadixSort32KernelsCL.h" - -b3RadixSort32CL::b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity) - : m_commandQueue(queue) -{ - b3OpenCLDeviceInfo info; - b3OpenCLUtils::getDeviceInfo(device, &info); - m_deviceCPU = (info.m_deviceType & CL_DEVICE_TYPE_CPU) != 0; - - m_workBuffer1 = new b3OpenCLArray(ctx, queue); - m_workBuffer2 = new b3OpenCLArray(ctx, queue); - m_workBuffer3 = new b3OpenCLArray(ctx, queue); - m_workBuffer3a = new b3OpenCLArray(ctx, queue); - m_workBuffer4 = new b3OpenCLArray(ctx, queue); - m_workBuffer4a = new b3OpenCLArray(ctx, queue); - - if (initialCapacity > 0) - { - m_workBuffer1->resize(initialCapacity); - m_workBuffer3->resize(initialCapacity); - m_workBuffer3a->resize(initialCapacity); - m_workBuffer4->resize(initialCapacity); - m_workBuffer4a->resize(initialCapacity); - } - - m_scan = new b3PrefixScanCL(ctx, device, queue); - m_fill = new b3FillCL(ctx, device, queue); - - const char* additionalMacros = ""; - - cl_int pErrNum; - const char* kernelSource = radixSort32KernelsCL; - - cl_program sortProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, kernelSource, &pErrNum, additionalMacros, RADIXSORT32_PATH); - b3Assert(sortProg); - - m_streamCountSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_streamCountSortDataKernel); - - m_streamCountKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_streamCountKernel); - - if (m_deviceCPU) - { - m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterSortDataKernel); - m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterKernel); - } - else - { - m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterSortDataKernel); - m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterKernel); - } - - m_prefixScanKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_prefixScanKernel); -} - -b3RadixSort32CL::~b3RadixSort32CL() -{ - delete m_scan; - delete m_fill; - delete m_workBuffer1; - delete m_workBuffer2; - delete m_workBuffer3; - delete m_workBuffer3a; - delete m_workBuffer4; - delete m_workBuffer4a; - - clReleaseKernel(m_streamCountSortDataKernel); - clReleaseKernel(m_streamCountKernel); - clReleaseKernel(m_sortAndScatterSortDataKernel); - clReleaseKernel(m_sortAndScatterKernel); - clReleaseKernel(m_prefixScanKernel); -} - -void b3RadixSort32CL::executeHost(b3AlignedObjectArray& inout, int sortBits /* = 32 */) -{ - int n = inout.size(); - const int BITS_PER_PASS = 8; - const int NUM_TABLES = (1 << BITS_PER_PASS); - - int tables[NUM_TABLES]; - int counter[NUM_TABLES]; - - b3SortData* src = &inout[0]; - b3AlignedObjectArray workbuffer; - workbuffer.resize(inout.size()); - b3SortData* dst = &workbuffer[0]; - - int count = 0; - for (int startBit = 0; startBit < sortBits; startBit += BITS_PER_PASS) - { - for (int i = 0; i < NUM_TABLES; i++) - { - tables[i] = 0; - } - - for (int i = 0; i < n; i++) - { - int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES - 1); - tables[tableIdx]++; - } -//#define TEST -#ifdef TEST - printf("histogram size=%d\n", NUM_TABLES); - for (int i = 0; i < NUM_TABLES; i++) - { - if (tables[i] != 0) - { - printf("tables[%d]=%d]\n", i, tables[i]); - } - } -#endif //TEST \ - // prefix scan - int sum = 0; - for (int i = 0; i < NUM_TABLES; i++) - { - int iData = tables[i]; - tables[i] = sum; - sum += iData; - counter[i] = 0; - } - - // distribute - for (int i = 0; i < n; i++) - { - int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES - 1); - - dst[tables[tableIdx] + counter[tableIdx]] = src[i]; - counter[tableIdx]++; - } - - b3Swap(src, dst); - count++; - } - - if (count & 1) - { - b3Assert(0); //need to copy - } -} - -void b3RadixSort32CL::executeHost(b3OpenCLArray& keyValuesInOut, int sortBits /* = 32 */) -{ - b3AlignedObjectArray inout; - keyValuesInOut.copyToHost(inout); - - executeHost(inout, sortBits); - - keyValuesInOut.copyFromHost(inout); -} - -void b3RadixSort32CL::execute(b3OpenCLArray& keysIn, b3OpenCLArray& keysOut, b3OpenCLArray& valuesIn, - b3OpenCLArray& valuesOut, int n, int sortBits) -{ -} - -//#define DEBUG_RADIXSORT -//#define DEBUG_RADIXSORT2 - -void b3RadixSort32CL::execute(b3OpenCLArray& keyValuesInOut, int sortBits /* = 32 */) -{ - int originalSize = keyValuesInOut.size(); - int workingSize = originalSize; - - int dataAlignment = DATA_ALIGNMENT; - -#ifdef DEBUG_RADIXSORT2 - b3AlignedObjectArray test2; - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } -#endif //DEBUG_RADIXSORT2 - - b3OpenCLArray* src = 0; - - if (workingSize % dataAlignment) - { - workingSize += dataAlignment - (workingSize % dataAlignment); - m_workBuffer4->copyFromOpenCLArray(keyValuesInOut); - m_workBuffer4->resize(workingSize); - b3SortData fillValue; - fillValue.m_key = 0xffffffff; - fillValue.m_value = 0xffffffff; - -#define USE_BTFILL -#ifdef USE_BTFILL - m_fill->execute((b3OpenCLArray&)*m_workBuffer4, (b3Int2&)fillValue, workingSize - originalSize, originalSize); -#else - //fill the remaining bits (very slow way, todo: fill on GPU/OpenCL side) - - for (int i = originalSize; i < workingSize; i++) - { - m_workBuffer4->copyFromHostPointer(&fillValue, 1, i); - } -#endif //USE_BTFILL - - src = m_workBuffer4; - } - else - { - src = &keyValuesInOut; - m_workBuffer4->resize(0); - } - - b3Assert(workingSize % DATA_ALIGNMENT == 0); - int minCap = NUM_BUCKET * NUM_WGS; - - int n = workingSize; - - m_workBuffer1->resize(minCap); - m_workBuffer3->resize(workingSize); - - // ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); - b3Assert(BITS_PER_PASS == 4); - b3Assert(WG_SIZE == 64); - b3Assert((sortBits & 0x3) == 0); - - b3OpenCLArray* dst = m_workBuffer3; - - b3OpenCLArray* srcHisto = m_workBuffer1; - b3OpenCLArray* destHisto = m_workBuffer2; - - int nWGs = NUM_WGS; - b3ConstData cdata; - - { - int blockSize = ELEMENTS_PER_WORK_ITEM * WG_SIZE; //set at 256 - int nBlocks = (n + blockSize - 1) / (blockSize); - cdata.m_n = n; - cdata.m_nWGs = NUM_WGS; - cdata.m_startBit = 0; - cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1) / cdata.m_nWGs; - if (nBlocks < NUM_WGS) - { - cdata.m_nBlocksPerWG = 1; - nWGs = nBlocks; - } - } - - int count = 0; - for (int ib = 0; ib < sortBits; ib += 4) - { -#ifdef DEBUG_RADIXSORT2 - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - if (test2[i].m_key != test2[i].m_value) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } - } -#endif //DEBUG_RADIXSORT2 - - cdata.m_startBit = ib; - - if (src->size()) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel, "m_streamCountSortDataKernel"); - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - - int num = NUM_WGS * WG_SIZE; - launcher.launch1D(num, WG_SIZE); - } - -#ifdef DEBUG_RADIXSORT - b3AlignedObjectArray testHist; - srcHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n", ib, testHist.size()); - for (int i = 0; i < testHist.size(); i++) - { - if (testHist[i] != 0) - printf("testHist[%d]=%d\n", i, testHist[i]); - } -#endif //DEBUG_RADIXSORT - -//fast prefix scan is not working properly on Mac OSX yet -#ifdef __APPLE__ - bool fastScan = false; -#else - bool fastScan = !m_deviceCPU; //only use fast scan on GPU -#endif - - if (fastScan) - { // prefix scan group histogram - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_prefixScanKernel, "m_prefixScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(128, 128); - destHisto = srcHisto; - } - else - { - //unsigned int sum; //for debugging - m_scan->execute(*srcHisto, *destHisto, 1920, 0); //,&sum); - } - -#ifdef DEBUG_RADIXSORT - destHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n", ib, testHist.size()); - for (int i = 0; i < testHist.size(); i++) - { - if (testHist[i] != 0) - printf("testHist[%d]=%d\n", i, testHist[i]); - } - - for (int i = 0; i < testHist.size(); i += NUM_WGS) - { - printf("testHist[%d]=%d\n", i / NUM_WGS, testHist[i]); - } - -#endif //DEBUG_RADIXSORT - -#define USE_GPU -#ifdef USE_GPU - - if (src->size()) - { // local sort and distribute - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(destHisto->getBufferCL(), true), b3BufferInfoCL(dst->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_sortAndScatterSortDataKernel, "m_sortAndScatterSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nWGs * WG_SIZE, WG_SIZE); - } -#else - { -#define NUM_TABLES 16 -//#define SEQUENTIAL -#ifdef SEQUENTIAL - int counter2[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int tables[NUM_TABLES]; - int startBit = ib; - - destHisto->copyToHost(testHist); - b3AlignedObjectArray srcHost; - b3AlignedObjectArray dstHost; - dstHost.resize(src->size()); - - src->copyToHost(srcHost); - - for (int i = 0; i < NUM_TABLES; i++) - { - tables[i] = testHist[i * NUM_WGS]; - } - - // distribute - for (int i = 0; i < n; i++) - { - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES - 1); - - dstHost[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; - counter2[tableIdx]++; - } - -#else - - int counter2[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - int tables[NUM_TABLES]; - b3AlignedObjectArray dstHostOK; - dstHostOK.resize(src->size()); - - destHisto->copyToHost(testHist); - b3AlignedObjectArray srcHost; - src->copyToHost(srcHost); - - int blockSize = 256; - int nBlocksPerWG = cdata.m_nBlocksPerWG; - int startBit = ib; - - { - for (int i = 0; i < NUM_TABLES; i++) - { - tables[i] = testHist[i * NUM_WGS]; - } - - // distribute - for (int i = 0; i < n; i++) - { - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES - 1); - - dstHostOK[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; - counter2[tableIdx]++; - } - } - - b3AlignedObjectArray dstHost; - dstHost.resize(src->size()); - - int counter[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - for (int wgIdx = 0; wgIdx < NUM_WGS; wgIdx++) - { - int counter[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - int nBlocks = (n) / blockSize - nBlocksPerWG * wgIdx; - - for (int iblock = 0; iblock < b3Min(cdata.m_nBlocksPerWG, nBlocks); iblock++) - { - for (int lIdx = 0; lIdx < 64; lIdx++) - { - int addr = iblock * blockSize + blockSize * cdata.m_nBlocksPerWG * wgIdx + ELEMENTS_PER_WORK_ITEM * lIdx; - - // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD - // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops - // AMD: AtomInc performs better while NV prefers ++ - for (int j = 0; j < ELEMENTS_PER_WORK_ITEM; j++) - { - if (addr + j < n) - { - // printf ("addr+j=%d\n", addr+j); - - int i = addr + j; - - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES - 1); - - int destIndex = testHist[tableIdx * NUM_WGS + wgIdx] + counter[tableIdx]; - - b3SortData ok = dstHostOK[destIndex]; - - if (ok.m_key != srcHost[i].m_key) - { - printf("ok.m_key = %d, srcHost[i].m_key = %d\n", ok.m_key, srcHost[i].m_key); - printf("(ok.m_value = %d, srcHost[i].m_value = %d)\n", ok.m_value, srcHost[i].m_value); - } - if (ok.m_value != srcHost[i].m_value) - { - printf("ok.m_value = %d, srcHost[i].m_value = %d\n", ok.m_value, srcHost[i].m_value); - printf("(ok.m_key = %d, srcHost[i].m_key = %d)\n", ok.m_key, srcHost[i].m_key); - } - - dstHost[destIndex] = srcHost[i]; - counter[tableIdx]++; - } - } - } - } - } - -#endif //SEQUENTIAL - - dst->copyFromHost(dstHost); - } -#endif //USE_GPU - -#ifdef DEBUG_RADIXSORT - destHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n", ib, testHist.size()); - for (int i = 0; i < testHist.size(); i++) - { - if (testHist[i] != 0) - printf("testHist[%d]=%d\n", i, testHist[i]); - } -#endif //DEBUG_RADIXSORT - b3Swap(src, dst); - b3Swap(srcHisto, destHisto); - -#ifdef DEBUG_RADIXSORT2 - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - if (test2[i].m_key != test2[i].m_value) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } - } -#endif //DEBUG_RADIXSORT2 - - count++; - } - - if (count & 1) - { - b3Assert(0); //need to copy from workbuffer to keyValuesInOut - } - - if (m_workBuffer4->size()) - { - m_workBuffer4->resize(originalSize); - keyValuesInOut.copyFromOpenCLArray(*m_workBuffer4); - } - -#ifdef DEBUG_RADIXSORT - keyValuesInOut.copyToHost(test2); - - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } -#endif -} - -void b3RadixSort32CL::execute(b3OpenCLArray& keysInOut, int sortBits /* = 32 */) -{ - int originalSize = keysInOut.size(); - int workingSize = originalSize; - - int dataAlignment = DATA_ALIGNMENT; - - b3OpenCLArray* src = 0; - - if (workingSize % dataAlignment) - { - workingSize += dataAlignment - (workingSize % dataAlignment); - m_workBuffer4a->copyFromOpenCLArray(keysInOut); - m_workBuffer4a->resize(workingSize); - unsigned int fillValue = 0xffffffff; - - m_fill->execute(*m_workBuffer4a, fillValue, workingSize - originalSize, originalSize); - - src = m_workBuffer4a; - } - else - { - src = &keysInOut; - m_workBuffer4a->resize(0); - } - - b3Assert(workingSize % DATA_ALIGNMENT == 0); - int minCap = NUM_BUCKET * NUM_WGS; - - int n = workingSize; - - m_workBuffer1->resize(minCap); - m_workBuffer3->resize(workingSize); - m_workBuffer3a->resize(workingSize); - - // ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); - b3Assert(BITS_PER_PASS == 4); - b3Assert(WG_SIZE == 64); - b3Assert((sortBits & 0x3) == 0); - - b3OpenCLArray* dst = m_workBuffer3a; - - b3OpenCLArray* srcHisto = m_workBuffer1; - b3OpenCLArray* destHisto = m_workBuffer2; - - int nWGs = NUM_WGS; - b3ConstData cdata; - - { - int blockSize = ELEMENTS_PER_WORK_ITEM * WG_SIZE; //set at 256 - int nBlocks = (n + blockSize - 1) / (blockSize); - cdata.m_n = n; - cdata.m_nWGs = NUM_WGS; - cdata.m_startBit = 0; - cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1) / cdata.m_nWGs; - if (nBlocks < NUM_WGS) - { - cdata.m_nBlocksPerWG = 1; - nWGs = nBlocks; - } - } - - int count = 0; - for (int ib = 0; ib < sortBits; ib += 4) - { - cdata.m_startBit = ib; - - if (src->size()) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_streamCountKernel, "m_streamCountKernel"); - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - - int num = NUM_WGS * WG_SIZE; - launcher.launch1D(num, WG_SIZE); - } - -//fast prefix scan is not working properly on Mac OSX yet -#ifdef __APPLE__ - bool fastScan = false; -#else - bool fastScan = !m_deviceCPU; -#endif - - if (fastScan) - { // prefix scan group histogram - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_prefixScanKernel, "m_prefixScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(128, 128); - destHisto = srcHisto; - } - else - { - //unsigned int sum; //for debugging - m_scan->execute(*srcHisto, *destHisto, 1920, 0); //,&sum); - } - - if (src->size()) - { // local sort and distribute - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(destHisto->getBufferCL(), true), b3BufferInfoCL(dst->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_sortAndScatterKernel, "m_sortAndScatterKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nWGs * WG_SIZE, WG_SIZE); - } - - b3Swap(src, dst); - b3Swap(srcHisto, destHisto); - - count++; - } - - if (count & 1) - { - b3Assert(0); //need to copy from workbuffer to keyValuesInOut - } - - if (m_workBuffer4a->size()) - { - m_workBuffer4a->resize(originalSize); - keysInOut.copyFromOpenCLArray(*m_workBuffer4a); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h deleted file mode 100644 index 69caf182d7f..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h +++ /dev/null @@ -1,84 +0,0 @@ - -#ifndef B3_RADIXSORT32_H -#define B3_RADIXSORT32_H - -#include "b3OpenCLArray.h" - -struct b3SortData -{ - union { - unsigned int m_key; - unsigned int x; - }; - - union { - unsigned int m_value; - unsigned int y; - }; -}; -#include "b3BufferInfoCL.h" - -class b3RadixSort32CL -{ - b3OpenCLArray* m_workBuffer1; - b3OpenCLArray* m_workBuffer2; - - b3OpenCLArray* m_workBuffer3; - b3OpenCLArray* m_workBuffer4; - - b3OpenCLArray* m_workBuffer3a; - b3OpenCLArray* m_workBuffer4a; - - cl_command_queue m_commandQueue; - - cl_kernel m_streamCountSortDataKernel; - cl_kernel m_streamCountKernel; - - cl_kernel m_prefixScanKernel; - cl_kernel m_sortAndScatterSortDataKernel; - cl_kernel m_sortAndScatterKernel; - - bool m_deviceCPU; - - class b3PrefixScanCL* m_scan; - class b3FillCL* m_fill; - -public: - struct b3ConstData - { - int m_n; - int m_nWGs; - int m_startBit; - int m_nBlocksPerWG; - }; - enum - { - DATA_ALIGNMENT = 256, - WG_SIZE = 64, - BLOCK_SIZE = 256, - ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE / WG_SIZE), - BITS_PER_PASS = 4, - NUM_BUCKET = (1 << BITS_PER_PASS), - // if you change this, change nPerWI in kernel as well - NUM_WGS = 20 * 6, // cypress - // NUM_WGS = 24*6, // cayman - // NUM_WGS = 32*4, // nv - }; - -private: -public: - b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity = 0); - - virtual ~b3RadixSort32CL(); - - void execute(b3OpenCLArray& keysIn, b3OpenCLArray& keysOut, b3OpenCLArray& valuesIn, - b3OpenCLArray& valuesOut, int n, int sortBits = 32); - - ///keys only - void execute(b3OpenCLArray& keysInOut, int sortBits = 32); - - void execute(b3OpenCLArray& keyValuesInOut, int sortBits = 32); - void executeHost(b3OpenCLArray& keyValuesInOut, int sortBits = 32); - void executeHost(b3AlignedObjectArray& keyValuesInOut, int sortBits = 32); -}; -#endif //B3_RADIXSORT32_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl deleted file mode 100644 index f3b4a1e8a79..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl +++ /dev/null @@ -1,106 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -typedef struct -{ - u32 m_key; - u32 m_value; -}SortData; - - - -typedef struct -{ - u32 m_nSrc; - u32 m_nDst; - u32 m_padding[2]; -} ConstBuffer; - - - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nSrc ) - { - SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1); - SortData end; end.m_key = nDst; end.m_value = nDst; - - SortData iData = (gIdx==0)? first: src[gIdx-1]; - SortData jData = (gIdx==nSrc)? end: src[gIdx]; - - if( iData.m_key != jData.m_key ) - { -// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++) - u32 k = jData.m_key; - { - dst[k] = gIdx; - } - } - } -} - - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX+1; - - if( gIdx < nSrc+1 ) - { - SortData first; first.m_key = 0; first.m_value = 0; - SortData end; end.m_key = nDst; end.m_value = nDst; - - SortData iData = src[gIdx-1]; - SortData jData = (gIdx==nSrc)? end: src[gIdx]; - - if( iData.m_key != jData.m_key ) - { - u32 k = iData.m_key; - { - dst[k] = gIdx; - } - } - } -} - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX; - - - if( gIdx < nDst ) - { - C[gIdx] = A[gIdx] - B[gIdx]; - } -} - diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h deleted file mode 100644 index 1758dd41e3e..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h +++ /dev/null @@ -1,86 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* boundSearchKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "typedef struct\n" - "{\n" - " u32 m_key; \n" - " u32 m_value;\n" - "}SortData;\n" - "typedef struct\n" - "{\n" - " u32 m_nSrc;\n" - " u32 m_nDst;\n" - " u32 m_padding[2];\n" - "} ConstBuffer;\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "__kernel\n" - "void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, \n" - " unsigned int nSrc, unsigned int nDst)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nSrc )\n" - " {\n" - " SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1);\n" - " SortData end; end.m_key = nDst; end.m_value = nDst;\n" - " SortData iData = (gIdx==0)? first: src[gIdx-1];\n" - " SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" - " if( iData.m_key != jData.m_key )\n" - " {\n" - "// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n" - " u32 k = jData.m_key;\n" - " {\n" - " dst[k] = gIdx;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "__kernel\n" - "void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, \n" - " unsigned int nSrc, unsigned int nDst)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX+1;\n" - " if( gIdx < nSrc+1 )\n" - " {\n" - " SortData first; first.m_key = 0; first.m_value = 0;\n" - " SortData end; end.m_key = nDst; end.m_value = nDst;\n" - " SortData iData = src[gIdx-1];\n" - " SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" - " if( iData.m_key != jData.m_key )\n" - " {\n" - " u32 k = iData.m_key;\n" - " {\n" - " dst[k] = gIdx;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "__kernel\n" - "void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, \n" - " unsigned int nSrc, unsigned int nDst)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nDst )\n" - " {\n" - " C[gIdx] = A[gIdx] - B[gIdx];\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl deleted file mode 100644 index 2eee5752ecf..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -typedef struct -{ - int m_n; - int m_padding[3]; -} ConstBuffer; - - - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy1F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float4 a0 = src[gIdx]; - - dst[ gIdx ] = a0; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy2F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( 2*gIdx <= cb.m_n ) - { - float4 a0 = src[gIdx*2+0]; - float4 a1 = src[gIdx*2+1]; - - dst[ gIdx*2+0 ] = a0; - dst[ gIdx*2+1 ] = a1; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy4F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( 4*gIdx <= cb.m_n ) - { - int idx0 = gIdx*4+0; - int idx1 = gIdx*4+1; - int idx2 = gIdx*4+2; - int idx3 = gIdx*4+3; - - float4 a0 = src[idx0]; - float4 a1 = src[idx1]; - float4 a2 = src[idx2]; - float4 a3 = src[idx3]; - - dst[ idx0 ] = a0; - dst[ idx1 ] = a1; - dst[ idx2 ] = a2; - dst[ idx3 ] = a3; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void CopyF1Kernel(__global float* dstF1, __global float* srcF1, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float a0 = srcF1[gIdx]; - - dstF1[ gIdx ] = a0; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void CopyF2Kernel(__global float2* dstF2, __global float2* srcF2, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float2 a0 = srcF2[gIdx]; - - dstF2[ gIdx ] = a0; - } -} - diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h deleted file mode 100644 index 33c9279462f..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h +++ /dev/null @@ -1,131 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* copyKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "\n" - "#define make_uint4 (uint4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "\n" - "typedef struct\n" - "{\n" - " int m_n;\n" - " int m_padding[3];\n" - "} ConstBuffer;\n" - "\n" - "\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void Copy1F4Kernel(__global float4* dst, __global float4* src, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( gIdx < cb.m_n )\n" - " {\n" - " float4 a0 = src[gIdx];\n" - "\n" - " dst[ gIdx ] = a0;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void Copy2F4Kernel(__global float4* dst, __global float4* src, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( 2*gIdx <= cb.m_n )\n" - " {\n" - " float4 a0 = src[gIdx*2+0];\n" - " float4 a1 = src[gIdx*2+1];\n" - "\n" - " dst[ gIdx*2+0 ] = a0;\n" - " dst[ gIdx*2+1 ] = a1;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void Copy4F4Kernel(__global float4* dst, __global float4* src, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( 4*gIdx <= cb.m_n )\n" - " {\n" - " int idx0 = gIdx*4+0;\n" - " int idx1 = gIdx*4+1;\n" - " int idx2 = gIdx*4+2;\n" - " int idx3 = gIdx*4+3;\n" - "\n" - " float4 a0 = src[idx0];\n" - " float4 a1 = src[idx1];\n" - " float4 a2 = src[idx2];\n" - " float4 a3 = src[idx3];\n" - "\n" - " dst[ idx0 ] = a0;\n" - " dst[ idx1 ] = a1;\n" - " dst[ idx2 ] = a2;\n" - " dst[ idx3 ] = a3;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void CopyF1Kernel(__global float* dstF1, __global float* srcF1, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( gIdx < cb.m_n )\n" - " {\n" - " float a0 = srcF1[gIdx];\n" - "\n" - " dstF1[ gIdx ] = a0;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void CopyF2Kernel(__global float2* dstF2, __global float2* srcF2, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( gIdx < cb.m_n )\n" - " {\n" - " float2 a0 = srcF2[gIdx];\n" - "\n" - " dstF2[ gIdx ] = a0;\n" - " }\n" - "}\n" - "\n" - "\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl deleted file mode 100644 index 71c31075dd7..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -typedef struct -{ - union - { - int4 m_data; - uint4 m_unsignedData; - float m_floatData; - }; - int m_offset; - int m_n; - int m_padding[2]; -} ConstBuffer; - - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num_elements ) - { - dstInt[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num_elements ) - { - dstFloat[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt2[ gIdx + offset] = make_int2( value.x, value.y ); - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt4[ offset+gIdx ] = value; - } -} - diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h deleted file mode 100644 index 983e6522706..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h +++ /dev/null @@ -1,90 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* fillKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define make_uint4 (uint4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "typedef struct\n" - "{\n" - " union\n" - " {\n" - " int4 m_data;\n" - " uint4 m_unsignedData;\n" - " float m_floatData;\n" - " };\n" - " int m_offset;\n" - " int m_n;\n" - " int m_padding[2];\n" - "} ConstBuffer;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num_elements )\n" - " {\n" - " dstInt[ offset+gIdx ] = value;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num_elements )\n" - " {\n" - " dstFloat[ offset+gIdx ] = value;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num )\n" - " {\n" - " dstInt[ offset+gIdx ] = value;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num )\n" - " {\n" - " dstInt2[ gIdx + offset] = make_int2( value.x, value.y );\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num )\n" - " {\n" - " dstInt4[ offset+gIdx ] = value;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl deleted file mode 100644 index c9da79854a2..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -// takahiro end -#define WG_SIZE 128 -#define m_numElems x -#define m_numBlocks y -#define m_numScanBlocks z - -/*typedef struct -{ - uint m_numElems; - uint m_numBlocks; - uint m_numScanBlocks; - uint m_padding[1]; -} ConstBuffer; -*/ - -float4 ScanExclusiveFloat4(__local float4* data, u32 n, int lIdx, int lSize) -{ - float4 blocksum; - int offset = 1; - for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1) - { - GROUP_LDS_BARRIER; - for(int iIdx=lIdx; iIdx>= 1; - for(int nActive=1; nActive>=1 ) - { - GROUP_LDS_BARRIER; - for( int iIdx = lIdx; iIdx>1; nActive>0; nActive>>=1, offset<<=1) - { - GROUP_LDS_BARRIER; - for(int iIdx=lIdx; iIdx>= 1; - for(int nActive=1; nActive>=1 ) - { - GROUP_LDS_BARRIER; - for( int iIdx = lIdx; iIdx>1; nActive>0; nActive>>=1, offset<<=1)\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for(int iIdx=lIdx; iIdx>= 1;\n" - " for(int nActive=1; nActive>=1 )\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for( int iIdx = lIdx; iIdx>1; nActive>0; nActive>>=1, offset<<=1)\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for(int iIdx=lIdx; iIdx>= 1;\n" - " for(int nActive=1; nActive>=1 )\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for( int iIdx = lIdx; iIdx 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; - GROUP_MEM_FENCE; - } - - sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - } -#else - if( lIdx < 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-1]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-4]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-8]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-16]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-32]; - GROUP_MEM_FENCE; - if( wgSize > 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; - GROUP_MEM_FENCE; - } - - sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - } -#endif - } - - GROUP_LDS_BARRIER; - - *totalSum = sorterSharedMemory[wgSize*2-1]; - u32 addValue = sorterSharedMemory[lIdx+wgSize-1]; - return addValue; -} - -//__attribute__((reqd_work_group_size(128,1,1))) -uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory ) -{ - u32 s4 = prefixScanVectorEx( &pData ); - u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 ); - return pData + make_uint4( rank, rank, rank, rank ); -} - - -//__attribute__((reqd_work_group_size(64,1,1))) -uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory ) -{ - u32 s4 = prefixScanVectorEx( &pData ); - u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 ); - return pData + make_uint4( rank, rank, rank, rank ); -} - -u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;} - -u32 bit8Scan(u32 v) -{ - return (v<<8) + (v<<16) + (v<<24); -} - -//=== - - - - -#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx] - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb ) -{ - __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - for(int i=0; i>startBit) & 0xf; -#if defined(NV_GPU) - MY_HISTOGRAM( localKey )++; -#else - AtomInc( MY_HISTOGRAM( localKey ) ); -#endif - } - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i>startBit) & 0xf; -#if defined(NV_GPU) - MY_HISTOGRAM( localKey )++; -#else - AtomInc( MY_HISTOGRAM( localKey ) ); -#endif - } - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask ); - uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) ); - u32 total; - prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData ); - { - uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3); - uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total ); - dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) ); - - GROUP_LDS_BARRIER; - - ldsSortData[dstAddr.x] = sortData[0]; - ldsSortData[dstAddr.y] = sortData[1]; - ldsSortData[dstAddr.z] = sortData[2]; - ldsSortData[dstAddr.w] = sortData[3]; - - GROUP_LDS_BARRIER; - - sortData[0] = ldsSortData[localAddr.x]; - sortData[1] = ldsSortData[localAddr.y]; - sortData[2] = ldsSortData[localAddr.z]; - sortData[3] = ldsSortData[localAddr.w]; - - GROUP_LDS_BARRIER; - } - } -} - -// 2 scan, 2 exchange -void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData) -{ - for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, - (sortData[1]>>(startBit+ibit)) & 0x3, - (sortData[2]>>(startBit+ibit)) & 0x3, - (sortData[3]>>(startBit+ibit)) & 0x3); - - u32 key4; - u32 sKeyPacked[4] = { 0, 0, 0, 0 }; - { - sKeyPacked[0] |= 1<<(8*b.x); - sKeyPacked[1] |= 1<<(8*b.y); - sKeyPacked[2] |= 1<<(8*b.z); - sKeyPacked[3] |= 1<<(8*b.w); - - key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; - } - - u32 rankPacked; - u32 sumPacked; - { - rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); - } - - GROUP_LDS_BARRIER; - - u32 newOffset[4] = { 0,0,0,0 }; - { - u32 sumScanned = bit8Scan( sumPacked ); - - u32 scannedKeys[4]; - scannedKeys[0] = 1<<(8*b.x); - scannedKeys[1] = 1<<(8*b.y); - scannedKeys[2] = 1<<(8*b.z); - scannedKeys[3] = 1<<(8*b.w); - { // 4 scans at once - u32 sum4 = 0; - for(int ie=0; ie<4; ie++) - { - u32 tmp = scannedKeys[ie]; - scannedKeys[ie] = sum4; - sum4 += tmp; - } - } - - { - u32 sumPlusRank = sumScanned + rankPacked; - { u32 ie = b.x; - scannedKeys[0] += sumPlusRank; - newOffset[0] = unpack4Key( scannedKeys[0], ie ); - } - { u32 ie = b.y; - scannedKeys[1] += sumPlusRank; - newOffset[1] = unpack4Key( scannedKeys[1], ie ); - } - { u32 ie = b.z; - scannedKeys[2] += sumPlusRank; - newOffset[2] = unpack4Key( scannedKeys[2], ie ); - } - { u32 ie = b.w; - scannedKeys[3] += sumPlusRank; - newOffset[3] = unpack4Key( scannedKeys[3], ie ); - } - } - } - - - GROUP_LDS_BARRIER; - - { - ldsSortData[newOffset[0]] = sortData[0]; - ldsSortData[newOffset[1]] = sortData[1]; - ldsSortData[newOffset[2]] = sortData[2]; - ldsSortData[newOffset[3]] = sortData[3]; - - GROUP_LDS_BARRIER; - - u32 dstAddr = 4*lIdx; - sortData[0] = ldsSortData[dstAddr+0]; - sortData[1] = ldsSortData[dstAddr+1]; - sortData[2] = ldsSortData[dstAddr+2]; - sortData[3] = ldsSortData[dstAddr+3]; - - GROUP_LDS_BARRIER; - } - } -} - -#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key] - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb ) -{ - __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local u32 localHistogramToCarry[NUM_BUCKET]; - __local u32 localHistogram[NUM_BUCKET*2]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int startBit = cb.m_startBit; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - if( lIdx < (NUM_BUCKET) ) - { - localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; - } - - GROUP_LDS_BARRIER; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock>startBit) & 0xf; - - { // create histogram - u32 setIdx = lIdx/16; - if( lIdx < NUM_BUCKET ) - { - localHistogram[lIdx] = 0; - } - ldsSortData[lIdx] = 0; - GROUP_LDS_BARRIER; - - for(int i=0; i>(startBit+ibit)) & 0x3, - (sortData[1]>>(startBit+ibit)) & 0x3, - (sortData[2]>>(startBit+ibit)) & 0x3, - (sortData[3]>>(startBit+ibit)) & 0x3); - - u32 key4; - u32 sKeyPacked[4] = { 0, 0, 0, 0 }; - { - sKeyPacked[0] |= 1<<(8*b.x); - sKeyPacked[1] |= 1<<(8*b.y); - sKeyPacked[2] |= 1<<(8*b.z); - sKeyPacked[3] |= 1<<(8*b.w); - - key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; - } - - u32 rankPacked; - u32 sumPacked; - { - rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); - } - - GROUP_LDS_BARRIER; - - u32 newOffset[4] = { 0,0,0,0 }; - { - u32 sumScanned = bit8Scan( sumPacked ); - - u32 scannedKeys[4]; - scannedKeys[0] = 1<<(8*b.x); - scannedKeys[1] = 1<<(8*b.y); - scannedKeys[2] = 1<<(8*b.z); - scannedKeys[3] = 1<<(8*b.w); - { // 4 scans at once - u32 sum4 = 0; - for(int ie=0; ie<4; ie++) - { - u32 tmp = scannedKeys[ie]; - scannedKeys[ie] = sum4; - sum4 += tmp; - } - } - - { - u32 sumPlusRank = sumScanned + rankPacked; - { u32 ie = b.x; - scannedKeys[0] += sumPlusRank; - newOffset[0] = unpack4Key( scannedKeys[0], ie ); - } - { u32 ie = b.y; - scannedKeys[1] += sumPlusRank; - newOffset[1] = unpack4Key( scannedKeys[1], ie ); - } - { u32 ie = b.z; - scannedKeys[2] += sumPlusRank; - newOffset[2] = unpack4Key( scannedKeys[2], ie ); - } - { u32 ie = b.w; - scannedKeys[3] += sumPlusRank; - newOffset[3] = unpack4Key( scannedKeys[3], ie ); - } - } - } - - - GROUP_LDS_BARRIER; - - { - ldsSortData[newOffset[0]] = sortData[0]; - ldsSortData[newOffset[1]] = sortData[1]; - ldsSortData[newOffset[2]] = sortData[2]; - ldsSortData[newOffset[3]] = sortData[3]; - - ldsSortVal[newOffset[0]] = sortVal[0]; - ldsSortVal[newOffset[1]] = sortVal[1]; - ldsSortVal[newOffset[2]] = sortVal[2]; - ldsSortVal[newOffset[3]] = sortVal[3]; - - GROUP_LDS_BARRIER; - - u32 dstAddr = 4*lIdx; - sortData[0] = ldsSortData[dstAddr+0]; - sortData[1] = ldsSortData[dstAddr+1]; - sortData[2] = ldsSortData[dstAddr+2]; - sortData[3] = ldsSortData[dstAddr+3]; - - sortVal[0] = ldsSortVal[dstAddr+0]; - sortVal[1] = ldsSortVal[dstAddr+1]; - sortVal[2] = ldsSortVal[dstAddr+2]; - sortVal[3] = ldsSortVal[dstAddr+3]; - - GROUP_LDS_BARRIER; - } - } -} - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb) -{ - __local int ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local u32 localHistogramToCarry[NUM_BUCKET]; - __local u32 localHistogram[NUM_BUCKET*2]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int startBit = cb.m_startBit; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - if( lIdx < (NUM_BUCKET) ) - { - localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; - } - - GROUP_LDS_BARRIER; - - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock>startBit) & 0xf; - - { // create histogram - u32 setIdx = lIdx/16; - if( lIdx < NUM_BUCKET ) - { - localHistogram[lIdx] = 0; - } - ldsSortData[lIdx] = 0; - GROUP_LDS_BARRIER; - - for(int i=0; i0) - return; - - for (int c=0;c>startBit) & 0xf;//0xf = NUM_TABLES-1 - gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i]; - counter[tableIdx] ++; - } - } - } - } - -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterKernelSerial( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb ) -{ - - u32 gIdx = GET_GLOBAL_IDX; - u32 realLocalIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - int counter[NUM_BUCKET]; - - if (realLocalIdx>0) - return; - - for (int c=0;c>startBit) & 0xf;//0xf = NUM_TABLES-1 - gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i]; - counter[tableIdx] ++; - } - } - } - } - -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h deleted file mode 100644 index fb4bdda3034..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h +++ /dev/null @@ -1,909 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* radixSort32KernelsCL = - "/*\n" - "Bullet Continuous Collision Detection and Physics Library\n" - "Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Author Takahiro Harada\n" - "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_uint4 (uint4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define WG_SIZE 64\n" - "#define ELEMENTS_PER_WORK_ITEM (256/WG_SIZE)\n" - "#define BITS_PER_PASS 4\n" - "#define NUM_BUCKET (1< 64 )\n" - " {\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - " sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#else\n" - " if( lIdx < 64 )\n" - " {\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-1];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; \n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-4];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-8];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-16];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-32];\n" - " GROUP_MEM_FENCE;\n" - " if( wgSize > 64 )\n" - " {\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - " sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#endif\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " *totalSum = sorterSharedMemory[wgSize*2-1];\n" - " u32 addValue = sorterSharedMemory[lIdx+wgSize-1];\n" - " return addValue;\n" - "}\n" - "//__attribute__((reqd_work_group_size(128,1,1)))\n" - "uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n" - "{\n" - " u32 s4 = prefixScanVectorEx( &pData );\n" - " u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 );\n" - " return pData + make_uint4( rank, rank, rank, rank );\n" - "}\n" - "//__attribute__((reqd_work_group_size(64,1,1)))\n" - "uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n" - "{\n" - " u32 s4 = prefixScanVectorEx( &pData );\n" - " u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 );\n" - " return pData + make_uint4( rank, rank, rank, rank );\n" - "}\n" - "u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;}\n" - "u32 bit8Scan(u32 v)\n" - "{\n" - " return (v<<8) + (v<<16) + (v<<24);\n" - "}\n" - "//===\n" - "#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx]\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb )\n" - "{\n" - " __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int startBit = cb.m_startBit;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " for(int i=0; i>startBit) & 0xf;\n" - "#if defined(NV_GPU)\n" - " MY_HISTOGRAM( localKey )++;\n" - "#else\n" - " AtomInc( MY_HISTOGRAM( localKey ) );\n" - "#endif\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " u32 sum = 0;\n" - " for(int i=0; i>startBit) & 0xf;\n" - "#if defined(NV_GPU)\n" - " MY_HISTOGRAM( localKey )++;\n" - "#else\n" - " AtomInc( MY_HISTOGRAM( localKey ) );\n" - "#endif\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " u32 sum = 0;\n" - " for(int i=0; i>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask );\n" - " uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) );\n" - " u32 total;\n" - " prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData );\n" - " {\n" - " uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3);\n" - " uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total );\n" - " dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) );\n" - " GROUP_LDS_BARRIER;\n" - " ldsSortData[dstAddr.x] = sortData[0];\n" - " ldsSortData[dstAddr.y] = sortData[1];\n" - " ldsSortData[dstAddr.z] = sortData[2];\n" - " ldsSortData[dstAddr.w] = sortData[3];\n" - " GROUP_LDS_BARRIER;\n" - " sortData[0] = ldsSortData[localAddr.x];\n" - " sortData[1] = ldsSortData[localAddr.y];\n" - " sortData[2] = ldsSortData[localAddr.z];\n" - " sortData[3] = ldsSortData[localAddr.w];\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " }\n" - "}\n" - "// 2 scan, 2 exchange\n" - "void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n" - "{\n" - " for(uint ibit=0; ibit>(startBit+ibit)) & 0x3, \n" - " (sortData[1]>>(startBit+ibit)) & 0x3, \n" - " (sortData[2]>>(startBit+ibit)) & 0x3, \n" - " (sortData[3]>>(startBit+ibit)) & 0x3);\n" - " u32 key4;\n" - " u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" - " {\n" - " sKeyPacked[0] |= 1<<(8*b.x);\n" - " sKeyPacked[1] |= 1<<(8*b.y);\n" - " sKeyPacked[2] |= 1<<(8*b.z);\n" - " sKeyPacked[3] |= 1<<(8*b.w);\n" - " key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" - " }\n" - " u32 rankPacked;\n" - " u32 sumPacked;\n" - " {\n" - " rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " u32 newOffset[4] = { 0,0,0,0 };\n" - " {\n" - " u32 sumScanned = bit8Scan( sumPacked );\n" - " u32 scannedKeys[4];\n" - " scannedKeys[0] = 1<<(8*b.x);\n" - " scannedKeys[1] = 1<<(8*b.y);\n" - " scannedKeys[2] = 1<<(8*b.z);\n" - " scannedKeys[3] = 1<<(8*b.w);\n" - " { // 4 scans at once\n" - " u32 sum4 = 0;\n" - " for(int ie=0; ie<4; ie++)\n" - " {\n" - " u32 tmp = scannedKeys[ie];\n" - " scannedKeys[ie] = sum4;\n" - " sum4 += tmp;\n" - " }\n" - " }\n" - " {\n" - " u32 sumPlusRank = sumScanned + rankPacked;\n" - " { u32 ie = b.x;\n" - " scannedKeys[0] += sumPlusRank;\n" - " newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" - " }\n" - " { u32 ie = b.y;\n" - " scannedKeys[1] += sumPlusRank;\n" - " newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" - " }\n" - " { u32 ie = b.z;\n" - " scannedKeys[2] += sumPlusRank;\n" - " newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" - " }\n" - " { u32 ie = b.w;\n" - " scannedKeys[3] += sumPlusRank;\n" - " newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " {\n" - " ldsSortData[newOffset[0]] = sortData[0];\n" - " ldsSortData[newOffset[1]] = sortData[1];\n" - " ldsSortData[newOffset[2]] = sortData[2];\n" - " ldsSortData[newOffset[3]] = sortData[3];\n" - " GROUP_LDS_BARRIER;\n" - " u32 dstAddr = 4*lIdx;\n" - " sortData[0] = ldsSortData[dstAddr+0];\n" - " sortData[1] = ldsSortData[dstAddr+1];\n" - " sortData[2] = ldsSortData[dstAddr+2];\n" - " sortData[3] = ldsSortData[dstAddr+3];\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " }\n" - "}\n" - "#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key]\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n" - "{\n" - " __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" - " __local u32 localHistogramToCarry[NUM_BUCKET];\n" - " __local u32 localHistogram[NUM_BUCKET*2];\n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int startBit = cb.m_startBit;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " if( lIdx < (NUM_BUCKET) )\n" - " {\n" - " localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" - " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " for(int iblock=0; iblock>startBit) & 0xf;\n" - " { // create histogram\n" - " u32 setIdx = lIdx/16;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[lIdx] = 0;\n" - " }\n" - " ldsSortData[lIdx] = 0;\n" - " GROUP_LDS_BARRIER;\n" - " for(int i=0; i>(startBit+ibit)) & 0x3, \n" - " (sortData[1]>>(startBit+ibit)) & 0x3, \n" - " (sortData[2]>>(startBit+ibit)) & 0x3, \n" - " (sortData[3]>>(startBit+ibit)) & 0x3);\n" - " u32 key4;\n" - " u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" - " {\n" - " sKeyPacked[0] |= 1<<(8*b.x);\n" - " sKeyPacked[1] |= 1<<(8*b.y);\n" - " sKeyPacked[2] |= 1<<(8*b.z);\n" - " sKeyPacked[3] |= 1<<(8*b.w);\n" - " key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" - " }\n" - " u32 rankPacked;\n" - " u32 sumPacked;\n" - " {\n" - " rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " u32 newOffset[4] = { 0,0,0,0 };\n" - " {\n" - " u32 sumScanned = bit8Scan( sumPacked );\n" - " u32 scannedKeys[4];\n" - " scannedKeys[0] = 1<<(8*b.x);\n" - " scannedKeys[1] = 1<<(8*b.y);\n" - " scannedKeys[2] = 1<<(8*b.z);\n" - " scannedKeys[3] = 1<<(8*b.w);\n" - " { // 4 scans at once\n" - " u32 sum4 = 0;\n" - " for(int ie=0; ie<4; ie++)\n" - " {\n" - " u32 tmp = scannedKeys[ie];\n" - " scannedKeys[ie] = sum4;\n" - " sum4 += tmp;\n" - " }\n" - " }\n" - " {\n" - " u32 sumPlusRank = sumScanned + rankPacked;\n" - " { u32 ie = b.x;\n" - " scannedKeys[0] += sumPlusRank;\n" - " newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" - " }\n" - " { u32 ie = b.y;\n" - " scannedKeys[1] += sumPlusRank;\n" - " newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" - " }\n" - " { u32 ie = b.z;\n" - " scannedKeys[2] += sumPlusRank;\n" - " newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" - " }\n" - " { u32 ie = b.w;\n" - " scannedKeys[3] += sumPlusRank;\n" - " newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " {\n" - " ldsSortData[newOffset[0]] = sortData[0];\n" - " ldsSortData[newOffset[1]] = sortData[1];\n" - " ldsSortData[newOffset[2]] = sortData[2];\n" - " ldsSortData[newOffset[3]] = sortData[3];\n" - " ldsSortVal[newOffset[0]] = sortVal[0];\n" - " ldsSortVal[newOffset[1]] = sortVal[1];\n" - " ldsSortVal[newOffset[2]] = sortVal[2];\n" - " ldsSortVal[newOffset[3]] = sortVal[3];\n" - " GROUP_LDS_BARRIER;\n" - " u32 dstAddr = 4*lIdx;\n" - " sortData[0] = ldsSortData[dstAddr+0];\n" - " sortData[1] = ldsSortData[dstAddr+1];\n" - " sortData[2] = ldsSortData[dstAddr+2];\n" - " sortData[3] = ldsSortData[dstAddr+3];\n" - " sortVal[0] = ldsSortVal[dstAddr+0];\n" - " sortVal[1] = ldsSortVal[dstAddr+1];\n" - " sortVal[2] = ldsSortVal[dstAddr+2];\n" - " sortVal[3] = ldsSortVal[dstAddr+3];\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n" - "{\n" - " __local int ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" - " __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" - " __local u32 localHistogramToCarry[NUM_BUCKET];\n" - " __local u32 localHistogram[NUM_BUCKET*2];\n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int startBit = cb.m_startBit;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " if( lIdx < (NUM_BUCKET) )\n" - " {\n" - " localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" - " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " for(int iblock=0; iblock>startBit) & 0xf;\n" - " { // create histogram\n" - " u32 setIdx = lIdx/16;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[lIdx] = 0;\n" - " }\n" - " ldsSortData[lIdx] = 0;\n" - " GROUP_LDS_BARRIER;\n" - " for(int i=0; i0)\n" - " return;\n" - " \n" - " for (int c=0;c>startBit) & 0xf;//0xf = NUM_TABLES-1\n" - " gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i];\n" - " counter[tableIdx] ++;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " \n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SortAndScatterKernelSerial( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n" - "{\n" - " \n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 realLocalIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int startBit = cb.m_startBit;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " int counter[NUM_BUCKET];\n" - " \n" - " if (realLocalIdx>0)\n" - " return;\n" - " \n" - " for (int c=0;c>startBit) & 0xf;//0xf = NUM_TABLES-1\n" - " gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i];\n" - " counter[tableIdx] ++;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp deleted file mode 100644 index 6571f305482..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ /dev/null @@ -1,374 +0,0 @@ - -#include "b3GpuRaycast.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h" - -#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" - -#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" - -struct b3GpuRaycastInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_q; - cl_kernel m_raytraceKernel; - cl_kernel m_raytracePairsKernel; - cl_kernel m_findRayRigidPairIndexRanges; - - b3GpuParallelLinearBvh* m_plbvh; - b3RadixSort32CL* m_radixSorter; - b3FillCL* m_fill; - - //1 element per ray - b3OpenCLArray* m_gpuRays; - b3OpenCLArray* m_gpuHitResults; - b3OpenCLArray* m_firstRayRigidPairIndexPerRay; - b3OpenCLArray* m_numRayRigidPairsPerRay; - - //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB - b3OpenCLArray* m_gpuNumRayRigidPairs; - b3OpenCLArray* m_gpuRayRigidPairs; //x == ray index, y == rigid index - - int m_test; -}; - -b3GpuRaycast::b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q) -{ - m_data = new b3GpuRaycastInternalData; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_q = q; - m_data->m_raytraceKernel = 0; - m_data->m_raytracePairsKernel = 0; - m_data->m_findRayRigidPairIndexRanges = 0; - - m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q); - m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q); - m_data->m_fill = new b3FillCL(ctx, device, q); - - m_data->m_gpuRays = new b3OpenCLArray(ctx, q); - m_data->m_gpuHitResults = new b3OpenCLArray(ctx, q); - m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray(ctx, q); - m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray(ctx, q); - m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray(ctx, q); - m_data->m_gpuRayRigidPairs = new b3OpenCLArray(ctx, q); - - { - cl_int errNum = 0; - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, &errNum, "", B3_RAYCAST_PATH); - b3Assert(errNum == CL_SUCCESS); - m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastPairsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "findRayRigidPairIndexRanges", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - clReleaseProgram(prog); - } -} - -b3GpuRaycast::~b3GpuRaycast() -{ - clReleaseKernel(m_data->m_raytraceKernel); - clReleaseKernel(m_data->m_raytracePairsKernel); - clReleaseKernel(m_data->m_findRayRigidPairIndexRanges); - - delete m_data->m_plbvh; - delete m_data->m_radixSorter; - delete m_data->m_fill; - - delete m_data->m_gpuRays; - delete m_data->m_gpuHitResults; - delete m_data->m_firstRayRigidPairIndexPerRay; - delete m_data->m_numRayRigidPairsPerRay; - delete m_data->m_gpuNumRayRigidPairs; - delete m_data->m_gpuRayRigidPairs; - - delete m_data; -} - -bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction) -{ - b3Vector3 rs = rayFrom - spherePos; - b3Vector3 rayDir = rayTo - rayFrom; - - float A = b3Dot(rayDir, rayDir); - float B = b3Dot(rs, rayDir); - float C = b3Dot(rs, rs) - (radius * radius); - - float D = B * B - A * C; - - if (D > 0.0) - { - float t = (-B - sqrt(D)) / A; - - if ((t >= 0.0f) && (t < hitFraction)) - { - hitFraction = t; - return true; - } - } - return false; -} - -bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly, - const b3AlignedObjectArray& faces, float& hitFraction, b3Vector3& hitNormal) -{ - float exitFraction = hitFraction; - float enterFraction = -0.1f; - b3Vector3 curHitNormal = b3MakeVector3(0, 0, 0); - for (int i = 0; i < poly.m_numFaces; i++) - { - const b3GpuFace& face = faces[poly.m_faceOffset + i]; - float fromPlaneDist = b3Dot(rayFromLocal, face.m_plane) + face.m_plane.w; - float toPlaneDist = b3Dot(rayToLocal, face.m_plane) + face.m_plane.w; - if (fromPlaneDist < 0.f) - { - if (toPlaneDist >= 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist); - if (exitFraction > fraction) - { - exitFraction = fraction; - } - } - } - else - { - if (toPlaneDist < 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist); - if (enterFraction <= fraction) - { - enterFraction = fraction; - curHitNormal = face.m_plane; - curHitNormal.w = 0.f; - } - } - else - { - return false; - } - } - if (exitFraction <= enterFraction) - return false; - } - - if (enterFraction < 0.f) - return false; - - hitFraction = enterFraction; - hitNormal = curHitNormal; - return true; -} - -void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) -{ - // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); - - B3_PROFILE("castRaysHost"); - for (int r = 0; r < rays.size(); r++) - { - b3Vector3 rayFrom = rays[r].m_from; - b3Vector3 rayTo = rays[r].m_to; - float hitFraction = hitResults[r].m_hitFraction; - - int hitBodyIndex = -1; - b3Vector3 hitNormal; - - for (int b = 0; b < numBodies; b++) - { - const b3Vector3& pos = bodies[b].m_pos; - //const b3Quaternion& orn = bodies[b].m_quat; - - switch (collidables[bodies[b].m_collidableIdx].m_shapeType) - { - case SHAPE_SPHERE: - { - b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius; - if (sphere_intersect(pos, radius, rayFrom, rayTo, hitFraction)) - { - hitBodyIndex = b; - b3Vector3 hitPoint; - hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction); - hitNormal = (hitPoint - bodies[b].m_pos).normalize(); - } - } - case SHAPE_CONVEX_HULL: - { - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(bodies[b].m_pos); - convexWorldTransform.setRotation(bodies[b].m_quat); - b3Transform convexWorld2Local = convexWorldTransform.inverse(); - - b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); - b3Vector3 rayToLocal = convexWorld2Local(rayTo); - - int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; - const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; - if (rayConvex(rayFromLocal, rayToLocal, poly, narrowphaseData->m_convexFaces, hitFraction, hitNormal)) - { - hitBodyIndex = b; - } - - break; - } - default: - { - static bool once = true; - if (once) - { - once = false; - b3Warning("Raytest: unsupported shape type\n"); - } - } - } - } - if (hitBodyIndex >= 0) - { - hitResults[r].m_hitFraction = hitFraction; - hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction); - hitResults[r].m_hitNormal = hitNormal; - hitResults[r].m_hitBody = hitBodyIndex; - } - } -} -///todo: add some acceleration structure (AABBs, tree etc) -void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) -{ - //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); - - B3_PROFILE("castRaysGPU"); - - { - B3_PROFILE("raycast copyFromHost"); - m_data->m_gpuRays->copyFromHost(rays); - m_data->m_gpuHitResults->copyFromHost(hitResults); - } - - int numRays = hitResults.size(); - { - m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); - m_data->m_numRayRigidPairsPerRay->resize(numRays); - - m_data->m_gpuNumRayRigidPairs->resize(1); - m_data->m_gpuRayRigidPairs->resize(numRays * 16); - } - - //run kernel - const bool USE_BRUTE_FORCE_RAYCAST = false; - if (USE_BRUTE_FORCE_RAYCAST) - { - B3_PROFILE("raycast launch1D"); - - b3LauncherCL launcher(m_data->m_q, m_data->m_raytraceKernel, "m_raytraceKernel"); - int numRays = rays.size(); - launcher.setConst(numRays); - - launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); - launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); - - launcher.setConst(numBodies); - launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); - - launcher.launch1D(numRays); - clFinish(m_data->m_q); - } - else - { - m_data->m_plbvh->build(broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU()); - - m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); - - int numRayRigidPairs = -1; - m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); - if (numRayRigidPairs > m_data->m_gpuRayRigidPairs->size()) - { - numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); - m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); - } - - m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct - - //Sort ray-rigid pairs by ray index - { - B3_PROFILE("sort ray-rigid pairs"); - m_data->m_radixSorter->execute(*reinterpret_cast*>(m_data->m_gpuRayRigidPairs)); - } - - //detect start,count of each ray pair - { - B3_PROFILE("detect ray-rigid pair index ranges"); - - { - B3_PROFILE("reset ray-rigid pair index ranges"); - - m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index - m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); - clFinish(m_data->m_q); - } - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL()), - - b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()), - b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numRayRigidPairs); - - launcher.launch1D(numRayRigidPairs); - clFinish(m_data->m_q); - } - - { - B3_PROFILE("ray-rigid intersection"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_data->m_gpuRays->getBufferCL()), - b3BufferInfoCL(m_data->m_gpuHitResults->getBufferCL()), - b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()), - b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL()), - - b3BufferInfoCL(narrowphaseData->m_bodyBufferGPU->getBufferCL()), - b3BufferInfoCL(narrowphaseData->m_collidablesGPU->getBufferCL()), - b3BufferInfoCL(narrowphaseData->m_convexFacesGPU->getBufferCL()), - b3BufferInfoCL(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()), - - b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_data->m_q); - } - } - - //copy results - { - B3_PROFILE("raycast copyToHost"); - m_data->m_gpuHitResults->copyToHost(hitResults); - } -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h deleted file mode 100644 index f1f6ffd4020..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef B3_GPU_RAYCAST_H -#define B3_GPU_RAYCAST_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -class b3GpuRaycast -{ -protected: - struct b3GpuRaycastInternalData* m_data; - -public: - b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q); - virtual ~b3GpuRaycast(); - - void castRaysHost(const b3AlignedObjectArray& raysIn, b3AlignedObjectArray& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData); - - void castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase); -}; - -#endif //B3_GPU_RAYCAST_H diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl deleted file mode 100644 index e72d96876b1..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl +++ /dev/null @@ -1,439 +0,0 @@ - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_PLANE 4 -#define SHAPE_CONCAVE_TRIMESH 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - - -typedef struct -{ - float4 m_from; - float4 m_to; -} b3RayInfo; - -typedef struct -{ - float m_hitFraction; - int m_hitResult0; - int m_hitResult1; - int m_hitResult2; - float4 m_hitPoint; - float4 m_hitNormal; -} b3RayHit; - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - unsigned int m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct Collidable -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - float m_radius; - int m_shapeType; - int m_shapeIndex; -} Collidable; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; - -} ConvexPolyhedronCL; - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} b3GpuFace; - - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline - Quaternion qtMul(Quaternion a, Quaternion b); - -__inline - Quaternion qtNormalize(Quaternion in); - - -__inline - Quaternion qtInvert(Quaternion q); - - -__inline - float dot3F4(float4 a, float4 b) -{ - float4 a1 = (float4)(a.xyz,0.f); - float4 b1 = (float4)(b.xyz,0.f); - return dot(a1, b1); -} - - -__inline - Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross( a, b ); - ans += a.w*b+b.w*a; - // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline - Quaternion qtNormalize(Quaternion in) -{ - return fast_normalize(in); - // in /= length( in ); - // return in; -} -__inline - float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(q,vcpy); - out = qtMul(out,qInv); - return out; -} - -__inline - Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline - float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - - - - - -bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset, - __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal) -{ - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - bool result = true; - - float exitFraction = hitFraction[0]; - float enterFraction = -0.3f; - float4 curHitNormal = (float4)(0,0,0,0); - for (int i=0;i= 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (exitFraction>fraction) - { - exitFraction = fraction; - } - } - } else - { - if (toPlaneDist<0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (enterFraction <= fraction) - { - enterFraction = fraction; - curHitNormal = face.m_plane; - curHitNormal.w = 0.f; - } - } else - { - result = false; - } - } - if (exitFraction <= enterFraction) - result = false; - } - - if (enterFraction < 0.f) - { - result = false; - } - - if (result) - { - hitFraction[0] = enterFraction; - hitNormal[0] = curHitNormal; - } - return result; -} - - - - - - -bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction) -{ - float4 rs = rayFrom - spherePos; - rs.w = 0.f; - float4 rayDir = rayTo-rayFrom; - rayDir.w = 0.f; - float A = dot(rayDir,rayDir); - float B = dot(rs, rayDir); - float C = dot(rs, rs) - (radius * radius); - - float D = B * B - A*C; - - if (D > 0.0f) - { - float t = (-B - sqrt(D))/A; - - if ( (t >= 0.0f) && (t < (*hitFraction)) ) - { - *hitFraction = t; - return true; - } - } - return false; -} - -float4 setInterpolate3(float4 from, float4 to, float t) -{ - float s = 1.0f - t; - float4 result; - result = s * from + t * to; - result.w = 0.f; - return result; -} - -__kernel void rayCastKernel( - int numRays, - const __global b3RayInfo* rays, - __global b3RayHit* hitResults, - const int numBodies, - __global Body* bodies, - __global Collidable* collidables, - __global const b3GpuFace* faces, - __global const ConvexPolyhedronCL* convexShapes ) -{ - - int i = get_global_id(0); - if (i>=numRays) - return; - - hitResults[i].m_hitFraction = 1.f; - - float4 rayFrom = rays[i].m_from; - float4 rayTo = rays[i].m_to; - float hitFraction = 1.f; - float4 hitPoint; - float4 hitNormal; - int hitBodyIndex= -1; - - int cachedCollidableIndex = -1; - Collidable cachedCollidable; - - for (int b=0;b=0) - { - hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction); - hitResults[i].m_hitFraction = hitFraction; - hitResults[i].m_hitPoint = hitPoint; - hitResults[i].m_hitNormal = normalize(hitNormal); - hitResults[i].m_hitResult0 = hitBodyIndex; - } - -} - - -__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, - __global int* out_firstRayRigidPairIndexPerRay, - __global int* out_numRayRigidPairsPerRay, - int numRayRigidPairs) -{ - int rayRigidPairIndex = get_global_id(0); - if (rayRigidPairIndex >= numRayRigidPairs) return; - - int rayIndex = rayRigidPairs[rayRigidPairIndex].x; - - atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex); - atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]); -} - -__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, - __global b3RayHit* hitResults, - __global int* firstRayRigidPairIndexPerRay, - __global int* numRayRigidPairsPerRay, - - __global Body* bodies, - __global Collidable* collidables, - __global const b3GpuFace* faces, - __global const ConvexPolyhedronCL* convexShapes, - - __global int2* rayRigidPairs, - int numRays) -{ - int i = get_global_id(0); - if (i >= numRays) return; - - float4 rayFrom = rays[i].m_from; - float4 rayTo = rays[i].m_to; - - hitResults[i].m_hitFraction = 1.f; - - float hitFraction = 1.f; - float4 hitPoint; - float4 hitNormal; - int hitBodyIndex = -1; - - // - for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair) - { - int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i]; - int b = rayRigidPairs[rayRigidPairIndex].y; - - if (hitResults[i].m_hitResult2 == b) continue; - - Body body = bodies[b]; - Collidable rigidCollidable = collidables[body.m_collidableIdx]; - - float4 pos = body.m_pos; - float4 orn = body.m_quat; - - if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL) - { - float4 invPos = (float4)(0,0,0,0); - float4 invOrn = (float4)(0,0,0,0); - float4 rayFromLocal = (float4)(0,0,0,0); - float4 rayToLocal = (float4)(0,0,0,0); - invOrn = qtInvert(orn); - invPos = qtRotate(invOrn, -pos); - rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; - rayToLocal = qtRotate( invOrn, rayTo) + invPos; - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces; - int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset; - - if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) - { - hitBodyIndex = b; - hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); - } - } - - if (rigidCollidable.m_shapeType == SHAPE_SPHERE) - { - float radius = rigidCollidable.m_radius; - - if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) - { - hitBodyIndex = b; - hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); - hitNormal = (float4) (hitPoint - bodies[b].m_pos); - } - } - } - - if (hitBodyIndex >= 0) - { - hitResults[i].m_hitFraction = hitFraction; - hitResults[i].m_hitPoint = hitPoint; - hitResults[i].m_hitNormal = normalize(hitNormal); - hitResults[i].m_hitResult0 = hitBodyIndex; - } - -} diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h deleted file mode 100644 index 94f6a8eb9f7..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h +++ /dev/null @@ -1,380 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* rayCastKernelCL = - "#define SHAPE_CONVEX_HULL 3\n" - "#define SHAPE_PLANE 4\n" - "#define SHAPE_CONCAVE_TRIMESH 5\n" - "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" - "#define SHAPE_SPHERE 7\n" - "typedef struct\n" - "{\n" - " float4 m_from;\n" - " float4 m_to;\n" - "} b3RayInfo;\n" - "typedef struct\n" - "{\n" - " float m_hitFraction;\n" - " int m_hitResult0;\n" - " int m_hitResult1;\n" - " int m_hitResult2;\n" - " float4 m_hitPoint;\n" - " float4 m_hitNormal;\n" - "} b3RayHit;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " unsigned int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " float m_radius;\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - "} Collidable;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} b3GpuFace;\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - " Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - " Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - " Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - " float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = (float4)(a.xyz,0.f);\n" - " float4 b1 = (float4)(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - " Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross( a, b );\n" - " ans += a.w*b+b.w*a;\n" - " // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - " Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fast_normalize(in);\n" - " // in /= length( in );\n" - " // return in;\n" - "}\n" - "__inline\n" - " float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(q,vcpy);\n" - " out = qtMul(out,qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - " Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - " float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "void trInverse(float4 translationIn, Quaternion orientationIn,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtInvert(orientationIn);\n" - " *translationOut = qtRotate(*orientationOut, -translationIn);\n" - "}\n" - "bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n" - " __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n" - "{\n" - " rayFromLocal.w = 0.f;\n" - " rayToLocal.w = 0.f;\n" - " bool result = true;\n" - " float exitFraction = hitFraction[0];\n" - " float enterFraction = -0.3f;\n" - " float4 curHitNormal = (float4)(0,0,0,0);\n" - " for (int i=0;i= 0.f)\n" - " {\n" - " float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" - " if (exitFraction>fraction)\n" - " {\n" - " exitFraction = fraction;\n" - " }\n" - " } \n" - " } else\n" - " {\n" - " if (toPlaneDist<0.f)\n" - " {\n" - " float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" - " if (enterFraction <= fraction)\n" - " {\n" - " enterFraction = fraction;\n" - " curHitNormal = face.m_plane;\n" - " curHitNormal.w = 0.f;\n" - " }\n" - " } else\n" - " {\n" - " result = false;\n" - " }\n" - " }\n" - " if (exitFraction <= enterFraction)\n" - " result = false;\n" - " }\n" - " if (enterFraction < 0.f)\n" - " {\n" - " result = false;\n" - " }\n" - " if (result)\n" - " { \n" - " hitFraction[0] = enterFraction;\n" - " hitNormal[0] = curHitNormal;\n" - " }\n" - " return result;\n" - "}\n" - "bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n" - "{\n" - " float4 rs = rayFrom - spherePos;\n" - " rs.w = 0.f;\n" - " float4 rayDir = rayTo-rayFrom;\n" - " rayDir.w = 0.f;\n" - " float A = dot(rayDir,rayDir);\n" - " float B = dot(rs, rayDir);\n" - " float C = dot(rs, rs) - (radius * radius);\n" - " float D = B * B - A*C;\n" - " if (D > 0.0f)\n" - " {\n" - " float t = (-B - sqrt(D))/A;\n" - " if ( (t >= 0.0f) && (t < (*hitFraction)) )\n" - " {\n" - " *hitFraction = t;\n" - " return true;\n" - " }\n" - " }\n" - " return false;\n" - "}\n" - "float4 setInterpolate3(float4 from, float4 to, float t)\n" - "{\n" - " float s = 1.0f - t;\n" - " float4 result;\n" - " result = s * from + t * to;\n" - " result.w = 0.f; \n" - " return result; \n" - "}\n" - "__kernel void rayCastKernel( \n" - " int numRays, \n" - " const __global b3RayInfo* rays, \n" - " __global b3RayHit* hitResults, \n" - " const int numBodies, \n" - " __global Body* bodies,\n" - " __global Collidable* collidables,\n" - " __global const b3GpuFace* faces,\n" - " __global const ConvexPolyhedronCL* convexShapes )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numRays)\n" - " return;\n" - " hitResults[i].m_hitFraction = 1.f;\n" - " float4 rayFrom = rays[i].m_from;\n" - " float4 rayTo = rays[i].m_to;\n" - " float hitFraction = 1.f;\n" - " float4 hitPoint;\n" - " float4 hitNormal;\n" - " int hitBodyIndex= -1;\n" - " int cachedCollidableIndex = -1;\n" - " Collidable cachedCollidable;\n" - " for (int b=0;b=0)\n" - " {\n" - " hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n" - " hitResults[i].m_hitFraction = hitFraction;\n" - " hitResults[i].m_hitPoint = hitPoint;\n" - " hitResults[i].m_hitNormal = normalize(hitNormal);\n" - " hitResults[i].m_hitResult0 = hitBodyIndex;\n" - " }\n" - "}\n" - "__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n" - " __global int* out_firstRayRigidPairIndexPerRay,\n" - " __global int* out_numRayRigidPairsPerRay,\n" - " int numRayRigidPairs)\n" - "{\n" - " int rayRigidPairIndex = get_global_id(0);\n" - " if (rayRigidPairIndex >= numRayRigidPairs) return;\n" - " \n" - " int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n" - " \n" - " atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n" - " atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n" - "}\n" - "__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n" - " __global b3RayHit* hitResults, \n" - " __global int* firstRayRigidPairIndexPerRay,\n" - " __global int* numRayRigidPairsPerRay,\n" - " \n" - " __global Body* bodies,\n" - " __global Collidable* collidables,\n" - " __global const b3GpuFace* faces,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " \n" - " __global int2* rayRigidPairs,\n" - " int numRays)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i >= numRays) return;\n" - " \n" - " float4 rayFrom = rays[i].m_from;\n" - " float4 rayTo = rays[i].m_to;\n" - " \n" - " hitResults[i].m_hitFraction = 1.f;\n" - " \n" - " float hitFraction = 1.f;\n" - " float4 hitPoint;\n" - " float4 hitNormal;\n" - " int hitBodyIndex = -1;\n" - " \n" - " //\n" - " for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n" - " {\n" - " int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n" - " int b = rayRigidPairs[rayRigidPairIndex].y;\n" - " \n" - " if (hitResults[i].m_hitResult2 == b) continue;\n" - " \n" - " Body body = bodies[b];\n" - " Collidable rigidCollidable = collidables[body.m_collidableIdx];\n" - " \n" - " float4 pos = body.m_pos;\n" - " float4 orn = body.m_quat;\n" - " \n" - " if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" - " {\n" - " float4 invPos = (float4)(0,0,0,0);\n" - " float4 invOrn = (float4)(0,0,0,0);\n" - " float4 rayFromLocal = (float4)(0,0,0,0);\n" - " float4 rayToLocal = (float4)(0,0,0,0);\n" - " invOrn = qtInvert(orn);\n" - " invPos = qtRotate(invOrn, -pos);\n" - " rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" - " rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" - " rayFromLocal.w = 0.f;\n" - " rayToLocal.w = 0.f;\n" - " int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n" - " int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n" - " \n" - " if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" - " {\n" - " hitBodyIndex = b;\n" - " hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" - " }\n" - " }\n" - " \n" - " if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " float radius = rigidCollidable.m_radius;\n" - " \n" - " if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" - " {\n" - " hitBodyIndex = b;\n" - " hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" - " hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n" - " }\n" - " }\n" - " }\n" - " \n" - " if (hitBodyIndex >= 0)\n" - " {\n" - " hitResults[i].m_hitFraction = hitFraction;\n" - " hitResults[i].m_hitPoint = hitPoint;\n" - " hitResults[i].m_hitNormal = normalize(hitNormal);\n" - " hitResults[i].m_hitResult0 = hitBodyIndex;\n" - " }\n" - " \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h deleted file mode 100644 index 89c0142ab37..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h +++ /dev/null @@ -1,17 +0,0 @@ - -#ifndef B3_CONSTRAINT4_h -#define B3_CONSTRAINT4_h -#include "Bullet3Common/b3Vector3.h" - -#include "Bullet3Dynamics/shared/b3ContactConstraint4.h" - -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuConstraint4 : public b3ContactConstraint4 -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - inline void setFrictionCoeff(float value) { m_linear[3] = value; } - inline float getFrictionCoeff() const { return m_linear[3]; } -}; - -#endif //B3_CONSTRAINT4_h diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp deleted file mode 100644 index a271090af4b..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3GpuGenericConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include -#include "Bullet3Common/b3Transform.h" - -void b3GpuGenericConstraint::getInfo1(unsigned int* info, const b3RigidBodyData* bodies) -{ - switch (m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - *info = 3; - break; - }; - default: - { - b3Assert(0); - } - }; -} - -void getInfo2Point2Point(b3GpuGenericConstraint* constraint, b3GpuConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - b3Transform trA; - trA.setIdentity(); - trA.setOrigin(bodies[constraint->m_rbA].m_pos); - trA.setRotation(bodies[constraint->m_rbA].m_quat); - - b3Transform trB; - trB.setIdentity(); - trB.setOrigin(bodies[constraint->m_rbB].m_pos); - trB.setRotation(bodies[constraint->m_rbB].m_quat); - - // anchor points in global coordinates with respect to body PORs. - - // set jacobian - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip + 1] = 1; - info->m_J1linearAxis[2 * info->rowskip + 2] = 1; - - b3Vector3 a1 = trA.getBasis() * constraint->getPivotInA(); - //b3Vector3 a1a = b3QuatRotate(trA.getRotation(),constraint->getPivotInA()); - - { - b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis + 2 * info->rowskip); - b3Vector3 a1neg = -a1; - a1neg.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip + 1] = -1; - info->m_J2linearAxis[2 * info->rowskip + 2] = -1; - } - - b3Vector3 a2 = trB.getBasis() * constraint->getPivotInB(); - - { - // b3Vector3 a2n = -a2; - b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis + 2 * info->rowskip); - a2.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - // set right hand side - // b3Scalar currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp; - b3Scalar currERP = info->erp; - - b3Scalar k = info->fps * currERP; - int j; - for (j = 0; j < 3; j++) - { - info->m_constraintError[j * info->rowskip] = k * (a2[j] + trB.getOrigin()[j] - a1[j] - trA.getOrigin()[j]); - //printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]); - } -#if 0 - if(m_flags & B3_P2P_FLAGS_CFM) - { - for (j=0; j<3; j++) - { - info->cfm[j*info->rowskip] = m_cfm; - } - } -#endif - -#if 0 - b3Scalar impulseClamp = m_setting.m_impulseClamp;// - for (j=0; j<3; j++) - { - if (m_setting.m_impulseClamp > 0) - { - info->m_lowerLimit[j*info->rowskip] = -impulseClamp; - info->m_upperLimit[j*info->rowskip] = impulseClamp; - } - } - info->m_damping = m_setting.m_damping; -#endif -} - -void b3GpuGenericConstraint::getInfo2(b3GpuConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - switch (m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - getInfo2Point2Point(this, info, bodies); - break; - }; - default: - { - b3Assert(0); - } - }; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h deleted file mode 100644 index 1f163ba7d5b..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_GENERIC_CONSTRAINT_H -#define B3_GPU_GENERIC_CONSTRAINT_H - -#include "Bullet3Common/b3Quaternion.h" -struct b3RigidBodyData; -enum B3_CONSTRAINT_FLAGS -{ - B3_CONSTRAINT_FLAG_ENABLED = 1, -}; - -enum b3GpuGenericConstraintType -{ - B3_GPU_POINT2POINT_CONSTRAINT_TYPE = 3, - B3_GPU_FIXED_CONSTRAINT_TYPE = 4, - // B3_HINGE_CONSTRAINT_TYPE, - // B3_CONETWIST_CONSTRAINT_TYPE, - // B3_D6_CONSTRAINT_TYPE, - // B3_SLIDER_CONSTRAINT_TYPE, - // B3_CONTACT_CONSTRAINT_TYPE, - // B3_D6_SPRING_CONSTRAINT_TYPE, - // B3_GEAR_CONSTRAINT_TYPE, - - B3_GPU_MAX_CONSTRAINT_TYPE -}; - -struct b3GpuConstraintInfo2 -{ - // integrator parameters: frames per second (1/stepsize), default error - // reduction parameter (0..1). - b3Scalar fps, erp; - - // for the first and second body, pointers to two (linear and angular) - // n*3 jacobian sub matrices, stored by rows. these matrices will have - // been initialized to 0 on entry. if the second body is zero then the - // J2xx pointers may be 0. - b3Scalar *m_J1linearAxis, *m_J1angularAxis, *m_J2linearAxis, *m_J2angularAxis; - - // elements to jump from one row to the next in J's - int rowskip; - - // right hand sides of the equation J*v = c + cfm * lambda. cfm is the - // "constraint force mixing" vector. c is set to zero on entry, cfm is - // set to a constant value (typically very small or zero) value on entry. - b3Scalar *m_constraintError, *cfm; - - // lo and hi limits for variables (set to -/+ infinity on entry). - b3Scalar *m_lowerLimit, *m_upperLimit; - - // findex vector for variables. see the LCP solver interface for a - // description of what this does. this is set to -1 on entry. - // note that the returned indexes are relative to the first index of - // the constraint. - int* findex; - // number of solver iterations - int m_numIterations; - - //damping of the velocity - b3Scalar m_damping; -}; - -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuGenericConstraint -{ - int m_constraintType; - int m_rbA; - int m_rbB; - float m_breakingImpulseThreshold; - - b3Vector3 m_pivotInA; - b3Vector3 m_pivotInB; - b3Quaternion m_relTargetAB; - - int m_flags; - int m_uid; - int m_padding[2]; - - int getRigidBodyA() const - { - return m_rbA; - } - int getRigidBodyB() const - { - return m_rbB; - } - - const b3Vector3& getPivotInA() const - { - return m_pivotInA; - } - - const b3Vector3& getPivotInB() const - { - return m_pivotInB; - } - - int isEnabled() const - { - return m_flags & B3_CONSTRAINT_FLAG_ENABLED; - } - - float getBreakingImpulseThreshold() const - { - return m_breakingImpulseThreshold; - } - - ///internal method used by the constraint solver, don't use them directly - void getInfo1(unsigned int* info, const b3RigidBodyData* bodies); - - ///internal method used by the constraint solver, don't use them directly - void getInfo2(b3GpuConstraintInfo2 * info, const b3RigidBodyData* bodies); -}; - -#endif //B3_GPU_GENERIC_CONSTRAINT_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp deleted file mode 100644 index 089fb1f6a60..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp +++ /dev/null @@ -1,1305 +0,0 @@ - -#include "b3GpuJacobiContactSolver.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2 -class b3Vector3; -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/RigidBody/kernels/solverUtils.h" -#include "Bullet3Common/b3Logging.h" -#include "b3GpuConstraint4.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#define SOLVER_UTILS_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" - -struct b3GpuJacobiSolverInternalData -{ - //btRadixSort32CL* m_sort32; - //btBoundSearchCL* m_search; - b3PrefixScanCL* m_scan; - - b3OpenCLArray* m_bodyCount; - b3OpenCLArray* m_contactConstraintOffsets; - b3OpenCLArray* m_offsetSplitBodies; - - b3OpenCLArray* m_deltaLinearVelocities; - b3OpenCLArray* m_deltaAngularVelocities; - - b3AlignedObjectArray m_deltaLinearVelocitiesCPU; - b3AlignedObjectArray m_deltaAngularVelocitiesCPU; - - b3OpenCLArray* m_contactConstraints; - - b3FillCL* m_filler; - - cl_kernel m_countBodiesKernel; - cl_kernel m_contactToConstraintSplitKernel; - cl_kernel m_clearVelocitiesKernel; - cl_kernel m_averageVelocitiesKernel; - cl_kernel m_updateBodyVelocitiesKernel; - cl_kernel m_solveContactKernel; - cl_kernel m_solveFrictionKernel; -}; - -b3GpuJacobiContactSolver::b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - : m_context(ctx), - m_device(device), - m_queue(queue) -{ - m_data = new b3GpuJacobiSolverInternalData; - m_data->m_scan = new b3PrefixScanCL(m_context, m_device, m_queue); - m_data->m_bodyCount = new b3OpenCLArray(m_context, m_queue); - m_data->m_filler = new b3FillCL(m_context, m_device, m_queue); - m_data->m_contactConstraintOffsets = new b3OpenCLArray(m_context, m_queue); - m_data->m_offsetSplitBodies = new b3OpenCLArray(m_context, m_queue); - m_data->m_contactConstraints = new b3OpenCLArray(m_context, m_queue); - m_data->m_deltaLinearVelocities = new b3OpenCLArray(m_context, m_queue); - m_data->m_deltaAngularVelocities = new b3OpenCLArray(m_context, m_queue); - - cl_int pErrNum; - const char* additionalMacros = ""; - const char* solverUtilsSource = solverUtilsCL; - { - cl_program solverUtilsProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverUtilsSource, &pErrNum, additionalMacros, SOLVER_UTILS_KERNEL_PATH); - b3Assert(solverUtilsProg); - m_data->m_countBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_countBodiesKernel); - - m_data->m_contactToConstraintSplitKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_contactToConstraintSplitKernel); - m_data->m_clearVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_clearVelocitiesKernel); - - m_data->m_averageVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_averageVelocitiesKernel); - - m_data->m_updateBodyVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_updateBodyVelocitiesKernel); - - m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_solveContactKernel); - - m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_solveFrictionKernel); - } -} - -b3GpuJacobiContactSolver::~b3GpuJacobiContactSolver() -{ - clReleaseKernel(m_data->m_solveContactKernel); - clReleaseKernel(m_data->m_solveFrictionKernel); - clReleaseKernel(m_data->m_countBodiesKernel); - clReleaseKernel(m_data->m_contactToConstraintSplitKernel); - clReleaseKernel(m_data->m_averageVelocitiesKernel); - clReleaseKernel(m_data->m_updateBodyVelocitiesKernel); - clReleaseKernel(m_data->m_clearVelocitiesKernel); - - delete m_data->m_deltaLinearVelocities; - delete m_data->m_deltaAngularVelocities; - delete m_data->m_contactConstraints; - delete m_data->m_offsetSplitBodies; - delete m_data->m_contactConstraintOffsets; - delete m_data->m_bodyCount; - delete m_data->m_filler; - delete m_data->m_scan; - delete m_data; -} - -b3Vector3 make_float4(float v) -{ - return b3MakeVector3(v, v, v); -} - -b3Vector4 make_float4(float x, float y, float z, float w) -{ - return b3MakeVector4(x, y, z, w); -} - -static inline float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1, - const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1) -{ - return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1); -} - -static inline void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1, - b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1) -{ - linear = n; - angular0 = b3Cross(r0, n); - angular1 = -b3Cross(r1, n); -} - -static __inline void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) -{ - for (int ic = 0; ic < 4; ic++) - { - // dont necessary because this makes change to 0 - if (cs.m_jacCoeffInv[ic] == 0.f) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, linear, angular0, angular1); - - float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1, - linVelARO + dLinVelA, angVelARO + dAngVelA, linVelBRO + dLinVelB, angVelBRO + dAngVelB) + - cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[ic]); - updated = b3Min(updated, maxRambdaDt[ic]); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - - if (invMassA) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - } - if (invMassB) - { - dLinVelB += linImp1; - dAngVelB += angImp1; - } - } - } -} - -void solveContact3(b3GpuConstraint4* cs, - b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for (int ic = 0; ic < 4; ic++) - { - if (cs->m_jacCoeffInv[ic] == 0.f) continue; - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr; - b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr; - setLinearAndAngular(cs->m_linear, r0, r1, linear, angular0, angular1); - - float rambdaDt = calcRelVel(cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA + *dLinVelA, *angVelA + *dAngVelA, *linVelB + *dLinVelB, *angVelB + *dAngVelB) + - cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt); - updated = b3Min(updated, maxRambdaDt); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; - - if (invMassA) - { - *dLinVelA += linImp0; - *dAngVelA += angImp0; - } - if (invMassB) - { - *dLinVelB += linImp1; - *dAngVelB += angImp1; - } - } -} - -static inline void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) -{ - b3Vector3 linVelA = linVelARO + dLinVelA; - b3Vector3 linVelB = linVelBRO + dLinVelB; - b3Vector3 angVelA = angVelARO + dAngVelA; - b3Vector3 angVelB = angVelBRO + dAngVelB; - - if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1(n, tangent[0], tangent[1]); -#else - b3Vector3 r = cs.m_worldPos[0] - center; - tangent[0] = cross3(n, r); - tangent[1] = cross3(tangent[0], n); - tangent[0] = normalize3(tangent[0]); - tangent[1] = normalize3(tangent[1]); -#endif - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for (int i = 0; i < 2; i++) - { - setLinearAndAngular(tangent[i], r0, r1, linear, angular0, angular1); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[i]); - updated = b3Min(updated, maxRambdaDt[i]); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - if (invMassA) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - } - if (invMassB) - { - dLinVelB += linImp1; - dAngVelB += angImp1; - } - } - - { // angular damping for point constraint - b3Vector3 ab = (posB - posA).normalized(); - b3Vector3 ac = (center - posA).normalized(); - if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot(n, angVelA); - float angNB = b3Dot(n, angVelB); - - if (invMassA) - dAngVelA -= (angNA * 0.1f) * n; - if (invMassB) - dAngVelB -= (angNB * 0.1f) * n; - } - } -} - -float calcJacCoeff(const b3Vector3& linear0, const b3Vector3& linear1, const b3Vector3& angular0, const b3Vector3& angular1, - float invMass0, const b3Matrix3x3* invInertia0, float invMass1, const b3Matrix3x3* invInertia1, float countA, float countB) -{ - // linear0,1 are normlized - float jmj0 = invMass0; //dot3F4(linear0, linear0)*invMass0; - - float jmj1 = b3Dot(mtMul3(angular0, *invInertia0), angular0); - float jmj2 = invMass1; //dot3F4(linear1, linear1)*invMass1; - float jmj3 = b3Dot(mtMul3(angular1, *invInertia1), angular1); - return -1.f / ((jmj0 + jmj1) * countA + (jmj2 + jmj3) * countB); - // return -1.f/((jmj0+jmj1)+(jmj2+jmj3)); -} - -void setConstraint4(const b3Vector3& posA, const b3Vector3& linVelA, const b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelB, const b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - b3Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB, - b3GpuConstraint4* dstC) -{ - dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f / dt; - for (int ic = 0; ic < 4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear[3] = 0.7f; //src->getFrictionCoeff() ); - for (int ic = 0; ic < 4; ic++) - { - b3Vector3 r0 = src->m_worldPosB[ic] - posA; - b3Vector3 r1 = src->m_worldPosB[ic] - posB; - - if (ic >= src->m_worldNormalOnB[3]) //npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - b3Vector3 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, linear, angular0, angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f; //src->getRestituitionCoeff(); - if (relVelN * relVelN < 0.004f) - { - e = 0.f; - } - - dstC->m_b[ic] = e * relVelN; - //float penetration = src->m_worldPos[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic][3] + positionDrift) * positionConstraintCoeff * dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if (src->m_worldNormalOnB[3] > 0) //npoints - { // prepare friction - b3Vector3 center = make_float4(0.f); - for (int i = 0; i < src->m_worldNormalOnB[3]; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB[3]; - - b3Vector3 tangent[2]; - b3PlaneSpace1(src->m_worldNormalOnB, tangent[0], tangent[1]); - - b3Vector3 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for (int i = 0; i < 2; i++) - { - b3Vector3 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for (int i = 0; i < 4; i++) - { - if (i < src->m_worldNormalOnB[3]) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = make_float4(0.f); - } - } -} - -void ContactToConstraintKernel(b3Contact4* gContact, b3RigidBodyData* gBodies, b3InertiaData* gShapes, b3GpuConstraint4* gConstraintOut, int nContacts, - float dt, - float positionDrift, - float positionConstraintCoeff, int gIdx, b3AlignedObjectArray& bodyCount) -{ - //int gIdx = 0;//GET_GLOBAL_IDX; - - if (gIdx < nContacts) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - b3Vector3 posA = gBodies[aIdx].m_pos; - b3Vector3 linVelA = gBodies[aIdx].m_linVel; - b3Vector3 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld; //.m_invInertia; - - b3Vector3 posB = gBodies[bIdx].m_pos; - b3Vector3 linVelB = gBodies[bIdx].m_linVel; - b3Vector3 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld; //m_invInertia; - - b3GpuConstraint4 cs; - float countA = invMassA ? (float)(bodyCount[aIdx]) : 1; - float countB = invMassB ? (float)(bodyCount[bIdx]) : 1; - setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, countA, countB, - &cs); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -} - -void b3GpuJacobiContactSolver::solveGroupHost(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, const b3JacobiSolverInfo& solverInfo) -{ - B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); - - b3AlignedObjectArray bodyCount; - bodyCount.resize(numBodies); - for (int i = 0; i < numBodies; i++) - bodyCount[i] = 0; - - b3AlignedObjectArray contactConstraintOffsets; - contactConstraintOffsets.resize(numManifolds); - - for (int i = 0; i < numManifolds; i++) - { - int pa = manifoldPtr[i].m_bodyAPtrAndSignBit; - int pb = manifoldPtr[i].m_bodyBPtrAndSignBit; - - bool isFixedA = (pa < 0) || (pa == solverInfo.m_fixedBodyIndex); - bool isFixedB = (pb < 0) || (pb == solverInfo.m_fixedBodyIndex); - - int bodyIndexA = manifoldPtr[i].getBodyA(); - int bodyIndexB = manifoldPtr[i].getBodyB(); - - if (!isFixedA) - { - contactConstraintOffsets[i].x = bodyCount[bodyIndexA]; - bodyCount[bodyIndexA]++; - } - if (!isFixedB) - { - contactConstraintOffsets[i].y = bodyCount[bodyIndexB]; - bodyCount[bodyIndexB]++; - } - } - - b3AlignedObjectArray offsetSplitBodies; - offsetSplitBodies.resize(numBodies); - unsigned int totalNumSplitBodies; - m_data->m_scan->executeHost(bodyCount, offsetSplitBodies, numBodies, &totalNumSplitBodies); - int numlastBody = bodyCount[numBodies - 1]; - totalNumSplitBodies += numlastBody; - printf("totalNumSplitBodies = %d\n", totalNumSplitBodies); - - b3AlignedObjectArray contactConstraints; - contactConstraints.resize(numManifolds); - - for (int i = 0; i < numManifolds; i++) - { - ContactToConstraintKernel(&manifoldPtr[0], bodies, inertias, &contactConstraints[0], numManifolds, - solverInfo.m_deltaTime, - solverInfo.m_positionDrift, - solverInfo.m_positionConstraintCoeff, - i, bodyCount); - } - int maxIter = solverInfo.m_numIterations; - - b3AlignedObjectArray deltaLinearVelocities; - b3AlignedObjectArray deltaAngularVelocities; - deltaLinearVelocities.resize(totalNumSplitBodies); - deltaAngularVelocities.resize(totalNumSplitBodies); - for (unsigned int i = 0; i < totalNumSplitBodies; i++) - { - deltaLinearVelocities[i].setZero(); - deltaAngularVelocities[i].setZero(); - } - - for (int iter = 0; iter < maxIter; iter++) - { - int i = 0; - for (i = 0; i < numManifolds; i++) - { - //float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodies[aIdx]; - b3RigidBodyData& bodyB = bodies[bIdx]; - - b3Vector3 zero = b3MakeVector3(0, 0, 0); - - b3Vector3* dlvAPtr = &zero; - b3Vector3* davAPtr = &zero; - b3Vector3* dlvBPtr = &zero; - b3Vector3* davBPtr = &zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA + constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB = bodyOffsetB + constraintOffsetB; - dlvBPtr = &deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - - solveContact(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr); - } - } - - //easy - for (int i = 0; i < numBodies; i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f / float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j = 0; j < count; j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor; - averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor; - } - for (int j = 0; j < count; j++) - { - deltaLinearVelocities[bodyOffset + j] = averageLinVel; - deltaAngularVelocities[bodyOffset + j] = averageAngVel; - } - } - } - } - for (int iter = 0; iter < maxIter; iter++) - { - //int i=0; - - //solve friction - - for (int i = 0; i < numManifolds; i++) - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - - float sum = 0; - for (int j = 0; j < 4; j++) - { - sum += contactConstraints[i].m_appliedRambdaDt[j]; - } - float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodies[aIdx]; - b3RigidBodyData& bodyB = bodies[bIdx]; - - b3Vector3 zero = b3MakeVector3(0, 0, 0); - - b3Vector3* dlvAPtr = &zero; - b3Vector3* davAPtr = &zero; - b3Vector3* dlvBPtr = &zero; - b3Vector3* davBPtr = &zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA + constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB = bodyOffsetB + constraintOffsetB; - dlvBPtr = &deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - for (int j = 0; j < 4; j++) - { - maxRambdaDt[j] = frictionCoeff * sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - solveFriction(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr); - } - - //easy - for (int i = 0; i < numBodies; i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f / float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j = 0; j < count; j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor; - averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor; - } - for (int j = 0; j < count; j++) - { - deltaLinearVelocities[bodyOffset + j] = averageLinVel; - deltaAngularVelocities[bodyOffset + j] = averageAngVel; - } - } - } - } - - //easy - for (int i = 0; i < numBodies; i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - if (count) - { - bodies[i].m_linVel += deltaLinearVelocities[bodyOffset]; - bodies[i].m_angVel += deltaAngularVelocities[bodyOffset]; - } - } - } -} - -void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index) -// -// -//void b3GpuJacobiContactSolver::solveGroup(b3OpenCLArray* bodies,b3OpenCLArray* inertias,b3OpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo) -{ - b3JacobiSolverInfo solverInfo; - solverInfo.m_fixedBodyIndex = static0Index; - - B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); - - //int numBodies = bodies->size(); - int numManifolds = numContacts; //manifoldPtr->size(); - - { - B3_PROFILE("resize"); - m_data->m_bodyCount->resize(numBodies); - } - - unsigned int val = 0; - b3Int2 val2; - val2.x = 0; - val2.y = 0; - - { - B3_PROFILE("m_filler"); - m_data->m_contactConstraintOffsets->resize(numManifolds); - m_data->m_filler->execute(*m_data->m_bodyCount, val, numBodies); - - m_data->m_filler->execute(*m_data->m_contactConstraintOffsets, val2, numManifolds); - } - - { - B3_PROFILE("m_countBodiesKernel"); - b3LauncherCL launcher(this->m_queue, m_data->m_countBodiesKernel, "m_countBodiesKernel"); - launcher.setBuffer(contactBuf); //manifoldPtr->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setConst(numManifolds); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.launch1D(numManifolds); - } - unsigned int totalNumSplitBodies = 0; - { - B3_PROFILE("m_scan->execute"); - - m_data->m_offsetSplitBodies->resize(numBodies); - m_data->m_scan->execute(*m_data->m_bodyCount, *m_data->m_offsetSplitBodies, numBodies, &totalNumSplitBodies); - totalNumSplitBodies += m_data->m_bodyCount->at(numBodies - 1); - } - - { - B3_PROFILE("m_data->m_contactConstraints->resize"); - //int numContacts = manifoldPtr->size(); - m_data->m_contactConstraints->resize(numContacts); - } - - { - B3_PROFILE("contactToConstraintSplitKernel"); - b3LauncherCL launcher(m_queue, m_data->m_contactToConstraintSplitKernel, "m_contactToConstraintSplitKernel"); - launcher.setBuffer(contactBuf); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setConst(numContacts); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.launch1D(numContacts, 64); - } - - { - B3_PROFILE("m_data->m_deltaLinearVelocities->resize"); - m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies); - m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); - } - - { - B3_PROFILE("m_clearVelocitiesKernel"); - b3LauncherCL launch(m_queue, m_data->m_clearVelocitiesKernel, "m_clearVelocitiesKernel"); - launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launch.setConst(totalNumSplitBodies); - launch.launch1D(totalNumSplitBodies); - clFinish(m_queue); - } - - int maxIter = solverInfo.m_numIterations; - - for (int iter = 0; iter < maxIter; iter++) - { - { - B3_PROFILE("m_solveContactKernel"); - b3LauncherCL launcher(m_queue, m_data->m_solveContactKernel, "m_solveContactKernel"); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - { - B3_PROFILE("m_solveFrictionKernel"); - b3LauncherCL launcher(m_queue, m_data->m_solveFrictionKernel, "m_solveFrictionKernel"); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - } - - { - B3_PROFILE("update body velocities"); - b3LauncherCL launcher(m_queue, m_data->m_updateBodyVelocitiesKernel, "m_updateBodyVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } -} - -#if 0 - -void b3GpuJacobiContactSolver::solveGroupMixed(b3OpenCLArray* bodiesGPU,b3OpenCLArray* inertiasGPU,b3OpenCLArray* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo) -{ - - b3AlignedObjectArray bodiesCPU; - bodiesGPU->copyToHost(bodiesCPU); - b3AlignedObjectArray inertiasCPU; - inertiasGPU->copyToHost(inertiasCPU); - b3AlignedObjectArray manifoldPtrCPU; - manifoldPtrGPU->copyToHost(manifoldPtrCPU); - - int numBodiesCPU = bodiesGPU->size(); - int numManifoldsCPU = manifoldPtrGPU->size(); - B3_PROFILE("b3GpuJacobiContactSolver::solveGroupMixed"); - - b3AlignedObjectArray bodyCount; - bodyCount.resize(numBodiesCPU); - for (int i=0;i contactConstraintOffsets; - contactConstraintOffsets.resize(numManifoldsCPU); - - - for (int i=0;i offsetSplitBodies; - offsetSplitBodies.resize(numBodiesCPU); - unsigned int totalNumSplitBodiesCPU; - m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU); - int numlastBody = bodyCount[numBodiesCPU-1]; - totalNumSplitBodiesCPU += numlastBody; - - int numBodies = bodiesGPU->size(); - int numManifolds = manifoldPtrGPU->size(); - - m_data->m_bodyCount->resize(numBodies); - - unsigned int val=0; - b3Int2 val2; - val2.x=0; - val2.y=0; - - { - B3_PROFILE("m_filler"); - m_data->m_contactConstraintOffsets->resize(numManifolds); - m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); - - - m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); - } - - { - B3_PROFILE("m_countBodiesKernel"); - b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); - launcher.setBuffer(manifoldPtrGPU->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setConst(numManifolds); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.launch1D(numManifolds); - } - - unsigned int totalNumSplitBodies=0; - m_data->m_offsetSplitBodies->resize(numBodies); - m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); - totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); - - if (totalNumSplitBodies != totalNumSplitBodiesCPU) - { - printf("error in totalNumSplitBodies!\n"); - } - - int numContacts = manifoldPtrGPU->size(); - m_data->m_contactConstraints->resize(numContacts); - - - { - B3_PROFILE("contactToConstraintSplitKernel"); - b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); - launcher.setBuffer(manifoldPtrGPU->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setConst(numContacts); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.launch1D( numContacts, 64 ); - clFinish(m_queue); - } - - - - b3AlignedObjectArray contactConstraints; - contactConstraints.resize(numManifoldsCPU); - - for (int i=0;i deltaLinearVelocities; - b3AlignedObjectArray deltaAngularVelocities; - deltaLinearVelocities.resize(totalNumSplitBodiesCPU); - deltaAngularVelocities.resize(totalNumSplitBodiesCPU); - for (int i=0;im_deltaLinearVelocities->resize(totalNumSplitBodies); - m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); - - - - { - B3_PROFILE("m_clearVelocitiesKernel"); - b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); - launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launch.setConst(totalNumSplitBodies); - launch.launch1D(totalNumSplitBodies); - } - - - ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - m_data->m_contactConstraints->copyToHost(contactConstraints); - m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies); - m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets); - m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); - m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); - - for (int iter = 0;iterm_solveContactKernel ); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - - int i=0; - for( i=0; im_averageVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - //easy - for (int i=0;im_deltaAngularVelocities->copyFromHost(deltaAngularVelocities); - //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities); - m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); - m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); - -#if 0 - - { - B3_PROFILE("m_solveFrictionKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - //solve friction - - for(int i=0; im_averageVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - //easy - for (int i=0;im_updateBodyVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - - //easy - for (int i=0;icopyFromHost(bodiesCPU); - - -} -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h deleted file mode 100644 index 8281aee05de..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h +++ /dev/null @@ -1,56 +0,0 @@ - -#ifndef B3_GPU_JACOBI_CONTACT_SOLVER_H -#define B3_GPU_JACOBI_CONTACT_SOLVER_H -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -//#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" - -//struct b3InertiaData; -//b3InertiaData - -class b3TypedConstraint; - -struct b3JacobiSolverInfo -{ - int m_fixedBodyIndex; - - float m_deltaTime; - float m_positionDrift; - float m_positionConstraintCoeff; - int m_numIterations; - - b3JacobiSolverInfo() - : m_fixedBodyIndex(0), - m_deltaTime(1. / 60.f), - m_positionDrift(0.005f), - m_positionConstraintCoeff(0.99f), - m_numIterations(7) - { - } -}; -class b3GpuJacobiContactSolver -{ -protected: - struct b3GpuJacobiSolverInternalData* m_data; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - -public: - b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity); - virtual ~b3GpuJacobiContactSolver(); - - void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); - void solveGroupHost(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, struct b3Contact4* manifoldPtr, int numManifolds, const b3JacobiSolverInfo& solverInfo); - //void solveGroupHost(btRigidBodyCL* bodies,b3InertiaData* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo); - - //b3Scalar solveGroup(b3OpenCLArray* gpuBodies,b3OpenCLArray* gpuInertias, int numBodies,b3OpenCLArray* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); - - //void solveGroup(btOpenCLArray* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo); - //void solveGroupMixed(btOpenCLArray* bodies,btOpenCLArray* inertias,btOpenCLArray* manifoldPtr,const btJacobiSolverInfo& solverInfo); -}; -#endif //B3_GPU_JACOBI_CONTACT_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp deleted file mode 100644 index 2e4f6c1572f..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp +++ /dev/null @@ -1,1013 +0,0 @@ -#include "b3GpuNarrowPhase.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" - -#include "b3GpuNarrowPhaseInternalData.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h" - -b3GpuNarrowPhase::b3GpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const b3Config& config) - : m_data(0), m_planeBodyIndex(-1), m_static0Index(-1), m_context(ctx), m_device(device), m_queue(queue) -{ - m_data = new b3GpuNarrowPhaseInternalData(); - m_data->m_currentContactBuffer = 0; - - memset(m_data, 0, sizeof(b3GpuNarrowPhaseInternalData)); - - m_data->m_config = config; - - m_data->m_gpuSatCollision = new GpuSatCollision(ctx, device, queue); - - m_data->m_triangleConvexPairs = new b3OpenCLArray(m_context, m_queue, config.m_maxTriConvexPairCapacity); - - //m_data->m_convexPairsOutGPU = new b3OpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); - //m_data->m_planePairs = new b3OpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); - - m_data->m_pBufContactOutCPU = new b3AlignedObjectArray(); - m_data->m_pBufContactOutCPU->resize(config.m_maxBroadphasePairs); - m_data->m_bodyBufferCPU = new b3AlignedObjectArray(); - m_data->m_bodyBufferCPU->resize(config.m_maxConvexBodies); - - m_data->m_inertiaBufferCPU = new b3AlignedObjectArray(); - m_data->m_inertiaBufferCPU->resize(config.m_maxConvexBodies); - - m_data->m_pBufContactBuffersGPU[0] = new b3OpenCLArray(ctx, queue, config.m_maxContactCapacity, true); - m_data->m_pBufContactBuffersGPU[1] = new b3OpenCLArray(ctx, queue, config.m_maxContactCapacity, true); - - m_data->m_inertiaBufferGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexBodies, false); - m_data->m_collidablesGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexShapes); - m_data->m_collidablesCPU.reserve(config.m_maxConvexShapes); - - m_data->m_localShapeAABBCPU = new b3AlignedObjectArray; - m_data->m_localShapeAABBGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexShapes); - - //m_data->m_solverDataGPU = adl::Solver::allocate(ctx,queue, config.m_maxBroadphasePairs,false); - m_data->m_bodyBufferGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexBodies, false); - - m_data->m_convexFacesGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexShapes * config.m_maxFacesPerShape, false); - m_data->m_convexFaces.reserve(config.m_maxConvexShapes * config.m_maxFacesPerShape); - - m_data->m_gpuChildShapes = new b3OpenCLArray(ctx, queue, config.m_maxCompoundChildShapes, false); - - m_data->m_convexPolyhedraGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexShapes, false); - m_data->m_convexPolyhedra.reserve(config.m_maxConvexShapes); - - m_data->m_uniqueEdgesGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexUniqueEdges, true); - m_data->m_uniqueEdges.reserve(config.m_maxConvexUniqueEdges); - - m_data->m_convexVerticesGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexVertices, true); - m_data->m_convexVertices.reserve(config.m_maxConvexVertices); - - m_data->m_convexIndicesGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexIndices, true); - m_data->m_convexIndices.reserve(config.m_maxConvexIndices); - - m_data->m_worldVertsB1GPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexBodies * config.m_maxVerticesPerFace); - m_data->m_clippingFacesOutGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexBodies); - m_data->m_worldNormalsAGPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexBodies); - m_data->m_worldVertsA1GPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexBodies * config.m_maxVerticesPerFace); - m_data->m_worldVertsB2GPU = new b3OpenCLArray(ctx, queue, config.m_maxConvexBodies * config.m_maxVerticesPerFace); - - m_data->m_convexData = new b3AlignedObjectArray(); - - m_data->m_convexData->resize(config.m_maxConvexShapes); - m_data->m_convexPolyhedra.resize(config.m_maxConvexShapes); - - m_data->m_numAcceleratedShapes = 0; - m_data->m_numAcceleratedRigidBodies = 0; - - m_data->m_subTreesGPU = new b3OpenCLArray(this->m_context, this->m_queue); - m_data->m_treeNodesGPU = new b3OpenCLArray(this->m_context, this->m_queue); - m_data->m_bvhInfoGPU = new b3OpenCLArray(this->m_context, this->m_queue); - - //m_data->m_contactCGPU = new b3OpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); - //m_data->m_frictionCGPU = new b3OpenCLArray::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs); -} - -b3GpuNarrowPhase::~b3GpuNarrowPhase() -{ - delete m_data->m_gpuSatCollision; - - delete m_data->m_triangleConvexPairs; - //delete m_data->m_convexPairsOutGPU; - //delete m_data->m_planePairs; - delete m_data->m_pBufContactOutCPU; - delete m_data->m_bodyBufferCPU; - delete m_data->m_inertiaBufferCPU; - delete m_data->m_pBufContactBuffersGPU[0]; - delete m_data->m_pBufContactBuffersGPU[1]; - - delete m_data->m_inertiaBufferGPU; - delete m_data->m_collidablesGPU; - delete m_data->m_localShapeAABBCPU; - delete m_data->m_localShapeAABBGPU; - delete m_data->m_bodyBufferGPU; - delete m_data->m_convexFacesGPU; - delete m_data->m_gpuChildShapes; - delete m_data->m_convexPolyhedraGPU; - delete m_data->m_uniqueEdgesGPU; - delete m_data->m_convexVerticesGPU; - delete m_data->m_convexIndicesGPU; - delete m_data->m_worldVertsB1GPU; - delete m_data->m_clippingFacesOutGPU; - delete m_data->m_worldNormalsAGPU; - delete m_data->m_worldVertsA1GPU; - delete m_data->m_worldVertsB2GPU; - - delete m_data->m_bvhInfoGPU; - - for (int i = 0; i < m_data->m_bvhData.size(); i++) - { - delete m_data->m_bvhData[i]; - } - for (int i = 0; i < m_data->m_meshInterfaces.size(); i++) - { - delete m_data->m_meshInterfaces[i]; - } - m_data->m_meshInterfaces.clear(); - m_data->m_bvhData.clear(); - delete m_data->m_treeNodesGPU; - delete m_data->m_subTreesGPU; - - delete m_data->m_convexData; - delete m_data; -} - -int b3GpuNarrowPhase::allocateCollidable() -{ - int curSize = m_data->m_collidablesCPU.size(); - if (curSize < m_data->m_config.m_maxConvexShapes) - { - m_data->m_collidablesCPU.expand(); - return curSize; - } - else - { - b3Error("allocateCollidable out-of-range %d\n", m_data->m_config.m_maxConvexShapes); - } - return -1; -} - -int b3GpuNarrowPhase::registerSphereShape(float radius) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_SPHERE; - col.m_shapeIndex = 0; - col.m_radius = radius; - - if (col.m_shapeIndex >= 0) - { - b3SapAabb aabb; - b3Vector3 myAabbMin = b3MakeVector3(-radius, -radius, -radius); - b3Vector3 myAabbMax = b3MakeVector3(radius, radius, radius); - - aabb.m_min[0] = myAabbMin[0]; //s_convexHeightField->m_aabb.m_min.x; - aabb.m_min[1] = myAabbMin[1]; //s_convexHeightField->m_aabb.m_min.y; - aabb.m_min[2] = myAabbMin[2]; //s_convexHeightField->m_aabb.m_min.z; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; //s_convexHeightField->m_aabb.m_max.x; - aabb.m_max[1] = myAabbMax[1]; //s_convexHeightField->m_aabb.m_max.y; - aabb.m_max[2] = myAabbMax[2]; //s_convexHeightField->m_aabb.m_max.z; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - clFinish(m_queue); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerFace(const b3Vector3& faceNormal, float faceConstant) -{ - int faceOffset = m_data->m_convexFaces.size(); - b3GpuFace& face = m_data->m_convexFaces.expand(); - face.m_plane = b3MakeVector3(faceNormal.x, faceNormal.y, faceNormal.z, faceConstant); - return faceOffset; -} - -int b3GpuNarrowPhase::registerPlaneShape(const b3Vector3& planeNormal, float planeConstant) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_PLANE; - col.m_shapeIndex = registerFace(planeNormal, planeConstant); - col.m_radius = planeConstant; - - if (col.m_shapeIndex >= 0) - { - b3SapAabb aabb; - aabb.m_min[0] = -1e30f; - aabb.m_min[1] = -1e30f; - aabb.m_min[2] = -1e30f; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = 1e30f; - aabb.m_max[1] = 1e30f; - aabb.m_max[2] = 1e30f; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - clFinish(m_queue); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConvexHullShapeInternal(b3ConvexUtility* convexPtr, b3Collidable& col) -{ - m_data->m_convexData->resize(m_data->m_numAcceleratedShapes + 1); - m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes + 1); - - b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size() - 1); - convex.mC = convexPtr->mC; - convex.mE = convexPtr->mE; - convex.m_extents = convexPtr->m_extents; - convex.m_localCenter = convexPtr->m_localCenter; - convex.m_radius = convexPtr->m_radius; - - convex.m_numUniqueEdges = convexPtr->m_uniqueEdges.size(); - int edgeOffset = m_data->m_uniqueEdges.size(); - convex.m_uniqueEdgesOffset = edgeOffset; - - m_data->m_uniqueEdges.resize(edgeOffset + convex.m_numUniqueEdges); - - //convex data here - int i; - for (i = 0; i < convexPtr->m_uniqueEdges.size(); i++) - { - m_data->m_uniqueEdges[edgeOffset + i] = convexPtr->m_uniqueEdges[i]; - } - - int faceOffset = m_data->m_convexFaces.size(); - convex.m_faceOffset = faceOffset; - convex.m_numFaces = convexPtr->m_faces.size(); - - m_data->m_convexFaces.resize(faceOffset + convex.m_numFaces); - - for (i = 0; i < convexPtr->m_faces.size(); i++) - { - m_data->m_convexFaces[convex.m_faceOffset + i].m_plane = b3MakeVector3(convexPtr->m_faces[i].m_plane[0], - convexPtr->m_faces[i].m_plane[1], - convexPtr->m_faces[i].m_plane[2], - convexPtr->m_faces[i].m_plane[3]); - - int indexOffset = m_data->m_convexIndices.size(); - int numIndices = convexPtr->m_faces[i].m_indices.size(); - m_data->m_convexFaces[convex.m_faceOffset + i].m_numIndices = numIndices; - m_data->m_convexFaces[convex.m_faceOffset + i].m_indexOffset = indexOffset; - m_data->m_convexIndices.resize(indexOffset + numIndices); - for (int p = 0; p < numIndices; p++) - { - m_data->m_convexIndices[indexOffset + p] = convexPtr->m_faces[i].m_indices[p]; - } - } - - convex.m_numVertices = convexPtr->m_vertices.size(); - int vertexOffset = m_data->m_convexVertices.size(); - convex.m_vertexOffset = vertexOffset; - - m_data->m_convexVertices.resize(vertexOffset + convex.m_numVertices); - for (int i = 0; i < convexPtr->m_vertices.size(); i++) - { - m_data->m_convexVertices[vertexOffset + i] = convexPtr->m_vertices[i]; - } - - (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr; - - return m_data->m_numAcceleratedShapes++; -} - -int b3GpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling) -{ - b3AlignedObjectArray verts; - - unsigned char* vts = (unsigned char*)vertices; - for (int i = 0; i < numVertices; i++) - { - float* vertex = (float*)&vts[i * strideInBytes]; - verts.push_back(b3MakeVector3(vertex[0] * scaling[0], vertex[1] * scaling[1], vertex[2] * scaling[2])); - } - - b3ConvexUtility* utilPtr = new b3ConvexUtility(); - bool merge = true; - if (numVertices) - { - utilPtr->initializePolyhedralFeatures(&verts[0], verts.size(), merge); - } - - int collidableIndex = registerConvexHullShape(utilPtr); - delete utilPtr; - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_CONVEX_HULL; - col.m_shapeIndex = -1; - - { - b3Vector3 localCenter = b3MakeVector3(0, 0, 0); - for (int i = 0; i < utilPtr->m_vertices.size(); i++) - localCenter += utilPtr->m_vertices[i]; - localCenter *= (1.f / utilPtr->m_vertices.size()); - utilPtr->m_localCenter = localCenter; - - col.m_shapeIndex = registerConvexHullShapeInternal(utilPtr, col); - } - - if (col.m_shapeIndex >= 0) - { - b3SapAabb aabb; - - b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f); - b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f); - - for (int i = 0; i < utilPtr->m_vertices.size(); i++) - { - myAabbMin.setMin(utilPtr->m_vertices[i]); - myAabbMax.setMax(utilPtr->m_vertices[i]); - } - aabb.m_min[0] = myAabbMin[0]; - aabb.m_min[1] = myAabbMin[1]; - aabb.m_min[2] = myAabbMin[2]; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; - aabb.m_max[1] = myAabbMax[1]; - aabb.m_max[2] = myAabbMax[2]; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray* childShapes) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_COMPOUND_OF_CONVEX_HULLS; - col.m_shapeIndex = m_data->m_cpuChildShapes.size(); - col.m_compoundBvhIndex = m_data->m_bvhInfoCPU.size(); - - { - b3Assert(col.m_shapeIndex + childShapes->size() < m_data->m_config.m_maxCompoundChildShapes); - for (int i = 0; i < childShapes->size(); i++) - { - m_data->m_cpuChildShapes.push_back(childShapes->at(i)); - } - } - - col.m_numChildShapes = childShapes->size(); - - b3SapAabb aabbLocalSpace; - b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f); - b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f); - - b3AlignedObjectArray childLocalAabbs; - childLocalAabbs.resize(childShapes->size()); - - //compute local AABB of the compound of all children - for (int i = 0; i < childShapes->size(); i++) - { - int childColIndex = childShapes->at(i).m_shapeIndex; - //b3Collidable& childCol = getCollidableCpu(childColIndex); - b3SapAabb aabbLoc = m_data->m_localShapeAABBCPU->at(childColIndex); - - b3Vector3 childLocalAabbMin = b3MakeVector3(aabbLoc.m_min[0], aabbLoc.m_min[1], aabbLoc.m_min[2]); - b3Vector3 childLocalAabbMax = b3MakeVector3(aabbLoc.m_max[0], aabbLoc.m_max[1], aabbLoc.m_max[2]); - b3Vector3 aMin, aMax; - b3Scalar margin(0.f); - b3Transform childTr; - childTr.setIdentity(); - - childTr.setOrigin(childShapes->at(i).m_childPosition); - childTr.setRotation(b3Quaternion(childShapes->at(i).m_childOrientation)); - b3TransformAabb(childLocalAabbMin, childLocalAabbMax, margin, childTr, aMin, aMax); - myAabbMin.setMin(aMin); - myAabbMax.setMax(aMax); - childLocalAabbs[i].m_min[0] = aMin[0]; - childLocalAabbs[i].m_min[1] = aMin[1]; - childLocalAabbs[i].m_min[2] = aMin[2]; - childLocalAabbs[i].m_min[3] = 0; - childLocalAabbs[i].m_max[0] = aMax[0]; - childLocalAabbs[i].m_max[1] = aMax[1]; - childLocalAabbs[i].m_max[2] = aMax[2]; - childLocalAabbs[i].m_max[3] = 0; - } - - aabbLocalSpace.m_min[0] = myAabbMin[0]; //s_convexHeightField->m_aabb.m_min.x; - aabbLocalSpace.m_min[1] = myAabbMin[1]; //s_convexHeightField->m_aabb.m_min.y; - aabbLocalSpace.m_min[2] = myAabbMin[2]; //s_convexHeightField->m_aabb.m_min.z; - aabbLocalSpace.m_minIndices[3] = 0; - - aabbLocalSpace.m_max[0] = myAabbMax[0]; //s_convexHeightField->m_aabb.m_max.x; - aabbLocalSpace.m_max[1] = myAabbMax[1]; //s_convexHeightField->m_aabb.m_max.y; - aabbLocalSpace.m_max[2] = myAabbMax[2]; //s_convexHeightField->m_aabb.m_max.z; - aabbLocalSpace.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabbLocalSpace); - - b3QuantizedBvh* bvh = new b3QuantizedBvh; - bvh->setQuantizationValues(myAabbMin, myAabbMax); - QuantizedNodeArray& nodes = bvh->getLeafNodeArray(); - int numNodes = childShapes->size(); - - for (int i = 0; i < numNodes; i++) - { - b3QuantizedBvhNode node; - b3Vector3 aabbMin, aabbMax; - aabbMin = (b3Vector3&)childLocalAabbs[i].m_min; - aabbMax = (b3Vector3&)childLocalAabbs[i].m_max; - - bvh->quantize(&node.m_quantizedAabbMin[0], aabbMin, 0); - bvh->quantize(&node.m_quantizedAabbMax[0], aabbMax, 1); - int partId = 0; - node.m_escapeIndexOrTriangleIndex = (partId << (31 - MAX_NUM_PARTS_IN_BITS)) | i; - nodes.push_back(node); - } - bvh->buildInternal(); - - int numSubTrees = bvh->getSubtreeInfoArray().size(); - - //void setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin=b3Scalar(1.0)); - //QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } - ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized - //void buildInternal(); - - b3BvhInfo bvhInfo; - - bvhInfo.m_aabbMin = bvh->m_bvhAabbMin; - bvhInfo.m_aabbMax = bvh->m_bvhAabbMax; - bvhInfo.m_quantization = bvh->m_bvhQuantization; - bvhInfo.m_numNodes = numNodes; - bvhInfo.m_numSubTrees = numSubTrees; - bvhInfo.m_nodeOffset = m_data->m_treeNodesCPU.size(); - bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size(); - - int numNewNodes = bvh->getQuantizedNodeArray().size(); - - for (int i = 0; i < numNewNodes - 1; i++) - { - if (bvh->getQuantizedNodeArray()[i].isLeafNode()) - { - int orgIndex = bvh->getQuantizedNodeArray()[i].getTriangleIndex(); - - b3Vector3 nodeMinVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMin); - b3Vector3 nodeMaxVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMax); - - for (int c = 0; c < 3; c++) - { - if (childLocalAabbs[orgIndex].m_min[c] < nodeMinVec[c]) - { - printf("min org (%f) and new (%f) ? at i:%d,c:%d\n", childLocalAabbs[i].m_min[c], nodeMinVec[c], i, c); - } - if (childLocalAabbs[orgIndex].m_max[c] > nodeMaxVec[c]) - { - printf("max org (%f) and new (%f) ? at i:%d,c:%d\n", childLocalAabbs[i].m_max[c], nodeMaxVec[c], i, c); - } - } - } - } - - m_data->m_bvhInfoCPU.push_back(bvhInfo); - - int numNewSubtrees = bvh->getSubtreeInfoArray().size(); - m_data->m_subTreesCPU.reserve(m_data->m_subTreesCPU.size() + numNewSubtrees); - for (int i = 0; i < numNewSubtrees; i++) - { - m_data->m_subTreesCPU.push_back(bvh->getSubtreeInfoArray()[i]); - } - int numNewTreeNodes = bvh->getQuantizedNodeArray().size(); - - for (int i = 0; i < numNewTreeNodes; i++) - { - m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]); - } - - // m_data->m_localShapeAABBGPU->push_back(aabbWS); - clFinish(m_queue); - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray* vertices, b3AlignedObjectArray* indices, const float* scaling1) -{ - b3Vector3 scaling = b3MakeVector3(scaling1[0], scaling1[1], scaling1[2]); - - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - - col.m_shapeType = SHAPE_CONCAVE_TRIMESH; - col.m_shapeIndex = registerConcaveMeshShape(vertices, indices, col, scaling); - col.m_bvhIndex = m_data->m_bvhInfoCPU.size(); - - b3SapAabb aabb; - b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f); - b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f); - - for (int i = 0; i < vertices->size(); i++) - { - b3Vector3 vtx(vertices->at(i) * scaling); - myAabbMin.setMin(vtx); - myAabbMax.setMax(vtx); - } - aabb.m_min[0] = myAabbMin[0]; - aabb.m_min[1] = myAabbMin[1]; - aabb.m_min[2] = myAabbMin[2]; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; - aabb.m_max[1] = myAabbMax[1]; - aabb.m_max[2] = myAabbMax[2]; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - - b3OptimizedBvh* bvh = new b3OptimizedBvh(); - //void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax) - - bool useQuantizedAabbCompression = true; - b3TriangleIndexVertexArray* meshInterface = new b3TriangleIndexVertexArray(); - m_data->m_meshInterfaces.push_back(meshInterface); - b3IndexedMesh mesh; - mesh.m_numTriangles = indices->size() / 3; - mesh.m_numVertices = vertices->size(); - mesh.m_vertexBase = (const unsigned char*)&vertices->at(0).x; - mesh.m_vertexStride = sizeof(b3Vector3); - mesh.m_triangleIndexStride = 3 * sizeof(int); // or sizeof(int) - mesh.m_triangleIndexBase = (const unsigned char*)&indices->at(0); - - meshInterface->addIndexedMesh(mesh); - bvh->build(meshInterface, useQuantizedAabbCompression, (b3Vector3&)aabb.m_min, (b3Vector3&)aabb.m_max); - m_data->m_bvhData.push_back(bvh); - int numNodes = bvh->getQuantizedNodeArray().size(); - //b3OpenCLArray* treeNodesGPU = new b3OpenCLArray(this->m_context,this->m_queue,numNodes); - int numSubTrees = bvh->getSubtreeInfoArray().size(); - - b3BvhInfo bvhInfo; - - bvhInfo.m_aabbMin = bvh->m_bvhAabbMin; - bvhInfo.m_aabbMax = bvh->m_bvhAabbMax; - bvhInfo.m_quantization = bvh->m_bvhQuantization; - bvhInfo.m_numNodes = numNodes; - bvhInfo.m_numSubTrees = numSubTrees; - bvhInfo.m_nodeOffset = m_data->m_treeNodesCPU.size(); - bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size(); - - m_data->m_bvhInfoCPU.push_back(bvhInfo); - - int numNewSubtrees = bvh->getSubtreeInfoArray().size(); - m_data->m_subTreesCPU.reserve(m_data->m_subTreesCPU.size() + numNewSubtrees); - for (int i = 0; i < numNewSubtrees; i++) - { - m_data->m_subTreesCPU.push_back(bvh->getSubtreeInfoArray()[i]); - } - int numNewTreeNodes = bvh->getQuantizedNodeArray().size(); - - for (int i = 0; i < numNewTreeNodes; i++) - { - m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConcaveMeshShape(b3AlignedObjectArray* vertices, b3AlignedObjectArray* indices, b3Collidable& col, const float* scaling1) -{ - b3Vector3 scaling = b3MakeVector3(scaling1[0], scaling1[1], scaling1[2]); - - m_data->m_convexData->resize(m_data->m_numAcceleratedShapes + 1); - m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes + 1); - - b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size() - 1); - convex.mC = b3MakeVector3(0, 0, 0); - convex.mE = b3MakeVector3(0, 0, 0); - convex.m_extents = b3MakeVector3(0, 0, 0); - convex.m_localCenter = b3MakeVector3(0, 0, 0); - convex.m_radius = 0.f; - - convex.m_numUniqueEdges = 0; - int edgeOffset = m_data->m_uniqueEdges.size(); - convex.m_uniqueEdgesOffset = edgeOffset; - - int faceOffset = m_data->m_convexFaces.size(); - convex.m_faceOffset = faceOffset; - - convex.m_numFaces = indices->size() / 3; - m_data->m_convexFaces.resize(faceOffset + convex.m_numFaces); - m_data->m_convexIndices.reserve(convex.m_numFaces * 3); - for (int i = 0; i < convex.m_numFaces; i++) - { - if (i % 256 == 0) - { - //printf("i=%d out of %d", i,convex.m_numFaces); - } - b3Vector3 vert0(vertices->at(indices->at(i * 3)) * scaling); - b3Vector3 vert1(vertices->at(indices->at(i * 3 + 1)) * scaling); - b3Vector3 vert2(vertices->at(indices->at(i * 3 + 2)) * scaling); - - b3Vector3 normal = ((vert1 - vert0).cross(vert2 - vert0)).normalize(); - b3Scalar c = -(normal.dot(vert0)); - - m_data->m_convexFaces[convex.m_faceOffset + i].m_plane = b3MakeVector4(normal.x, normal.y, normal.z, c); - int indexOffset = m_data->m_convexIndices.size(); - int numIndices = 3; - m_data->m_convexFaces[convex.m_faceOffset + i].m_numIndices = numIndices; - m_data->m_convexFaces[convex.m_faceOffset + i].m_indexOffset = indexOffset; - m_data->m_convexIndices.resize(indexOffset + numIndices); - for (int p = 0; p < numIndices; p++) - { - int vi = indices->at(i * 3 + p); - m_data->m_convexIndices[indexOffset + p] = vi; //convexPtr->m_faces[i].m_indices[p]; - } - } - - convex.m_numVertices = vertices->size(); - int vertexOffset = m_data->m_convexVertices.size(); - convex.m_vertexOffset = vertexOffset; - m_data->m_convexVertices.resize(vertexOffset + convex.m_numVertices); - for (int i = 0; i < vertices->size(); i++) - { - m_data->m_convexVertices[vertexOffset + i] = vertices->at(i) * scaling; - } - - (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = 0; - - return m_data->m_numAcceleratedShapes++; -} - -cl_mem b3GpuNarrowPhase::getBodiesGpu() -{ - return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL(); -} - -const struct b3RigidBodyData* b3GpuNarrowPhase::getBodiesCpu() const -{ - return &m_data->m_bodyBufferCPU->at(0); -}; - -int b3GpuNarrowPhase::getNumBodiesGpu() const -{ - return m_data->m_bodyBufferGPU->size(); -} - -cl_mem b3GpuNarrowPhase::getBodyInertiasGpu() -{ - return (cl_mem)m_data->m_inertiaBufferGPU->getBufferCL(); -} - -int b3GpuNarrowPhase::getNumBodyInertiasGpu() const -{ - return m_data->m_inertiaBufferGPU->size(); -} - -b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -const b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) const -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -cl_mem b3GpuNarrowPhase::getCollidablesGpu() -{ - return m_data->m_collidablesGPU->getBufferCL(); -} - -const struct b3Collidable* b3GpuNarrowPhase::getCollidablesCpu() const -{ - if (m_data->m_collidablesCPU.size()) - return &m_data->m_collidablesCPU[0]; - return 0; -} - -const struct b3SapAabb* b3GpuNarrowPhase::getLocalSpaceAabbsCpu() const -{ - if (m_data->m_localShapeAABBCPU->size()) - { - return &m_data->m_localShapeAABBCPU->at(0); - } - return 0; -} - -cl_mem b3GpuNarrowPhase::getAabbLocalSpaceBufferGpu() -{ - return m_data->m_localShapeAABBGPU->getBufferCL(); -} -int b3GpuNarrowPhase::getNumCollidablesGpu() const -{ - return m_data->m_collidablesGPU->size(); -} - -int b3GpuNarrowPhase::getNumContactsGpu() const -{ - return m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->size(); -} -cl_mem b3GpuNarrowPhase::getContactsGpu() -{ - return m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->getBufferCL(); -} - -const b3Contact4* b3GpuNarrowPhase::getContactsCPU() const -{ - m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->copyToHost(*m_data->m_pBufContactOutCPU); - return &m_data->m_pBufContactOutCPU->at(0); -} - -void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects) -{ - cl_mem aabbsLocalSpace = m_data->m_localShapeAABBGPU->getBufferCL(); - - int nContactOut = 0; - - //swap buffer - m_data->m_currentContactBuffer = 1 - m_data->m_currentContactBuffer; - - //int curSize = m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->size(); - - int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity; - int numTriConvexPairsOut = 0; - - b3OpenCLArray broadphasePairsGPU(m_context, m_queue); - broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs, numBroadphasePairs); - - b3OpenCLArray clAabbArrayWorldSpace(this->m_context, this->m_queue); - clAabbArrayWorldSpace.setFromOpenCLBuffer(aabbsWorldSpace, numObjects); - - b3OpenCLArray clAabbArrayLocalSpace(this->m_context, this->m_queue); - clAabbArrayLocalSpace.setFromOpenCLBuffer(aabbsLocalSpace, numObjects); - - m_data->m_gpuSatCollision->computeConvexConvexContactsGPUSAT( - &broadphasePairsGPU, numBroadphasePairs, - m_data->m_bodyBufferGPU, - m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer], - nContactOut, - m_data->m_pBufContactBuffersGPU[1 - m_data->m_currentContactBuffer], - m_data->m_config.m_maxContactCapacity, - m_data->m_config.m_compoundPairCapacity, - *m_data->m_convexPolyhedraGPU, - *m_data->m_convexVerticesGPU, - *m_data->m_uniqueEdgesGPU, - *m_data->m_convexFacesGPU, - *m_data->m_convexIndicesGPU, - *m_data->m_collidablesGPU, - *m_data->m_gpuChildShapes, - clAabbArrayWorldSpace, - clAabbArrayLocalSpace, - *m_data->m_worldVertsB1GPU, - *m_data->m_clippingFacesOutGPU, - *m_data->m_worldNormalsAGPU, - *m_data->m_worldVertsA1GPU, - *m_data->m_worldVertsB2GPU, - m_data->m_bvhData, - m_data->m_treeNodesGPU, - m_data->m_subTreesGPU, - m_data->m_bvhInfoGPU, - numObjects, - maxTriConvexPairCapacity, - *m_data->m_triangleConvexPairs, - numTriConvexPairsOut); - - /*b3AlignedObjectArray broadphasePairsCPU; - broadphasePairsGPU.copyToHost(broadphasePairsCPU); - printf("checking pairs\n"); - */ -} - -const b3SapAabb& b3GpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const -{ - return m_data->m_localShapeAABBCPU->at(collidableIndex); -} - -int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMinPtr, const float* aabbMaxPtr, bool writeToGpu) -{ - b3Vector3 aabbMin = b3MakeVector3(aabbMinPtr[0], aabbMinPtr[1], aabbMinPtr[2]); - b3Vector3 aabbMax = b3MakeVector3(aabbMaxPtr[0], aabbMaxPtr[1], aabbMaxPtr[2]); - - if (m_data->m_numAcceleratedRigidBodies >= (m_data->m_config.m_maxConvexBodies)) - { - b3Error("registerRigidBody: exceeding the number of rigid bodies, %d > %d \n", m_data->m_numAcceleratedRigidBodies, m_data->m_config.m_maxConvexBodies); - return -1; - } - - m_data->m_bodyBufferCPU->resize(m_data->m_numAcceleratedRigidBodies + 1); - - b3RigidBodyData& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies); - - float friction = 1.f; - float restitution = 0.f; - - body.m_frictionCoeff = friction; - body.m_restituitionCoeff = restitution; - body.m_angVel = b3MakeVector3(0, 0, 0); - body.m_linVel = b3MakeVector3(0, 0, 0); //.setZero(); - body.m_pos = b3MakeVector3(position[0], position[1], position[2]); - body.m_quat.setValue(orientation[0], orientation[1], orientation[2], orientation[3]); - body.m_collidableIdx = collidableIndex; - if (collidableIndex >= 0) - { - // body.m_shapeType = m_data->m_collidablesCPU.at(collidableIndex).m_shapeType; - } - else - { - // body.m_shapeType = CollisionShape::SHAPE_PLANE; - m_planeBodyIndex = m_data->m_numAcceleratedRigidBodies; - } - //body.m_shapeType = shapeType; - - body.m_invMass = mass ? 1.f / mass : 0.f; - - if (writeToGpu) - { - m_data->m_bodyBufferGPU->copyFromHostPointer(&body, 1, m_data->m_numAcceleratedRigidBodies); - } - - b3InertiaData& shapeInfo = m_data->m_inertiaBufferCPU->at(m_data->m_numAcceleratedRigidBodies); - - if (mass == 0.f) - { - if (m_data->m_numAcceleratedRigidBodies == 0) - m_static0Index = 0; - - shapeInfo.m_initInvInertia.setValue(0, 0, 0, 0, 0, 0, 0, 0, 0); - shapeInfo.m_invInertiaWorld.setValue(0, 0, 0, 0, 0, 0, 0, 0, 0); - } - else - { - b3Assert(body.m_collidableIdx >= 0); - - //approximate using the aabb of the shape - - //Aabb aabb = (*m_data->m_shapePointers)[shapeIndex]->m_aabb; - b3Vector3 halfExtents = (aabbMax - aabbMin); //*0.5f;//fake larger inertia makes demos more stable ;-) - - b3Vector3 localInertia; - - float lx = 2.f * halfExtents[0]; - float ly = 2.f * halfExtents[1]; - float lz = 2.f * halfExtents[2]; - - localInertia.setValue((mass / 12.0f) * (ly * ly + lz * lz), - (mass / 12.0f) * (lx * lx + lz * lz), - (mass / 12.0f) * (lx * lx + ly * ly)); - - b3Vector3 invLocalInertia; - invLocalInertia[0] = 1.f / localInertia[0]; - invLocalInertia[1] = 1.f / localInertia[1]; - invLocalInertia[2] = 1.f / localInertia[2]; - invLocalInertia[3] = 0.f; - - shapeInfo.m_initInvInertia.setValue( - invLocalInertia[0], 0, 0, - 0, invLocalInertia[1], 0, - 0, 0, invLocalInertia[2]); - - b3Matrix3x3 m(body.m_quat); - - shapeInfo.m_invInertiaWorld = m.scaled(invLocalInertia) * m.transpose(); - } - - if (writeToGpu) - m_data->m_inertiaBufferGPU->copyFromHostPointer(&shapeInfo, 1, m_data->m_numAcceleratedRigidBodies); - - return m_data->m_numAcceleratedRigidBodies++; -} - -int b3GpuNarrowPhase::getNumRigidBodies() const -{ - return m_data->m_numAcceleratedRigidBodies; -} - -void b3GpuNarrowPhase::writeAllBodiesToGpu() -{ - if (m_data->m_localShapeAABBCPU->size()) - { - m_data->m_localShapeAABBGPU->copyFromHost(*m_data->m_localShapeAABBCPU); - } - - m_data->m_gpuChildShapes->copyFromHost(m_data->m_cpuChildShapes); - m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces); - m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra); - m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges); - m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices); - m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices); - m_data->m_bvhInfoGPU->copyFromHost(m_data->m_bvhInfoCPU); - m_data->m_treeNodesGPU->copyFromHost(m_data->m_treeNodesCPU); - m_data->m_subTreesGPU->copyFromHost(m_data->m_subTreesCPU); - - m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); - m_data->m_inertiaBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); - - if (m_data->m_numAcceleratedRigidBodies) - { - m_data->m_bodyBufferGPU->copyFromHostPointer(&m_data->m_bodyBufferCPU->at(0), m_data->m_numAcceleratedRigidBodies); - m_data->m_inertiaBufferGPU->copyFromHostPointer(&m_data->m_inertiaBufferCPU->at(0), m_data->m_numAcceleratedRigidBodies); - } - if (m_data->m_collidablesCPU.size()) - { - m_data->m_collidablesGPU->copyFromHost(m_data->m_collidablesCPU); - } -} - -void b3GpuNarrowPhase::reset() -{ - m_data->m_numAcceleratedShapes = 0; - m_data->m_numAcceleratedRigidBodies = 0; - this->m_static0Index = -1; - m_data->m_uniqueEdges.resize(0); - m_data->m_convexVertices.resize(0); - m_data->m_convexPolyhedra.resize(0); - m_data->m_convexIndices.resize(0); - m_data->m_cpuChildShapes.resize(0); - m_data->m_convexFaces.resize(0); - m_data->m_collidablesCPU.resize(0); - m_data->m_localShapeAABBCPU->resize(0); - m_data->m_bvhData.resize(0); - m_data->m_treeNodesCPU.resize(0); - m_data->m_subTreesCPU.resize(0); - m_data->m_bvhInfoCPU.resize(0); -} - -void b3GpuNarrowPhase::readbackAllBodiesToCpu() -{ - m_data->m_bodyBufferGPU->copyToHostPointer(&m_data->m_bodyBufferCPU->at(0), m_data->m_numAcceleratedRigidBodies); -} - -void b3GpuNarrowPhase::setObjectTransformCpu(float* position, float* orientation, int bodyIndex) -{ - if (bodyIndex >= 0 && bodyIndex < m_data->m_bodyBufferCPU->size()) - { - m_data->m_bodyBufferCPU->at(bodyIndex).m_pos = b3MakeVector3(position[0], position[1], position[2]); - m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.setValue(orientation[0], orientation[1], orientation[2], orientation[3]); - } - else - { - b3Warning("setObjectVelocityCpu out of range.\n"); - } -} -void b3GpuNarrowPhase::setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex) -{ - if (bodyIndex >= 0 && bodyIndex < m_data->m_bodyBufferCPU->size()) - { - m_data->m_bodyBufferCPU->at(bodyIndex).m_linVel = b3MakeVector3(linVel[0], linVel[1], linVel[2]); - m_data->m_bodyBufferCPU->at(bodyIndex).m_angVel = b3MakeVector3(angVel[0], angVel[1], angVel[2]); - } - else - { - b3Warning("setObjectVelocityCpu out of range.\n"); - } -} - -bool b3GpuNarrowPhase::getObjectTransformFromCpu(float* position, float* orientation, int bodyIndex) const -{ - if (bodyIndex >= 0 && bodyIndex < m_data->m_bodyBufferCPU->size()) - { - position[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.x; - position[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.y; - position[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.z; - position[3] = 1.f; //or 1 - - orientation[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.x; - orientation[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.y; - orientation[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.z; - orientation[3] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.w; - return true; - } - - b3Warning("getObjectTransformFromCpu out of range.\n"); - return false; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h deleted file mode 100644 index 21a68de3433..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef B3_GPU_NARROWPHASE_H -#define B3_GPU_NARROWPHASE_H - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -class b3GpuNarrowPhase -{ -protected: - struct b3GpuNarrowPhaseInternalData* m_data; - int m_acceleratedCompanionShapeIndex; - int m_planeBodyIndex; - int m_static0Index; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - int registerConvexHullShapeInternal(class b3ConvexUtility* convexPtr, b3Collidable& col); - int registerConcaveMeshShape(b3AlignedObjectArray* vertices, b3AlignedObjectArray* indices, b3Collidable& col, const float* scaling); - -public: - b3GpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct b3Config& config); - - virtual ~b3GpuNarrowPhase(void); - - int registerSphereShape(float radius); - int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant); - - int registerCompoundShape(b3AlignedObjectArray* childShapes); - int registerFace(const b3Vector3& faceNormal, float faceConstant); - - int registerConcaveMesh(b3AlignedObjectArray* vertices, b3AlignedObjectArray* indices, const float* scaling); - - //do they need to be merged? - - int registerConvexHullShape(b3ConvexUtility* utilPtr); - int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - - int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax, bool writeToGpu); - void setObjectTransform(const float* position, const float* orientation, int bodyIndex); - - void writeAllBodiesToGpu(); - void reset(); - void readbackAllBodiesToCpu(); - bool getObjectTransformFromCpu(float* position, float* orientation, int bodyIndex) const; - - void setObjectTransformCpu(float* position, float* orientation, int bodyIndex); - void setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex); - - virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects); - - cl_mem getBodiesGpu(); - const struct b3RigidBodyData* getBodiesCpu() const; - //struct b3RigidBodyData* getBodiesCpu(); - - int getNumBodiesGpu() const; - - cl_mem getBodyInertiasGpu(); - int getNumBodyInertiasGpu() const; - - cl_mem getCollidablesGpu(); - const struct b3Collidable* getCollidablesCpu() const; - int getNumCollidablesGpu() const; - - const struct b3SapAabb* getLocalSpaceAabbsCpu() const; - - const struct b3Contact4* getContactsCPU() const; - - cl_mem getContactsGpu(); - int getNumContactsGpu() const; - - cl_mem getAabbLocalSpaceBufferGpu(); - - int getNumRigidBodies() const; - - int allocateCollidable(); - - int getStatic0Index() const - { - return m_static0Index; - } - b3Collidable& getCollidableCpu(int collidableIndex); - const b3Collidable& getCollidableCpu(int collidableIndex) const; - - const b3GpuNarrowPhaseInternalData* getInternalData() const - { - return m_data; - } - - b3GpuNarrowPhaseInternalData* getInternalData() - { - return m_data; - } - - const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const; -}; - -#endif //B3_GPU_NARROWPHASE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h deleted file mode 100644 index 716a5ea0fc8..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h +++ /dev/null @@ -1,89 +0,0 @@ - -#ifndef B3_GPU_NARROWPHASE_INTERNAL_DATA_H -#define B3_GPU_NARROWPHASE_INTERNAL_DATA_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" - -#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Common/shared/b3Int2.h" - -class b3ConvexUtility; - -struct b3GpuNarrowPhaseInternalData -{ - b3AlignedObjectArray* m_convexData; - - b3AlignedObjectArray m_convexPolyhedra; - b3AlignedObjectArray m_uniqueEdges; - b3AlignedObjectArray m_convexVertices; - b3AlignedObjectArray m_convexIndices; - - b3OpenCLArray* m_convexPolyhedraGPU; - b3OpenCLArray* m_uniqueEdgesGPU; - b3OpenCLArray* m_convexVerticesGPU; - b3OpenCLArray* m_convexIndicesGPU; - - b3OpenCLArray* m_worldVertsB1GPU; - b3OpenCLArray* m_clippingFacesOutGPU; - b3OpenCLArray* m_worldNormalsAGPU; - b3OpenCLArray* m_worldVertsA1GPU; - b3OpenCLArray* m_worldVertsB2GPU; - - b3AlignedObjectArray m_cpuChildShapes; - b3OpenCLArray* m_gpuChildShapes; - - b3AlignedObjectArray m_convexFaces; - b3OpenCLArray* m_convexFacesGPU; - - struct GpuSatCollision* m_gpuSatCollision; - - b3OpenCLArray* m_triangleConvexPairs; - - b3OpenCLArray* m_pBufContactBuffersGPU[2]; - int m_currentContactBuffer; - b3AlignedObjectArray* m_pBufContactOutCPU; - - b3AlignedObjectArray* m_bodyBufferCPU; - b3OpenCLArray* m_bodyBufferGPU; - - b3AlignedObjectArray* m_inertiaBufferCPU; - b3OpenCLArray* m_inertiaBufferGPU; - - int m_numAcceleratedShapes; - int m_numAcceleratedRigidBodies; - - b3AlignedObjectArray m_collidablesCPU; - b3OpenCLArray* m_collidablesGPU; - - b3OpenCLArray* m_localShapeAABBGPU; - b3AlignedObjectArray* m_localShapeAABBCPU; - - b3AlignedObjectArray m_bvhData; - b3AlignedObjectArray m_meshInterfaces; - - b3AlignedObjectArray m_treeNodesCPU; - b3AlignedObjectArray m_subTreesCPU; - - b3AlignedObjectArray m_bvhInfoCPU; - b3OpenCLArray* m_bvhInfoGPU; - - b3OpenCLArray* m_treeNodesGPU; - b3OpenCLArray* m_subTreesGPU; - - b3Config m_config; -}; - -#endif //B3_GPU_NARROWPHASE_INTERNAL_DATA_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp deleted file mode 100644 index bd9d6bb04b4..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp +++ /dev/null @@ -1,1068 +0,0 @@ - -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -bool useGpuInitSolverBodies = true; -bool useGpuInfo1 = true; -bool useGpuInfo2 = true; -bool useGpuSolveJointConstraintRows = true; -bool useGpuWriteBackVelocities = true; -bool gpuBreakConstraints = true; - -#include "b3GpuPgsConstraintSolver.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include -#include "Bullet3Common/b3AlignedObjectArray.h" -#include //for memset -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" - -#include "Bullet3OpenCL/RigidBody/kernels/jointSolver.h" //solveConstraintRowsCL -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" - -#define B3_JOINT_SOLVER_PATH "src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl" - -struct b3GpuPgsJacobiSolverInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3PrefixScanCL* m_prefixScan; - - cl_kernel m_solveJointConstraintRowsKernels; - cl_kernel m_initSolverBodiesKernel; - cl_kernel m_getInfo1Kernel; - cl_kernel m_initBatchConstraintsKernel; - cl_kernel m_getInfo2Kernel; - cl_kernel m_writeBackVelocitiesKernel; - cl_kernel m_breakViolatedConstraintsKernel; - - b3OpenCLArray* m_gpuConstraintRowOffsets; - - b3OpenCLArray* m_gpuSolverBodies; - b3OpenCLArray* m_gpuBatchConstraints; - b3OpenCLArray* m_gpuConstraintRows; - b3OpenCLArray* m_gpuConstraintInfo1; - - // b3AlignedObjectArray m_cpuSolverBodies; - b3AlignedObjectArray m_cpuBatchConstraints; - b3AlignedObjectArray m_cpuConstraintRows; - b3AlignedObjectArray m_cpuConstraintInfo1; - b3AlignedObjectArray m_cpuConstraintRowOffsets; - - b3AlignedObjectArray m_cpuBodies; - b3AlignedObjectArray m_cpuInertias; - - b3AlignedObjectArray m_cpuConstraints; - - b3AlignedObjectArray m_batchSizes; -}; - -/* -static b3Transform getWorldTransform(b3RigidBodyData* rb) -{ - b3Transform newTrans; - newTrans.setOrigin(rb->m_pos); - newTrans.setRotation(rb->m_quat); - return newTrans; -} - -static const b3Matrix3x3& getInvInertiaTensorWorld(b3InertiaData* inertia) -{ - return inertia->m_invInertiaWorld; -} - -*/ - -static const b3Vector3& getLinearVelocity(b3RigidBodyData* rb) -{ - return rb->m_linVel; -} - -static const b3Vector3& getAngularVelocity(b3RigidBodyData* rb) -{ - return rb->m_angVel; -} - -b3Vector3 getVelocityInLocalPoint(b3RigidBodyData* rb, const b3Vector3& rel_pos) -{ - //we also calculate lin/ang velocity for kinematic objects - return getLinearVelocity(rb) + getAngularVelocity(rb).cross(rel_pos); -} - -b3GpuPgsConstraintSolver::b3GpuPgsConstraintSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, bool usePgs) -{ - m_usePgs = usePgs; - m_gpuData = new b3GpuPgsJacobiSolverInternalData(); - m_gpuData->m_context = ctx; - m_gpuData->m_device = device; - m_gpuData->m_queue = queue; - - m_gpuData->m_prefixScan = new b3PrefixScanCL(ctx, device, queue); - - m_gpuData->m_gpuConstraintRowOffsets = new b3OpenCLArray(m_gpuData->m_context, m_gpuData->m_queue); - - m_gpuData->m_gpuSolverBodies = new b3OpenCLArray(m_gpuData->m_context, m_gpuData->m_queue); - m_gpuData->m_gpuBatchConstraints = new b3OpenCLArray(m_gpuData->m_context, m_gpuData->m_queue); - m_gpuData->m_gpuConstraintRows = new b3OpenCLArray(m_gpuData->m_context, m_gpuData->m_queue); - m_gpuData->m_gpuConstraintInfo1 = new b3OpenCLArray(m_gpuData->m_context, m_gpuData->m_queue); - cl_int errNum = 0; - - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, &errNum, "", B3_JOINT_SOLVER_PATH); - //cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,0,&errNum,"",B3_JOINT_SOLVER_PATH,true); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_solveJointConstraintRowsKernels = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "solveJointConstraintRows", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_initSolverBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "initSolverBodies", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_getInfo1Kernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "getInfo1Kernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_initBatchConstraintsKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "initBatchConstraintsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_getInfo2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "getInfo2Kernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_writeBackVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "writeBackVelocitiesKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_breakViolatedConstraintsKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "breakViolatedConstraintsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - - clReleaseProgram(prog); - } -} - -b3GpuPgsConstraintSolver::~b3GpuPgsConstraintSolver() -{ - clReleaseKernel(m_gpuData->m_solveJointConstraintRowsKernels); - clReleaseKernel(m_gpuData->m_initSolverBodiesKernel); - clReleaseKernel(m_gpuData->m_getInfo1Kernel); - clReleaseKernel(m_gpuData->m_initBatchConstraintsKernel); - clReleaseKernel(m_gpuData->m_getInfo2Kernel); - clReleaseKernel(m_gpuData->m_writeBackVelocitiesKernel); - clReleaseKernel(m_gpuData->m_breakViolatedConstraintsKernel); - - delete m_gpuData->m_prefixScan; - delete m_gpuData->m_gpuConstraintRowOffsets; - delete m_gpuData->m_gpuSolverBodies; - delete m_gpuData->m_gpuBatchConstraints; - delete m_gpuData->m_gpuConstraintRows; - delete m_gpuData->m_gpuConstraintInfo1; - - delete m_gpuData; -} - -struct b3BatchConstraint -{ - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - int m_originalConstraintIndex; - int m_batchId; -}; - -static b3AlignedObjectArray batchConstraints; - -void b3GpuPgsConstraintSolver::recomputeBatches() -{ - m_gpuData->m_batchSizes.clear(); -} - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("GPU solveGroupCacheFriendlySetup"); - batchConstraints.resize(numConstraints); - m_gpuData->m_gpuBatchConstraints->resize(numConstraints); - m_staticIdx = -1; - m_maxOverrideNumSolverIterations = 0; - - /* m_gpuData->m_gpuBodies->resize(numBodies); - m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies); - - b3OpenCLArray gpuInertias(m_gpuData->m_context,m_gpuData->m_queue); - gpuInertias.resize(numBodies); - gpuInertias.copyFromHostPointer(inertias,numBodies); - */ - - m_gpuData->m_gpuSolverBodies->resize(numBodies); - - m_tmpSolverBodyPool.resize(numBodies); - { - if (useGpuInitSolverBodies) - { - B3_PROFILE("m_initSolverBodiesKernel"); - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initSolverBodiesKernel, "m_initSolverBodiesKernel"); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_gpuData->m_queue); - - // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - } - else - { - gpuBodies->copyToHost(m_gpuData->m_cpuBodies); - for (int i = 0; i < numBodies; i++) - { - b3RigidBodyData& body = m_gpuData->m_cpuBodies[i]; - b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i]; - initSolverBody(i, &solverBody, &body); - solverBody.m_originalBodyIndex = i; - } - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - } - } - - // int totalBodies = 0; - int totalNumRows = 0; - //b3RigidBody* rb0=0,*rb1=0; - //if (1) - { - { - // int i; - - m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints); - - // b3OpenCLArray gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue); - - if (useGpuInfo1) - { - B3_PROFILE("info1 and init batchConstraint"); - - m_gpuData->m_gpuConstraintInfo1->resize(numConstraints); - - if (1) - { - B3_PROFILE("getInfo1Kernel"); - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo1Kernel, "m_getInfo1Kernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - } - - if (m_gpuData->m_batchSizes.size() == 0) - { - B3_PROFILE("initBatchConstraintsKernel"); - - m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints); - unsigned int total = 0; - m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1, *m_gpuData->m_gpuConstraintRowOffsets, numConstraints, &total); - unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints - 1); - totalNumRows = total + lastElem; - - { - B3_PROFILE("init batch constraints"); - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initBatchConstraintsKernel, "m_initBatchConstraintsKernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - } - //assume the batching happens on CPU, so copy the data - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - } - } - else - { - totalNumRows = 0; - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - //calculate the total number of contraint rows - for (int i = 0; i < numConstraints; i++) - { - unsigned int& info1 = m_tmpConstraintSizesPool[i]; - // unsigned int info1; - if (m_gpuData->m_cpuConstraints[i].isEnabled()) - { - m_gpuData->m_cpuConstraints[i].getInfo1(&info1, &m_gpuData->m_cpuBodies[0]); - } - else - { - info1 = 0; - } - - totalNumRows += info1; - } - - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); - } - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); - m_gpuData->m_gpuConstraintRows->resize(totalNumRows); - - // b3GpuConstraintArray verify; - - if (useGpuInfo2) - { - { - B3_PROFILE("getInfo2Kernel"); - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo2Kernel, "m_getInfo2Kernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setBuffer(gpuInertias->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setConst(infoGlobal.m_timeStep); - launcher.setConst(infoGlobal.m_erp); - launcher.setConst(infoGlobal.m_globalCfm); - launcher.setConst(infoGlobal.m_damping); - launcher.setConst(infoGlobal.m_numIterations); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - - if (m_gpuData->m_batchSizes.size() == 0) - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - //m_gpuData->m_gpuConstraintRows->copyToHost(verify); - //m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); - } - } - else - { - gpuInertias->copyToHost(m_gpuData->m_cpuInertias); - - ///setup the b3SolverConstraints - - for (int i = 0; i < numConstraints; i++) - { - const int& info1 = m_tmpConstraintSizesPool[i]; - - if (info1) - { - int constraintIndex = batchConstraints[i].m_originalConstraintIndex; - int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[constraintIndex]; - - b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset]; - b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i]; - - b3RigidBodyData& rbA = m_gpuData->m_cpuBodies[constraint.getRigidBodyA()]; - //b3RigidBody& rbA = constraint.getRigidBodyA(); - // b3RigidBody& rbB = constraint.getRigidBodyB(); - b3RigidBodyData& rbB = m_gpuData->m_cpuBodies[constraint.getRigidBodyB()]; - - int solverBodyIdA = constraint.getRigidBodyA(); //getOrInitSolverBody(constraint.getRigidBodyA(),bodies,inertias); - int solverBodyIdB = constraint.getRigidBodyB(); //getOrInitSolverBody(constraint.getRigidBodyB(),bodies,inertias); - - b3GpuSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA]; - b3GpuSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB]; - - if (rbA.m_invMass) - { - batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; - } - else - { - if (!solverBodyIdA) - m_staticIdx = 0; - batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; - } - - if (rbB.m_invMass) - { - batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; - } - else - { - if (!solverBodyIdB) - m_staticIdx = 0; - batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; - } - - int overrideNumSolverIterations = 0; //constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; - if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations) - m_maxOverrideNumSolverIterations = overrideNumSolverIterations; - - int j; - for (j = 0; j < info1; j++) - { - memset(¤tConstraintRow[j], 0, sizeof(b3GpuSolverConstraint)); - currentConstraintRow[j].m_angularComponentA.setValue(0, 0, 0); - currentConstraintRow[j].m_angularComponentB.setValue(0, 0, 0); - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_cfm = 0.f; - currentConstraintRow[j].m_contactNormal.setValue(0, 0, 0); - currentConstraintRow[j].m_friction = 0.f; - currentConstraintRow[j].m_frictionIndex = 0; - currentConstraintRow[j].m_jacDiagABInv = 0.f; - currentConstraintRow[j].m_lowerLimit = 0.f; - currentConstraintRow[j].m_upperLimit = 0.f; - - currentConstraintRow[j].m_originalContactPoint = 0; - currentConstraintRow[j].m_overrideNumSolverIterations = 0; - currentConstraintRow[j].m_relpos1CrossNormal.setValue(0, 0, 0); - currentConstraintRow[j].m_relpos2CrossNormal.setValue(0, 0, 0); - currentConstraintRow[j].m_rhs = 0.f; - currentConstraintRow[j].m_rhsPenetration = 0.f; - currentConstraintRow[j].m_solverBodyIdA = 0; - currentConstraintRow[j].m_solverBodyIdB = 0; - - currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; - currentConstraintRow[j].m_upperLimit = B3_INFINITY; - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; - currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; - currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; - } - - bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - - b3GpuConstraintInfo2 info2; - info2.fps = 1.f / infoGlobal.m_timeStep; - info2.erp = infoGlobal.m_erp; - info2.m_J1linearAxis = currentConstraintRow->m_contactNormal; - info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal; - info2.m_J2linearAxis = 0; - info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal; - info2.rowskip = sizeof(b3GpuSolverConstraint) / sizeof(b3Scalar); //check this - ///the size of b3GpuSolverConstraint needs be a multiple of b3Scalar - b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3GpuSolverConstraint)); - info2.m_constraintError = ¤tConstraintRow->m_rhs; - currentConstraintRow->m_cfm = infoGlobal.m_globalCfm; - info2.m_damping = infoGlobal.m_damping; - info2.cfm = ¤tConstraintRow->m_cfm; - info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; - info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; - info2.m_numIterations = infoGlobal.m_numIterations; - m_gpuData->m_cpuConstraints[i].getInfo2(&info2, &m_gpuData->m_cpuBodies[0]); - - ///finalize the constraint setup - for (j = 0; j < info1; j++) - { - b3GpuSolverConstraint& solverConstraint = currentConstraintRow[j]; - - if (solverConstraint.m_upperLimit >= m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) - { - solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); - } - - if (solverConstraint.m_lowerLimit <= -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) - { - solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); - } - - // solverConstraint.m_originalContactPoint = constraint; - - b3Matrix3x3& invInertiaWorldA = m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld; - { - //b3Vector3 angularFactorA(1,1,1); - const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal; - solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1; //*angularFactorA; - } - - b3Matrix3x3& invInertiaWorldB = m_gpuData->m_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld; - { - const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; - solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2; //*constraint.getRigidBodyB().getAngularFactor(); - } - - { - //it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal - //because it gets multiplied iMJlB - b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass; - b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal; - b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass; //sign of normal? - b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal; - - b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal); - sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); - sum += iMJlB.dot(solverConstraint.m_contactNormal); - sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); - b3Scalar fsum = b3Fabs(sum); - b3Assert(fsum > B3_EPSILON); - solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f; - } - - ///fix rhs - ///todo: add force/torque accelerators - { - b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel); - - rel_vel = vel1Dotn + vel2Dotn; - - b3Scalar restitution = 0.f; - b3Scalar positionalError = solverConstraint.m_rhs; //already filled in by getConstraintInfo2 - b3Scalar velocityError = restitution - rel_vel * info2.m_damping; - b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv; - b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv; - solverConstraint.m_rhs = penetrationImpulse + velocityImpulse; - solverConstraint.m_appliedImpulse = 0.f; - } - } - } - } - - m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); - m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); - - if (m_gpuData->m_batchSizes.size() == 0) - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - else - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - - } //end useGpuInfo2 - } - -#ifdef B3_SUPPORT_CONTACT_CONSTRAINTS - { - int i; - - for (i = 0; i < numManifolds; i++) - { - b3Contact4& manifold = manifoldPtr[i]; - convertContact(bodies, inertias, &manifold, infoGlobal); - } - } -#endif //B3_SUPPORT_CONTACT_CONSTRAINTS - } - - // b3ContactSolverInfo info = infoGlobal; - - // int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); - // int numConstraintPool = m_tmpSolverContactConstraintPool.size(); - // int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); - - return 0.f; -} - -///a straight copy from GPU/OpenCL kernel, for debugging -__inline void internalApplyImpulse(b3GpuSolverBody* body, const b3Vector3& linearComponent, const b3Vector3& angularComponent, float impulseMagnitude) -{ - body->m_deltaLinearVelocity += linearComponent * impulseMagnitude * body->m_linearFactor; - body->m_deltaAngularVelocity += angularComponent * (impulseMagnitude * body->m_angularFactor); -} - -void resolveSingleConstraintRowGeneric2(b3GpuSolverBody* body1, b3GpuSolverBody* body2, b3GpuSolverConstraint* c) -{ - float deltaImpulse = c->m_rhs - b3Scalar(c->m_appliedImpulse) * c->m_cfm; - float deltaVel1Dotn = b3Dot(c->m_contactNormal, body1->m_deltaLinearVelocity) + b3Dot(c->m_relpos1CrossNormal, body1->m_deltaAngularVelocity); - float deltaVel2Dotn = -b3Dot(c->m_contactNormal, body2->m_deltaLinearVelocity) + b3Dot(c->m_relpos2CrossNormal, body2->m_deltaAngularVelocity); - - deltaImpulse -= deltaVel1Dotn * c->m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn * c->m_jacDiagABInv; - - float sum = b3Scalar(c->m_appliedImpulse) + deltaImpulse; - if (sum < c->m_lowerLimit) - { - deltaImpulse = c->m_lowerLimit - b3Scalar(c->m_appliedImpulse); - c->m_appliedImpulse = c->m_lowerLimit; - } - else if (sum > c->m_upperLimit) - { - deltaImpulse = c->m_upperLimit - b3Scalar(c->m_appliedImpulse); - c->m_appliedImpulse = c->m_upperLimit; - } - else - { - c->m_appliedImpulse = sum; - } - - internalApplyImpulse(body1, c->m_contactNormal * body1->m_invMass, c->m_angularComponentA, deltaImpulse); - internalApplyImpulse(body2, -c->m_contactNormal * body2->m_invMass, c->m_angularComponentB, deltaImpulse); -} - -void b3GpuPgsConstraintSolver::initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyData* rb) -{ - solverBody->m_deltaLinearVelocity.setValue(0.f, 0.f, 0.f); - solverBody->m_deltaAngularVelocity.setValue(0.f, 0.f, 0.f); - solverBody->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - solverBody->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - - b3Assert(rb); - // solverBody->m_worldTransform = getWorldTransform(rb); - solverBody->internalSetInvMass(b3MakeVector3(rb->m_invMass, rb->m_invMass, rb->m_invMass)); - solverBody->m_originalBodyIndex = bodyIndex; - solverBody->m_angularFactor = b3MakeVector3(1, 1, 1); - solverBody->m_linearFactor = b3MakeVector3(1, 1, 1); - solverBody->m_linearVelocity = getLinearVelocity(rb); - solverBody->m_angularVelocity = getAngularVelocity(rb); -} - -void b3GpuPgsConstraintSolver::averageVelocities() -{ -} - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - //only create the batches once. - //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated - B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); - - bool createBatches = m_gpuData->m_batchSizes.size() == 0; - { - if (createBatches) - { - m_gpuData->m_batchSizes.resize(0); - - { - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - - B3_PROFILE("batch joints"); - b3Assert(batchConstraints.size() == numConstraints); - int simdWidth = numConstraints + 1; - int numBodies = m_tmpSolverBodyPool.size(); - sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies); - - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - } - } - else - { - /*b3AlignedObjectArray cpuCheckBatches; - m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches); - b3Assert(cpuCheckBatches.size()==batchConstraints.size()); - printf(".\n"); - */ - //>copyFromHost(batchConstraints); - } - int maxIterations = infoGlobal.m_numIterations; - - bool useBatching = true; - - if (useBatching) - { - if (!useGpuSolveJointConstraintRows) - { - B3_PROFILE("copy to host"); - m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); - m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); - m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); - gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints); - } - - for (int iteration = 0; iteration < maxIterations; iteration++) - { - int batchOffset = 0; - int constraintOffset = 0; - int numBatches = m_gpuData->m_batchSizes.size(); - for (int bb = 0; bb < numBatches; bb++) - { - int numConstraintsInBatch = m_gpuData->m_batchSizes[bb]; - - if (useGpuSolveJointConstraintRows) - { - B3_PROFILE("solveJointConstraintRowsKernels"); - - /* - __kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, - __global b3BatchConstraint* batchConstraints, - __global b3SolverConstraint* rows, - __global unsigned int* numConstraintRowsInfo1, - __global unsigned int* rowOffsets, - __global b3GpuGenericConstraint* constraints, - int batchOffset, - int numConstraintsInBatch*/ - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels"); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(gpuConstraints1->getBufferCL()); //to detect disabled constraints - launcher.setConst(batchOffset); - launcher.setConst(numConstraintsInBatch); - - launcher.launch1D(numConstraintsInBatch); - } - else //useGpu - { - for (int b = 0; b < numConstraintsInBatch; b++) - { - const b3BatchConstraint& c = batchConstraints[batchOffset + b]; - /*printf("-----------\n"); - printf("bb=%d\n",bb); - printf("c.batchId = %d\n", c.m_batchId); - */ - b3Assert(c.m_batchId == bb); - b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex]; - if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED) - { - int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex]; - int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex]; - - for (int jj = 0; jj < numConstraintRows; jj++) - { - // - b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj]; - //resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); - resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); - } - } - } - } //useGpu - batchOffset += numConstraintsInBatch; - constraintOffset += numConstraintsInBatch; - } - } //for (int iteration... - - if (!useGpuSolveJointConstraintRows) - { - { - B3_PROFILE("copy from host"); - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); - } - - //B3_PROFILE("copy to host"); - //m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - } - //int sz = sizeof(b3GpuSolverBody); - //printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz); - } - else - { - for (int iteration = 0; iteration < maxIterations; iteration++) - { - int numJoints = m_tmpSolverNonContactConstraintPool.size(); - for (int j = 0; j < numJoints; j++) - { - b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j]; - resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); - } - - if (!m_usePgs) - { - averageVelocities(); - } - } - } - } - clFinish(m_gpuData->m_queue); - return 0.f; -} - -static b3AlignedObjectArray bodyUsed; -static b3AlignedObjectArray curUsed; - -inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3(b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies) -{ - //int sz = sizeof(b3BatchConstraint); - - B3_PROFILE("sortConstraintByBatch3"); - - static int maxSwaps = 0; - int numSwaps = 0; - - curUsed.resize(2 * simdWidth); - - static int maxNumConstraints = 0; - if (maxNumConstraints < numConstraints) - { - maxNumConstraints = numConstraints; - //printf("maxNumConstraints = %d\n",maxNumConstraints ); - } - - int numUsedArray = numBodies / 32 + 1; - bodyUsed.resize(numUsedArray); - - for (int q = 0; q < numUsedArray; q++) - bodyUsed[q] = 0; - - int curBodyUsed = 0; - - int numIter = 0; - -#if defined(_DEBUG) - for (int i = 0; i < numConstraints; i++) - cs[i].m_batchId = -1; -#endif - - int numValidConstraints = 0; - // int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - - while (numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - // clear flag - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - - curBodyUsed = 0; - - for (int i = numValidConstraints; i < numConstraints; i++) - { - int idx = i; - b3Assert(idx < numConstraints); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31)); - } - if (!aUnavailable) - if (!bIsStatic) - { - bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31)); - } - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - { - bodyUsed[bodyA / 32] |= (1 << (bodyA & 31)); - curUsed[curBodyUsed++] = bodyA; - } - if (!bIsStatic) - { - bodyUsed[bodyB / 32] |= (1 << (bodyB & 31)); - curUsed[curBodyUsed++] = bodyB; - } - - cs[idx].m_batchId = batchIdx; - - if (i != numValidConstraints) - { - b3Swap(cs[i], cs[numValidConstraints]); - numSwaps++; - } - - numValidConstraints++; - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - nCurrentBatch = 0; - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - curBodyUsed = 0; - } - } - } - } - m_gpuData->m_batchSizes.push_back(nCurrentBatch); - batchIdx++; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < numConstraints; i++) - { - b3Assert(cs[i].m_batchId != -1); - } -#endif - - if (maxSwaps < numSwaps) - { - maxSwaps = numSwaps; - //printf("maxSwaps = %d\n", maxSwaps); - } - - return batchIdx; -} - -/// b3PgsJacobiSolver Sequentially applies impulses -b3Scalar b3GpuPgsConstraintSolver::solveGroup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, - int numBodies, b3OpenCLArray* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveJoints"); - //you need to provide at least some bodies - - solveGroupCacheFriendlySetup(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); - - solveGroupCacheFriendlyIterations(gpuConstraints, numConstraints, infoGlobal); - - solveGroupCacheFriendlyFinish(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); - - return 0.f; -} - -void b3GpuPgsConstraintSolver::solveJoints(int numBodies, b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, - int numConstraints, b3OpenCLArray* gpuConstraints) -{ - b3ContactSolverInfo infoGlobal; - infoGlobal.m_splitImpulse = false; - infoGlobal.m_timeStep = 1.f / 60.f; - infoGlobal.m_numIterations = 4; //4; - // infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS|B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION; - //infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS; - infoGlobal.m_solverMode |= B3_SOLVER_USE_2_FRICTION_DIRECTIONS; - - //if (infoGlobal.m_solverMode & B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS) - //if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION)) - - solveGroup(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); -} - -//b3AlignedObjectArray testBodies; - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveGroupCacheFriendlyFinish"); - // int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - // int i,j; - - { - if (gpuBreakConstraints) - { - B3_PROFILE("breakViolatedConstraintsKernel"); - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_breakViolatedConstraintsKernel, "m_breakViolatedConstraintsKernel"); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - } - else - { - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - m_gpuData->m_gpuBatchConstraints->copyToHost(m_gpuData->m_cpuBatchConstraints); - m_gpuData->m_gpuConstraintRows->copyToHost(m_gpuData->m_cpuConstraintRows); - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); - m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); - - for (int cid = 0; cid < numConstraints; cid++) - { - int originalConstraintIndex = batchConstraints[cid].m_originalConstraintIndex; - int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[originalConstraintIndex]; - int numRows = m_gpuData->m_cpuConstraintInfo1[originalConstraintIndex]; - if (numRows) - { - // printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold); - for (int i = 0; i < numRows; i++) - { - int rowIndex = constraintRowOffset + i; - int orgConstraintIndex = m_gpuData->m_cpuConstraintRows[rowIndex].m_originalConstraintIndex; - float breakingThreshold = m_gpuData->m_cpuConstraints[orgConstraintIndex].m_breakingImpulseThreshold; - // printf("rows[%d].m_appliedImpulse=%f\n",rowIndex,rows[rowIndex].m_appliedImpulse); - if (b3Fabs(m_gpuData->m_cpuConstraintRows[rowIndex].m_appliedImpulse) >= breakingThreshold) - { - m_gpuData->m_cpuConstraints[orgConstraintIndex].m_flags = 0; //&= ~B3_CONSTRAINT_FLAG_ENABLED; - } - } - } - } - - gpuConstraints->copyFromHost(m_gpuData->m_cpuConstraints); - } - } - - { - if (useGpuWriteBackVelocities) - { - B3_PROFILE("GPU write back velocities and transforms"); - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_writeBackVelocitiesKernel, "m_writeBackVelocitiesKernel"); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_gpuData->m_queue); - // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - // m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies); - //m_gpuData->m_gpuBodies->copyToHost(testBodies); - } - else - { - B3_PROFILE("CPU write back velocities and transforms"); - - m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - gpuBodies->copyToHost(m_gpuData->m_cpuBodies); - for (int i = 0; i < m_tmpSolverBodyPool.size(); i++) - { - int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex; - //printf("bodyIndex=%d\n",bodyIndex); - b3Assert(i == bodyIndex); - - b3RigidBodyData* body = &m_gpuData->m_cpuBodies[bodyIndex]; - if (body->m_invMass) - { - if (infoGlobal.m_splitImpulse) - m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp); - else - m_tmpSolverBodyPool[i].writebackVelocity(); - - if (m_usePgs) - { - body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity; - body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity; - } - else - { - b3Assert(0); - } - /* - if (infoGlobal.m_splitImpulse) - { - body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin(); - b3Quaternion orn; - orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation(); - body->m_quat = orn; - } - */ - } - } //for - - gpuBodies->copyFromHost(m_gpuData->m_cpuBodies); - } - } - - clFinish(m_gpuData->m_queue); - - m_tmpSolverContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0); - - m_tmpSolverBodyPool.resizeNoInitialize(0); - return 0.f; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h deleted file mode 100644 index 00bc544f022..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_PGS_CONSTRAINT_SOLVER_H -#define B3_GPU_PGS_CONSTRAINT_SOLVER_H - -struct b3Contact4; -struct b3ContactPoint; - -class b3Dispatcher; - -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include "Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h" -#include "b3GpuSolverBody.h" -#include "b3GpuSolverConstraint.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -struct b3RigidBodyData; -struct b3InertiaData; - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "b3GpuGenericConstraint.h" - -class b3GpuPgsConstraintSolver -{ -protected: - int m_staticIdx; - struct b3GpuPgsJacobiSolverInternalData* m_gpuData; - -protected: - b3AlignedObjectArray m_tmpSolverBodyPool; - b3GpuConstraintArray m_tmpSolverContactConstraintPool; - b3GpuConstraintArray m_tmpSolverNonContactConstraintPool; - b3GpuConstraintArray m_tmpSolverContactFrictionConstraintPool; - b3GpuConstraintArray m_tmpSolverContactRollingFrictionConstraintPool; - - b3AlignedObjectArray m_tmpConstraintSizesPool; - - bool m_usePgs; - void averageVelocities(); - - int m_maxOverrideNumSolverIterations; - - int m_numSplitImpulseRecoveries; - - // int getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies,b3InertiaData* inertias); - void initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyData* rb); - -public: - b3GpuPgsConstraintSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, bool usePgs); - virtual ~b3GpuPgsConstraintSolver(); - - virtual b3Scalar solveGroupCacheFriendlyIterations(b3OpenCLArray* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal); - virtual b3Scalar solveGroupCacheFriendlySetup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - b3Scalar solveGroupCacheFriendlyFinish(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - - b3Scalar solveGroup(b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, int numBodies, b3OpenCLArray* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - void solveJoints(int numBodies, b3OpenCLArray* gpuBodies, b3OpenCLArray* gpuInertias, - int numConstraints, b3OpenCLArray* gpuConstraints); - - int sortConstraintByBatch3(struct b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies); - void recomputeBatches(); -}; - -#endif //B3_GPU_PGS_CONSTRAINT_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp deleted file mode 100644 index e3d235a4fdc..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp +++ /dev/null @@ -1,1529 +0,0 @@ - -bool gUseLargeBatches = false; -bool gCpuBatchContacts = false; -bool gCpuSolveConstraint = false; -bool gCpuRadixSort = false; -bool gCpuSetSortData = false; -bool gCpuSortContactsDeterminism = false; -bool gUseCpuCopyConstraints = false; -bool gUseScanHost = false; -bool gReorderContactsOnCpu = false; - -bool optionalSortContactsDeterminism = true; - -#include "b3GpuPgsContactSolver.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "b3Solver.h" - -#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" -#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" -#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" - -#include "kernels/solverSetup.h" -#include "kernels/solverSetup2.h" -#include "kernels/solveContact.h" -#include "kernels/solveFriction.h" -#include "kernels/batchingKernels.h" -#include "kernels/batchingKernelsNew.h" - -struct b3GpuBatchingPgsSolverInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - int m_pairCapacity; - int m_nIterations; - - b3OpenCLArray* m_contactCGPU; - b3OpenCLArray* m_numConstraints; - b3OpenCLArray* m_offsets; - - b3Solver* m_solverGPU; - - cl_kernel m_batchingKernel; - cl_kernel m_batchingKernelNew; - cl_kernel m_solveContactKernel; - cl_kernel m_solveSingleContactKernel; - cl_kernel m_solveSingleFrictionKernel; - cl_kernel m_solveFrictionKernel; - cl_kernel m_contactToConstraintKernel; - cl_kernel m_setSortDataKernel; - cl_kernel m_reorderContactKernel; - cl_kernel m_copyConstraintKernel; - - cl_kernel m_setDeterminismSortDataBodyAKernel; - cl_kernel m_setDeterminismSortDataBodyBKernel; - cl_kernel m_setDeterminismSortDataChildShapeAKernel; - cl_kernel m_setDeterminismSortDataChildShapeBKernel; - - class b3RadixSort32CL* m_sort32; - class b3BoundSearchCL* m_search; - class b3PrefixScanCL* m_scan; - - b3OpenCLArray* m_sortDataBuffer; - b3OpenCLArray* m_contactBuffer; - - b3OpenCLArray* m_bodyBufferGPU; - b3OpenCLArray* m_inertiaBufferGPU; - b3OpenCLArray* m_pBufContactOutGPU; - - b3OpenCLArray* m_pBufContactOutGPUCopy; - b3OpenCLArray* m_contactKeyValues; - - b3AlignedObjectArray m_idxBuffer; - b3AlignedObjectArray m_sortData; - b3AlignedObjectArray m_old; - - b3AlignedObjectArray m_batchSizes; - b3OpenCLArray* m_batchSizesGpu; -}; - -b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx, cl_device_id device, cl_command_queue q, int pairCapacity) -{ - m_debugOutput = 0; - m_data = new b3GpuBatchingPgsSolverInternalData; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_queue = q; - m_data->m_pairCapacity = pairCapacity; - m_data->m_nIterations = 4; - m_data->m_batchSizesGpu = new b3OpenCLArray(ctx, q); - m_data->m_bodyBufferGPU = new b3OpenCLArray(ctx, q); - m_data->m_inertiaBufferGPU = new b3OpenCLArray(ctx, q); - m_data->m_pBufContactOutGPU = new b3OpenCLArray(ctx, q); - - m_data->m_pBufContactOutGPUCopy = new b3OpenCLArray(ctx, q); - m_data->m_contactKeyValues = new b3OpenCLArray(ctx, q); - - m_data->m_solverGPU = new b3Solver(ctx, device, q, 512 * 1024); - - m_data->m_sort32 = new b3RadixSort32CL(ctx, device, m_data->m_queue); - m_data->m_scan = new b3PrefixScanCL(ctx, device, m_data->m_queue, B3_SOLVER_N_CELLS); - m_data->m_search = new b3BoundSearchCL(ctx, device, m_data->m_queue, B3_SOLVER_N_CELLS); - - const int sortSize = B3NEXTMULTIPLEOF(pairCapacity, 512); - - m_data->m_sortDataBuffer = new b3OpenCLArray(ctx, m_data->m_queue, sortSize); - m_data->m_contactBuffer = new b3OpenCLArray(ctx, m_data->m_queue); - - m_data->m_numConstraints = new b3OpenCLArray(ctx, m_data->m_queue, B3_SOLVER_N_CELLS); - m_data->m_numConstraints->resize(B3_SOLVER_N_CELLS); - - m_data->m_contactCGPU = new b3OpenCLArray(ctx, q, pairCapacity); - - m_data->m_offsets = new b3OpenCLArray(ctx, m_data->m_queue, B3_SOLVER_N_CELLS); - m_data->m_offsets->resize(B3_SOLVER_N_CELLS); - const char* additionalMacros = ""; - //const char* srcFileNameForCaching=""; - - cl_int pErrNum; - const char* batchKernelSource = batchingKernelsCL; - const char* batchKernelNewSource = batchingKernelsNewCL; - const char* solverSetupSource = solverSetupCL; - const char* solverSetup2Source = solverSetup2CL; - const char* solveContactSource = solveContactCL; - const char* solveFrictionSource = solveFrictionCL; - - { - cl_program solveContactProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveContactSource, &pErrNum, additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); - b3Assert(solveContactProg); - - cl_program solveFrictionProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveFrictionSource, &pErrNum, additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); - b3Assert(solveFrictionProg); - - cl_program solverSetup2Prog = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetup2Source, &pErrNum, additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - - b3Assert(solverSetup2Prog); - - cl_program solverSetupProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetupSource, &pErrNum, additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); - b3Assert(solverSetupProg); - - m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg, additionalMacros); - b3Assert(m_data->m_solveFrictionKernel); - - m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg, additionalMacros); - b3Assert(m_data->m_solveContactKernel); - - m_data->m_solveSingleContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "solveSingleContactKernel", &pErrNum, solveContactProg, additionalMacros); - b3Assert(m_data->m_solveSingleContactKernel); - - m_data->m_solveSingleFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "solveSingleFrictionKernel", &pErrNum, solveFrictionProg, additionalMacros); - b3Assert(m_data->m_solveSingleFrictionKernel); - - m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg, additionalMacros); - b3Assert(m_data->m_contactToConstraintKernel); - - m_data->m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setSortDataKernel); - - m_data->m_setDeterminismSortDataBodyAKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyA", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataBodyAKernel); - - m_data->m_setDeterminismSortDataBodyBKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyB", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataBodyBKernel); - - m_data->m_setDeterminismSortDataChildShapeAKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeA", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataChildShapeAKernel); - - m_data->m_setDeterminismSortDataChildShapeBKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeB", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataChildShapeBKernel); - - m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_reorderContactKernel); - - m_data->m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_copyConstraintKernel); - } - - { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelSource, &pErrNum, additionalMacros, B3_BATCHING_PATH); - b3Assert(batchingProg); - - m_data->m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg, additionalMacros); - b3Assert(m_data->m_batchingKernel); - } - - { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelNewSource, &pErrNum, additionalMacros, B3_BATCHING_NEW_PATH); - b3Assert(batchingNewProg); - - m_data->m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg, additionalMacros); - b3Assert(m_data->m_batchingKernelNew); - } -} - -b3GpuPgsContactSolver::~b3GpuPgsContactSolver() -{ - delete m_data->m_batchSizesGpu; - delete m_data->m_bodyBufferGPU; - delete m_data->m_inertiaBufferGPU; - delete m_data->m_pBufContactOutGPU; - delete m_data->m_pBufContactOutGPUCopy; - delete m_data->m_contactKeyValues; - - delete m_data->m_contactCGPU; - delete m_data->m_numConstraints; - delete m_data->m_offsets; - delete m_data->m_sortDataBuffer; - delete m_data->m_contactBuffer; - - delete m_data->m_sort32; - delete m_data->m_scan; - delete m_data->m_search; - delete m_data->m_solverGPU; - - clReleaseKernel(m_data->m_batchingKernel); - clReleaseKernel(m_data->m_batchingKernelNew); - clReleaseKernel(m_data->m_solveSingleContactKernel); - clReleaseKernel(m_data->m_solveSingleFrictionKernel); - clReleaseKernel(m_data->m_solveContactKernel); - clReleaseKernel(m_data->m_solveFrictionKernel); - - clReleaseKernel(m_data->m_contactToConstraintKernel); - clReleaseKernel(m_data->m_setSortDataKernel); - clReleaseKernel(m_data->m_reorderContactKernel); - clReleaseKernel(m_data->m_copyConstraintKernel); - - clReleaseKernel(m_data->m_setDeterminismSortDataBodyAKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataBodyBKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeAKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeBKernel); - - delete m_data; -} - -struct b3ConstraintCfg -{ - b3ConstraintCfg(float dt = 0.f) : m_positionDrift(0.005f), m_positionConstraintCoeff(0.2f), m_dt(dt), m_staticIdx(0) {} - - float m_positionDrift; - float m_positionConstraintCoeff; - float m_dt; - bool m_enableParallelSolve; - float m_batchCellSize; - int m_staticIdx; -}; - -void b3GpuPgsContactSolver::solveContactConstraintBatchSizes(const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray* batchSizes) //const b3OpenCLArray* gpuBatchSizes) -{ - B3_PROFILE("solveContactConstraintBatchSizes"); - int numBatches = batchSizes->size() / B3_MAX_NUM_BATCHES; - for (int iter = 0; iter < numIterations; iter++) - { - for (int cellId = 0; cellId < numBatches; cellId++) - { - int offset = 0; - for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) - { - int numInBatch = batchSizes->at(cellId * B3_MAX_NUM_BATCHES + ii); - if (!numInBatch) - break; - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveSingleContactKernel, "m_solveSingleContactKernel"); - launcher.setBuffer(bodyBuf->getBufferCL()); - launcher.setBuffer(shapeBuf->getBufferCL()); - launcher.setBuffer(constraint->getBufferCL()); - launcher.setConst(cellId); - launcher.setConst(offset); - launcher.setConst(numInBatch); - launcher.launch1D(numInBatch); - offset += numInBatch; - } - } - } - } - - for (int iter = 0; iter < numIterations; iter++) - { - for (int cellId = 0; cellId < numBatches; cellId++) - { - int offset = 0; - for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) - { - int numInBatch = batchSizes->at(cellId * B3_MAX_NUM_BATCHES + ii); - if (!numInBatch) - break; - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveSingleFrictionKernel, "m_solveSingleFrictionKernel"); - launcher.setBuffer(bodyBuf->getBufferCL()); - launcher.setBuffer(shapeBuf->getBufferCL()); - launcher.setBuffer(constraint->getBufferCL()); - launcher.setConst(cellId); - launcher.setConst(offset); - launcher.setConst(numInBatch); - launcher.launch1D(numInBatch); - offset += numInBatch; - } - } - } - } -} - -void b3GpuPgsContactSolver::solveContactConstraint(const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray* batchSizes) //,const b3OpenCLArray* gpuBatchSizes) -{ - //sort the contacts - - b3Int4 cdata = b3MakeInt4(n, 0, 0, 0); - { - const int nn = B3_SOLVER_N_CELLS; - - cdata.x = 0; - cdata.y = maxNumBatches; //250; - - int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES; -#ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray gpuDebugInfo(data->m_device, numWorkItems); -#endif - - { - B3_PROFILE("m_batchSolveKernel iterations"); - for (int iter = 0; iter < numIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { -#ifdef DEBUG_ME - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - - cdata.z = ib; - - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveContactKernel, "m_solveContactKernel"); -#if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif - }; - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setBuffer(m_data->m_solverGPU->m_batchSizes.getBufferCL()); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - launcher.launch1D(numWorkItems, 64); - -#else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName, "rb"); - if (f) - { - int sizeInBytes = 0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*)malloc(sizeInBytes); - fread(buf, sizeInBytes, 1, f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D(num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } - -#endif - -#ifdef DEBUG_ME - clFinish(m_queue); - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt2 > 0) - { - printf("debugInfo[i].m_valInt2 = %d\n", i, debugInfo[i].m_valInt2); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("debugInfo[i].m_valInt3 = %d\n", i, debugInfo[i].m_valInt3); - } - } -#endif //DEBUG_ME - } - } - - clFinish(m_data->m_queue); - } - - cdata.x = 1; - bool applyFriction = true; - if (applyFriction) - { - B3_PROFILE("m_batchSolveKernel iterations2"); - for (int iter = 0; iter < numIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - cdata.z = ib; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME - }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveFrictionKernel, "m_solveFrictionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setBuffer(m_data->m_solverGPU->m_batchSizes.getBufferCL()); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - - launcher.launch1D(64 * nn / B3_SOLVER_N_BATCHES, 64); - } - } - clFinish(m_data->m_queue); - } -#ifdef DEBUG_ME - delete[] debugInfo; -#endif //DEBUG_ME - } -} - -static bool sortfnc(const b3SortData& a, const b3SortData& b) -{ - return (a.m_key < b.m_key); -} - -static bool b3ContactCmp(const b3Contact4& p, const b3Contact4& q) -{ - return ((p.m_bodyAPtrAndSignBit < q.m_bodyAPtrAndSignBit) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit < q.m_bodyBPtrAndSignBit)) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit == q.m_bodyBPtrAndSignBit) && p.m_childIndexA < q.m_childIndexA) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit == q.m_bodyBPtrAndSignBit) && p.m_childIndexA < q.m_childIndexA) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit == q.m_bodyBPtrAndSignBit) && p.m_childIndexA == q.m_childIndexA && p.m_childIndexB < q.m_childIndexB)); -} - -#define USE_SPATIAL_BATCHING 1 -#define USE_4x4_GRID 1 - -#ifndef USE_SPATIAL_BATCHING -static const int gridTable4x4[] = - { - 0, 1, 17, 16, - 1, 2, 18, 19, - 17, 18, 32, 3, - 16, 19, 3, 34}; -static const int gridTable8x8[] = - { - 0, 2, 3, 16, 17, 18, 19, 1, - 66, 64, 80, 67, 82, 81, 65, 83, - 131, 144, 128, 130, 147, 129, 145, 146, - 208, 195, 194, 192, 193, 211, 210, 209, - 21, 22, 23, 5, 4, 6, 7, 20, - 86, 85, 69, 87, 70, 68, 84, 71, - 151, 133, 149, 150, 135, 148, 132, 134, - 197, 27, 214, 213, 212, 199, 198, 196 - -}; - -#endif - -void SetSortDataCPU(b3Contact4* gContact, b3RigidBodyData* gBodies, b3SortData* gSortDataOut, int nContacts, float scale, const b3Int4& nSplit, int staticIdx) -{ - for (int gIdx = 0; gIdx < nContacts; gIdx++) - { - if (gIdx < nContacts) - { - int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit; - int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit; - - int aIdx = abs(aPtrAndSignBit); - int bIdx = abs(bPtrAndSignBit); - - bool aStatic = (aPtrAndSignBit < 0) || (aPtrAndSignBit == staticIdx); - -#if USE_SPATIAL_BATCHING - int idx = (aStatic) ? bIdx : aIdx; - b3Vector3 p = gBodies[idx].m_pos; - int xIdx = (int)((p.x - ((p.x < 0.f) ? 1.f : 0.f)) * scale) & (nSplit.x - 1); - int yIdx = (int)((p.y - ((p.y < 0.f) ? 1.f : 0.f)) * scale) & (nSplit.y - 1); - int zIdx = (int)((p.z - ((p.z < 0.f) ? 1.f : 0.f)) * scale) & (nSplit.z - 1); - - int newIndex = (xIdx + yIdx * nSplit.x + zIdx * nSplit.x * nSplit.y); - -#else //USE_SPATIAL_BATCHING - bool bStatic = (bPtrAndSignBit < 0) || (bPtrAndSignBit == staticIdx); - -#if USE_4x4_GRID - int aa = aIdx & 3; - int bb = bIdx & 3; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb * 4; - int newIndex = gridTable4x4[gridIndex]; -#else //USE_4x4_GRID - int aa = aIdx & 7; - int bb = bIdx & 7; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb * 8; - int newIndex = gridTable8x8[gridIndex]; -#endif //USE_4x4_GRID -#endif //USE_SPATIAL_BATCHING - - gSortDataOut[gIdx].x = newIndex; - gSortDataOut[gIdx].y = gIdx; - } - else - { - gSortDataOut[gIdx].x = 0xffffffff; - } - } -} - -void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) -{ - B3_PROFILE("solveContacts"); - m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf, numBodies); - m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf, numBodies); - m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf, numContacts); - - if (optionalSortContactsDeterminism) - { - if (!gCpuSortContactsDeterminism) - { - B3_PROFILE("GPU Sort contact constraints (determinism)"); - - m_data->m_pBufContactOutGPUCopy->resize(numContacts); - m_data->m_contactKeyValues->resize(numContacts); - - m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(), numContacts, 0, 0); - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel, "m_setDeterminismSortDataChildShapeBKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel, "m_setDeterminismSortDataChildShapeAKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel, "m_setDeterminismSortDataBodyBKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel, "m_setDeterminismSortDataBodyAKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - - { - B3_PROFILE("gpu reorderContactKernel (determinism)"); - - b3Int4 cdata; - cdata.x = numContacts; - - //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) - // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_reorderContactKernel, "m_reorderContactKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(cdata); - launcher.launch1D(numContacts, 64); - } - } - else - { - B3_PROFILE("CPU Sort contact constraints (determinism)"); - b3AlignedObjectArray cpuConstraints; - m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints); - bool sort = true; - if (sort) - { - cpuConstraints.quickSort(b3ContactCmp); - - for (int i = 0; i < cpuConstraints.size(); i++) - { - cpuConstraints[i].m_batchIdx = i; - } - } - m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints); - if (m_debugOutput == 100) - { - for (int i = 0; i < cpuConstraints.size(); i++) - { - printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n", i, cpuConstraints[i].m_bodyAPtrAndSignBit, cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx); - } - } - - m_debugOutput++; - } - } - - int nContactOut = m_data->m_pBufContactOutGPU->size(); - - bool useSolver = true; - - if (useSolver) - { - float dt = 1. / 60.; - b3ConstraintCfg csCfg(dt); - csCfg.m_enableParallelSolve = true; - csCfg.m_batchCellSize = 6; - csCfg.m_staticIdx = static0Index; - - b3OpenCLArray* bodyBuf = m_data->m_bodyBufferGPU; - - void* additionalData = 0; //m_data->m_frictionCGPU; - const b3OpenCLArray* shapeBuf = m_data->m_inertiaBufferGPU; - b3OpenCLArray* contactConstraintOut = m_data->m_contactCGPU; - int nContacts = nContactOut; - - int maxNumBatches = 0; - - if (!gUseLargeBatches) - { - if (m_data->m_solverGPU->m_contactBuffer2) - { - m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); - } - - if (m_data->m_solverGPU->m_contactBuffer2 == 0) - { - m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray(m_data->m_context, m_data->m_queue, nContacts); - m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); - } - - //clFinish(m_data->m_queue); - - { - B3_PROFILE("batching"); - //@todo: just reserve it, without copy of original contact (unless we use warmstarting) - - //const b3OpenCLArray* bodyNative = bodyBuf; - - { - //b3OpenCLArray* bodyNative = b3OpenCLArrayUtils::map( data->m_device, bodyBuf ); - //b3OpenCLArray* contactNative = b3OpenCLArrayUtils::map( data->m_device, contactsIn ); - - const int sortAlignment = 512; // todo. get this out of sort - if (csCfg.m_enableParallelSolve) - { - int sortSize = B3NEXTMULTIPLEOF(nContacts, sortAlignment); - - b3OpenCLArray* countsNative = m_data->m_solverGPU->m_numConstraints; - b3OpenCLArray* offsetsNative = m_data->m_solverGPU->m_offsets; - - if (!gCpuSetSortData) - { // 2. set cell idx - B3_PROFILE("GPU set cell idx"); - struct CB - { - int m_nContacts; - int m_staticIdx; - float m_scale; - b3Int4 m_nSplit; - }; - - b3Assert(sortSize % 64 == 0); - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_staticIdx = csCfg.m_staticIdx; - cdata.m_scale = 1.f / csCfg.m_batchCellSize; - cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X; - cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y; - cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z; - - m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); - - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_data->m_pBufContactOutGPU->getBufferCL()), b3BufferInfoCL(bodyBuf->getBufferCL()), b3BufferInfoCL(m_data->m_solverGPU->m_sortDataBuffer->getBufferCL())}; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel, "m_setSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata.m_nContacts); - launcher.setConst(cdata.m_scale); - launcher.setConst(cdata.m_nSplit); - launcher.setConst(cdata.m_staticIdx); - - launcher.launch1D(sortSize, 64); - } - else - { - m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); - b3AlignedObjectArray sortDataCPU; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU); - - b3AlignedObjectArray contactCPU; - m_data->m_pBufContactOutGPU->copyToHost(contactCPU); - b3AlignedObjectArray bodiesCPU; - bodyBuf->copyToHost(bodiesCPU); - float scale = 1.f / csCfg.m_batchCellSize; - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts, scale, nSplit, csCfg.m_staticIdx); - - m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU); - } - - if (!gCpuRadixSort) - { // 3. sort by cell idx - B3_PROFILE("gpuRadixSort"); - //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - //int sortBit = 32; - //if( n <= 0xffff ) sortBit = 16; - //if( n <= 0xff ) sortBit = 8; - //adl::RadixSort::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); - //adl::RadixSort32::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); - b3OpenCLArray& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); - this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut); - } - else - { - b3OpenCLArray& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); - b3AlignedObjectArray hostValues; - keyValuesInOut.copyToHost(hostValues); - hostValues.quickSort(sortfnc); - keyValuesInOut.copyFromHost(hostValues); - } - - if (gUseScanHost) - { - // 4. find entries - B3_PROFILE("cpuBoundSearch"); - b3AlignedObjectArray countsHost; - countsNative->copyToHost(countsHost); - - b3AlignedObjectArray sortDataHost; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); - - //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); - m_data->m_solverGPU->m_search->executeHost(sortDataHost, nContacts, countsHost, B3_SOLVER_N_CELLS, b3BoundSearchCL::COUNT); - - countsNative->copyFromHost(countsHost); - - //adl::BoundSearch::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, - // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); - - //unsigned int sum; - //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); - b3AlignedObjectArray offsetsHost; - offsetsHost.resize(offsetsNative->size()); - - m_data->m_solverGPU->m_scan->executeHost(countsHost, offsetsHost, B3_SOLVER_N_CELLS); //,&sum ); - offsetsNative->copyFromHost(offsetsHost); - - //printf("sum = %d\n",sum); - } - else - { - // 4. find entries - B3_PROFILE("gpuBoundSearch"); - m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer, nContacts, *countsNative, B3_SOLVER_N_CELLS, b3BoundSearchCL::COUNT); - m_data->m_solverGPU->m_scan->execute(*countsNative, *offsetsNative, B3_SOLVER_N_CELLS); //,&sum ); - } - - if (nContacts) - { // 5. sort constraints by cellIdx - if (gReorderContactsOnCpu) - { - B3_PROFILE("cpu m_reorderContactKernel"); - b3AlignedObjectArray sortDataHost; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); - b3AlignedObjectArray inContacts; - b3AlignedObjectArray outContacts; - m_data->m_pBufContactOutGPU->copyToHost(inContacts); - outContacts.resize(inContacts.size()); - for (int i = 0; i < nContacts; i++) - { - int srcIdx = sortDataHost[i].y; - outContacts[i] = inContacts[srcIdx]; - } - m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts); - - /* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" - "{\n" - " int nContacts = cb.x;\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int srcIdx = sortData[gIdx].y;\n" - " out[gIdx] = in[srcIdx];\n" - " }\n" - "}\n" - */ - } - else - { - B3_PROFILE("gpu m_reorderContactKernel"); - - b3Int4 cdata; - cdata.x = nContacts; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_data->m_pBufContactOutGPU->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_contactBuffer2->getBufferCL()), b3BufferInfoCL(m_data->m_solverGPU->m_sortDataBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_reorderContactKernel, "m_reorderContactKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nContacts, 64); - } - } - } - } - - //clFinish(m_data->m_queue); - - // { - // b3AlignedObjectArray histogram; - // m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); - // printf(",,,\n"); - // } - - if (nContacts) - { - if (gUseCpuCopyConstraints) - { - for (int i = 0; i < nContacts; i++) - { - m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); - // m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); - // m_data->m_pBufContactOutGPU->getBufferCL() - } - } - else - { - B3_PROFILE("gpu m_copyConstraintKernel"); - b3Int4 cdata; - cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_data->m_solverGPU->m_contactBuffer2->getBufferCL()), - b3BufferInfoCL(m_data->m_pBufContactOutGPU->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel, "m_copyConstraintKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nContacts, 64); - //we use the clFinish for proper benchmark/profile - clFinish(m_data->m_queue); - } - } - - // bool compareGPU = false; - if (nContacts) - { - if (!gCpuBatchContacts) - { - B3_PROFILE("gpu batchContacts"); - maxNumBatches = 250; //250; - m_data->m_solverGPU->batchContacts(m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx); - clFinish(m_data->m_queue); - } - else - { - B3_PROFILE("cpu batchContacts"); - static b3AlignedObjectArray cpuContacts; - b3OpenCLArray* contactsIn = m_data->m_solverGPU->m_contactBuffer2; - { - B3_PROFILE("copyToHost"); - contactsIn->copyToHost(cpuContacts); - } - b3OpenCLArray* countsNative = m_data->m_solverGPU->m_numConstraints; - b3OpenCLArray* offsetsNative = m_data->m_solverGPU->m_offsets; - - b3AlignedObjectArray nNativeHost; - b3AlignedObjectArray offsetsNativeHost; - - { - B3_PROFILE("countsNative/offsetsNative copyToHost"); - countsNative->copyToHost(nNativeHost); - offsetsNative->copyToHost(offsetsNativeHost); - } - - int numNonzeroGrid = 0; - - if (gUseLargeBatches) - { - m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); - int totalNumConstraints = cpuContacts.size(); - //int simdWidth =numBodies+1;//-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3(&cpuContacts[0], totalNumConstraints, totalNumConstraints + 1, csCfg.m_staticIdx, numBodies, &m_data->m_batchSizes[0]); // on GPU - maxNumBatches = b3Max(numBatches, maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches > globalMaxBatch) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n", maxNumBatches); - } - } - else - { - m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS * B3_MAX_NUM_BATCHES); - B3_PROFILE("cpu batch grid"); - for (int i = 0; i < B3_SOLVER_N_CELLS; i++) - { - int n = (nNativeHost)[i]; - int offset = (offsetsNativeHost)[i]; - if (n) - { - numNonzeroGrid++; - int simdWidth = numBodies + 1; //-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3(&cpuContacts[0] + offset, n, simdWidth, csCfg.m_staticIdx, numBodies, &m_data->m_batchSizes[i * B3_MAX_NUM_BATCHES]); // on GPU - maxNumBatches = b3Max(numBatches, maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches > globalMaxBatch) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n", maxNumBatches); - } - //we use the clFinish for proper benchmark/profile - } - } - //clFinish(m_data->m_queue); - } - { - B3_PROFILE("m_contactBuffer->copyFromHost"); - m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray&)cpuContacts); - } - } - } - } - } - - //printf("maxNumBatches = %d\n", maxNumBatches); - - if (gUseLargeBatches) - { - if (nContacts) - { - B3_PROFILE("cpu batchContacts"); - static b3AlignedObjectArray cpuContacts; - // b3OpenCLArray* contactsIn = m_data->m_solverGPU->m_contactBuffer2; - { - B3_PROFILE("copyToHost"); - m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); - } - // b3OpenCLArray* countsNative = m_data->m_solverGPU->m_numConstraints; - // b3OpenCLArray* offsetsNative = m_data->m_solverGPU->m_offsets; - - // int numNonzeroGrid=0; - - { - m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); - int totalNumConstraints = cpuContacts.size(); - // int simdWidth =numBodies+1;//-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3(&cpuContacts[0], totalNumConstraints, totalNumConstraints + 1, csCfg.m_staticIdx, numBodies, &m_data->m_batchSizes[0]); // on GPU - maxNumBatches = b3Max(numBatches, maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches > globalMaxBatch) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n", maxNumBatches); - } - } - { - B3_PROFILE("m_contactBuffer->copyFromHost"); - m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray&)cpuContacts); - } - } - } - - if (nContacts) - { - B3_PROFILE("gpu convertToConstraints"); - m_data->m_solverGPU->convertToConstraints(bodyBuf, - shapeBuf, m_data->m_solverGPU->m_contactBuffer2, - contactConstraintOut, - additionalData, nContacts, - (b3SolverBase::ConstraintCfg&)csCfg); - clFinish(m_data->m_queue); - } - - if (1) - { - int numIter = 4; - - m_data->m_solverGPU->m_nIterations = numIter; //10 - if (!gCpuSolveConstraint) - { - B3_PROFILE("GPU solveContactConstraint"); - - /*m_data->m_solverGPU->solveContactConstraint( - m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU,0, - nContactOut , - maxNumBatches); - */ - - //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); - - if (gUseLargeBatches) - { - solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU, 0, - nContactOut, - maxNumBatches, numIter, &m_data->m_batchSizes); - } - else - { - solveContactConstraint( - m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU, 0, - nContactOut, - maxNumBatches, numIter, &m_data->m_batchSizes); //m_data->m_batchSizesGpu); - } - } - else - { - B3_PROFILE("Host solveContactConstraint"); - - m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU, 0, nContactOut, maxNumBatches, &m_data->m_batchSizes); - } - } - -#if 0 - if (0) - { - B3_PROFILE("read body velocities back to CPU"); - //read body updated linear/angular velocities back to CPU - m_data->m_bodyBufferGPU->read( - m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); - adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL ); - } -#endif - } -} - -void b3GpuPgsContactSolver::batchContacts(b3OpenCLArray* contacts, int nContacts, b3OpenCLArray* n, b3OpenCLArray* offsets, int staticIdx) -{ -} - -b3AlignedObjectArray idxBuffer; -b3AlignedObjectArray sortData; -b3AlignedObjectArray old; - -inline int b3GpuPgsContactSolver::sortConstraintByBatch(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies) -{ - B3_PROFILE("sortConstraintByBatch"); - int numIter = 0; - - sortData.resize(n); - idxBuffer.resize(n); - old.resize(n); - - unsigned int* idxSrc = &idxBuffer[0]; - unsigned int* idxDst = &idxBuffer[0]; - int nIdxSrc, nIdxDst; - - const int N_FLG = 256; - const int FLG_MASK = N_FLG - 1; - unsigned int flg[N_FLG / 32]; -#if defined(_DEBUG) - for (int i = 0; i < n; i++) - cs[i].getBatchIdx() = -1; -#endif - for (int i = 0; i < n; i++) - idxSrc[i] = i; - nIdxSrc = n; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - while (nIdxSrc) - { - numIter++; - nIdxDst = 0; - int nCurrentBatch = 0; - - // clear flag - for (int i = 0; i < N_FLG / 32; i++) flg[i] = 0; - - for (int i = 0; i < nIdxSrc; i++) - { - int idx = idxSrc[i]; - - b3Assert(idx < n); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - - int aIdx = bodyA & FLG_MASK; - int bIdx = bodyB & FLG_MASK; - - unsigned int aUnavailable = flg[aIdx / 32] & (1 << (aIdx & 31)); - unsigned int bUnavailable = flg[bIdx / 32] & (1 << (bIdx & 31)); - - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - - //use inv_mass! - aUnavailable = !aIsStatic ? aUnavailable : 0; // - bUnavailable = !bIsStatic ? bUnavailable : 0; - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - flg[aIdx / 32] |= (1 << (aIdx & 31)); - if (!bIsStatic) - flg[bIdx / 32] |= (1 << (bIdx & 31)); - - cs[idx].getBatchIdx() = batchIdx; - sortData[idx].m_key = batchIdx; - sortData[idx].m_value = idx; - - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - nCurrentBatch = 0; - for (int i = 0; i < N_FLG / 32; i++) flg[i] = 0; - } - } - } - else - { - idxDst[nIdxDst++] = idx; - } - } - b3Swap(idxSrc, idxDst); - b3Swap(nIdxSrc, nIdxDst); - batchIdx++; - } - } - { - B3_PROFILE("quickSort"); - sortData.quickSort(sortfnc); - } - - { - B3_PROFILE("reorder"); - // reorder - - memcpy(&old[0], cs, sizeof(b3Contact4) * n); - for (int i = 0; i < n; i++) - { - int idx = sortData[i].m_value; - cs[i] = old[idx]; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < n; i++) - { - b3Assert(cs[i].getBatchIdx() != -1); - } -#endif - return batchIdx; -} - -b3AlignedObjectArray bodyUsed2; - -inline int b3GpuPgsContactSolver::sortConstraintByBatch2(b3Contact4* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies) -{ - B3_PROFILE("sortConstraintByBatch2"); - - bodyUsed2.resize(2 * simdWidth); - - for (int q = 0; q < 2 * simdWidth; q++) - bodyUsed2[q] = 0; - - int curBodyUsed = 0; - - int numIter = 0; - - m_data->m_sortData.resize(numConstraints); - m_data->m_idxBuffer.resize(numConstraints); - m_data->m_old.resize(numConstraints); - - unsigned int* idxSrc = &m_data->m_idxBuffer[0]; - -#if defined(_DEBUG) - for (int i = 0; i < numConstraints; i++) - cs[i].getBatchIdx() = -1; -#endif - for (int i = 0; i < numConstraints; i++) - idxSrc[i] = i; - - int numValidConstraints = 0; - // int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - - while (numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - // clear flag - for (int i = 0; i < curBodyUsed; i++) - bodyUsed2[i] = 0; - curBodyUsed = 0; - - for (int i = numValidConstraints; i < numConstraints; i++) - { - int idx = idxSrc[i]; - b3Assert(idx < numConstraints); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - for (int j = 0; j < curBodyUsed; j++) - { - if (bodyA == bodyUsed2[j]) - { - aUnavailable = 1; - break; - } - } - } - if (!aUnavailable) - if (!bIsStatic) - { - for (int j = 0; j < curBodyUsed; j++) - { - if (bodyB == bodyUsed2[j]) - { - bUnavailable = 1; - break; - } - } - } - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - { - bodyUsed2[curBodyUsed++] = bodyA; - } - if (!bIsStatic) - { - bodyUsed2[curBodyUsed++] = bodyB; - } - - cs[idx].getBatchIdx() = batchIdx; - m_data->m_sortData[idx].m_key = batchIdx; - m_data->m_sortData[idx].m_value = idx; - - if (i != numValidConstraints) - { - b3Swap(idxSrc[i], idxSrc[numValidConstraints]); - } - - numValidConstraints++; - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - nCurrentBatch = 0; - for (int i = 0; i < curBodyUsed; i++) - bodyUsed2[i] = 0; - - curBodyUsed = 0; - } - } - } - } - - batchIdx++; - } - } - { - B3_PROFILE("quickSort"); - //m_data->m_sortData.quickSort(sortfnc); - } - - { - B3_PROFILE("reorder"); - // reorder - - memcpy(&m_data->m_old[0], cs, sizeof(b3Contact4) * numConstraints); - - for (int i = 0; i < numConstraints; i++) - { - b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]); - int idx = m_data->m_sortData[idxSrc[i]].m_value; - cs[i] = m_data->m_old[idx]; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < numConstraints; i++) - { - b3Assert(cs[i].getBatchIdx() != -1); - } -#endif - - return batchIdx; -} - -b3AlignedObjectArray bodyUsed; -b3AlignedObjectArray curUsed; - -inline int b3GpuPgsContactSolver::sortConstraintByBatch3(b3Contact4* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies, int* batchSizes) -{ - B3_PROFILE("sortConstraintByBatch3"); - - static int maxSwaps = 0; - int numSwaps = 0; - - curUsed.resize(2 * simdWidth); - - static int maxNumConstraints = 0; - if (maxNumConstraints < numConstraints) - { - maxNumConstraints = numConstraints; - //printf("maxNumConstraints = %d\n",maxNumConstraints ); - } - - int numUsedArray = numBodies / 32 + 1; - bodyUsed.resize(numUsedArray); - - for (int q = 0; q < numUsedArray; q++) - bodyUsed[q] = 0; - - int curBodyUsed = 0; - - int numIter = 0; - - m_data->m_sortData.resize(0); - m_data->m_idxBuffer.resize(0); - m_data->m_old.resize(0); - -#if defined(_DEBUG) - for (int i = 0; i < numConstraints; i++) - cs[i].getBatchIdx() = -1; -#endif - - int numValidConstraints = 0; - // int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - - while (numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - batchSizes[batchIdx] = 0; - - // clear flag - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - - curBodyUsed = 0; - - for (int i = numValidConstraints; i < numConstraints; i++) - { - int idx = i; - b3Assert(idx < numConstraints); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31)); - } - if (!aUnavailable) - if (!bIsStatic) - { - bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31)); - } - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - { - bodyUsed[bodyA / 32] |= (1 << (bodyA & 31)); - curUsed[curBodyUsed++] = bodyA; - } - if (!bIsStatic) - { - bodyUsed[bodyB / 32] |= (1 << (bodyB & 31)); - curUsed[curBodyUsed++] = bodyB; - } - - cs[idx].getBatchIdx() = batchIdx; - - if (i != numValidConstraints) - { - b3Swap(cs[i], cs[numValidConstraints]); - numSwaps++; - } - - numValidConstraints++; - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - batchSizes[batchIdx] += simdWidth; - nCurrentBatch = 0; - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - curBodyUsed = 0; - } - } - } - } - - if (batchIdx >= B3_MAX_NUM_BATCHES) - { - b3Error("batchIdx>=B3_MAX_NUM_BATCHES"); - b3Assert(0); - break; - } - - batchSizes[batchIdx] += nCurrentBatch; - - batchIdx++; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < numConstraints; i++) - { - b3Assert(cs[i].getBatchIdx() != -1); - } -#endif - - batchSizes[batchIdx] = 0; - - if (maxSwaps < numSwaps) - { - maxSwaps = numSwaps; - //printf("maxSwaps = %d\n", maxSwaps); - } - - return batchIdx; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h deleted file mode 100644 index 6ab7502af3d..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h +++ /dev/null @@ -1,37 +0,0 @@ - -#ifndef B3_GPU_BATCHING_PGS_SOLVER_H -#define B3_GPU_BATCHING_PGS_SOLVER_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "b3GpuConstraint4.h" - -class b3GpuPgsContactSolver -{ -protected: - int m_debugOutput; - - struct b3GpuBatchingPgsSolverInternalData* m_data; - - void batchContacts(b3OpenCLArray* contacts, int nContacts, b3OpenCLArray* n, b3OpenCLArray* offsets, int staticIdx); - - inline int sortConstraintByBatch(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies); - inline int sortConstraintByBatch2(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies); - inline int sortConstraintByBatch3(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies, int* batchSizes); - - void solveContactConstraintBatchSizes(const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray* batchSizes); //const b3OpenCLArray* gpuBatchSizes); - - void solveContactConstraint(const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray* batchSizes); //const b3OpenCLArray* gpuBatchSizes); - -public: - b3GpuPgsContactSolver(cl_context ctx, cl_device_id device, cl_command_queue q, int pairCapacity); - virtual ~b3GpuPgsContactSolver(); - - void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); -}; - -#endif //B3_GPU_BATCHING_PGS_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp deleted file mode 100644 index fef33ad1cdb..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ /dev/null @@ -1,677 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3GpuRigidBodyPipeline.h" -#include "b3GpuRigidBodyPipelineInternalData.h" -#include "kernels/integrateKernel.h" -#include "kernels/updateAabbsKernel.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3GpuNarrowPhase.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" -#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" - -//#define TEST_OTHER_GPU_SOLVER - -#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" -#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" - -bool useBullet2CpuSolver = true; - -//choice of contact solver -bool gUseJacobi = false; -bool gUseDbvt = false; -bool gDumpContactStats = false; -bool gCalcWorldSpaceAabbOnCpu = false; -bool gUseCalculateOverlappingPairsHost = false; -bool gIntegrateOnCpu = false; -bool gClearPairsOnGpu = true; - -#define TEST_OTHER_GPU_SOLVER 1 -#ifdef TEST_OTHER_GPU_SOLVER -#include "b3GpuJacobiContactSolver.h" -#endif //TEST_OTHER_GPU_SOLVER - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h" - -#include "b3GpuPgsContactSolver.h" -#include "b3Solver.h" - -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3OpenCL/Raycast/b3GpuRaycast.h" - -#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" -#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" - -b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config) -{ - m_data = new b3GpuRigidBodyPipelineInternalData; - m_data->m_constraintUid = 0; - m_data->m_config = config; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_queue = q; - - m_data->m_solver = new b3PgsJacobiSolver(true); //new b3PgsJacobiSolver(true); - m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx, device, q, true); //new b3PgsJacobiSolver(true); - - m_data->m_allAabbsGPU = new b3OpenCLArray(ctx, q, config.m_maxConvexBodies); - m_data->m_overlappingPairsGPU = new b3OpenCLArray(ctx, q, config.m_maxBroadphasePairs); - - m_data->m_gpuConstraints = new b3OpenCLArray(ctx, q); -#ifdef TEST_OTHER_GPU_SOLVER - m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx, device, q, config.m_maxBroadphasePairs); -#endif // TEST_OTHER_GPU_SOLVER - - m_data->m_solver2 = new b3GpuPgsContactSolver(ctx, device, q, config.m_maxBroadphasePairs); - - m_data->m_raycaster = new b3GpuRaycast(ctx, device, q); - - m_data->m_broadphaseDbvt = broadphaseDbvt; - m_data->m_broadphaseSap = broadphaseSap; - m_data->m_narrowphase = narrowphase; - m_data->m_gravity.setValue(0.f, -9.8f, 0.f); - - cl_int errNum = 0; - - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, integrateKernelCL, &errNum, "", B3_RIGIDBODY_INTEGRATE_PATH); - b3Assert(errNum == CL_SUCCESS); - m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, integrateKernelCL, "integrateTransformsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - clReleaseProgram(prog); - } - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, &errNum, "", B3_RIGIDBODY_UPDATEAABB_PATH); - b3Assert(errNum == CL_SUCCESS); - m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "initializeGpuAabbsFull", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - - m_data->m_clearOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "clearOverlappingPairsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - - clReleaseProgram(prog); - } -} - -b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline() -{ - if (m_data->m_integrateTransformsKernel) - clReleaseKernel(m_data->m_integrateTransformsKernel); - - if (m_data->m_updateAabbsKernel) - clReleaseKernel(m_data->m_updateAabbsKernel); - - if (m_data->m_clearOverlappingPairsKernel) - clReleaseKernel(m_data->m_clearOverlappingPairsKernel); - delete m_data->m_raycaster; - delete m_data->m_solver; - delete m_data->m_allAabbsGPU; - delete m_data->m_gpuConstraints; - delete m_data->m_overlappingPairsGPU; - -#ifdef TEST_OTHER_GPU_SOLVER - delete m_data->m_solver3; -#endif //TEST_OTHER_GPU_SOLVER - - delete m_data->m_solver2; - - delete m_data; -} - -void b3GpuRigidBodyPipeline::reset() -{ - m_data->m_gpuConstraints->resize(0); - m_data->m_cpuConstraints.resize(0); - m_data->m_allAabbsGPU->resize(0); - m_data->m_allAabbsCPU.resize(0); -} - -void b3GpuRigidBodyPipeline::addConstraint(b3TypedConstraint* constraint) -{ - m_data->m_joints.push_back(constraint); -} - -void b3GpuRigidBodyPipeline::removeConstraint(b3TypedConstraint* constraint) -{ - m_data->m_joints.remove(constraint); -} - -void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid) -{ - m_data->m_gpuSolver->recomputeBatches(); - //slow linear search - m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints); - //remove - for (int i = 0; i < m_data->m_cpuConstraints.size(); i++) - { - if (m_data->m_cpuConstraints[i].m_uid == uid) - { - //m_data->m_cpuConstraints.remove(m_data->m_cpuConstraints[i]); - m_data->m_cpuConstraints.swap(i, m_data->m_cpuConstraints.size() - 1); - m_data->m_cpuConstraints.pop_back(); - - break; - } - } - - if (m_data->m_cpuConstraints.size()) - { - m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints); - } - else - { - m_data->m_gpuConstraints->resize(0); - } -} -int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold) -{ - m_data->m_gpuSolver->recomputeBatches(); - b3GpuGenericConstraint c; - c.m_uid = m_data->m_constraintUid; - m_data->m_constraintUid++; - c.m_flags = B3_CONSTRAINT_FLAG_ENABLED; - c.m_rbA = bodyA; - c.m_rbB = bodyB; - c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]); - c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]); - c.m_breakingImpulseThreshold = breakingThreshold; - c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE; - m_data->m_cpuConstraints.push_back(c); - return c.m_uid; -} -int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold) -{ - m_data->m_gpuSolver->recomputeBatches(); - b3GpuGenericConstraint c; - c.m_uid = m_data->m_constraintUid; - m_data->m_constraintUid++; - c.m_flags = B3_CONSTRAINT_FLAG_ENABLED; - c.m_rbA = bodyA; - c.m_rbB = bodyB; - c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]); - c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]); - c.m_relTargetAB.setValue(relTargetAB[0], relTargetAB[1], relTargetAB[2], relTargetAB[3]); - c.m_breakingImpulseThreshold = breakingThreshold; - c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE; - - m_data->m_cpuConstraints.push_back(c); - return c.m_uid; -} - -void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) -{ - //update worldspace AABBs from local AABB/worldtransform - { - B3_PROFILE("setupGpuAabbs"); - setupGpuAabbsFull(); - } - - int numPairs = 0; - - //compute overlapping pairs - { - if (gUseDbvt) - { - { - B3_PROFILE("setAabb"); - m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU); - for (int i = 0; i < m_data->m_allAabbsCPU.size(); i++) - { - b3Vector3 aabbMin = b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0], m_data->m_allAabbsCPU[i].m_min[1], m_data->m_allAabbsCPU[i].m_min[2]); - b3Vector3 aabbMax = b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0], m_data->m_allAabbsCPU[i].m_max[1], m_data->m_allAabbsCPU[i].m_max[2]); - m_data->m_broadphaseDbvt->setAabb(i, aabbMin, aabbMax, 0); - } - } - - { - B3_PROFILE("calculateOverlappingPairs"); - m_data->m_broadphaseDbvt->calculateOverlappingPairs(); - } - numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs(); - } - else - { - if (gUseCalculateOverlappingPairsHost) - { - m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs); - } - else - { - m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs); - } - numPairs = m_data->m_broadphaseSap->getNumOverlap(); - } - } - - //compute contact points - // printf("numPairs=%d\n",numPairs); - - int numContacts = 0; - - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - - if (numPairs) - { - cl_mem pairs = 0; - cl_mem aabbsWS = 0; - if (gUseDbvt) - { - B3_PROFILE("m_overlappingPairsGPU->copyFromHost"); - m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); - pairs = m_data->m_overlappingPairsGPU->getBufferCL(); - aabbsWS = m_data->m_allAabbsGPU->getBufferCL(); - } - else - { - pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer(); - aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS(); - } - - m_data->m_overlappingPairsGPU->resize(numPairs); - - //mark the contacts for each pair as 'unused' - if (numPairs) - { - b3OpenCLArray gpuPairs(this->m_data->m_context, m_data->m_queue); - gpuPairs.setFromOpenCLBuffer(pairs, numPairs); - - if (gClearPairsOnGpu) - { - //b3AlignedObjectArray hostPairs;//just for debugging - //gpuPairs.copyToHost(hostPairs); - - b3LauncherCL launcher(m_data->m_queue, m_data->m_clearOverlappingPairsKernel, "clearOverlappingPairsKernel"); - launcher.setBuffer(pairs); - launcher.setConst(numPairs); - launcher.launch1D(numPairs); - - //gpuPairs.copyToHost(hostPairs); - } - else - { - b3AlignedObjectArray hostPairs; - gpuPairs.copyToHost(hostPairs); - - for (int i = 0; i < hostPairs.size(); i++) - { - hostPairs[i].z = 0xffffffff; - } - - gpuPairs.copyFromHost(hostPairs); - } - } - - m_data->m_narrowphase->computeContacts(pairs, numPairs, aabbsWS, numBodies); - numContacts = m_data->m_narrowphase->getNumContactsGpu(); - - if (gUseDbvt) - { - ///store the cached information (contact locations in the 'z' component) - B3_PROFILE("m_overlappingPairsGPU->copyToHost"); - m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); - } - if (gDumpContactStats && numContacts) - { - m_data->m_narrowphase->getContactsGpu(); - - printf("numContacts = %d\n", numContacts); - - int totalPoints = 0; - const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU(); - - for (int i = 0; i < numContacts; i++) - { - totalPoints += contacts->getNPoints(); - } - printf("totalPoints=%d\n", totalPoints); - } - } - - //convert contact points to contact constraints - - //solve constraints - - b3OpenCLArray gpuBodies(m_data->m_context, m_data->m_queue, 0, true); - gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(), m_data->m_narrowphase->getNumRigidBodies()); - b3OpenCLArray gpuInertias(m_data->m_context, m_data->m_queue, 0, true); - gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(), m_data->m_narrowphase->getNumRigidBodies()); - b3OpenCLArray gpuContacts(m_data->m_context, m_data->m_queue, 0, true); - gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(), m_data->m_narrowphase->getNumContactsGpu()); - - int numJoints = m_data->m_joints.size() ? m_data->m_joints.size() : m_data->m_cpuConstraints.size(); - if (useBullet2CpuSolver && numJoints) - { - // b3AlignedObjectArray hostContacts; - //gpuContacts.copyToHost(hostContacts); - { - bool useGpu = m_data->m_joints.size() == 0; - - // b3Contact4* contacts = numContacts? &hostContacts[0]: 0; - //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints); - if (useGpu) - { - m_data->m_gpuSolver->solveJoints(m_data->m_narrowphase->getNumRigidBodies(), &gpuBodies, &gpuInertias, numJoints, m_data->m_gpuConstraints); - } - else - { - b3AlignedObjectArray hostBodies; - gpuBodies.copyToHost(hostBodies); - b3AlignedObjectArray hostInertias; - gpuInertias.copyToHost(hostInertias); - - b3TypedConstraint** joints = numJoints ? &m_data->m_joints[0] : 0; - m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(), &hostBodies[0], &hostInertias[0], 0, 0, numJoints, joints); - gpuBodies.copyFromHost(hostBodies); - } - } - } - - if (numContacts) - { -#ifdef TEST_OTHER_GPU_SOLVER - - if (gUseJacobi) - { - bool useGpu = true; - if (useGpu) - { - bool forceHost = false; - if (forceHost) - { - b3AlignedObjectArray hostBodies; - b3AlignedObjectArray hostInertias; - b3AlignedObjectArray hostContacts; - - { - B3_PROFILE("copyToHost"); - gpuBodies.copyToHost(hostBodies); - gpuInertias.copyToHost(hostInertias); - gpuContacts.copyToHost(hostContacts); - } - - { - b3JacobiSolverInfo solverInfo; - m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(), &hostContacts[0], hostContacts.size(), solverInfo); - } - { - B3_PROFILE("copyFromHost"); - gpuBodies.copyFromHost(hostBodies); - } - } - else - - { - int static0Index = m_data->m_narrowphase->getStatic0Index(); - b3JacobiSolverInfo solverInfo; - //m_data->m_solver3->solveContacts( >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo); - //m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); - m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index); - } - } - else - { - b3AlignedObjectArray hostBodies; - gpuBodies.copyToHost(hostBodies); - b3AlignedObjectArray hostInertias; - gpuInertias.copyToHost(hostInertias); - b3AlignedObjectArray hostContacts; - gpuContacts.copyToHost(hostContacts); - { - //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); - } - gpuBodies.copyFromHost(hostBodies); - } - } - else -#endif //TEST_OTHER_GPU_SOLVER - { - int static0Index = m_data->m_narrowphase->getStatic0Index(); - m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index); - - //m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL()); - - /*m_data->m_solver3->solveContactConstraintHost( - (b3OpenCLArray*)&gpuBodies, - (b3OpenCLArray*)&gpuInertias, - (b3OpenCLArray*) &gpuContacts, - 0,numContacts,256); - */ - } - } - - integrate(deltaTime); -} - -void b3GpuRigidBodyPipeline::integrate(float timeStep) -{ - //integrate - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - float angularDamp = 0.99f; - - if (gIntegrateOnCpu) - { - if (numBodies) - { - b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData(); - npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU); - - b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0); - - for (int nodeID = 0; nodeID < numBodies; nodeID++) - { - integrateSingleTransform(bodies, nodeID, timeStep, angularDamp, m_data->m_gravity); - } - npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU); - } - } - else - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_integrateTransformsKernel, "m_integrateTransformsKernel"); - launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); - - launcher.setConst(numBodies); - launcher.setConst(timeStep); - launcher.setConst(angularDamp); - launcher.setConst(m_data->m_gravity); - launcher.launch1D(numBodies); - } -} - -void b3GpuRigidBodyPipeline::setupGpuAabbsFull() -{ - cl_int ciErrNum = 0; - - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - if (!numBodies) - return; - - if (gCalcWorldSpaceAabbOnCpu) - { - if (numBodies) - { - if (gUseDbvt) - { - m_data->m_allAabbsCPU.resize(numBodies); - m_data->m_narrowphase->readbackAllBodiesToCpu(); - for (int i = 0; i < numBodies; i++) - { - b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_allAabbsCPU[0]); - } - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - } - else - { - m_data->m_broadphaseSap->getAllAabbsCPU().resize(numBodies); - m_data->m_narrowphase->readbackAllBodiesToCpu(); - for (int i = 0; i < numBodies; i++) - { - b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_broadphaseSap->getAllAabbsCPU()[0]); - } - m_data->m_broadphaseSap->getAllAabbsGPU().copyFromHost(m_data->m_broadphaseSap->getAllAabbsCPU()); - //m_data->m_broadphaseSap->writeAabbsToGpu(); - } - } - } - else - { - //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB) - b3LauncherCL launcher(m_data->m_queue, m_data->m_updateAabbsKernel, "m_updateAabbsKernel"); - launcher.setConst(numBodies); - cl_mem bodies = m_data->m_narrowphase->getBodiesGpu(); - launcher.setBuffer(bodies); - cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu(); - launcher.setBuffer(collidables); - cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu(); - launcher.setBuffer(localAabbs); - - cl_mem worldAabbs = 0; - if (gUseDbvt) - { - worldAabbs = m_data->m_allAabbsGPU->getBufferCL(); - } - else - { - worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS(); - } - launcher.setBuffer(worldAabbs); - launcher.launch1D(numBodies); - - oclCHECKERROR(ciErrNum, CL_SUCCESS); - } - - /* - b3AlignedObjectArray aabbs; - m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs); - - printf("numAabbs = %d\n", aabbs.size()); - - for (int i=0;im_narrowphase->getBodiesGpu(); -} - -int b3GpuRigidBodyPipeline::getNumBodies() const -{ - return m_data->m_narrowphase->getNumRigidBodies(); -} - -void b3GpuRigidBodyPipeline::setGravity(const float* grav) -{ - m_data->m_gravity.setValue(grav[0], grav[1], grav[2]); -} - -void b3GpuRigidBodyPipeline::copyConstraintsToHost() -{ - m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints); -} - -void b3GpuRigidBodyPipeline::writeAllInstancesToGpu() -{ - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints); -} - -int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu) -{ - b3Vector3 aabbMin = b3MakeVector3(0, 0, 0), aabbMax = b3MakeVector3(0, 0, 0); - - if (collidableIndex >= 0) - { - b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex); - b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]); - b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]); - - b3Scalar margin = 0.01f; - b3Transform t; - t.setIdentity(); - t.setOrigin(b3MakeVector3(position[0], position[1], position[2])); - t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3])); - b3TransformAabb(localAabbMin, localAabbMax, margin, t, aabbMin, aabbMax); - } - else - { - b3Error("registerPhysicsInstance using invalid collidableIndex\n"); - return -1; - } - - bool writeToGpu = false; - int bodyIndex = m_data->m_narrowphase->getNumRigidBodies(); - bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex, mass, position, orientation, &aabbMin.getX(), &aabbMax.getX(), writeToGpu); - - if (bodyIndex >= 0) - { - if (gUseDbvt) - { - m_data->m_broadphaseDbvt->createProxy(aabbMin, aabbMax, bodyIndex, 0, 1, 1); - b3SapAabb aabb; - for (int i = 0; i < 3; i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - aabb.m_minIndices[3] = bodyIndex; - } - m_data->m_allAabbsCPU.push_back(aabb); - if (writeInstanceToGpu) - { - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - } - } - else - { - if (mass) - { - m_data->m_broadphaseSap->createProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher); - } - else - { - m_data->m_broadphaseSap->createLargeProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher); - } - } - } - - /* - if (mass>0.f) - m_numDynamicPhysicsInstances++; - - m_numPhysicsInstances++; - */ - - return bodyIndex; -} - -void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults) -{ - this->m_data->m_raycaster->castRays(rays, hitResults, - getNumBodies(), this->m_data->m_narrowphase->getBodiesCpu(), - m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), - m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h deleted file mode 100644 index 0e5c6fec123..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h +++ /dev/null @@ -1,70 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_RIGIDBODY_PIPELINE_H -#define B3_GPU_RIGIDBODY_PIPELINE_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -class b3GpuRigidBodyPipeline -{ -protected: - struct b3GpuRigidBodyPipelineInternalData* m_data; - - int allocateCollidable(); - -public: - b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config); - virtual ~b3GpuRigidBodyPipeline(); - - void stepSimulation(float deltaTime); - void integrate(float timeStep); - void setupGpuAabbsFull(); - - int registerConvexPolyhedron(class b3ConvexUtility* convex); - - //int registerConvexPolyhedron(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - //int registerSphereShape(float radius); - //int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant); - - //int registerConcaveMesh(b3AlignedObjectArray* vertices, b3AlignedObjectArray* indices, const float* scaling); - //int registerCompoundShape(b3AlignedObjectArray* childShapes); - - int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData, bool writeInstanceToGpu); - //if you passed "writeInstanceToGpu" false in the registerPhysicsInstance method (for performance) you need to call writeAllInstancesToGpu after all instances are registered - void writeAllInstancesToGpu(); - void copyConstraintsToHost(); - void setGravity(const float* grav); - void reset(); - - int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold); - int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold); - void removeConstraintByUid(int uid); - - void addConstraint(class b3TypedConstraint* constraint); - void removeConstraint(b3TypedConstraint* constraint); - - void castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults); - - cl_mem getBodyBuffer(); - - int getNumBodies() const; -}; - -#endif //B3_GPU_RIGIDBODY_PIPELINE_H \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h deleted file mode 100644 index e0a26fda17c..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H -#define B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" - -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" - -#include "Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h" -#include "Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h" - -struct b3GpuRigidBodyPipelineInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - cl_kernel m_integrateTransformsKernel; - cl_kernel m_updateAabbsKernel; - cl_kernel m_clearOverlappingPairsKernel; - - class b3PgsJacobiSolver* m_solver; - - class b3GpuPgsConstraintSolver* m_gpuSolver; - - class b3GpuPgsContactSolver* m_solver2; - class b3GpuJacobiContactSolver* m_solver3; - class b3GpuRaycast* m_raycaster; - - class b3GpuBroadphaseInterface* m_broadphaseSap; - - struct b3DynamicBvhBroadphase* m_broadphaseDbvt; - b3OpenCLArray* m_allAabbsGPU; - b3AlignedObjectArray m_allAabbsCPU; - b3OpenCLArray* m_overlappingPairsGPU; - - b3OpenCLArray* m_gpuConstraints; - b3AlignedObjectArray m_cpuConstraints; - - b3AlignedObjectArray m_joints; - int m_constraintUid; - class b3GpuNarrowPhase* m_narrowphase; - b3Vector3 m_gravity; - - b3Config m_config; -}; - -#endif //B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h deleted file mode 100644 index db815d9b31d..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_SOLVER_BODY_H -#define B3_GPU_SOLVER_BODY_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" - -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3TransformUtil.h" - -///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision -#ifdef B3_USE_SSE -#define USE_SIMD 1 -#endif // - -///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance. -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuSolverBody -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - // b3Transform m_worldTransformUnused; - b3Vector3 m_deltaLinearVelocity; - b3Vector3 m_deltaAngularVelocity; - b3Vector3 m_angularFactor; - b3Vector3 m_linearFactor; - b3Vector3 m_invMass; - b3Vector3 m_pushVelocity; - b3Vector3 m_turnVelocity; - b3Vector3 m_linearVelocity; - b3Vector3 m_angularVelocity; - - union { - void* m_originalBody; - int m_originalBodyIndex; - }; - - int padding[3]; - - /* - void setWorldTransform(const b3Transform& worldTransform) - { - m_worldTransform = worldTransform; - } - - const b3Transform& getWorldTransform() const - { - return m_worldTransform; - } - */ - B3_FORCE_INLINE void getVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const - { - if (m_originalBody) - velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos); - else - velocity.setValue(0, 0, 0); - } - - B3_FORCE_INLINE void getAngularVelocity(b3Vector3 & angVel) const - { - if (m_originalBody) - angVel = m_angularVelocity + m_deltaAngularVelocity; - else - angVel.setValue(0, 0, 0); - } - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void applyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - B3_FORCE_INLINE void internalApplyPushImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_pushVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_turnVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - const b3Vector3& getDeltaLinearVelocity() const - { - return m_deltaLinearVelocity; - } - - const b3Vector3& getDeltaAngularVelocity() const - { - return m_deltaAngularVelocity; - } - - const b3Vector3& getPushVelocity() const - { - return m_pushVelocity; - } - - const b3Vector3& getTurnVelocity() const - { - return m_turnVelocity; - } - - //////////////////////////////////////////////// - ///some internal methods, don't use them - - b3Vector3& internalGetDeltaLinearVelocity() - { - return m_deltaLinearVelocity; - } - - b3Vector3& internalGetDeltaAngularVelocity() - { - return m_deltaAngularVelocity; - } - - const b3Vector3& internalGetAngularFactor() const - { - return m_angularFactor; - } - - const b3Vector3& internalGetInvMass() const - { - return m_invMass; - } - - void internalSetInvMass(const b3Vector3& invMass) - { - m_invMass = invMass; - } - - b3Vector3& internalGetPushVelocity() - { - return m_pushVelocity; - } - - b3Vector3& internalGetTurnVelocity() - { - return m_turnVelocity; - } - - B3_FORCE_INLINE void internalGetVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const - { - velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos); - } - - B3_FORCE_INLINE void internalGetAngularVelocity(b3Vector3 & angVel) const - { - angVel = m_angularVelocity + m_deltaAngularVelocity; - } - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void internalApplyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude) - { - //if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - void writebackVelocity() - { - //if (m_originalBody>=0) - { - m_linearVelocity += m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //m_originalBody->setCompanionId(-1); - } - } - - void writebackVelocityAndTransform(b3Scalar timeStep, b3Scalar splitImpulseTurnErp) - { - (void)timeStep; - if (m_originalBody) - { - m_linearVelocity += m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //correct the position/orientation based on push/turn recovery - b3Transform newTransform; - if (m_pushVelocity[0] != 0.f || m_pushVelocity[1] != 0 || m_pushVelocity[2] != 0 || m_turnVelocity[0] != 0.f || m_turnVelocity[1] != 0 || m_turnVelocity[2] != 0) - { - // b3Quaternion orn = m_worldTransform.getRotation(); - // b3TransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform); - // m_worldTransform = newTransform; - } - //m_worldTransform.setRotation(orn); - //m_originalBody->setCompanionId(-1); - } - } -}; - -#endif //B3_SOLVER_BODY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h deleted file mode 100644 index 7d9eea243ab..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2013 Erwin Coumans http://github.com/erwincoumans/bullet3 - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GPU_SOLVER_CONSTRAINT_H -#define B3_GPU_SOLVER_CONSTRAINT_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" -//#include "b3JacobianEntry.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -//#define NO_FRICTION_TANGENTIALS 1 - -///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints. -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuSolverConstraint -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3Vector3 m_relpos1CrossNormal; - b3Vector3 m_contactNormal; - - b3Vector3 m_relpos2CrossNormal; - //b3Vector3 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal - - b3Vector3 m_angularComponentA; - b3Vector3 m_angularComponentB; - - mutable b3Scalar m_appliedPushImpulse; - mutable b3Scalar m_appliedImpulse; - int m_padding1; - int m_padding2; - b3Scalar m_friction; - b3Scalar m_jacDiagABInv; - b3Scalar m_rhs; - b3Scalar m_cfm; - - b3Scalar m_lowerLimit; - b3Scalar m_upperLimit; - b3Scalar m_rhsPenetration; - union { - void* m_originalContactPoint; - int m_originalConstraintIndex; - b3Scalar m_unusedPadding4; - }; - - int m_overrideNumSolverIterations; - int m_frictionIndex; - int m_solverBodyIdA; - int m_solverBodyIdB; - - enum b3SolverConstraintType - { - B3_SOLVER_CONTACT_1D = 0, - B3_SOLVER_FRICTION_1D - }; -}; - -typedef b3AlignedObjectArray b3GpuConstraintArray; - -#endif //B3_GPU_SOLVER_CONSTRAINT_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp deleted file mode 100644 index ccf67da1a82..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ /dev/null @@ -1,1128 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "b3Solver.h" - -///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments -bool useNewBatchingKernel = true; -bool gConvertConstraintOnCpu = false; - -#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" -#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" -#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" - -#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" - -#include "kernels/solverSetup.h" -#include "kernels/solverSetup2.h" - -#include "kernels/solveContact.h" -#include "kernels/solveFriction.h" - -#include "kernels/batchingKernels.h" -#include "kernels/batchingKernelsNew.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Common/b3Vector3.h" - -struct SolverDebugInfo -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - int m_valInt4; - int m_valInt5; - int m_valInt6; - int m_valInt7; - - int m_valInt8; - int m_valInt9; - int m_valInt10; - int m_valInt11; - - int m_valInt12; - int m_valInt13; - int m_valInt14; - int m_valInt15; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -}; - -class SolverDeviceInl -{ -public: - struct ParallelSolveData - { - b3OpenCLArray* m_numConstraints; - b3OpenCLArray* m_offsets; - }; -}; - -b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - : m_context(ctx), - m_device(device), - m_queue(queue), - m_batchSizes(ctx, queue), - m_nIterations(4) -{ - m_sort32 = new b3RadixSort32CL(ctx, device, queue); - m_scan = new b3PrefixScanCL(ctx, device, queue, B3_SOLVER_N_CELLS); - m_search = new b3BoundSearchCL(ctx, device, queue, B3_SOLVER_N_CELLS); - - const int sortSize = B3NEXTMULTIPLEOF(pairCapacity, 512); - - m_sortDataBuffer = new b3OpenCLArray(ctx, queue, sortSize); - m_contactBuffer2 = new b3OpenCLArray(ctx, queue); - - m_numConstraints = new b3OpenCLArray(ctx, queue, B3_SOLVER_N_CELLS); - m_numConstraints->resize(B3_SOLVER_N_CELLS); - - m_offsets = new b3OpenCLArray(ctx, queue, B3_SOLVER_N_CELLS); - m_offsets->resize(B3_SOLVER_N_CELLS); - const char* additionalMacros = ""; - // const char* srcFileNameForCaching=""; - - cl_int pErrNum; - const char* batchKernelSource = batchingKernelsCL; - const char* batchKernelNewSource = batchingKernelsNewCL; - - const char* solverSetupSource = solverSetupCL; - const char* solverSetup2Source = solverSetup2CL; - const char* solveContactSource = solveContactCL; - const char* solveFrictionSource = solveFrictionCL; - - { - cl_program solveContactProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveContactSource, &pErrNum, additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); - b3Assert(solveContactProg); - - cl_program solveFrictionProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveFrictionSource, &pErrNum, additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); - b3Assert(solveFrictionProg); - - cl_program solverSetup2Prog = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetup2Source, &pErrNum, additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - b3Assert(solverSetup2Prog); - - cl_program solverSetupProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetupSource, &pErrNum, additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); - b3Assert(solverSetupProg); - - m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg, additionalMacros); - b3Assert(m_solveFrictionKernel); - - m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg, additionalMacros); - b3Assert(m_solveContactKernel); - - m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg, additionalMacros); - b3Assert(m_contactToConstraintKernel); - - m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_setSortDataKernel); - - m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_reorderContactKernel); - - m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_copyConstraintKernel); - } - - { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelSource, &pErrNum, additionalMacros, B3_BATCHING_PATH); - //cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_BATCHING_PATH,true); - b3Assert(batchingProg); - - m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg, additionalMacros); - b3Assert(m_batchingKernel); - } - { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelNewSource, &pErrNum, additionalMacros, B3_BATCHING_NEW_PATH); - b3Assert(batchingNewProg); - - m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg, additionalMacros); - //m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros ); - b3Assert(m_batchingKernelNew); - } -} - -b3Solver::~b3Solver() -{ - delete m_offsets; - delete m_numConstraints; - delete m_sortDataBuffer; - delete m_contactBuffer2; - - delete m_sort32; - delete m_scan; - delete m_search; - - clReleaseKernel(m_batchingKernel); - clReleaseKernel(m_batchingKernelNew); - - clReleaseKernel(m_solveContactKernel); - clReleaseKernel(m_solveFrictionKernel); - - clReleaseKernel(m_contactToConstraintKernel); - clReleaseKernel(m_setSortDataKernel); - clReleaseKernel(m_reorderContactKernel); - clReleaseKernel(m_copyConstraintKernel); -} - -template -static __inline void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - b3Vector3 dLinVelA; - dLinVelA.setZero(); - b3Vector3 dAngVelA; - dAngVelA.setZero(); - b3Vector3 dLinVelB; - dLinVelB.setZero(); - b3Vector3 dAngVelB; - dAngVelB.setZero(); - - for (int ic = 0; ic < 4; ic++) - { - // dont necessary because this makes change to 0 - if (cs.m_jacCoeffInv[ic] == 0.f) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, &linear, &angular0, &angular1); - - float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB) + - cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[ic]); - updated = b3Min(updated, maxRambdaDt[ic]); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - if (JACOBI) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - dLinVelB += linImp1; - dAngVelB += angImp1; - } - else - { - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - } - } - - if (JACOBI) - { - linVelA += dLinVelA; - angVelA += dAngVelA; - linVelB += dLinVelB; - angVelB += dAngVelB; - } -} - -static __inline void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1(n, tangent[0], tangent[1]); -#else - b3Vector3 r = cs.m_worldPos[0] - center; - tangent[0] = cross3(n, r); - tangent[1] = cross3(tangent[0], n); - tangent[0] = normalize3(tangent[0]); - tangent[1] = normalize3(tangent[1]); -#endif - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for (int i = 0; i < 2; i++) - { - setLinearAndAngular(tangent[i], r0, r1, &linear, &angular0, &angular1); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[i]); - updated = b3Min(updated, maxRambdaDt[i]); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - - { // angular damping for point constraint - b3Vector3 ab = (posB - posA).normalized(); - b3Vector3 ac = (center - posA).normalized(); - if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot(n, angVelA); - float angNB = b3Dot(n, angVelB); - - angVelA -= (angNA * 0.1f) * n; - angVelB -= (angNB * 0.1f) * n; - } - } -} -/* - b3AlignedObjectArray& m_bodies; - b3AlignedObjectArray& m_shapes; - b3AlignedObjectArray& m_constraints; - b3AlignedObjectArray* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; - */ - -struct SolveTask // : public ThreadPool::Task -{ - SolveTask(b3AlignedObjectArray& bodies, b3AlignedObjectArray& shapes, b3AlignedObjectArray& constraints, - int start, int nConstraints, int maxNumBatches, b3AlignedObjectArray* wgUsedBodies, int curWgidx, b3AlignedObjectArray* batchSizes, int cellIndex) - : m_bodies(bodies), m_shapes(shapes), m_constraints(constraints), m_batchSizes(batchSizes), m_cellIndex(cellIndex), m_curWgidx(curWgidx), m_start(start), m_nConstraints(nConstraints), m_solveFriction(true), m_maxNumBatches(maxNumBatches) - { - } - - unsigned short int getType() { return 0; } - - void run(int tIdx) - { - int offset = 0; - for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) - { - int numInBatch = m_batchSizes->at(m_cellIndex * B3_MAX_NUM_BATCHES + ii); - if (!numInBatch) - break; - - for (int jj = 0; jj < numInBatch; jj++) - { - int i = m_start + offset + jj; - int batchId = m_constraints[i].m_batchIdx; - b3Assert(batchId == ii); - float frictionCoeff = m_constraints[i].getFrictionCoeff(); - int aIdx = (int)m_constraints[i].m_bodyA; - int bIdx = (int)m_constraints[i].m_bodyB; - // int localBatch = m_constraints[i].m_batchIdx; - b3RigidBodyData& bodyA = m_bodies[aIdx]; - b3RigidBodyData& bodyB = m_bodies[bIdx]; - - if (!m_solveFriction) - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - - solveContact(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt); - } - else - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - float sum = 0; - for (int j = 0; j < 4; j++) - { - sum += m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for (int j = 0; j < 4; j++) - { - maxRambdaDt[j] = frictionCoeff * sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt); - } - } - offset += numInBatch; - } - /* for (int bb=0;bb=0; ic--) - for(int ic=0; ic( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - } - else - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - - } - } - } - */ - } - - b3AlignedObjectArray& m_bodies; - b3AlignedObjectArray& m_shapes; - b3AlignedObjectArray& m_constraints; - b3AlignedObjectArray* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; -}; - -void b3Solver::solveContactConstraintHost(b3OpenCLArray* bodyBuf, b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches, b3AlignedObjectArray* batchSizes) -{ -#if 0 - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; - for (int z=0;z<4;z++) - { - for (int y=0;y<4;y++) - { - for (int x=0;x<4;x++) - { - int newIndex = (x+y*nSplitX+z*nSplitX*nSplitY); - // printf("newIndex=%d\n",newIndex); - - int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - // printf("newIndex=%d\n",newIndex); - } - } - } - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int cellBatch=0;cellBatch>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - - /*int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - */ - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - // printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - } - } - } -#endif - - b3AlignedObjectArray bodyNative; - bodyBuf->copyToHost(bodyNative); - b3AlignedObjectArray shapeNative; - shapeBuf->copyToHost(shapeNative); - b3AlignedObjectArray constraintNative; - constraint->copyToHost(constraintNative); - - b3AlignedObjectArray numConstraintsHost; - m_numConstraints->copyToHost(numConstraintsHost); - - //printf("------------------------\n"); - b3AlignedObjectArray offsetsHost; - m_offsets->copyToHost(offsetsHost); - static int frame = 0; - bool useBatches = true; - if (useBatches) - { - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; - //printf("cell Batch %d\n",cellBatch); - b3AlignedObjectArray usedBodies[B3_SOLVER_N_CELLS]; - for (int i = 0; i < B3_SOLVER_N_CELLS; i++) - { - usedBodies[i].resize(0); - } - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); - int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); - int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - - if (numConstraintsHost[cellIdx] == 0) - continue; - - //printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - //printf("cell %d has %d constraints\n", cellIdx,numConstraintsHost[cellIdx]); - if (zIdx) - { - //printf("?\n"); - } - - if (iter == 0) - { - //printf("frame=%d, Cell xIdx=%x, yIdx=%d ",frame, xIdx,yIdx); - //printf("cellBatch=%d, wgIdx=%d, #constraints in cell=%d\n",cellBatch,wgIdx,numConstraintsHost[cellIdx]); - } - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, usedBodies, wgIdx, batchSizes, cellIdx); - task.m_solveFriction = false; - task.run(0); - } - } - } - - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - - int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; - - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); - int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); - int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); - - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - - if (numConstraintsHost[cellIdx] == 0) - continue; - - //printf("yIdx=%d\n",yIdx); - - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, 0, 0, batchSizes, cellIdx); - task.m_solveFriction = true; - task.run(0); - } - } - } - } - else - { - for (int iter = 0; iter < m_nIterations; iter++) - { - SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); - task.m_solveFriction = false; - task.run(0); - } - - for (int iter = 0; iter < m_nIterations; iter++) - { - SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); - task.m_solveFriction = true; - task.run(0); - } - } - - bodyBuf->copyFromHost(bodyNative); - shapeBuf->copyFromHost(shapeNative); - constraint->copyFromHost(constraintNative); - frame++; -} - -void checkConstraintBatch(const b3OpenCLArray* bodyBuf, - const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, - b3OpenCLArray* m_numConstraints, - b3OpenCLArray* m_offsets, - int batchId) -{ - // b3BufferInfoCL( m_numConstraints->getBufferCL() ), - // b3BufferInfoCL( m_offsets->getBufferCL() ) - - int cellBatch = batchId; - const int nn = B3_SOLVER_N_CELLS; - // int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; - - b3AlignedObjectArray gN; - m_numConstraints->copyToHost(gN); - b3AlignedObjectArray gOffsets; - m_offsets->copyToHost(gOffsets); - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - - // int bIdx = batchId; - - b3AlignedObjectArray cpuConstraints; - constraint->copyToHost(cpuConstraints); - - printf("batch = %d\n", batchId); - - int numWorkgroups = nn / B3_SOLVER_N_BATCHES; - b3AlignedObjectArray usedBodies; - - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - printf("wgIdx = %d ", wgIdx); - - int zIdx = (wgIdx / ((nSplitX * nSplitY)) / 2) * 2 + ((cellBatch & 4) >> 2); - int remain = wgIdx % ((nSplitX * nSplitY)); - int yIdx = (remain % (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain / (nSplitX / 2)) * 2 + (cellBatch & 1); - - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - printf("cellIdx=%d\n", cellIdx); - if (gN[cellIdx] == 0) - continue; - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - for (int c = start; c < end; c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - //printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB); - if (usedBodies.findLinearSearch(constraint.m_bodyA) < usedBodies.size()) - { - printf("error?\n"); - } - if (usedBodies.findLinearSearch(constraint.m_bodyB) < usedBodies.size()) - { - printf("error?\n"); - } - } - - for (int c = start; c < end; c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - usedBodies.push_back(constraint.m_bodyA); - usedBodies.push_back(constraint.m_bodyB); - } - } -} - -static bool verify = false; - -void b3Solver::solveContactConstraint(const b3OpenCLArray* bodyBuf, const b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches) -{ - b3Int4 cdata = b3MakeInt4(n, 0, 0, 0); - { - const int nn = B3_SOLVER_N_CELLS; - - cdata.x = 0; - cdata.y = maxNumBatches; //250; - - int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES; -#ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray gpuDebugInfo(data->m_device, numWorkItems); -#endif - - { - B3_PROFILE("m_batchSolveKernel iterations"); - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - if (verify) - { - checkConstraintBatch(bodyBuf, shapeBuf, constraint, m_numConstraints, m_offsets, ib); - } - -#ifdef DEBUG_ME - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - - cdata.z = ib; - - b3LauncherCL launcher(m_queue, m_solveContactKernel, "m_solveContactKernel"); -#if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif - }; - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - launcher.launch1D(numWorkItems, 64); - -#else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName, "rb"); - if (f) - { - int sizeInBytes = 0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*)malloc(sizeInBytes); - fread(buf, sizeInBytes, 1, f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D(num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } - -#endif - -#ifdef DEBUG_ME - clFinish(m_queue); - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt2 > 0) - { - printf("debugInfo[i].m_valInt2 = %d\n", i, debugInfo[i].m_valInt2); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("debugInfo[i].m_valInt3 = %d\n", i, debugInfo[i].m_valInt3); - } - } -#endif //DEBUG_ME - } - } - - clFinish(m_queue); - } - - cdata.x = 1; - bool applyFriction = true; - if (applyFriction) - { - B3_PROFILE("m_batchSolveKernel iterations2"); - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - cdata.z = ib; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME - }; - b3LauncherCL launcher(m_queue, m_solveFrictionKernel, "m_solveFrictionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - - launcher.launch1D(64 * nn / B3_SOLVER_N_BATCHES, 64); - } - } - clFinish(m_queue); - } -#ifdef DEBUG_ME - delete[] debugInfo; -#endif //DEBUG_ME - } -} - -void b3Solver::convertToConstraints(const b3OpenCLArray* bodyBuf, - const b3OpenCLArray* shapeBuf, - b3OpenCLArray* contactsIn, b3OpenCLArray* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg) -{ - // b3OpenCLArray* constraintNative =0; - contactCOut->resize(nContacts); - struct CB - { - int m_nContacts; - float m_dt; - float m_positionDrift; - float m_positionConstraintCoeff; - }; - - { - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_dt = cfg.m_dt; - cdata.m_positionDrift = cfg.m_positionDrift; - cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; - - if (gConvertConstraintOnCpu) - { - b3AlignedObjectArray gBodies; - bodyBuf->copyToHost(gBodies); - - b3AlignedObjectArray gContact; - contactsIn->copyToHost(gContact); - - b3AlignedObjectArray gShapes; - shapeBuf->copyToHost(gShapes); - - b3AlignedObjectArray gConstraintOut; - gConstraintOut.resize(nContacts); - - B3_PROFILE("cpu contactToConstraintKernel"); - for (int gIdx = 0; gIdx < nContacts; gIdx++) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - b3Float4 posA = gBodies[aIdx].m_pos; - b3Float4 linVelA = gBodies[aIdx].m_linVel; - b3Float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; - - b3Float4 posB = gBodies[bIdx].m_pos; - b3Float4 linVelB = gBodies[bIdx].m_linVel; - b3Float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; - - b3ContactConstraint4_t cs; - - setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff, - &cs); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = (b3GpuConstraint4&)cs; - } - - contactCOut->copyFromHost(gConstraintOut); - } - else - { - B3_PROFILE("gpu m_contactToConstraintKernel"); - - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(contactsIn->getBufferCL()), b3BufferInfoCL(bodyBuf->getBufferCL()), b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(contactCOut->getBufferCL())}; - b3LauncherCL launcher(m_queue, m_contactToConstraintKernel, "m_contactToConstraintKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata ); - - launcher.setConst(cdata.m_nContacts); - launcher.setConst(cdata.m_dt); - launcher.setConst(cdata.m_positionDrift); - launcher.setConst(cdata.m_positionConstraintCoeff); - - launcher.launch1D(nContacts, 64); - clFinish(m_queue); - } - } -} - -/* -void b3Solver::sortContacts( const b3OpenCLArray* bodyBuf, - b3OpenCLArray* contactsIn, void* additionalData, - int nContacts, const b3Solver::ConstraintCfg& cfg ) -{ - - - - const int sortAlignment = 512; // todo. get this out of sort - if( cfg.m_enableParallelSolve ) - { - - - int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment ); - - b3OpenCLArray* countsNative = m_numConstraints;//BufferUtils::map( data->m_device, &countsHost ); - b3OpenCLArray* offsetsNative = m_offsets;//BufferUtils::map( data->m_device, &offsetsHost ); - - { // 2. set cell idx - struct CB - { - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; - }; - - b3Assert( sortSize%64 == 0 ); - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_staticIdx = cfg.m_staticIdx; - cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent); - cdata.m_nSplit = B3_SOLVER_N_SPLIT; - - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_setSortDataKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( sortSize, 64 ); - } - - { // 3. sort by cell idx - int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - int sortBit = 32; - //if( n <= 0xffff ) sortBit = 16; - //if( n <= 0xff ) sortBit = 8; - m_sort32->execute(*m_sortDataBuffer,sortSize); - } - { // 4. find entries - m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, b3BoundSearchCL::COUNT); - - m_scan->execute( *countsNative, *offsetsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT ); - } - - { // 5. sort constraints by cellIdx - // todo. preallocate this -// b3Assert( contactsIn->getType() == TYPE_HOST ); -// b3OpenCLArray* out = BufferUtils::map( data->m_device, contactsIn ); // copying contacts to this buffer - - { - - - b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_reorderContactKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - } -// BufferUtils::unmap( out, contactsIn, nContacts ); - } - } - - -} - -*/ -void b3Solver::batchContacts(b3OpenCLArray* contacts, int nContacts, b3OpenCLArray* nNative, b3OpenCLArray* offsetsNative, int staticIdx) -{ - int numWorkItems = 64 * B3_SOLVER_N_CELLS; - { - B3_PROFILE("batch generation"); - - b3Int4 cdata; - cdata.x = nContacts; - cdata.y = 0; - cdata.z = staticIdx; - -#ifdef BATCH_DEBUG - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray gpuDebugInfo(data->m_device, numWorkItems); - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - -#if 0 - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( contacts->getBufferCL() ), - b3BufferInfoCL( m_contactBuffer2->getBufferCL()), - b3BufferInfoCL( nNative->getBufferCL() ), - b3BufferInfoCL( offsetsNative->getBufferCL() ), -#ifdef BATCH_DEBUG - , b3BufferInfoCL(&gpuDebugInfo) -#endif - }; -#endif - - { - m_batchSizes.resize(nNative->size()); - B3_PROFILE("batchingKernel"); - //b3LauncherCL launcher( m_queue, m_batchingKernel); - cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; - - b3LauncherCL launcher(m_queue, k, "*batchingKernel"); - if (!useNewBatchingKernel) - { - launcher.setBuffer(contacts->getBufferCL()); - } - launcher.setBuffer(m_contactBuffer2->getBufferCL()); - launcher.setBuffer(nNative->getBufferCL()); - launcher.setBuffer(offsetsNative->getBufferCL()); - - launcher.setBuffer(m_batchSizes.getBufferCL()); - - //launcher.setConst( cdata ); - launcher.setConst(staticIdx); - - launcher.launch1D(numWorkItems, 64); - //clFinish(m_queue); - //b3AlignedObjectArray batchSizesCPU; - //m_batchSizes.copyToHost(batchSizesCPU); - //printf(".\n"); - } - -#ifdef BATCH_DEBUG - aaaa - b3Contact4* hostContacts = new b3Contact4[nContacts]; - m_contactBuffer->read(hostContacts, nContacts); - clFinish(m_queue); - - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt1 > 0) - { - printf("catch\n"); - } - if (debugInfo[i].m_valInt2 > 0) - { - printf("catch22\n"); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("catch666\n"); - } - - if (debugInfo[i].m_valInt4 > 0) - { - printf("catch777\n"); - } - } - delete[] debugInfo; -#endif //BATCH_DEBUG - } - - // copy buffer to buffer - //b3Assert(m_contactBuffer->size()==nContacts); - //contacts->copyFromOpenCLArray( *m_contactBuffer); - //clFinish(m_queue);//needed? -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.h deleted file mode 100644 index ee63531d78d..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#ifndef __ADL_SOLVER_H -#define __ADL_SOLVER_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "b3GpuConstraint4.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" - -#define B3NEXTMULTIPLEOF(num, alignment) (((num) / (alignment) + (((num) % (alignment) == 0) ? 0 : 1)) * (alignment)) - -enum -{ - B3_SOLVER_N_SPLIT_X = 8, //16,//4, - B3_SOLVER_N_SPLIT_Y = 4, //16,//4, - B3_SOLVER_N_SPLIT_Z = 8, //, - B3_SOLVER_N_CELLS = B3_SOLVER_N_SPLIT_X * B3_SOLVER_N_SPLIT_Y * B3_SOLVER_N_SPLIT_Z, - B3_SOLVER_N_BATCHES = 8, //4,//8,//4, - B3_MAX_NUM_BATCHES = 128, -}; - -class b3SolverBase -{ -public: - struct ConstraintCfg - { - ConstraintCfg(float dt = 0.f) : m_positionDrift(0.005f), m_positionConstraintCoeff(0.2f), m_dt(dt), m_staticIdx(-1) {} - - float m_positionDrift; - float m_positionConstraintCoeff; - float m_dt; - bool m_enableParallelSolve; - float m_batchCellSize; - int m_staticIdx; - }; -}; - -class b3Solver : public b3SolverBase -{ -public: - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3OpenCLArray* m_numConstraints; - b3OpenCLArray* m_offsets; - b3OpenCLArray m_batchSizes; - - int m_nIterations; - cl_kernel m_batchingKernel; - cl_kernel m_batchingKernelNew; - cl_kernel m_solveContactKernel; - cl_kernel m_solveFrictionKernel; - cl_kernel m_contactToConstraintKernel; - cl_kernel m_setSortDataKernel; - cl_kernel m_reorderContactKernel; - cl_kernel m_copyConstraintKernel; - - class b3RadixSort32CL* m_sort32; - class b3BoundSearchCL* m_search; - class b3PrefixScanCL* m_scan; - - b3OpenCLArray* m_sortDataBuffer; - b3OpenCLArray* m_contactBuffer2; - - enum - { - DYNAMIC_CONTACT_ALLOCATION_THRESHOLD = 2000000, - }; - - b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity); - - virtual ~b3Solver(); - - void solveContactConstraint(const b3OpenCLArray* bodyBuf, const b3OpenCLArray* inertiaBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches); - - void solveContactConstraintHost(b3OpenCLArray* bodyBuf, b3OpenCLArray* shapeBuf, - b3OpenCLArray* constraint, void* additionalData, int n, int maxNumBatches, b3AlignedObjectArray* batchSizes); - - void convertToConstraints(const b3OpenCLArray* bodyBuf, - const b3OpenCLArray* shapeBuf, - b3OpenCLArray* contactsIn, b3OpenCLArray* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg); - - void batchContacts(b3OpenCLArray* contacts, int nContacts, b3OpenCLArray* n, b3OpenCLArray* offsets, int staticIdx); -}; - -#endif //__ADL_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl deleted file mode 100644 index 3b891b863d4..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl +++ /dev/null @@ -1,353 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -#define WG_SIZE 64 - - - - - -typedef struct -{ - int m_n; - int m_start; - int m_staticIdx; - int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_a; - int m_b; - u32 m_idx; -}Elem; - -#define STACK_SIZE (WG_SIZE*10) -//#define STACK_SIZE (WG_SIZE) -#define RING_SIZE 1024 -#define RING_SIZE_MASK (RING_SIZE-1) -#define CHECK_SIZE (WG_SIZE) - - -#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd) -#define RING_END ldsTmp - -u32 readBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - return buff[bufIdx] & (1<> bitIdx)&1) == 0; -} - -// batching on the GPU -__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut, - __global const u32* gN, __global const u32* gStart, __global int* batchSizes, - int m_staticIdx ) -{ - __local u32 ldsStackIdx[STACK_SIZE]; - __local u32 ldsStackEnd; - __local Elem ldsRingElem[RING_SIZE]; - __local u32 ldsRingEnd; - __local u32 ldsTmp; - __local u32 ldsCheckBuffer[CHECK_SIZE]; - __local u32 ldsFixedBuffer[CHECK_SIZE]; - __local u32 ldsGEnd; - __local u32 ldsDstEnd; - - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - - const int m_n = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - - if( lIdx == 0 ) - { - ldsRingEnd = 0; - ldsGEnd = 0; - ldsStackEnd = 0; - ldsDstEnd = m_start; - } - - - -// while(1) -//was 250 - int ie=0; - int maxBatch = 0; - for(ie=0; ie<50; ie++) - { - ldsFixedBuffer[lIdx] = 0; - - for(int giter=0; giter<4; giter++) - { - int ringCap = GET_RING_CAPACITY; - - // 1. fill ring - if( ldsGEnd < m_n ) - { - while( ringCap > WG_SIZE ) - { - if( ldsGEnd >= m_n ) break; - if( lIdx < ringCap - WG_SIZE ) - { - int srcIdx; - AtomInc1( ldsGEnd, srcIdx ); - if( srcIdx < m_n ) - { - int dstIdx; - AtomInc1( ldsRingEnd, dstIdx ); - - int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit; - int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit; - ldsRingElem[dstIdx].m_a = (a>b)? b:a; - ldsRingElem[dstIdx].m_b = (a>b)? a:b; - ldsRingElem[dstIdx].m_idx = srcIdx; - } - } - ringCap = GET_RING_CAPACITY; - } - } - - GROUP_LDS_BARRIER; - - // 2. fill stack - __local Elem* dst = ldsRingElem; - if( lIdx == 0 ) RING_END = 0; - - int srcIdx=lIdx; - int end = ldsRingEnd; - - { - for(int ii=0; ii1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile __global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "#define WG_SIZE 64\n" - "typedef struct \n" - "{\n" - " int m_n;\n" - " int m_start;\n" - " int m_staticIdx;\n" - " int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct \n" - "{\n" - " int m_a;\n" - " int m_b;\n" - " u32 m_idx;\n" - "}Elem;\n" - "#define STACK_SIZE (WG_SIZE*10)\n" - "//#define STACK_SIZE (WG_SIZE)\n" - "#define RING_SIZE 1024\n" - "#define RING_SIZE_MASK (RING_SIZE-1)\n" - "#define CHECK_SIZE (WG_SIZE)\n" - "#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n" - "#define RING_END ldsTmp\n" - "u32 readBuf(__local u32* buff, int idx)\n" - "{\n" - " idx = idx % (32*CHECK_SIZE);\n" - " int bitIdx = idx%32;\n" - " int bufIdx = idx/32;\n" - " return buff[bufIdx] & (1<> bitIdx)&1) == 0;\n" - "}\n" - "// batching on the GPU\n" - "__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,\n" - " __global const u32* gN, __global const u32* gStart, __global int* batchSizes, \n" - " int m_staticIdx )\n" - "{\n" - " __local u32 ldsStackIdx[STACK_SIZE];\n" - " __local u32 ldsStackEnd;\n" - " __local Elem ldsRingElem[RING_SIZE];\n" - " __local u32 ldsRingEnd;\n" - " __local u32 ldsTmp;\n" - " __local u32 ldsCheckBuffer[CHECK_SIZE];\n" - " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" - " __local u32 ldsGEnd;\n" - " __local u32 ldsDstEnd;\n" - " int wgIdx = GET_GROUP_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " \n" - " const int m_n = gN[wgIdx];\n" - " const int m_start = gStart[wgIdx];\n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " ldsRingEnd = 0;\n" - " ldsGEnd = 0;\n" - " ldsStackEnd = 0;\n" - " ldsDstEnd = m_start;\n" - " }\n" - " \n" - " \n" - " \n" - "// while(1)\n" - "//was 250\n" - " int ie=0;\n" - " int maxBatch = 0;\n" - " for(ie=0; ie<50; ie++)\n" - " {\n" - " ldsFixedBuffer[lIdx] = 0;\n" - " for(int giter=0; giter<4; giter++)\n" - " {\n" - " int ringCap = GET_RING_CAPACITY;\n" - " \n" - " // 1. fill ring\n" - " if( ldsGEnd < m_n )\n" - " {\n" - " while( ringCap > WG_SIZE )\n" - " {\n" - " if( ldsGEnd >= m_n ) break;\n" - " if( lIdx < ringCap - WG_SIZE )\n" - " {\n" - " int srcIdx;\n" - " AtomInc1( ldsGEnd, srcIdx );\n" - " if( srcIdx < m_n )\n" - " {\n" - " int dstIdx;\n" - " AtomInc1( ldsRingEnd, dstIdx );\n" - " \n" - " int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;\n" - " int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;\n" - " ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n" - " ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n" - " ldsRingElem[dstIdx].m_idx = srcIdx;\n" - " }\n" - " }\n" - " ringCap = GET_RING_CAPACITY;\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " // 2. fill stack\n" - " __local Elem* dst = ldsRingElem;\n" - " if( lIdx == 0 ) RING_END = 0;\n" - " int srcIdx=lIdx;\n" - " int end = ldsRingEnd;\n" - " {\n" - " for(int ii=0; ii> bitIdx)&1) == 0; -} - - -// batching on the GPU -__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx ) -{ - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - const int numConstraints = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - b3Contact4Data_t tmp; - - __local u32 ldsFixedBuffer[CHECK_SIZE]; - - - - - - if( lIdx == 0 ) - { - - - __global struct b3Contact4Data* cs = &gConstraints[m_start]; - - - int numValidConstraints = 0; - int batchIdx = 0; - - while( numValidConstraints < numConstraints) - { - int nCurrentBatch = 0; - // clear flag - - for(int i=0; i1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile __global int*\n" - "#endif\n" - "#define SIMD_WIDTH 64\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "#define WG_SIZE 64\n" - "typedef struct \n" - "{\n" - " int m_n;\n" - " int m_start;\n" - " int m_staticIdx;\n" - " int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct \n" - "{\n" - " int m_a;\n" - " int m_b;\n" - " u32 m_idx;\n" - "}Elem;\n" - "// batching on the GPU\n" - "__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n" - "{\n" - " int wgIdx = GET_GROUP_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " \n" - " const int m_n = gN[wgIdx];\n" - " const int m_start = gStart[wgIdx];\n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " for (int i=0;i> bitIdx)&1) == 0;\n" - "}\n" - "// batching on the GPU\n" - "__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx )\n" - "{\n" - " int wgIdx = GET_GROUP_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " const int numConstraints = gN[wgIdx];\n" - " const int m_start = gStart[wgIdx];\n" - " b3Contact4Data_t tmp;\n" - " \n" - " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" - " \n" - " \n" - " \n" - " \n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " \n" - " \n" - " __global struct b3Contact4Data* cs = &gConstraints[m_start]; \n" - " \n" - " \n" - " int numValidConstraints = 0;\n" - " int batchIdx = 0;\n" - " while( numValidConstraints < numConstraints)\n" - " {\n" - " int nCurrentBatch = 0;\n" - " // clear flag\n" - " \n" - " for(int i=0; i1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" - "{\n" - " \n" - " if (bodies[nodeID].m_invMass != 0.f)\n" - " {\n" - " float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" - " //angular velocity\n" - " {\n" - " b3Float4 axis;\n" - " //add some hardcoded angular damping\n" - " bodies[nodeID].m_angVel.x *= angularDamping;\n" - " bodies[nodeID].m_angVel.y *= angularDamping;\n" - " bodies[nodeID].m_angVel.z *= angularDamping;\n" - " \n" - " b3Float4 angvel = bodies[nodeID].m_angVel;\n" - " float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" - " \n" - " //limit the angular motion\n" - " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" - " {\n" - " fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" - " }\n" - " if(fAngle < 0.001f)\n" - " {\n" - " // use Taylor's expansions of sync function\n" - " axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" - " }\n" - " else\n" - " {\n" - " // sync(fAngle) = sin(c*fAngle)/t\n" - " axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" - " }\n" - " \n" - " b3Quat dorn;\n" - " dorn.x = axis.x;\n" - " dorn.y = axis.y;\n" - " dorn.z = axis.z;\n" - " dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" - " b3Quat orn0 = bodies[nodeID].m_quat;\n" - " b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" - " predictedOrn = b3QuatNormalized(predictedOrn);\n" - " bodies[nodeID].m_quat=predictedOrn;\n" - " }\n" - " //linear velocity \n" - " bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;\n" - " \n" - " //apply gravity\n" - " bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n" - " \n" - " }\n" - " \n" - "}\n" - "inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" - "{\n" - " float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" - " \n" - " if( (body->m_invMass != 0.f))\n" - " {\n" - " //angular velocity\n" - " {\n" - " b3Float4 axis;\n" - " //add some hardcoded angular damping\n" - " body->m_angVel.x *= angularDamping;\n" - " body->m_angVel.y *= angularDamping;\n" - " body->m_angVel.z *= angularDamping;\n" - " \n" - " b3Float4 angvel = body->m_angVel;\n" - " float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" - " //limit the angular motion\n" - " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" - " {\n" - " fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" - " }\n" - " if(fAngle < 0.001f)\n" - " {\n" - " // use Taylor's expansions of sync function\n" - " axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" - " }\n" - " else\n" - " {\n" - " // sync(fAngle) = sin(c*fAngle)/t\n" - " axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" - " }\n" - " b3Quat dorn;\n" - " dorn.x = axis.x;\n" - " dorn.y = axis.y;\n" - " dorn.z = axis.z;\n" - " dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" - " b3Quat orn0 = body->m_quat;\n" - " b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" - " predictedOrn = b3QuatNormalized(predictedOrn);\n" - " body->m_quat=predictedOrn;\n" - " }\n" - " //apply gravity\n" - " body->m_linVel += gravityAcceleration * timeStep;\n" - " //linear velocity \n" - " body->m_pos += body->m_linVel * timeStep;\n" - " \n" - " }\n" - " \n" - "}\n" - "__kernel void \n" - " integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" - "{\n" - " int nodeID = get_global_id(0);\n" - " \n" - " if( nodeID < numNodes)\n" - " {\n" - " integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl deleted file mode 100644 index 7f5dabe274d..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl +++ /dev/null @@ -1,877 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#define B3_CONSTRAINT_FLAG_ENABLED 1 - -#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3 -#define B3_GPU_FIXED_CONSTRAINT_TYPE 4 - -#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails -#define B3_INFINITY 1e30f - -#define mymake_float4 (float4) - - -__inline float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - -typedef float4 Quaternion; - - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - - - -typedef struct -{ - Matrix3x3 m_invInertiaWorld; - Matrix3x3 m_initInvInertia; -} BodyInertia; - - -typedef struct -{ - Matrix3x3 m_basis;//orientation - float4 m_origin;//transform -}b3Transform; - -typedef struct -{ -// b3Transform m_worldTransformUnused; - float4 m_deltaLinearVelocity; - float4 m_deltaAngularVelocity; - float4 m_angularFactor; - float4 m_linearFactor; - float4 m_invMass; - float4 m_pushVelocity; - float4 m_turnVelocity; - float4 m_linearVelocity; - float4 m_angularVelocity; - - union - { - void* m_originalBody; - int m_originalBodyIndex; - }; - int padding[3]; - -} b3GpuSolverBody; - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - unsigned int m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} b3RigidBodyCL; - -typedef struct -{ - - float4 m_relpos1CrossNormal; - float4 m_contactNormal; - - float4 m_relpos2CrossNormal; - //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal - - float4 m_angularComponentA; - float4 m_angularComponentB; - - float m_appliedPushImpulse; - float m_appliedImpulse; - int m_padding1; - int m_padding2; - float m_friction; - float m_jacDiagABInv; - float m_rhs; - float m_cfm; - - float m_lowerLimit; - float m_upperLimit; - float m_rhsPenetration; - int m_originalConstraint; - - - int m_overrideNumSolverIterations; - int m_frictionIndex; - int m_solverBodyIdA; - int m_solverBodyIdB; - -} b3SolverConstraint; - -typedef struct -{ - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - int m_originalConstraintIndex; - int m_batchId; -} b3BatchConstraint; - - - - - - -typedef struct -{ - int m_constraintType; - int m_rbA; - int m_rbB; - float m_breakingImpulseThreshold; - - float4 m_pivotInA; - float4 m_pivotInB; - Quaternion m_relTargetAB; - - int m_flags; - int m_padding[3]; -} b3GpuGenericConstraint; - - -/*b3Transform getWorldTransform(b3RigidBodyCL* rb) -{ - b3Transform newTrans; - newTrans.setOrigin(rb->m_pos); - newTrans.setRotation(rb->m_quat); - return newTrans; -}*/ - - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = mymake_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - - -__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude) -{ - body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor; - body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor); -} - - -void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c) -{ - float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm; - float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity); - float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity); - - deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv; - - float sum = c->m_appliedImpulse + deltaImpulse; - if (sum < c->m_lowerLimit) - { - deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse; - c->m_appliedImpulse = c->m_lowerLimit; - } - else if (sum > c->m_upperLimit) - { - deltaImpulse = c->m_upperLimit-c->m_appliedImpulse; - c->m_appliedImpulse = c->m_upperLimit; - } - else - { - c->m_appliedImpulse = sum; - } - - internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse); - internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse); - -} - -__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, - __global b3BatchConstraint* batchConstraints, - __global b3SolverConstraint* rows, - __global unsigned int* numConstraintRowsInfo1, - __global unsigned int* rowOffsets, - __global b3GpuGenericConstraint* constraints, - int batchOffset, - int numConstraintsInBatch - ) -{ - int b = get_global_id(0); - if (b>=numConstraintsInBatch) - return; - - __global b3BatchConstraint* c = &batchConstraints[b+batchOffset]; - int originalConstraintIndex = c->m_originalConstraintIndex; - if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED) - { - int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex]; - int rowOffset = rowOffsets[originalConstraintIndex]; - for (int jj=0;jjm_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint); - } - } -}; - -__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies) -{ - int i = get_global_id(0); - if (i>=numBodies) - return; - - __global b3GpuSolverBody* solverBody = &solverBodies[i]; - __global b3RigidBodyCL* bodyCL = &bodiesCL[i]; - - solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f); - solverBody->m_originalBodyIndex = i; - solverBody->m_angularFactor = (float4)(1,1,1,0); - solverBody->m_linearFactor = (float4) (1,1,1,0); - solverBody->m_linearVelocity = bodyCL->m_linVel; - solverBody->m_angularVelocity = bodyCL->m_angVel; -} - -__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints) -{ - int cid = get_global_id(0); - if (cid>=numConstraints) - return; - int numRows = numConstraintRows[cid]; - if (numRows) - { - for (int i=0;i= breakingThreshold) - { - constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED; - } - } - } -} - - - -__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints) -{ - int i = get_global_id(0); - if (i>=numConstraints) - return; - - __global b3GpuGenericConstraint* constraint = &constraints[i]; - - switch (constraint->m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - infos[i] = 3; - break; - } - case B3_GPU_FIXED_CONSTRAINT_TYPE: - { - infos[i] = 6; - break; - } - default: - { - } - } -} - -__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, - __global b3BatchConstraint* batchConstraints, - __global b3GpuGenericConstraint* constraints, - __global b3RigidBodyCL* bodies, - int numConstraints) -{ - int i = get_global_id(0); - if (i>=numConstraints) - return; - - int rbA = constraints[i].m_rbA; - int rbB = constraints[i].m_rbB; - - batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA; - batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB; - batchConstraints[i].m_batchId = -1; - batchConstraints[i].m_originalConstraintIndex = i; - -} - - - - -typedef struct -{ - // integrator parameters: frames per second (1/stepsize), default error - // reduction parameter (0..1). - float fps,erp; - - // for the first and second body, pointers to two (linear and angular) - // n*3 jacobian sub matrices, stored by rows. these matrices will have - // been initialized to 0 on entry. if the second body is zero then the - // J2xx pointers may be 0. - union - { - __global float4* m_J1linearAxisFloat4; - __global float* m_J1linearAxis; - }; - union - { - __global float4* m_J1angularAxisFloat4; - __global float* m_J1angularAxis; - - }; - union - { - __global float4* m_J2linearAxisFloat4; - __global float* m_J2linearAxis; - }; - union - { - __global float4* m_J2angularAxisFloat4; - __global float* m_J2angularAxis; - }; - // elements to jump from one row to the next in J's - int rowskip; - - // right hand sides of the equation J*v = c + cfm * lambda. cfm is the - // "constraint force mixing" vector. c is set to zero on entry, cfm is - // set to a constant value (typically very small or zero) value on entry. - __global float* m_constraintError; - __global float* cfm; - - // lo and hi limits for variables (set to -/+ infinity on entry). - __global float* m_lowerLimit; - __global float* m_upperLimit; - - // findex vector for variables. see the LCP solver interface for a - // description of what this does. this is set to -1 on entry. - // note that the returned indexes are relative to the first index of - // the constraint. - __global int *findex; - // number of solver iterations - int m_numIterations; - - //damping of the velocity - float m_damping; -} b3GpuConstraintInfo2; - - -void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2) -{ - *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f); - *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f); - *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f); -} - - -void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies) -{ - float4 posA = bodies[constraint->m_rbA].m_pos; - Quaternion rotA = bodies[constraint->m_rbA].m_quat; - - float4 posB = bodies[constraint->m_rbB].m_pos; - Quaternion rotB = bodies[constraint->m_rbB].m_quat; - - - - // anchor points in global coordinates with respect to body PORs. - - // set jacobian - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip+1] = 1; - info->m_J1linearAxis[2*info->rowskip+2] = 1; - - float4 a1 = qtRotate(rotA,constraint->m_pivotInA); - - { - __global float4* angular0 = (__global float4*)(info->m_J1angularAxis); - __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip); - __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip); - float4 a1neg = -a1; - getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2); - } - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip+1] = -1; - info->m_J2linearAxis[2*info->rowskip+2] = -1; - } - - float4 a2 = qtRotate(rotB,constraint->m_pivotInB); - - { - // float4 a2n = -a2; - __global float4* angular0 = (__global float4*)(info->m_J2angularAxis); - __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip); - __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip); - getSkewSymmetricMatrix(a2,angular0,angular1,angular2); - } - - // set right hand side -// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp; - float currERP = info->erp; - - float k = info->fps * currERP; - int j; - float4 result = a2 + posB - a1 - posA; - float* resultPtr = &result; - - for (j=0; j<3; j++) - { - info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]); - } -} - -Quaternion nearest( Quaternion first, Quaternion qd) -{ - Quaternion diff,sum; - diff = first- qd; - sum = first + qd; - - if( dot(diff,diff) < dot(sum,sum) ) - return qd; - return (-qd); -} - -float b3Acos(float x) -{ - if (x<-1) - x=-1; - if (x>1) - x=1; - return acos(x); -} - -float getAngle(Quaternion orn) -{ - if (orn.w>=1.f) - orn.w=1.f; - float s = 2.f * b3Acos(orn.w); - return s; -} - -void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle) -{ - Quaternion orn1 = nearest(orn0,orn1a); - - Quaternion dorn = qtMul(orn1,qtInvert(orn0)); - *angle = getAngle(dorn); - *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f); - - //check for axis length - float len = dot3F4(*axis,*axis); - if (len < FLT_EPSILON*FLT_EPSILON) - *axis = (float4)(1,0,0,0); - else - *axis /= sqrt(len); -} - - - -void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row) -{ - Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat; - Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat; - - int s = info->rowskip; - int start_index = start_row * s; - - // 3 rows to make body rotations equal - info->m_J1angularAxis[start_index] = 1; - info->m_J1angularAxis[start_index + s + 1] = 1; - info->m_J1angularAxis[start_index + s*2+2] = 1; - if ( info->m_J2angularAxis) - { - info->m_J2angularAxis[start_index] = -1; - info->m_J2angularAxis[start_index + s+1] = -1; - info->m_J2angularAxis[start_index + s*2+2] = -1; - } - - float currERP = info->erp; - float k = info->fps * currERP; - float4 diff; - float angle; - float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB)); - - calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle); - diff*=-angle; - - float* resultPtr = &diff; - - for (int j=0; j<3; j++) - { - info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j]; - } - - -} - - -__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies) -{ - int i = get_global_id(0); - if (i>=numBodies) - return; - - if (bodies[i].m_invMass) - { -// if (length(solverBodies[i].m_deltaLinearVelocity)=numConstraints) - return; - - //for now, always initialize the batch info - int info1 = infos[i]; - - __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]]; - __global b3GpuGenericConstraint* constraint = &constraints[i]; - - __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA]; - __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB]; - - int solverBodyIdA = constraint->m_rbA; - int solverBodyIdB = constraint->m_rbB; - - __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA]; - __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB]; - - - if (rbA->m_invMass) - { - batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; - } else - { -// if (!solverBodyIdA) -// m_staticIdx = 0; - batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; - } - - if (rbB->m_invMass) - { - batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; - } else - { -// if (!solverBodyIdB) -// m_staticIdx = 0; - batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; - } - - if (info1) - { - int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; -// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations) - // m_maxOverrideNumSolverIterations = overrideNumSolverIterations; - - - int j; - for ( j=0;jm_deltaLinearVelocity = (float4)(0,0,0,0); - bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0); - bodyAPtr->m_pushVelocity = (float4)(0,0,0,0); - bodyAPtr->m_turnVelocity = (float4)(0,0,0,0); - bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0); - bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0); - bodyBPtr->m_pushVelocity = (float4)(0,0,0,0); - bodyBPtr->m_turnVelocity = (float4)(0,0,0,0); - - int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this - - - - - b3GpuConstraintInfo2 info2; - info2.fps = 1.f/timeStep; - info2.erp = globalErp; - info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal; - info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal; - info2.m_J2linearAxisFloat4 = 0; - info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal; - info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this - - ///the size of b3SolverConstraint needs be a multiple of float -// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint)); - info2.m_constraintError = ¤tConstraintRow->m_rhs; - currentConstraintRow->m_cfm = globalCfm; - info2.m_damping = globalDamping; - info2.cfm = ¤tConstraintRow->m_cfm; - info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; - info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; - info2.m_numIterations = globalNumIterations; - - switch (constraint->m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - getInfo2Point2Point(constraint,&info2,bodies); - break; - } - case B3_GPU_FIXED_CONSTRAINT_TYPE: - { - getInfo2Point2Point(constraint,&info2,bodies); - - getInfo2FixedOrientation(constraint,&info2,bodies,3); - - break; - } - - default: - { - } - } - - ///finalize the constraint setup - for ( j=0;jm_upperLimit>=constraint->m_breakingImpulseThreshold) - { - solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold; - } - - if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold) - { - solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold; - } - -// solverConstraint->m_originalContactPoint = constraint; - - Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld; - { - - //float4 angularFactorA(1,1,1); - float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal; - solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA; - } - - Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld; - { - - float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal; - solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor(); - } - - { - //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal - //because it gets multiplied iMJlB - float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass; - float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA); - float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal? - float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB); - - float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal); - sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal); - sum += dot3F4(iMJlB,solverConstraint->m_contactNormal); - sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal); - float fsum = fabs(sum); - if (fsum>FLT_EPSILON) - { - solverConstraint->m_jacDiagABInv = 1.f/sum; - } else - { - solverConstraint->m_jacDiagABInv = 0.f; - } - } - - - ///fix rhs - ///todo: add force/torque accelerators - { - float rel_vel; - float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel); - float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel); - - rel_vel = vel1Dotn+vel2Dotn; - - float restitution = 0.f; - float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2 - float velocityError = restitution - rel_vel * info2.m_damping; - float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv; - float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv; - solverConstraint->m_rhs = penetrationImpulse+velocityImpulse; - solverConstraint->m_appliedImpulse = 0.f; - - } - } - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h deleted file mode 100644 index c94b55851e1..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h +++ /dev/null @@ -1,720 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveConstraintRowsCL = - "/*\n" - "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#define B3_CONSTRAINT_FLAG_ENABLED 1\n" - "#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3\n" - "#define B3_GPU_FIXED_CONSTRAINT_TYPE 4\n" - "#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails\n" - "#define B3_INFINITY 1e30f\n" - "#define mymake_float4 (float4)\n" - "__inline float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = mymake_float4(a.xyz,0.f);\n" - " float4 b1 = mymake_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "typedef float4 Quaternion;\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertiaWorld;\n" - " Matrix3x3 m_initInvInertia;\n" - "} BodyInertia;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_basis;//orientation\n" - " float4 m_origin;//transform\n" - "}b3Transform;\n" - "typedef struct\n" - "{\n" - "// b3Transform m_worldTransformUnused;\n" - " float4 m_deltaLinearVelocity;\n" - " float4 m_deltaAngularVelocity;\n" - " float4 m_angularFactor;\n" - " float4 m_linearFactor;\n" - " float4 m_invMass;\n" - " float4 m_pushVelocity;\n" - " float4 m_turnVelocity;\n" - " float4 m_linearVelocity;\n" - " float4 m_angularVelocity;\n" - " union \n" - " {\n" - " void* m_originalBody;\n" - " int m_originalBodyIndex;\n" - " };\n" - " int padding[3];\n" - "} b3GpuSolverBody;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " unsigned int m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} b3RigidBodyCL;\n" - "typedef struct\n" - "{\n" - " float4 m_relpos1CrossNormal;\n" - " float4 m_contactNormal;\n" - " float4 m_relpos2CrossNormal;\n" - " //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal\n" - " float4 m_angularComponentA;\n" - " float4 m_angularComponentB;\n" - " \n" - " float m_appliedPushImpulse;\n" - " float m_appliedImpulse;\n" - " int m_padding1;\n" - " int m_padding2;\n" - " float m_friction;\n" - " float m_jacDiagABInv;\n" - " float m_rhs;\n" - " float m_cfm;\n" - " \n" - " float m_lowerLimit;\n" - " float m_upperLimit;\n" - " float m_rhsPenetration;\n" - " int m_originalConstraint;\n" - " int m_overrideNumSolverIterations;\n" - " int m_frictionIndex;\n" - " int m_solverBodyIdA;\n" - " int m_solverBodyIdB;\n" - "} b3SolverConstraint;\n" - "typedef struct \n" - "{\n" - " int m_bodyAPtrAndSignBit;\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_originalConstraintIndex;\n" - " int m_batchId;\n" - "} b3BatchConstraint;\n" - "typedef struct \n" - "{\n" - " int m_constraintType;\n" - " int m_rbA;\n" - " int m_rbB;\n" - " float m_breakingImpulseThreshold;\n" - " float4 m_pivotInA;\n" - " float4 m_pivotInB;\n" - " Quaternion m_relTargetAB;\n" - " int m_flags;\n" - " int m_padding[3];\n" - "} b3GpuGenericConstraint;\n" - "/*b3Transform getWorldTransform(b3RigidBodyCL* rb)\n" - "{\n" - " b3Transform newTrans;\n" - " newTrans.setOrigin(rb->m_pos);\n" - " newTrans.setRotation(rb->m_quat);\n" - " return newTrans;\n" - "}*/\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " v = mymake_float4(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n" - "{\n" - " body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n" - " body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n" - "}\n" - "void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)\n" - "{\n" - " float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;\n" - " float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n" - " float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n" - " deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n" - " deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n" - " float sum = c->m_appliedImpulse + deltaImpulse;\n" - " if (sum < c->m_lowerLimit)\n" - " {\n" - " deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse;\n" - " c->m_appliedImpulse = c->m_lowerLimit;\n" - " }\n" - " else if (sum > c->m_upperLimit) \n" - " {\n" - " deltaImpulse = c->m_upperLimit-c->m_appliedImpulse;\n" - " c->m_appliedImpulse = c->m_upperLimit;\n" - " }\n" - " else\n" - " {\n" - " c->m_appliedImpulse = sum;\n" - " }\n" - " internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n" - " internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n" - "}\n" - "__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" - " __global b3BatchConstraint* batchConstraints,\n" - " __global b3SolverConstraint* rows,\n" - " __global unsigned int* numConstraintRowsInfo1, \n" - " __global unsigned int* rowOffsets,\n" - " __global b3GpuGenericConstraint* constraints,\n" - " int batchOffset,\n" - " int numConstraintsInBatch\n" - " )\n" - "{\n" - " int b = get_global_id(0);\n" - " if (b>=numConstraintsInBatch)\n" - " return;\n" - " __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n" - " int originalConstraintIndex = c->m_originalConstraintIndex;\n" - " if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n" - " {\n" - " int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex];\n" - " int rowOffset = rowOffsets[originalConstraintIndex];\n" - " for (int jj=0;jjm_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n" - " }\n" - " }\n" - "};\n" - "__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numBodies)\n" - " return;\n" - " __global b3GpuSolverBody* solverBody = &solverBodies[i];\n" - " __global b3RigidBodyCL* bodyCL = &bodiesCL[i];\n" - " solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f);\n" - " solverBody->m_originalBodyIndex = i;\n" - " solverBody->m_angularFactor = (float4)(1,1,1,0);\n" - " solverBody->m_linearFactor = (float4) (1,1,1,0);\n" - " solverBody->m_linearVelocity = bodyCL->m_linVel;\n" - " solverBody->m_angularVelocity = bodyCL->m_angVel;\n" - "}\n" - "__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n" - "{\n" - " int cid = get_global_id(0);\n" - " if (cid>=numConstraints)\n" - " return;\n" - " int numRows = numConstraintRows[cid];\n" - " if (numRows)\n" - " {\n" - " for (int i=0;i= breakingThreshold)\n" - " {\n" - " constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConstraints)\n" - " return;\n" - " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" - " switch (constraint->m_constraintType)\n" - " {\n" - " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" - " {\n" - " infos[i] = 3;\n" - " break;\n" - " }\n" - " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" - " {\n" - " infos[i] = 6;\n" - " break;\n" - " }\n" - " default:\n" - " {\n" - " }\n" - " }\n" - "}\n" - "__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n" - " __global b3BatchConstraint* batchConstraints, \n" - " __global b3GpuGenericConstraint* constraints,\n" - " __global b3RigidBodyCL* bodies,\n" - " int numConstraints)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConstraints)\n" - " return;\n" - " int rbA = constraints[i].m_rbA;\n" - " int rbB = constraints[i].m_rbB;\n" - " batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA;\n" - " batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB;\n" - " batchConstraints[i].m_batchId = -1;\n" - " batchConstraints[i].m_originalConstraintIndex = i;\n" - "}\n" - "typedef struct\n" - "{\n" - " // integrator parameters: frames per second (1/stepsize), default error\n" - " // reduction parameter (0..1).\n" - " float fps,erp;\n" - " // for the first and second body, pointers to two (linear and angular)\n" - " // n*3 jacobian sub matrices, stored by rows. these matrices will have\n" - " // been initialized to 0 on entry. if the second body is zero then the\n" - " // J2xx pointers may be 0.\n" - " union \n" - " {\n" - " __global float4* m_J1linearAxisFloat4;\n" - " __global float* m_J1linearAxis;\n" - " };\n" - " union\n" - " {\n" - " __global float4* m_J1angularAxisFloat4;\n" - " __global float* m_J1angularAxis;\n" - " };\n" - " union\n" - " {\n" - " __global float4* m_J2linearAxisFloat4;\n" - " __global float* m_J2linearAxis;\n" - " };\n" - " union\n" - " {\n" - " __global float4* m_J2angularAxisFloat4;\n" - " __global float* m_J2angularAxis;\n" - " };\n" - " // elements to jump from one row to the next in J's\n" - " int rowskip;\n" - " // right hand sides of the equation J*v = c + cfm * lambda. cfm is the\n" - " // \"constraint force mixing\" vector. c is set to zero on entry, cfm is\n" - " // set to a constant value (typically very small or zero) value on entry.\n" - " __global float* m_constraintError;\n" - " __global float* cfm;\n" - " // lo and hi limits for variables (set to -/+ infinity on entry).\n" - " __global float* m_lowerLimit;\n" - " __global float* m_upperLimit;\n" - " // findex vector for variables. see the LCP solver interface for a\n" - " // description of what this does. this is set to -1 on entry.\n" - " // note that the returned indexes are relative to the first index of\n" - " // the constraint.\n" - " __global int *findex;\n" - " // number of solver iterations\n" - " int m_numIterations;\n" - " //damping of the velocity\n" - " float m_damping;\n" - "} b3GpuConstraintInfo2;\n" - "void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)\n" - "{\n" - " *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f);\n" - " *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f);\n" - " *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f);\n" - "}\n" - "void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)\n" - "{\n" - " float4 posA = bodies[constraint->m_rbA].m_pos;\n" - " Quaternion rotA = bodies[constraint->m_rbA].m_quat;\n" - " float4 posB = bodies[constraint->m_rbB].m_pos;\n" - " Quaternion rotB = bodies[constraint->m_rbB].m_quat;\n" - " // anchor points in global coordinates with respect to body PORs.\n" - " \n" - " // set jacobian\n" - " info->m_J1linearAxis[0] = 1;\n" - " info->m_J1linearAxis[info->rowskip+1] = 1;\n" - " info->m_J1linearAxis[2*info->rowskip+2] = 1;\n" - " float4 a1 = qtRotate(rotA,constraint->m_pivotInA);\n" - " {\n" - " __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);\n" - " __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);\n" - " __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip);\n" - " float4 a1neg = -a1;\n" - " getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2);\n" - " }\n" - " if (info->m_J2linearAxis)\n" - " {\n" - " info->m_J2linearAxis[0] = -1;\n" - " info->m_J2linearAxis[info->rowskip+1] = -1;\n" - " info->m_J2linearAxis[2*info->rowskip+2] = -1;\n" - " }\n" - " \n" - " float4 a2 = qtRotate(rotB,constraint->m_pivotInB);\n" - " \n" - " {\n" - " // float4 a2n = -a2;\n" - " __global float4* angular0 = (__global float4*)(info->m_J2angularAxis);\n" - " __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip);\n" - " __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip);\n" - " getSkewSymmetricMatrix(a2,angular0,angular1,angular2);\n" - " }\n" - " \n" - " // set right hand side\n" - "// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;\n" - " float currERP = info->erp;\n" - " float k = info->fps * currERP;\n" - " int j;\n" - " float4 result = a2 + posB - a1 - posA;\n" - " float* resultPtr = &result;\n" - " for (j=0; j<3; j++)\n" - " {\n" - " info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);\n" - " }\n" - "}\n" - "Quaternion nearest( Quaternion first, Quaternion qd)\n" - "{\n" - " Quaternion diff,sum;\n" - " diff = first- qd;\n" - " sum = first + qd;\n" - " \n" - " if( dot(diff,diff) < dot(sum,sum) )\n" - " return qd;\n" - " return (-qd);\n" - "}\n" - "float b3Acos(float x) \n" - "{ \n" - " if (x<-1) \n" - " x=-1; \n" - " if (x>1) \n" - " x=1;\n" - " return acos(x); \n" - "}\n" - "float getAngle(Quaternion orn)\n" - "{\n" - " if (orn.w>=1.f)\n" - " orn.w=1.f;\n" - " float s = 2.f * b3Acos(orn.w);\n" - " return s;\n" - "}\n" - "void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n" - "{\n" - " Quaternion orn1 = nearest(orn0,orn1a);\n" - " \n" - " Quaternion dorn = qtMul(orn1,qtInvert(orn0));\n" - " *angle = getAngle(dorn);\n" - " *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f);\n" - " \n" - " //check for axis length\n" - " float len = dot3F4(*axis,*axis);\n" - " if (len < FLT_EPSILON*FLT_EPSILON)\n" - " *axis = (float4)(1,0,0,0);\n" - " else\n" - " *axis /= sqrt(len);\n" - "}\n" - "void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n" - "{\n" - " Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;\n" - " Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;\n" - " int s = info->rowskip;\n" - " int start_index = start_row * s;\n" - " // 3 rows to make body rotations equal\n" - " info->m_J1angularAxis[start_index] = 1;\n" - " info->m_J1angularAxis[start_index + s + 1] = 1;\n" - " info->m_J1angularAxis[start_index + s*2+2] = 1;\n" - " if ( info->m_J2angularAxis)\n" - " {\n" - " info->m_J2angularAxis[start_index] = -1;\n" - " info->m_J2angularAxis[start_index + s+1] = -1;\n" - " info->m_J2angularAxis[start_index + s*2+2] = -1;\n" - " }\n" - " \n" - " float currERP = info->erp;\n" - " float k = info->fps * currERP;\n" - " float4 diff;\n" - " float angle;\n" - " float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB));\n" - " \n" - " calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle);\n" - " diff*=-angle;\n" - " \n" - " float* resultPtr = &diff;\n" - " \n" - " for (int j=0; j<3; j++)\n" - " {\n" - " info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n" - " }\n" - " \n" - "}\n" - "__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numBodies)\n" - " return;\n" - " if (bodies[i].m_invMass)\n" - " {\n" - "// if (length(solverBodies[i].m_deltaLinearVelocity)=numConstraints)\n" - " return;\n" - " \n" - " //for now, always initialize the batch info\n" - " int info1 = infos[i];\n" - " \n" - " __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n" - " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" - " __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" - " __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" - " int solverBodyIdA = constraint->m_rbA;\n" - " int solverBodyIdB = constraint->m_rbB;\n" - " __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" - " __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" - " if (rbA->m_invMass)\n" - " {\n" - " batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" - " } else\n" - " {\n" - "// if (!solverBodyIdA)\n" - "// m_staticIdx = 0;\n" - " batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" - " }\n" - " if (rbB->m_invMass)\n" - " {\n" - " batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" - " } else\n" - " {\n" - "// if (!solverBodyIdB)\n" - "// m_staticIdx = 0;\n" - " batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" - " }\n" - " if (info1)\n" - " {\n" - " int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n" - "// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n" - " // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n" - " int j;\n" - " for ( j=0;jm_deltaLinearVelocity = (float4)(0,0,0,0);\n" - " bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" - " bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);\n" - " bodyAPtr->m_turnVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_turnVelocity = (float4)(0,0,0,0);\n" - " int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" - " \n" - " b3GpuConstraintInfo2 info2;\n" - " info2.fps = 1.f/timeStep;\n" - " info2.erp = globalErp;\n" - " info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal;\n" - " info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal;\n" - " info2.m_J2linearAxisFloat4 = 0;\n" - " info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal;\n" - " info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" - " ///the size of b3SolverConstraint needs be a multiple of float\n" - "// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));\n" - " info2.m_constraintError = ¤tConstraintRow->m_rhs;\n" - " currentConstraintRow->m_cfm = globalCfm;\n" - " info2.m_damping = globalDamping;\n" - " info2.cfm = ¤tConstraintRow->m_cfm;\n" - " info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;\n" - " info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;\n" - " info2.m_numIterations = globalNumIterations;\n" - " switch (constraint->m_constraintType)\n" - " {\n" - " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" - " {\n" - " getInfo2Point2Point(constraint,&info2,bodies);\n" - " break;\n" - " }\n" - " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" - " {\n" - " getInfo2Point2Point(constraint,&info2,bodies);\n" - " getInfo2FixedOrientation(constraint,&info2,bodies,3);\n" - " break;\n" - " }\n" - " default:\n" - " {\n" - " }\n" - " }\n" - " ///finalize the constraint setup\n" - " for ( j=0;jm_upperLimit>=constraint->m_breakingImpulseThreshold)\n" - " {\n" - " solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;\n" - " }\n" - " if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)\n" - " {\n" - " solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;\n" - " }\n" - "// solverConstraint->m_originalContactPoint = constraint;\n" - " \n" - " Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;\n" - " {\n" - " //float4 angularFactorA(1,1,1);\n" - " float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;\n" - " solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;\n" - " }\n" - " \n" - " Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;\n" - " {\n" - " float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;\n" - " solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();\n" - " }\n" - " {\n" - " //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal\n" - " //because it gets multiplied iMJlB\n" - " float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass;\n" - " float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);\n" - " float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?\n" - " float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);\n" - " float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);\n" - " sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);\n" - " sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);\n" - " sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal);\n" - " float fsum = fabs(sum);\n" - " if (fsum>FLT_EPSILON)\n" - " {\n" - " solverConstraint->m_jacDiagABInv = 1.f/sum;\n" - " } else\n" - " {\n" - " solverConstraint->m_jacDiagABInv = 0.f;\n" - " }\n" - " }\n" - " ///fix rhs\n" - " ///todo: add force/torque accelerators\n" - " {\n" - " float rel_vel;\n" - " float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);\n" - " float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);\n" - " rel_vel = vel1Dotn+vel2Dotn;\n" - " float restitution = 0.f;\n" - " float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2\n" - " float velocityError = restitution - rel_vel * info2.m_damping;\n" - " float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv;\n" - " float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;\n" - " solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;\n" - " solverConstraint->m_appliedImpulse = 0.f;\n" - " }\n" - " }\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.cl deleted file mode 100644 index 5c4d62e4ec9..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.cl +++ /dev/null @@ -1,501 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define mymake_float4 (float4) -//#define make_float2 (float2) -//#define make_uint4 (uint4) -//#define make_int4 (int4) -//#define make_uint2 (uint2) -//#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// - - - - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = mymake_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - - - - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - - - - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1); - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = mymake_float4(-n.xyz,0.f); - *angular0 = -cross3(r0, n); - *angular1 = cross3(r1, n); -} - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ); - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1); - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/(jmj0+jmj1+jmj2+jmj3); -} - - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB); - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for(int ic=0; ic<4; ic++) - { - if( cs->m_jacCoeffInv[ic] == 0.f ) continue; - - float4 angular0, angular1, linear; - float4 r0 = cs->m_worldPos[ic] - posA; - float4 r1 = cs->m_worldPos[ic] - posB; - setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); - - float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt ); - updated = min2( updated, maxRambdaDt ); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - *linVelA += linImp0; - *angVelA += angImp0; - *linVelB += linImp1; - *angVelB += angImp1; - } -} - -void btPlaneSpace1 (const float4* n, float4* p, float4* q); - void btPlaneSpace1 (const float4* n, float4* p, float4* q) -{ - if (fabs(n[0].z) > 0.70710678f) { - // choose p in y-z plane - float a = n[0].y*n[0].y + n[0].z*n[0].z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n[0].z*k; - p[0].z = n[0].y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n[0].x*p[0].z; - q[0].z = n[0].x*p[0].y; - } - else { - // choose p in x-y plane - float a = n[0].x*n[0].x + n[0].y*n[0].y; - float k = 1.f/sqrt(a); - p[0].x = -n[0].y*k; - p[0].y = n[0].x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n[0].z*p[0].y; - q[0].y = n[0].z*p[0].x; - q[0].z = a*k; - } -} - -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs); -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) -{ - //float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, - posB, &linVelB, &angVelB, invMassB, invInertiaB ); - - if (gBodies[aIdx].m_invMass) - { - gBodies[aIdx].m_linVel = linVelA; - gBodies[aIdx].m_angVel = angVelA; - } else - { - gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0); - - } - if (gBodies[bIdx].m_invMass) - { - gBodies[bIdx].m_linVel = linVelB; - gBodies[bIdx].m_angVel = angVelB; - } else - { - gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0); - - } - -} - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void BatchSolveKernelContact(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - __global int* gN, - __global int* gOffsets, - __global int* batchSizes, - int maxBatch1, - int cellBatch, - int4 nSplit - ) -{ - //__local int ldsBatchIdx[WG_SIZE+1]; - __local int ldsCurBatch; - __local int ldsNextBatch; - __local int ldsStart; - - int lIdx = GET_LOCAL_IDX; - int wgIdx = GET_GROUP_IDX; - -// int gIdx = GET_GLOBAL_IDX; -// debugInfo[gIdx].m_valInt0 = gIdx; - //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE; - - - - - int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplit.x*nSplit.y)/4)); - int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y); - - //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1); - //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1); - //int cellIdx = xIdx+yIdx*nSplit; - - if( gN[cellIdx] == 0 ) - return; - - int maxBatch = batchSizes[cellIdx]; - - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - - - - if( lIdx == 0 ) - { - ldsCurBatch = 0; - ldsNextBatch = 0; - ldsStart = start; - } - - - GROUP_LDS_BARRIER; - - int idx=ldsStart+lIdx; - while (ldsCurBatch < maxBatch) - { - for(; idxm_jacCoeffInv[ic] == 0.f ) continue;\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = cs->m_worldPos[ic] - posA;\n" - " float4 r1 = cs->m_worldPos[ic] - posB;\n" - " setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" - " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" - " *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" - " rambdaDt *= cs->m_jacCoeffInv[ic];\n" - " {\n" - " float prevSum = cs->m_appliedRambdaDt[ic];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt );\n" - " updated = min2( updated, maxRambdaDt );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_appliedRambdaDt[ic] = updated;\n" - " }\n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " *linVelA += linImp0;\n" - " *angVelA += angImp0;\n" - " *linVelB += linImp1;\n" - " *angVelB += angImp1;\n" - " }\n" - "}\n" - "void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" - " void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" - "{\n" - " if (fabs(n[0].z) > 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n[0].z*k;\n" - " p[0].z = n[0].y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n[0].x*p[0].z;\n" - " q[0].z = n[0].x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n[0].y*k;\n" - " p[0].y = n[0].x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n[0].z*p[0].y;\n" - " q[0].y = n[0].z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" - "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" - "{\n" - " //float frictionCoeff = ldsCs[0].m_linear.w;\n" - " int aIdx = ldsCs[0].m_bodyA;\n" - " int bIdx = ldsCs[0].m_bodyB;\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" - " posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" - " if (gBodies[aIdx].m_invMass)\n" - " {\n" - " gBodies[aIdx].m_linVel = linVelA;\n" - " gBodies[aIdx].m_angVel = angVelA;\n" - " } else\n" - " {\n" - " gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " \n" - " }\n" - " if (gBodies[bIdx].m_invMass)\n" - " {\n" - " gBodies[bIdx].m_linVel = linVelB;\n" - " gBodies[bIdx].m_angVel = angVelB;\n" - " } else\n" - " {\n" - " gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " \n" - " }\n" - "}\n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void BatchSolveKernelContact(__global Body* gBodies,\n" - " __global Shape* gShapes,\n" - " __global Constraint4* gConstraints,\n" - " __global int* gN,\n" - " __global int* gOffsets,\n" - " __global int* batchSizes,\n" - " int maxBatch1,\n" - " int cellBatch,\n" - " int4 nSplit\n" - " )\n" - "{\n" - " //__local int ldsBatchIdx[WG_SIZE+1];\n" - " __local int ldsCurBatch;\n" - " __local int ldsNextBatch;\n" - " __local int ldsStart;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " int wgIdx = GET_GROUP_IDX;\n" - "// int gIdx = GET_GLOBAL_IDX;\n" - "// debugInfo[gIdx].m_valInt0 = gIdx;\n" - " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" - " \n" - " \n" - " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" - " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" - " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" - " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" - " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" - " //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n" - " //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n" - " //int cellIdx = xIdx+yIdx*nSplit;\n" - " \n" - " if( gN[cellIdx] == 0 ) \n" - " return;\n" - " int maxBatch = batchSizes[cellIdx];\n" - " \n" - " \n" - " const int start = gOffsets[cellIdx];\n" - " const int end = start + gN[cellIdx];\n" - " \n" - " \n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " ldsCurBatch = 0;\n" - " ldsNextBatch = 0;\n" - " ldsStart = start;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " int idx=ldsStart+lIdx;\n" - " while (ldsCurBatch < maxBatch)\n" - " {\n" - " for(; idx 0.70710678f) { - // choose p in y-z plane - float a = n[0].y*n[0].y + n[0].z*n[0].z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n[0].z*k; - p[0].z = n[0].y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n[0].x*p[0].z; - q[0].z = n[0].x*p[0].y; - } - else { - // choose p in x-y plane - float a = n[0].x*n[0].x + n[0].y*n[0].y; - float k = 1.f/sqrt(a); - p[0].x = -n[0].y*k; - p[0].y = n[0].x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n[0].z*p[0].y; - q[0].y = n[0].z*p[0].x; - q[0].z = a*k; - } -} - - -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs); -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) -{ - float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=ldsCs[0].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - -// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, -// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); - - - { - - __global Constraint4* cs = ldsCs; - - if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; - const float4 center = cs->m_center; - - float4 n = -cs->m_linear; - - float4 tangent[2]; - btPlaneSpace1(&n,&tangent[0],&tangent[1]); - float4 angular0, angular1, linear; - float4 r0 = center - posA; - float4 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ); - rambdaDt *= cs->m_fJacCoeffInv[i]; - - { - float prevSum = cs->m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt[i] ); - updated = min2( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs->m_fAppliedRambdaDt[i] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - { // angular damping for point constraint - float4 ab = normalize3( posB - posA ); - float4 ac = normalize3( center - posA ); - if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = dot3F4( n, angVelA ); - float angNB = dot3F4( n, angVelB ); - - angVelA -= (angNA*0.1f)*n; - angVelB -= (angNB*0.1f)*n; - } - } - } - - - - } - - if (gBodies[aIdx].m_invMass) - { - gBodies[aIdx].m_linVel = linVelA; - gBodies[aIdx].m_angVel = angVelA; - } else - { - gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0); - } - if (gBodies[bIdx].m_invMass) - { - gBodies[bIdx].m_linVel = linVelB; - gBodies[bIdx].m_angVel = angVelB; - } else - { - gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0); - } - - -} - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void BatchSolveKernelFriction(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - __global int* gN, - __global int* gOffsets, - __global int* batchSizes, - int maxBatch1, - int cellBatch, - int4 nSplit - ) -{ - //__local int ldsBatchIdx[WG_SIZE+1]; - __local int ldsCurBatch; - __local int ldsNextBatch; - __local int ldsStart; - - int lIdx = GET_LOCAL_IDX; - int wgIdx = GET_GROUP_IDX; - -// int gIdx = GET_GLOBAL_IDX; -// debugInfo[gIdx].m_valInt0 = gIdx; - //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE; - - - int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplit.x*nSplit.y)/4)); - int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y); - - - if( gN[cellIdx] == 0 ) - return; - - int maxBatch = batchSizes[cellIdx]; - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - - if( lIdx == 0 ) - { - ldsCurBatch = 0; - ldsNextBatch = 0; - ldsStart = start; - } - - - GROUP_LDS_BARRIER; - - int idx=ldsStart+lIdx; - while (ldsCurBatch < maxBatch) - { - for(; idx 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n[0].z*k;\n" - " p[0].z = n[0].y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n[0].x*p[0].z;\n" - " q[0].z = n[0].x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n[0].y*k;\n" - " p[0].y = n[0].x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n[0].z*p[0].y;\n" - " q[0].y = n[0].z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" - "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" - "{\n" - " float frictionCoeff = ldsCs[0].m_linear.w;\n" - " int aIdx = ldsCs[0].m_bodyA;\n" - " int bIdx = ldsCs[0].m_bodyB;\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " \n" - " {\n" - " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" - " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" - " float sum = 0;\n" - " for(int j=0; j<4; j++)\n" - " {\n" - " sum +=ldsCs[0].m_appliedRambdaDt[j];\n" - " }\n" - " frictionCoeff = 0.7f;\n" - " for(int j=0; j<4; j++)\n" - " {\n" - " maxRambdaDt[j] = frictionCoeff*sum;\n" - " minRambdaDt[j] = -maxRambdaDt[j];\n" - " }\n" - " \n" - "// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" - "// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" - " \n" - " \n" - " {\n" - " \n" - " __global Constraint4* cs = ldsCs;\n" - " \n" - " if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" - " const float4 center = cs->m_center;\n" - " \n" - " float4 n = -cs->m_linear;\n" - " \n" - " float4 tangent[2];\n" - " btPlaneSpace1(&n,&tangent[0],&tangent[1]);\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = center - posA;\n" - " float4 r1 = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" - " float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA, angVelA, linVelB, angVelB );\n" - " rambdaDt *= cs->m_fJacCoeffInv[i];\n" - " \n" - " {\n" - " float prevSum = cs->m_fAppliedRambdaDt[i];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt[i] );\n" - " updated = min2( updated, maxRambdaDt[i] );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_fAppliedRambdaDt[i] = updated;\n" - " }\n" - " \n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " \n" - " linVelA += linImp0;\n" - " angVelA += angImp0;\n" - " linVelB += linImp1;\n" - " angVelB += angImp1;\n" - " }\n" - " { // angular damping for point constraint\n" - " float4 ab = normalize3( posB - posA );\n" - " float4 ac = normalize3( center - posA );\n" - " if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" - " {\n" - " float angNA = dot3F4( n, angVelA );\n" - " float angNB = dot3F4( n, angVelB );\n" - " \n" - " angVelA -= (angNA*0.1f)*n;\n" - " angVelB -= (angNB*0.1f)*n;\n" - " }\n" - " }\n" - " }\n" - " \n" - " \n" - " }\n" - " if (gBodies[aIdx].m_invMass)\n" - " {\n" - " gBodies[aIdx].m_linVel = linVelA;\n" - " gBodies[aIdx].m_angVel = angVelA;\n" - " } else\n" - " {\n" - " gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " }\n" - " if (gBodies[bIdx].m_invMass)\n" - " {\n" - " gBodies[bIdx].m_linVel = linVelB;\n" - " gBodies[bIdx].m_angVel = angVelB;\n" - " } else\n" - " {\n" - " gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " }\n" - " \n" - "}\n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void BatchSolveKernelFriction(__global Body* gBodies,\n" - " __global Shape* gShapes,\n" - " __global Constraint4* gConstraints,\n" - " __global int* gN,\n" - " __global int* gOffsets,\n" - " __global int* batchSizes,\n" - " int maxBatch1,\n" - " int cellBatch,\n" - " int4 nSplit\n" - " )\n" - "{\n" - " //__local int ldsBatchIdx[WG_SIZE+1];\n" - " __local int ldsCurBatch;\n" - " __local int ldsNextBatch;\n" - " __local int ldsStart;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " int wgIdx = GET_GROUP_IDX;\n" - "// int gIdx = GET_GLOBAL_IDX;\n" - "// debugInfo[gIdx].m_valInt0 = gIdx;\n" - " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" - " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" - " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" - " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" - " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" - " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" - " \n" - " if( gN[cellIdx] == 0 ) \n" - " return;\n" - " int maxBatch = batchSizes[cellIdx];\n" - " const int start = gOffsets[cellIdx];\n" - " const int end = start + gN[cellIdx];\n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " ldsCurBatch = 0;\n" - " ldsNextBatch = 0;\n" - " ldsStart = start;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " int idx=ldsStart+lIdx;\n" - " while (ldsCurBatch < maxBatch)\n" - " {\n" - " for(; idx1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#ifndef B3_CONTACT_CONSTRAINT5_H\n" - "#define B3_CONTACT_CONSTRAINT5_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3ContactConstraint4 b3ContactConstraint4_t;\n" - "struct b3ContactConstraint4\n" - "{\n" - " b3Float4 m_linear;//normal?\n" - " b3Float4 m_worldPos[4];\n" - " b3Float4 m_center; // friction\n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; // friction\n" - " float m_fAppliedRambdaDt[2]; // friction\n" - " unsigned int m_bodyA;\n" - " unsigned int m_bodyB;\n" - " int m_batchIdx;\n" - " unsigned int m_paddings;\n" - "};\n" - "//inline void setFrictionCoeff(float value) { m_linear[3] = value; }\n" - "inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) \n" - "{\n" - " return constraint->m_linear.w; \n" - "}\n" - "#endif //B3_CONTACT_CONSTRAINT5_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);\n" - " void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)\n" - "{\n" - " if (b3Fabs(n.z) > 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n.y*n.y + n.z*n.z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n.z*k;\n" - " p[0].z = n.y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n.x*p[0].z;\n" - " q[0].z = n.x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n.x*n.x + n.y*n.y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n.y*k;\n" - " p[0].y = n.x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n.z*p[0].y;\n" - " q[0].y = n.z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - " \n" - "void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)\n" - "{\n" - " *linear = b3MakeFloat4(n.x,n.y,n.z,0.f);\n" - " *angular0 = b3Cross3(r0, n);\n" - " *angular1 = -b3Cross3(r1, n);\n" - "}\n" - "float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,\n" - " b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )\n" - "{\n" - " return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);\n" - "}\n" - "float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,\n" - " float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)\n" - "{\n" - " // linear0,1 are normlized\n" - " float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;\n" - " float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" - " float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;\n" - " float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" - " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" - "}\n" - "void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,\n" - " b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, \n" - " __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" - " b3ContactConstraint4_t* dstC )\n" - "{\n" - " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" - " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" - " float dtInv = 1.f/dt;\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" - " dstC->m_linear = src->m_worldNormalOnB;\n" - " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " b3Float4 r0 = src->m_worldPosB[ic] - posA;\n" - " b3Float4 r1 = src->m_worldPosB[ic] - posB;\n" - " if( ic >= src->m_worldNormalOnB.w )//npoints\n" - " {\n" - " dstC->m_jacCoeffInv[ic] = 0.f;\n" - " continue;\n" - " }\n" - " float relVelN;\n" - " {\n" - " b3Float4 linear, angular0, angular1;\n" - " setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" - " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" - " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA, angVelA, linVelB, angVelB);\n" - " float e = 0.f;//src->getRestituitionCoeff();\n" - " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" - " dstC->m_b[ic] = e*relVelN;\n" - " //float penetration = src->m_worldPosB[ic].w;\n" - " dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " }\n" - " if( src->m_worldNormalOnB.w > 0 )//npoints\n" - " { // prepare friction\n" - " b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" - " for(int i=0; im_worldNormalOnB.w; i++) \n" - " center += src->m_worldPosB[i];\n" - " center /= (float)src->m_worldNormalOnB.w;\n" - " b3Float4 tangent[2];\n" - " b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" - " \n" - " b3Float4 r[2];\n" - " r[0] = center - posA;\n" - " r[1] = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " b3Float4 linear, angular0, angular1;\n" - " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" - " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" - " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" - " }\n" - " dstC->m_center = center;\n" - " }\n" - " for(int i=0; i<4; i++)\n" - " {\n" - " if( im_worldNormalOnB.w )\n" - " {\n" - " dstC->m_worldPos[i] = src->m_worldPosB[i];\n" - " }\n" - " else\n" - " {\n" - " dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" - " }\n" - " }\n" - "}\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float fastSqrtf(float f2)\n" - "{\n" - " return native_sqrt(f2);\n" - "// return sqrt(f2);\n" - "}\n" - "__inline\n" - "float fastRSqrt(float f2)\n" - "{\n" - " return native_rsqrt(f2);\n" - "}\n" - "__inline\n" - "float fastLength4(float4 v)\n" - "{\n" - " return fast_length(v);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float sqrtf(float a)\n" - "{\n" - "// return sqrt(a);\n" - " return native_sqrt(a);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float length3(const float4 a)\n" - "{\n" - " return sqrtf(dot3F4(a,a));\n" - "}\n" - "__inline\n" - "float dot4(const float4 a, const float4 b)\n" - "{\n" - " return dot( a, b );\n" - "}\n" - "// for height\n" - "__inline\n" - "float dot3w1(const float4 point, const float4 eqn)\n" - "{\n" - " return dot3F4(point,eqn) + eqn.w;\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 normalize4(const float4 a)\n" - "{\n" - " float length = sqrtf(dot4(a, a));\n" - " return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" - "{\n" - " float4 eqn;\n" - " float4 ab = b-a;\n" - " float4 ac = c-a;\n" - " eqn = normalize3( cross3(ab, ac) );\n" - " eqn.w = -dot3F4(eqn,a);\n" - " return eqn;\n" - "}\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " int m_nConstraints;\n" - " int m_start;\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct\n" - "{\n" - " int m_solveFriction;\n" - " int m_maxBatch; // long batch really kills the performance\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBufferBatchSolve;\n" - " \n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "typedef struct\n" - "{\n" - " int m_nContacts;\n" - " float m_dt;\n" - " float m_positionDrift;\n" - " float m_positionConstraintCoeff;\n" - "} ConstBufferCTC;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, \n" - "int nContacts,\n" - "float dt,\n" - "float positionDrift,\n" - "float positionConstraintCoeff\n" - ")\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nContacts )\n" - " {\n" - " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" - " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;\n" - " b3ContactConstraint4_t cs;\n" - " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" - " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" - " &cs );\n" - " \n" - " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" - " gConstraintOut[gIdx] = cs;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl deleted file mode 100644 index 3dc48d43502..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl +++ /dev/null @@ -1,613 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -Matrix3x3 mtZero(); - -__inline -Matrix3x3 mtIdentity(); - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m); - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - -__inline -Matrix3x3 mtZero() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(0.f); - m.m_row[1] = (float4)(0.f); - m.m_row[2] = (float4)(0.f); - return m; -} - -__inline -Matrix3x3 mtIdentity() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(1,0,0,0); - m.m_row[1] = (float4)(0,1,0,0); - m.m_row[2] = (float4)(0,0,1,0); - return m; -} - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - - - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -// others -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb ) -{ - int nContacts = cb.x; - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int srcIdx = sortData[gIdx].y; - out[gIdx] = in[srcIdx]; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sd; - sd.x = contactsIn[gIdx].m_childIndexB; - sd.y = gIdx; - sortDataOut[gIdx] = sd; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_childIndexA; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - - - - -typedef struct -{ - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; -} ConstBufferSSD; - - -__constant const int gridTable4x4[] = -{ - 0,1,17,16, - 1,2,18,19, - 17,18,32,3, - 16,19,3,34 -}; - -__constant const int gridTable8x8[] = -{ - 0, 2, 3, 16, 17, 18, 19, 1, - 66, 64, 80, 67, 82, 81, 65, 83, - 131,144,128,130,147,129,145,146, - 208,195,194,192,193,211,210,209, - 21, 22, 23, 5, 4, 6, 7, 20, - 86, 85, 69, 87, 70, 68, 84, 71, - 151,133,149,150,135,148,132,134, - 197,27,214,213,212,199,198,196 - -}; - - - - -#define USE_SPATIAL_BATCHING 1 -#define USE_4x4_GRID 1 - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, -int nContacts,float scale,int4 nSplit,int staticIdx) - -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit; - int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit; - - int aIdx = abs(aPtrAndSignBit ); - int bIdx = abs(bPtrAndSignBit); - - bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx); - bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx); - -#if USE_SPATIAL_BATCHING - int idx = (aStatic)? bIdx: aIdx; - float4 p = gBodies[idx].m_pos; - int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1); - int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1); - int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1); - int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y); - -#else//USE_SPATIAL_BATCHING - #if USE_4x4_GRID - int aa = aIdx&3; - int bb = bIdx&3; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*4; - int newIndex = gridTable4x4[gridIndex]; - #else//USE_4x4_GRID - int aa = aIdx&7; - int bb = bIdx&7; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*8; - int newIndex = gridTable8x8[gridIndex]; - #endif//USE_4x4_GRID -#endif//USE_SPATIAL_BATCHING - - - gSortDataOut[gIdx].x = newIndex; - gSortDataOut[gIdx].y = gIdx; - } - else - { - gSortDataOut[gIdx].x = 0xffffffff; - } -} - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb ) -{ - int gIdx = GET_GLOBAL_IDX; - if( gIdx < cb.x ) - { - gOut[gIdx] = gIn[gIdx]; - } -} - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h deleted file mode 100644 index 1e6e3579b68..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ /dev/null @@ -1,600 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverSetup2CL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float fastSqrtf(float f2)\n" - "{\n" - " return native_sqrt(f2);\n" - "// return sqrt(f2);\n" - "}\n" - "__inline\n" - "float fastRSqrt(float f2)\n" - "{\n" - " return native_rsqrt(f2);\n" - "}\n" - "__inline\n" - "float fastLength4(float4 v)\n" - "{\n" - " return fast_length(v);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float sqrtf(float a)\n" - "{\n" - "// return sqrt(a);\n" - " return native_sqrt(a);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float length3(const float4 a)\n" - "{\n" - " return sqrtf(dot3F4(a,a));\n" - "}\n" - "__inline\n" - "float dot4(const float4 a, const float4 b)\n" - "{\n" - " return dot( a, b );\n" - "}\n" - "// for height\n" - "__inline\n" - "float dot3w1(const float4 point, const float4 eqn)\n" - "{\n" - " return dot3F4(point,eqn) + eqn.w;\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 normalize4(const float4 a)\n" - "{\n" - " float length = sqrtf(dot4(a, a));\n" - " return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" - "{\n" - " float4 eqn;\n" - " float4 ab = b-a;\n" - " float4 ac = c-a;\n" - " eqn = normalize3( cross3(ab, ac) );\n" - " eqn.w = -dot3F4(eqn,a);\n" - " return eqn;\n" - "}\n" - "///////////////////////////////////////\n" - "// Matrix3x3\n" - "///////////////////////////////////////\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "Matrix3x3 mtZero();\n" - "__inline\n" - "Matrix3x3 mtIdentity();\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m);\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "Matrix3x3 mtZero()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(0.f);\n" - " m.m_row[1] = (float4)(0.f);\n" - " m.m_row[2] = (float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtIdentity()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(1,0,0,0);\n" - " m.m_row[1] = (float4)(0,1,0,0);\n" - " m.m_row[2] = (float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m)\n" - "{\n" - " Matrix3x3 out;\n" - " out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" - "{\n" - " Matrix3x3 transB;\n" - " transB = mtTranspose( b );\n" - " Matrix3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertia;\n" - " Matrix3x3 m_initInvInertia;\n" - "} Shape;\n" - "typedef struct\n" - "{\n" - " float4 m_linear;\n" - " float4 m_worldPos[4];\n" - " float4 m_center; \n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; \n" - " float m_fAppliedRambdaDt[2]; \n" - " u32 m_bodyA;\n" - " u32 m_bodyB;\n" - " int m_batchIdx;\n" - " u32 m_paddings[1];\n" - "} Constraint4;\n" - "typedef struct\n" - "{\n" - " int m_nConstraints;\n" - " int m_start;\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct\n" - "{\n" - " int m_solveFriction;\n" - " int m_maxBatch; // long batch really kills the performance\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBufferBatchSolve;\n" - " \n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "// others\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" - "{\n" - " int nContacts = cb.x;\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int srcIdx = sortData[gIdx].y;\n" - " out[gIdx] = in[srcIdx];\n" - " }\n" - "}\n" - "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sd;\n" - " sd.x = contactsIn[gIdx].m_childIndexB;\n" - " sd.y = gIdx;\n" - " sortDataOut[gIdx] = sd;\n" - " }\n" - "}\n" - "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sdIn;\n" - " sdIn = sortDataInOut[gIdx];\n" - " int2 sdOut;\n" - " sdOut.x = contactsIn[sdIn.y].m_childIndexA;\n" - " sdOut.y = sdIn.y;\n" - " sortDataInOut[gIdx] = sdOut;\n" - " }\n" - "}\n" - "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sdIn;\n" - " sdIn = sortDataInOut[gIdx];\n" - " int2 sdOut;\n" - " sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit;\n" - " sdOut.y = sdIn.y;\n" - " sortDataInOut[gIdx] = sdOut;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sdIn;\n" - " sdIn = sortDataInOut[gIdx];\n" - " int2 sdOut;\n" - " sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit;\n" - " sdOut.y = sdIn.y;\n" - " sortDataInOut[gIdx] = sdOut;\n" - " }\n" - "}\n" - "typedef struct\n" - "{\n" - " int m_nContacts;\n" - " int m_staticIdx;\n" - " float m_scale;\n" - " int m_nSplit;\n" - "} ConstBufferSSD;\n" - "__constant const int gridTable4x4[] = \n" - "{\n" - " 0,1,17,16,\n" - " 1,2,18,19,\n" - " 17,18,32,3,\n" - " 16,19,3,34\n" - "};\n" - "__constant const int gridTable8x8[] = \n" - "{\n" - " 0, 2, 3, 16, 17, 18, 19, 1,\n" - " 66, 64, 80, 67, 82, 81, 65, 83,\n" - " 131,144,128,130,147,129,145,146,\n" - " 208,195,194,192,193,211,210,209,\n" - " 21, 22, 23, 5, 4, 6, 7, 20,\n" - " 86, 85, 69, 87, 70, 68, 84, 71,\n" - " 151,133,149,150,135,148,132,134,\n" - " 197,27,214,213,212,199,198,196\n" - " \n" - "};\n" - "#define USE_SPATIAL_BATCHING 1\n" - "#define USE_4x4_GRID 1\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n" - "int nContacts,float scale,int4 nSplit,int staticIdx)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nContacts )\n" - " {\n" - " int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n" - " int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n" - " int aIdx = abs(aPtrAndSignBit );\n" - " int bIdx = abs(bPtrAndSignBit);\n" - " bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n" - " bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n" - "#if USE_SPATIAL_BATCHING \n" - " int idx = (aStatic)? bIdx: aIdx;\n" - " float4 p = gBodies[idx].m_pos;\n" - " int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);\n" - " int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);\n" - " int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);\n" - " int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);\n" - " \n" - "#else//USE_SPATIAL_BATCHING\n" - " #if USE_4x4_GRID\n" - " int aa = aIdx&3;\n" - " int bb = bIdx&3;\n" - " if (aStatic)\n" - " aa = bb;\n" - " if (bStatic)\n" - " bb = aa;\n" - " int gridIndex = aa + bb*4;\n" - " int newIndex = gridTable4x4[gridIndex];\n" - " #else//USE_4x4_GRID\n" - " int aa = aIdx&7;\n" - " int bb = bIdx&7;\n" - " if (aStatic)\n" - " aa = bb;\n" - " if (bStatic)\n" - " bb = aa;\n" - " int gridIndex = aa + bb*8;\n" - " int newIndex = gridTable8x8[gridIndex];\n" - " #endif//USE_4x4_GRID\n" - "#endif//USE_SPATIAL_BATCHING\n" - " gSortDataOut[gIdx].x = newIndex;\n" - " gSortDataOut[gIdx].y = gIdx;\n" - " }\n" - " else\n" - " {\n" - " gSortDataOut[gIdx].x = 0xffffffff;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < cb.x )\n" - " {\n" - " gOut[gIdx] = gIn[gIdx];\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl deleted file mode 100644 index a21a08c3b4e..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl +++ /dev/null @@ -1,968 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a1, float4 b1) -{ - - float4 a=make_float4(a1.xyz,0.f); - float4 b=make_float4(b1.xyz,0.f); - //float4 a=a1; - //float4 b=b1; - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -Matrix3x3 mtZero(); - -__inline -Matrix3x3 mtIdentity(); - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m); - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - -__inline -Matrix3x3 mtZero() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(0.f); - m.m_row[1] = (float4)(0.f); - m.m_row[2] = (float4)(0.f); - return m; -} - -__inline -Matrix3x3 mtIdentity() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(1,0,0,0); - m.m_row[1] = (float4)(0,1,0,0); - m.m_row[2] = (float4)(0,0,1,0); - return m; -} - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - - - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - int m_batchIdx; - u32 m_paddings; -} Constraint4; - - - - - - -__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex) -{ - int i = GET_GLOBAL_IDX; - - if( i < numContactManifolds) - { - int pa = manifoldPtr[i].m_bodyAPtrAndSignBit; - bool isFixedA = (pa <0) || (pa == fixedBodyIndex); - int bodyIndexA = abs(pa); - if (!isFixedA) - { - AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x); - } - barrier(CLK_GLOBAL_MEM_FENCE); - int pb = manifoldPtr[i].m_bodyBPtrAndSignBit; - bool isFixedB = (pb <0) || (pb == fixedBodyIndex); - int bodyIndexB = abs(pb); - if (!isFixedB) - { - AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y); - } - } -} - -__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies) -{ - int i = GET_GLOBAL_IDX; - - if( i < numSplitBodies) - { - linearVelocities[i] = make_float4(0); - angularVelocities[i] = make_float4(0); - } -} - - -__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, -__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) -{ - int i = GET_GLOBAL_IDX; - if (i 0.70710678f) { - // choose p in y-z plane - float a = n.y*n.y + n.z*n.z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n.z*k; - p[0].z = n.y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n.x*p[0].z; - q[0].z = n.x*p[0].y; - } - else { - // choose p in x-y plane - float a = n.x*n.x + n.y*n.y; - float k = 1.f/sqrt(a); - p[0].x = -n.y*k; - p[0].y = n.x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n.z*p[0].y; - q[0].y = n.z*p[0].x; - q[0].z = a*k; - } -} - - - - - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB, - float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for(int ic=0; ic<4; ic++) - { - if( cs->m_jacCoeffInv[ic] == 0.f ) continue; - - float4 angular0, angular1, linear; - float4 r0 = cs->m_worldPos[ic] - posA; - float4 r1 = cs->m_worldPos[ic] - posB; - setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); - - - - float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt ); - updated = min2( updated, maxRambdaDt ); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - - if (invMassA) - { - *dLinVelA += linImp0; - *dAngVelA += angImp0; - } - if (invMassB) - { - *dLinVelB += linImp1; - *dAngVelB += angImp1; - } - } -} - - -// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); - - -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, -__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, -__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) -{ - - //float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - float4 dLinVelA = make_float4(0,0,0,0); - float4 dAngVelA = make_float4(0,0,0,0); - float4 dLinVelB = make_float4(0,0,0,0); - float4 dAngVelB = make_float4(0,0,0,0); - - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[0].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - - if (invMassA) - { - dLinVelA = deltaLinearVelocities[splitIndexA]; - dAngVelA = deltaAngularVelocities[splitIndexA]; - } - - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[0].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - - if (invMassB) - { - dLinVelB = deltaLinearVelocities[splitIndexB]; - dAngVelB = deltaAngularVelocities[splitIndexB]; - } - - solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, - posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB); - - if (invMassA) - { - deltaLinearVelocities[splitIndexA] = dLinVelA; - deltaAngularVelocities[splitIndexA] = dAngVelA; - } - if (invMassB) - { - deltaLinearVelocities[splitIndexB] = dLinVelB; - deltaAngularVelocities[splitIndexB] = dAngVelB; - } - -} - - -__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , -__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, -float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds -) -{ - int i = GET_GLOBAL_IDX; - if (im_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; - const float4 center = cs->m_center; - - float4 n = -cs->m_linear; - - float4 tangent[2]; - btPlaneSpace1(n,&tangent[0],&tangent[1]); - float4 angular0, angular1, linear; - float4 r0 = center - posA; - float4 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB ); - rambdaDt *= cs->m_fJacCoeffInv[i]; - - { - float prevSum = cs->m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt[i] ); - updated = min2( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs->m_fAppliedRambdaDt[i] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - dLinVelA += linImp0; - dAngVelA += angImp0; - dLinVelB += linImp1; - dAngVelB += angImp1; - } - { // angular damping for point constraint - float4 ab = normalize3( posB - posA ); - float4 ac = normalize3( center - posA ); - if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = dot3F4( n, angVelA ); - float angNB = dot3F4( n, angVelB ); - - dAngVelA -= (angNA*0.1f)*n; - dAngVelB -= (angNB*0.1f)*n; - } - } - } - - - - } - - if (invMassA) - { - deltaLinearVelocities[splitIndexA] = dLinVelA; - deltaAngularVelocities[splitIndexA] = dAngVelA; - } - if (invMassB) - { - deltaLinearVelocities[splitIndexB] = dLinVelB; - deltaAngularVelocities[splitIndexB] = dAngVelB; - } - - -} - - -__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , - __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, - __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, - float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds -) -{ - int i = GET_GLOBAL_IDX; - if (im_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f/dt; - for(int ic=0; ic<4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() ); - for(int ic=0; ic<4; ic++) - { - float4 r0 = src->m_worldPosB[ic] - posA; - float4 r1 = src->m_worldPosB[ic] - posB; - - if( ic >= src->m_worldNormalOnB.w )//npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - float4 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f;//src->getRestituitionCoeff(); - if( relVelN*relVelN < 0.004f ) e = 0.f; - - dstC->m_b[ic] = e*relVelN; - //float penetration = src->m_worldPosB[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if( src->m_worldNormalOnB.w > 0 )//npoints - { // prepare friction - float4 center = make_float4(0.f); - for(int i=0; im_worldNormalOnB.w; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB.w; - - float4 tangent[2]; - btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]); - - float4 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for(int i=0; i<2; i++) - { - float4 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for(int i=0; i<4; i++) - { - if( im_worldNormalOnB.w ) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = make_float4(0.f); - } - } -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, -__global const unsigned int* bodyCount, -int nContacts, -float dt, -float positionDrift, -float positionConstraintCoeff -) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - Constraint4 cs; - - float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1; - float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1; - - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, - &cs ); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h deleted file mode 100644 index f4d98d99415..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ /dev/null @@ -1,908 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverUtilsCL = - "/*\n" - "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float fastSqrtf(float f2)\n" - "{\n" - " return native_sqrt(f2);\n" - "// return sqrt(f2);\n" - "}\n" - "__inline\n" - "float fastRSqrt(float f2)\n" - "{\n" - " return native_rsqrt(f2);\n" - "}\n" - "__inline\n" - "float fastLength4(float4 v)\n" - "{\n" - " return fast_length(v);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float sqrtf(float a)\n" - "{\n" - "// return sqrt(a);\n" - " return native_sqrt(a);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a1, float4 b1)\n" - "{\n" - " float4 a=make_float4(a1.xyz,0.f);\n" - " float4 b=make_float4(b1.xyz,0.f);\n" - " //float4 a=a1;\n" - " //float4 b=b1;\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float length3(const float4 a)\n" - "{\n" - " return sqrtf(dot3F4(a,a));\n" - "}\n" - "__inline\n" - "float dot4(const float4 a, const float4 b)\n" - "{\n" - " return dot( a, b );\n" - "}\n" - "// for height\n" - "__inline\n" - "float dot3w1(const float4 point, const float4 eqn)\n" - "{\n" - " return dot3F4(point,eqn) + eqn.w;\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 normalize4(const float4 a)\n" - "{\n" - " float length = sqrtf(dot4(a, a));\n" - " return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" - "{\n" - " float4 eqn;\n" - " float4 ab = b-a;\n" - " float4 ac = c-a;\n" - " eqn = normalize3( cross3(ab, ac) );\n" - " eqn.w = -dot3F4(eqn,a);\n" - " return eqn;\n" - "}\n" - "///////////////////////////////////////\n" - "// Matrix3x3\n" - "///////////////////////////////////////\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "Matrix3x3 mtZero();\n" - "__inline\n" - "Matrix3x3 mtIdentity();\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m);\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "Matrix3x3 mtZero()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(0.f);\n" - " m.m_row[1] = (float4)(0.f);\n" - " m.m_row[2] = (float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtIdentity()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(1,0,0,0);\n" - " m.m_row[1] = (float4)(0,1,0,0);\n" - " m.m_row[2] = (float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m)\n" - "{\n" - " Matrix3x3 out;\n" - " out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" - "{\n" - " Matrix3x3 transB;\n" - " transB = mtTranspose( b );\n" - " Matrix3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertia;\n" - " Matrix3x3 m_initInvInertia;\n" - "} Shape;\n" - "typedef struct\n" - "{\n" - " float4 m_linear;\n" - " float4 m_worldPos[4];\n" - " float4 m_center; \n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; \n" - " float m_fAppliedRambdaDt[2]; \n" - " u32 m_bodyA;\n" - " u32 m_bodyB;\n" - " int m_batchIdx;\n" - " u32 m_paddings;\n" - "} Constraint4;\n" - "__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " \n" - " if( i < numContactManifolds)\n" - " {\n" - " int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;\n" - " bool isFixedA = (pa <0) || (pa == fixedBodyIndex);\n" - " int bodyIndexA = abs(pa);\n" - " if (!isFixedA)\n" - " {\n" - " AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x);\n" - " }\n" - " barrier(CLK_GLOBAL_MEM_FENCE);\n" - " int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;\n" - " bool isFixedB = (pb <0) || (pb == fixedBodyIndex);\n" - " int bodyIndexB = abs(pb);\n" - " if (!isFixedB)\n" - " {\n" - " AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y);\n" - " } \n" - " }\n" - "}\n" - "__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " \n" - " if( i < numSplitBodies)\n" - " {\n" - " linearVelocities[i] = make_float4(0);\n" - " angularVelocities[i] = make_float4(0);\n" - " }\n" - "}\n" - "__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" - "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " if (i 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n.y*n.y + n.z*n.z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n.z*k;\n" - " p[0].z = n.y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n.x*p[0].z;\n" - " q[0].z = n.x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n.x*n.x + n.y*n.y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n.y*k;\n" - " p[0].y = n.x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n.z*p[0].y;\n" - " q[0].y = n.z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - "void solveContact(__global Constraint4* cs,\n" - " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" - " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n" - " float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB)\n" - "{\n" - " float minRambdaDt = 0;\n" - " float maxRambdaDt = FLT_MAX;\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = cs->m_worldPos[ic] - posA;\n" - " float4 r1 = cs->m_worldPos[ic] - posB;\n" - " setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" - " \n" - " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" - " *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" - " rambdaDt *= cs->m_jacCoeffInv[ic];\n" - " \n" - " {\n" - " float prevSum = cs->m_appliedRambdaDt[ic];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt );\n" - " updated = min2( updated, maxRambdaDt );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_appliedRambdaDt[ic] = updated;\n" - " }\n" - " \n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " \n" - " if (invMassA)\n" - " {\n" - " *dLinVelA += linImp0;\n" - " *dAngVelA += angImp0;\n" - " }\n" - " if (invMassB)\n" - " {\n" - " *dLinVelB += linImp1;\n" - " *dAngVelB += angImp1;\n" - " }\n" - " }\n" - "}\n" - "// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" - "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n" - "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" - "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" - "{\n" - " //float frictionCoeff = ldsCs[0].m_linear.w;\n" - " int aIdx = ldsCs[0].m_bodyA;\n" - " int bIdx = ldsCs[0].m_bodyB;\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " \n" - " float4 dLinVelA = make_float4(0,0,0,0);\n" - " float4 dAngVelA = make_float4(0,0,0,0);\n" - " float4 dLinVelB = make_float4(0,0,0,0);\n" - " float4 dAngVelB = make_float4(0,0,0,0);\n" - " \n" - " int bodyOffsetA = offsetSplitBodies[aIdx];\n" - " int constraintOffsetA = contactConstraintOffsets[0].x;\n" - " int splitIndexA = bodyOffsetA+constraintOffsetA;\n" - " \n" - " if (invMassA)\n" - " {\n" - " dLinVelA = deltaLinearVelocities[splitIndexA];\n" - " dAngVelA = deltaAngularVelocities[splitIndexA];\n" - " }\n" - " int bodyOffsetB = offsetSplitBodies[bIdx];\n" - " int constraintOffsetB = contactConstraintOffsets[0].y;\n" - " int splitIndexB= bodyOffsetB+constraintOffsetB;\n" - " if (invMassB)\n" - " {\n" - " dLinVelB = deltaLinearVelocities[splitIndexB];\n" - " dAngVelB = deltaAngularVelocities[splitIndexB];\n" - " }\n" - " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" - " posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);\n" - " if (invMassA)\n" - " {\n" - " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" - " deltaAngularVelocities[splitIndexA] = dAngVelA;\n" - " } \n" - " if (invMassB)\n" - " {\n" - " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" - " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" - " }\n" - "}\n" - "__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" - "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" - "float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" - ")\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " if (im_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" - " const float4 center = cs->m_center;\n" - " \n" - " float4 n = -cs->m_linear;\n" - " \n" - " float4 tangent[2];\n" - " btPlaneSpace1(n,&tangent[0],&tangent[1]);\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = center - posA;\n" - " float4 r1 = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" - " float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB );\n" - " rambdaDt *= cs->m_fJacCoeffInv[i];\n" - " \n" - " {\n" - " float prevSum = cs->m_fAppliedRambdaDt[i];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt[i] );\n" - " updated = min2( updated, maxRambdaDt[i] );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_fAppliedRambdaDt[i] = updated;\n" - " }\n" - " \n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " \n" - " dLinVelA += linImp0;\n" - " dAngVelA += angImp0;\n" - " dLinVelB += linImp1;\n" - " dAngVelB += angImp1;\n" - " }\n" - " { // angular damping for point constraint\n" - " float4 ab = normalize3( posB - posA );\n" - " float4 ac = normalize3( center - posA );\n" - " if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" - " {\n" - " float angNA = dot3F4( n, angVelA );\n" - " float angNB = dot3F4( n, angVelB );\n" - " \n" - " dAngVelA -= (angNA*0.1f)*n;\n" - " dAngVelB -= (angNB*0.1f)*n;\n" - " }\n" - " }\n" - " }\n" - " \n" - " \n" - " }\n" - " if (invMassA)\n" - " {\n" - " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" - " deltaAngularVelocities[splitIndexA] = dAngVelA;\n" - " } \n" - " if (invMassB)\n" - " {\n" - " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" - " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" - " }\n" - " \n" - "}\n" - "__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" - " __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" - " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" - " float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" - ")\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " if (im_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" - " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" - " float dtInv = 1.f/dt;\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" - " dstC->m_linear = src->m_worldNormalOnB;\n" - " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " float4 r0 = src->m_worldPosB[ic] - posA;\n" - " float4 r1 = src->m_worldPosB[ic] - posB;\n" - " if( ic >= src->m_worldNormalOnB.w )//npoints\n" - " {\n" - " dstC->m_jacCoeffInv[ic] = 0.f;\n" - " continue;\n" - " }\n" - " float relVelN;\n" - " {\n" - " float4 linear, angular0, angular1;\n" - " setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" - " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n" - " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA, angVelA, linVelB, angVelB);\n" - " float e = 0.f;//src->getRestituitionCoeff();\n" - " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" - " dstC->m_b[ic] = e*relVelN;\n" - " //float penetration = src->m_worldPosB[ic].w;\n" - " dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " }\n" - " if( src->m_worldNormalOnB.w > 0 )//npoints\n" - " { // prepare friction\n" - " float4 center = make_float4(0.f);\n" - " for(int i=0; im_worldNormalOnB.w; i++) \n" - " center += src->m_worldPosB[i];\n" - " center /= (float)src->m_worldNormalOnB.w;\n" - " float4 tangent[2];\n" - " btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" - " \n" - " float4 r[2];\n" - " r[0] = center - posA;\n" - " r[1] = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " float4 linear, angular0, angular1;\n" - " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" - " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n" - " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" - " }\n" - " dstC->m_center = center;\n" - " }\n" - " for(int i=0; i<4; i++)\n" - " {\n" - " if( im_worldNormalOnB.w )\n" - " {\n" - " dstC->m_worldPos[i] = src->m_worldPosB[i];\n" - " }\n" - " else\n" - " {\n" - " dstC->m_worldPos[i] = make_float4(0.f);\n" - " }\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n" - "__global const unsigned int* bodyCount,\n" - "int nContacts,\n" - "float dt,\n" - "float positionDrift,\n" - "float positionConstraintCoeff\n" - ")\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nContacts )\n" - " {\n" - " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" - " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " Constraint4 cs;\n" - " float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1;\n" - " float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1;\n" - " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" - " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n" - " &cs );\n" - " \n" - " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" - " gConstraintOut[gIdx] = cs;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl deleted file mode 100644 index ba8ba735d05..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl +++ /dev/null @@ -1,22 +0,0 @@ - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" - - -__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB) -{ - int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB); - } -} - -__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs) -{ - int pairId = get_global_id(0); - if( pairId< numPairs ) - { - pairs[pairId].z = 0xffffffff; - } -} \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h deleted file mode 100644 index bb949b20275..00000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ /dev/null @@ -1,482 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* updateAabbsKernelCL = - "#ifndef B3_UPDATE_AABBS_H\n" - "#define B3_UPDATE_AABBS_H\n" - "#ifndef B3_AABB_H\n" - "#define B3_AABB_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3Aabb b3Aabb_t;\n" - "struct b3Aabb\n" - "{\n" - " union\n" - " {\n" - " float m_min[4];\n" - " b3Float4 m_minVec;\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float m_max[4];\n" - " b3Float4 m_maxVec;\n" - " int m_signedMaxIndices[4];\n" - " };\n" - "};\n" - "inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" - " b3Float4ConstArg pos,\n" - " b3QuatConstArg orn,\n" - " b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" - "{\n" - " b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" - " localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" - " b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" - " b3Mat3x3 m;\n" - " m = b3QuatGetRotationMatrix(orn);\n" - " b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" - " b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" - " \n" - " b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" - " 0.f);\n" - " *aabbMinOut = center-extent;\n" - " *aabbMaxOut = center+extent;\n" - "}\n" - "/// conservative test for overlap between two aabbs\n" - "inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" - " b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" - " overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" - " overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "#endif //B3_AABB_H\n" - "#ifndef B3_COLLIDABLE_H\n" - "#define B3_COLLIDABLE_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "enum b3ShapeTypes\n" - "{\n" - " SHAPE_HEIGHT_FIELD=1,\n" - " SHAPE_CONVEX_HULL=3,\n" - " SHAPE_PLANE=4,\n" - " SHAPE_CONCAVE_TRIMESH=5,\n" - " SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" - " SHAPE_SPHERE=7,\n" - " MAX_NUM_SHAPE_TYPES,\n" - "};\n" - "typedef struct b3Collidable b3Collidable_t;\n" - "struct b3Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " int m_shapeType;\n" - " union\n" - " {\n" - " int m_shapeIndex;\n" - " float m_height;\n" - " };\n" - "};\n" - "typedef struct b3GpuChildShape b3GpuChildShape_t;\n" - "struct b3GpuChildShape\n" - "{\n" - " b3Float4 m_childPosition;\n" - " b3Quat m_childOrientation;\n" - " union\n" - " {\n" - " int m_shapeIndex;//used for SHAPE_COMPOUND_OF_CONVEX_HULLS\n" - " int m_capsuleAxis;\n" - " };\n" - " union \n" - " {\n" - " float m_radius;//used for childshape of SHAPE_COMPOUND_OF_SPHERES or SHAPE_COMPOUND_OF_CAPSULES\n" - " int m_numChildShapes;//used for compound shape\n" - " };\n" - " union \n" - " {\n" - " float m_height;//used for childshape of SHAPE_COMPOUND_OF_CAPSULES\n" - " int m_collidableShapeIndex;\n" - " };\n" - " int m_shapeType;\n" - "};\n" - "struct b3CompoundOverlappingPair\n" - "{\n" - " int m_bodyIndexA;\n" - " int m_bodyIndexB;\n" - "// int m_pairType;\n" - " int m_childShapeIndexA;\n" - " int m_childShapeIndexB;\n" - "};\n" - "#endif //B3_COLLIDABLE_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n" - "{\n" - " __global const b3RigidBodyData_t* body = &bodies[bodyId];\n" - " b3Float4 position = body->m_pos;\n" - " b3Quat orientation = body->m_quat;\n" - " \n" - " int collidableIndex = body->m_collidableIdx;\n" - " int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n" - " \n" - " if (shapeIndex>=0)\n" - " {\n" - " \n" - " b3Aabb_t localAabb = localShapeAABB[collidableIndex];\n" - " b3Aabb_t worldAabb;\n" - " \n" - " b3Float4 aabbAMinOut,aabbAMaxOut; \n" - " float margin = 0.f;\n" - " b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);\n" - " \n" - " worldAabb.m_minVec =aabbAMinOut;\n" - " worldAabb.m_minIndices[3] = bodyId;\n" - " worldAabb.m_maxVec = aabbAMaxOut;\n" - " worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;\n" - " worldAabbs[bodyId] = worldAabb;\n" - " }\n" - "}\n" - "#endif //B3_UPDATE_AABBS_H\n" - "__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB)\n" - "{\n" - " int nodeID = get_global_id(0);\n" - " if( nodeID < numNodes )\n" - " {\n" - " b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB);\n" - " }\n" - "}\n" - "__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs)\n" - "{\n" - " int pairId = get_global_id(0);\n" - " if( pairId< numPairs )\n" - " {\n" - " pairs[pairId].z = 0xffffffff;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/autogenerated/bullet2.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/autogenerated/bullet2.h deleted file mode 100644 index eaa27dfe8f7..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/autogenerated/bullet2.h +++ /dev/null @@ -1,987 +0,0 @@ -/* Copyright (C) 2011 Erwin Coumans & Charlie C -* -* This software is provided 'as-is', without any express or implied -* warranty. In no event will the authors be held liable for any damages -* arising from the use of this software. -* -* Permission is granted to anyone to use this software for any purpose, -* including commercial applications, and to alter it and redistribute it -* freely, subject to the following restrictions: -* -* 1. The origin of this software must not be misrepresented; you must not -* claim that you wrote the original software. If you use this software -* in a product, an acknowledgment in the product documentation would be -* appreciated but is not required. -* 2. Altered source versions must be plainly marked as such, and must not be -* misrepresented as being the original software. -* 3. This notice may not be removed or altered from any source distribution. -*/ -// Auto generated from Bullet/Extras/HeaderGenerator/bulletGenerate.py -#ifndef __BULLET2_H__ -#define __BULLET2_H__ -namespace Bullet3SerializeBullet2 -{ -// put an empty struct in the case -typedef struct bInvalidHandle -{ - int unused; -} bInvalidHandle; - -class PointerArray; -class b3PhysicsSystem; -class ListBase; -class b3Vector3FloatData; -class b3Vector3DoubleData; -class b3Matrix3x3FloatData; -class b3Matrix3x3DoubleData; -class b3TransformFloatData; -class b3TransformDoubleData; -class b3BvhSubtreeInfoData; -class b3OptimizedBvhNodeFloatData; -class b3OptimizedBvhNodeDoubleData; -class b3QuantizedBvhNodeData; -class b3QuantizedBvhFloatData; -class b3QuantizedBvhDoubleData; -class b3CollisionShapeData; -class b3StaticPlaneShapeData; -class b3ConvexInternalShapeData; -class b3PositionAndRadius; -class b3MultiSphereShapeData; -class b3IntIndexData; -class b3ShortIntIndexData; -class b3ShortIntIndexTripletData; -class b3CharIndexTripletData; -class b3MeshPartData; -class b3StridingMeshInterfaceData; -class b3TriangleMeshShapeData; -class b3ScaledTriangleMeshShapeData; -class b3CompoundShapeChildData; -class b3CompoundShapeData; -class b3CylinderShapeData; -class b3CapsuleShapeData; -class b3TriangleInfoData; -class b3TriangleInfoMapData; -class b3GImpactMeshShapeData; -class b3ConvexHullShapeData; -class b3CollisionObjectDoubleData; -class b3CollisionObjectFloatData; -class b3DynamicsWorldDoubleData; -class b3DynamicsWorldFloatData; -class b3RigidBodyFloatData; -class b3RigidBodyDoubleData; -class b3ConstraintInfo1; -class b3TypedConstraintData; -class b3Point2PointConstraintFloatData; -class b3Point2PointConstraintDoubleData; -class b3HingeConstraintDoubleData; -class b3HingeConstraintFloatData; -class b3ConeTwistConstraintData; -class b3Generic6DofConstraintData; -class b3Generic6DofSpringConstraintData; -class b3SliderConstraintData; -class b3ContactSolverInfoDoubleData; -class b3ContactSolverInfoFloatData; -class SoftBodyMaterialData; -class SoftBodyNodeData; -class SoftBodyLinkData; -class SoftBodyFaceData; -class SoftBodyTetraData; -class SoftRigidAnchorData; -class SoftBodyConfigData; -class SoftBodyPoseData; -class SoftBodyClusterData; -class b3SoftBodyJointData; -class b3SoftBodyFloatData; -// -------------------------------------------------- // -class PointerArray -{ -public: - int m_size; - int m_capacity; - void *m_data; -}; - -// -------------------------------------------------- // -class b3PhysicsSystem -{ -public: - PointerArray m_collisionShapes; - PointerArray m_collisionObjects; - PointerArray m_constraints; -}; - -// -------------------------------------------------- // -class ListBase -{ -public: - void *first; - void *last; -}; - -// -------------------------------------------------- // -class b3Vector3FloatData -{ -public: - float m_floats[4]; -}; - -// -------------------------------------------------- // -class b3Vector3DoubleData -{ -public: - double m_floats[4]; -}; - -// -------------------------------------------------- // -class b3Matrix3x3FloatData -{ -public: - b3Vector3FloatData m_el[3]; -}; - -// -------------------------------------------------- // -class b3Matrix3x3DoubleData -{ -public: - b3Vector3DoubleData m_el[3]; -}; - -// -------------------------------------------------- // -class b3TransformFloatData -{ -public: - b3Matrix3x3FloatData m_basis; - b3Vector3FloatData m_origin; -}; - -// -------------------------------------------------- // -class b3TransformDoubleData -{ -public: - b3Matrix3x3DoubleData m_basis; - b3Vector3DoubleData m_origin; -}; - -// -------------------------------------------------- // -class b3BvhSubtreeInfoData -{ -public: - int m_rootNodeIndex; - int m_subtreeSize; - short m_quantizedAabbMin[3]; - short m_quantizedAabbMax[3]; -}; - -// -------------------------------------------------- // -class b3OptimizedBvhNodeFloatData -{ -public: - b3Vector3FloatData m_aabbMinOrg; - b3Vector3FloatData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - -// -------------------------------------------------- // -class b3OptimizedBvhNodeDoubleData -{ -public: - b3Vector3DoubleData m_aabbMinOrg; - b3Vector3DoubleData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - -// -------------------------------------------------- // -class b3QuantizedBvhNodeData -{ -public: - short m_quantizedAabbMin[3]; - short m_quantizedAabbMax[3]; - int m_escapeIndexOrTriangleIndex; -}; - -// -------------------------------------------------- // -class b3QuantizedBvhFloatData -{ -public: - b3Vector3FloatData m_bvhAabbMin; - b3Vector3FloatData m_bvhAabbMax; - b3Vector3FloatData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeFloatData *m_contiguousNodesPtr; - b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr; - b3BvhSubtreeInfoData *m_subTreeInfoPtr; - int m_traversalMode; - int m_numSubtreeHeaders; -}; - -// -------------------------------------------------- // -class b3QuantizedBvhDoubleData -{ -public: - b3Vector3DoubleData m_bvhAabbMin; - b3Vector3DoubleData m_bvhAabbMax; - b3Vector3DoubleData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeDoubleData *m_contiguousNodesPtr; - b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr; - int m_traversalMode; - int m_numSubtreeHeaders; - b3BvhSubtreeInfoData *m_subTreeInfoPtr; -}; - -// -------------------------------------------------- // -class b3CollisionShapeData -{ -public: - char *m_name; - int m_shapeType; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3StaticPlaneShapeData -{ -public: - b3CollisionShapeData m_collisionShapeData; - b3Vector3FloatData m_localScaling; - b3Vector3FloatData m_planeNormal; - float m_planeConstant; - char m_pad[4]; -}; - -// -------------------------------------------------- // -class b3ConvexInternalShapeData -{ -public: - b3CollisionShapeData m_collisionShapeData; - b3Vector3FloatData m_localScaling; - b3Vector3FloatData m_implicitShapeDimensions; - float m_collisionMargin; - int m_padding; -}; - -// -------------------------------------------------- // -class b3PositionAndRadius -{ -public: - b3Vector3FloatData m_pos; - float m_radius; -}; - -// -------------------------------------------------- // -class b3MultiSphereShapeData -{ -public: - b3ConvexInternalShapeData m_convexInternalShapeData; - b3PositionAndRadius *m_localPositionArrayPtr; - int m_localPositionArraySize; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3IntIndexData -{ -public: - int m_value; -}; - -// -------------------------------------------------- // -class b3ShortIntIndexData -{ -public: - short m_value; - char m_pad[2]; -}; - -// -------------------------------------------------- // -class b3ShortIntIndexTripletData -{ -public: - short m_values[3]; - char m_pad[2]; -}; - -// -------------------------------------------------- // -class b3CharIndexTripletData -{ -public: - char m_values[3]; - char m_pad; -}; - -// -------------------------------------------------- // -class b3MeshPartData -{ -public: - b3Vector3FloatData *m_vertices3f; - b3Vector3DoubleData *m_vertices3d; - b3IntIndexData *m_indices32; - b3ShortIntIndexTripletData *m_3indices16; - b3CharIndexTripletData *m_3indices8; - b3ShortIntIndexData *m_indices16; - int m_numTriangles; - int m_numVertices; -}; - -// -------------------------------------------------- // -class b3StridingMeshInterfaceData -{ -public: - b3MeshPartData *m_meshPartsPtr; - b3Vector3FloatData m_scaling; - int m_numMeshParts; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3TriangleMeshShapeData -{ -public: - b3CollisionShapeData m_collisionShapeData; - b3StridingMeshInterfaceData m_meshInterface; - b3QuantizedBvhFloatData *m_quantizedFloatBvh; - b3QuantizedBvhDoubleData *m_quantizedDoubleBvh; - b3TriangleInfoMapData *m_triangleInfoMap; - float m_collisionMargin; - char m_pad3[4]; -}; - -// -------------------------------------------------- // -class b3ScaledTriangleMeshShapeData -{ -public: - b3TriangleMeshShapeData m_trimeshShapeData; - b3Vector3FloatData m_localScaling; -}; - -// -------------------------------------------------- // -class b3CompoundShapeChildData -{ -public: - b3TransformFloatData m_transform; - b3CollisionShapeData *m_childShape; - int m_childShapeType; - float m_childMargin; -}; - -// -------------------------------------------------- // -class b3CompoundShapeData -{ -public: - b3CollisionShapeData m_collisionShapeData; - b3CompoundShapeChildData *m_childShapePtr; - int m_numChildShapes; - float m_collisionMargin; -}; - -// -------------------------------------------------- // -class b3CylinderShapeData -{ -public: - b3ConvexInternalShapeData m_convexInternalShapeData; - int m_upAxis; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3CapsuleShapeData -{ -public: - b3ConvexInternalShapeData m_convexInternalShapeData; - int m_upAxis; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3TriangleInfoData -{ -public: - int m_flags; - float m_edgeV0V1Angle; - float m_edgeV1V2Angle; - float m_edgeV2V0Angle; -}; - -// -------------------------------------------------- // -class b3TriangleInfoMapData -{ -public: - int *m_hashTablePtr; - int *m_nextPtr; - b3TriangleInfoData *m_valueArrayPtr; - int *m_keyArrayPtr; - float m_convexEpsilon; - float m_planarEpsilon; - float m_equalVertexThreshold; - float m_edgeDistanceThreshold; - float m_zeroAreaThreshold; - int m_nextSize; - int m_hashTableSize; - int m_numValues; - int m_numKeys; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3GImpactMeshShapeData -{ -public: - b3CollisionShapeData m_collisionShapeData; - b3StridingMeshInterfaceData m_meshInterface; - b3Vector3FloatData m_localScaling; - float m_collisionMargin; - int m_gimpactSubType; -}; - -// -------------------------------------------------- // -class b3ConvexHullShapeData -{ -public: - b3ConvexInternalShapeData m_convexInternalShapeData; - b3Vector3FloatData *m_unscaledPointsFloatPtr; - b3Vector3DoubleData *m_unscaledPointsDoublePtr; - int m_numUnscaledPoints; - char m_padding3[4]; -}; - -// -------------------------------------------------- // -class b3CollisionObjectDoubleData -{ -public: - void *m_broadphaseHandle; - void *m_collisionShape; - b3CollisionShapeData *m_rootCollisionShape; - char *m_name; - b3TransformDoubleData m_worldTransform; - b3TransformDoubleData m_interpolationWorldTransform; - b3Vector3DoubleData m_interpolationLinearVelocity; - b3Vector3DoubleData m_interpolationAngularVelocity; - b3Vector3DoubleData m_anisotropicFriction; - double m_contactProcessingThreshold; - double m_deactivationTime; - double m_friction; - double m_rollingFriction; - double m_restitution; - double m_hitFraction; - double m_ccdSweptSphereRadius; - double m_ccdMotionThreshold; - int m_hasAnisotropicFriction; - int m_collisionFlags; - int m_islandTag1; - int m_companionId; - int m_activationState1; - int m_internalType; - int m_checkCollideWith; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3CollisionObjectFloatData -{ -public: - void *m_broadphaseHandle; - void *m_collisionShape; - b3CollisionShapeData *m_rootCollisionShape; - char *m_name; - b3TransformFloatData m_worldTransform; - b3TransformFloatData m_interpolationWorldTransform; - b3Vector3FloatData m_interpolationLinearVelocity; - b3Vector3FloatData m_interpolationAngularVelocity; - b3Vector3FloatData m_anisotropicFriction; - float m_contactProcessingThreshold; - float m_deactivationTime; - float m_friction; - float m_rollingFriction; - float m_restitution; - float m_hitFraction; - float m_ccdSweptSphereRadius; - float m_ccdMotionThreshold; - int m_hasAnisotropicFriction; - int m_collisionFlags; - int m_islandTag1; - int m_companionId; - int m_activationState1; - int m_internalType; - int m_checkCollideWith; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3RigidBodyFloatData -{ -public: - b3CollisionObjectFloatData m_collisionObjectData; - b3Matrix3x3FloatData m_invInertiaTensorWorld; - b3Vector3FloatData m_linearVelocity; - b3Vector3FloatData m_angularVelocity; - b3Vector3FloatData m_angularFactor; - b3Vector3FloatData m_linearFactor; - b3Vector3FloatData m_gravity; - b3Vector3FloatData m_gravity_acceleration; - b3Vector3FloatData m_invInertiaLocal; - b3Vector3FloatData m_totalForce; - b3Vector3FloatData m_totalTorque; - float m_inverseMass; - float m_linearDamping; - float m_angularDamping; - float m_additionalDampingFactor; - float m_additionalLinearDampingThresholdSqr; - float m_additionalAngularDampingThresholdSqr; - float m_additionalAngularDampingFactor; - float m_linearSleepingThreshold; - float m_angularSleepingThreshold; - int m_additionalDamping; -}; - -// -------------------------------------------------- // -class b3RigidBodyDoubleData -{ -public: - b3CollisionObjectDoubleData m_collisionObjectData; - b3Matrix3x3DoubleData m_invInertiaTensorWorld; - b3Vector3DoubleData m_linearVelocity; - b3Vector3DoubleData m_angularVelocity; - b3Vector3DoubleData m_angularFactor; - b3Vector3DoubleData m_linearFactor; - b3Vector3DoubleData m_gravity; - b3Vector3DoubleData m_gravity_acceleration; - b3Vector3DoubleData m_invInertiaLocal; - b3Vector3DoubleData m_totalForce; - b3Vector3DoubleData m_totalTorque; - double m_inverseMass; - double m_linearDamping; - double m_angularDamping; - double m_additionalDampingFactor; - double m_additionalLinearDampingThresholdSqr; - double m_additionalAngularDampingThresholdSqr; - double m_additionalAngularDampingFactor; - double m_linearSleepingThreshold; - double m_angularSleepingThreshold; - int m_additionalDamping; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3ConstraintInfo1 -{ -public: - int m_numConstraintRows; - int nub; -}; - -// -------------------------------------------------- // -class b3TypedConstraintData -{ -public: - bInvalidHandle *m_rbA; - bInvalidHandle *m_rbB; - char *m_name; - int m_objectType; - int m_userConstraintType; - int m_userConstraintId; - int m_needsFeedback; - float m_appliedImpulse; - float m_dbgDrawSize; - int m_disableCollisionsBetweenLinkedBodies; - int m_overrideNumSolverIterations; - float m_breakingImpulseThreshold; - int m_isEnabled; -}; - -// -------------------------------------------------- // -class b3Point2PointConstraintFloatData -{ -public: - b3TypedConstraintData m_typeConstraintData; - b3Vector3FloatData m_pivotInA; - b3Vector3FloatData m_pivotInB; -}; - -// -------------------------------------------------- // -class b3Point2PointConstraintDoubleData -{ -public: - b3TypedConstraintData m_typeConstraintData; - b3Vector3DoubleData m_pivotInA; - b3Vector3DoubleData m_pivotInB; -}; - -// -------------------------------------------------- // -class b3HingeConstraintDoubleData -{ -public: - b3TypedConstraintData m_typeConstraintData; - b3TransformDoubleData m_rbAFrame; - b3TransformDoubleData m_rbBFrame; - int m_useReferenceFrameA; - int m_angularOnly; - int m_enableAngularMotor; - float m_motorTargetVelocity; - float m_maxMotorImpulse; - float m_lowerLimit; - float m_upperLimit; - float m_limitSoftness; - float m_biasFactor; - float m_relaxationFactor; -}; - -// -------------------------------------------------- // -class b3HingeConstraintFloatData -{ -public: - b3TypedConstraintData m_typeConstraintData; - b3TransformFloatData m_rbAFrame; - b3TransformFloatData m_rbBFrame; - int m_useReferenceFrameA; - int m_angularOnly; - int m_enableAngularMotor; - float m_motorTargetVelocity; - float m_maxMotorImpulse; - float m_lowerLimit; - float m_upperLimit; - float m_limitSoftness; - float m_biasFactor; - float m_relaxationFactor; -}; - -// -------------------------------------------------- // -class b3ConeTwistConstraintData -{ -public: - b3TypedConstraintData m_typeConstraintData; - b3TransformFloatData m_rbAFrame; - b3TransformFloatData m_rbBFrame; - float m_swingSpan1; - float m_swingSpan2; - float m_twistSpan; - float m_limitSoftness; - float m_biasFactor; - float m_relaxationFactor; - float m_damping; - char m_pad[4]; -}; - -// -------------------------------------------------- // -class b3Generic6DofConstraintData -{ -public: - b3TypedConstraintData m_typeConstraintData; - b3TransformFloatData m_rbAFrame; - b3TransformFloatData m_rbBFrame; - b3Vector3FloatData m_linearUpperLimit; - b3Vector3FloatData m_linearLowerLimit; - b3Vector3FloatData m_angularUpperLimit; - b3Vector3FloatData m_angularLowerLimit; - int m_useLinearReferenceFrameA; - int m_useOffsetForConstraintFrame; -}; - -// -------------------------------------------------- // -class b3Generic6DofSpringConstraintData -{ -public: - b3Generic6DofConstraintData m_6dofData; - int m_springEnabled[6]; - float m_equilibriumPoint[6]; - float m_springStiffness[6]; - float m_springDamping[6]; -}; - -// -------------------------------------------------- // -class b3SliderConstraintData -{ -public: - b3TypedConstraintData m_typeConstraintData; - b3TransformFloatData m_rbAFrame; - b3TransformFloatData m_rbBFrame; - float m_linearUpperLimit; - float m_linearLowerLimit; - float m_angularUpperLimit; - float m_angularLowerLimit; - int m_useLinearReferenceFrameA; - int m_useOffsetForConstraintFrame; -}; - -// -------------------------------------------------- // -class b3ContactSolverInfoDoubleData -{ -public: - double m_tau; - double m_damping; - double m_friction; - double m_timeStep; - double m_restitution; - double m_maxErrorReduction; - double m_sor; - double m_erp; - double m_erp2; - double m_globalCfm; - double m_splitImpulsePenetrationThreshold; - double m_splitImpulseTurnErp; - double m_linearSlop; - double m_warmstartingFactor; - double m_maxGyroscopicForce; - double m_singleAxisRollingFrictionThreshold; - int m_numIterations; - int m_solverMode; - int m_restingContactRestitutionThreshold; - int m_minimumSolverBatchSize; - int m_splitImpulse; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3ContactSolverInfoFloatData -{ -public: - float m_tau; - float m_damping; - float m_friction; - float m_timeStep; - float m_restitution; - float m_maxErrorReduction; - float m_sor; - float m_erp; - float m_erp2; - float m_globalCfm; - float m_splitImpulsePenetrationThreshold; - float m_splitImpulseTurnErp; - float m_linearSlop; - float m_warmstartingFactor; - float m_maxGyroscopicForce; - float m_singleAxisRollingFrictionThreshold; - int m_numIterations; - int m_solverMode; - int m_restingContactRestitutionThreshold; - int m_minimumSolverBatchSize; - int m_splitImpulse; - char m_padding[4]; -}; - -// -------------------------------------------------- // -class b3DynamicsWorldDoubleData -{ -public: - b3ContactSolverInfoDoubleData m_solverInfo; - b3Vector3DoubleData m_gravity; -}; - -// -------------------------------------------------- // -class b3DynamicsWorldFloatData -{ -public: - b3ContactSolverInfoFloatData m_solverInfo; - b3Vector3FloatData m_gravity; -}; - -// -------------------------------------------------- // -class SoftBodyMaterialData -{ -public: - float m_linearStiffness; - float m_angularStiffness; - float m_volumeStiffness; - int m_flags; -}; - -// -------------------------------------------------- // -class SoftBodyNodeData -{ -public: - SoftBodyMaterialData *m_material; - b3Vector3FloatData m_position; - b3Vector3FloatData m_previousPosition; - b3Vector3FloatData m_velocity; - b3Vector3FloatData m_accumulatedForce; - b3Vector3FloatData m_normal; - float m_inverseMass; - float m_area; - int m_attach; - int m_pad; -}; - -// -------------------------------------------------- // -class SoftBodyLinkData -{ -public: - SoftBodyMaterialData *m_material; - int m_nodeIndices[2]; - float m_restLength; - int m_bbending; -}; - -// -------------------------------------------------- // -class SoftBodyFaceData -{ -public: - b3Vector3FloatData m_normal; - SoftBodyMaterialData *m_material; - int m_nodeIndices[3]; - float m_restArea; -}; - -// -------------------------------------------------- // -class SoftBodyTetraData -{ -public: - b3Vector3FloatData m_c0[4]; - SoftBodyMaterialData *m_material; - int m_nodeIndices[4]; - float m_restVolume; - float m_c1; - float m_c2; - int m_pad; -}; - -// -------------------------------------------------- // -class SoftRigidAnchorData -{ -public: - b3Matrix3x3FloatData m_c0; - b3Vector3FloatData m_c1; - b3Vector3FloatData m_localFrame; - bInvalidHandle *m_rigidBody; - int m_nodeIndex; - float m_c2; -}; - -// -------------------------------------------------- // -class SoftBodyConfigData -{ -public: - int m_aeroModel; - float m_baumgarte; - float m_damping; - float m_drag; - float m_lift; - float m_pressure; - float m_volume; - float m_dynamicFriction; - float m_poseMatch; - float m_rigidContactHardness; - float m_kineticContactHardness; - float m_softContactHardness; - float m_anchorHardness; - float m_softRigidClusterHardness; - float m_softKineticClusterHardness; - float m_softSoftClusterHardness; - float m_softRigidClusterImpulseSplit; - float m_softKineticClusterImpulseSplit; - float m_softSoftClusterImpulseSplit; - float m_maxVolume; - float m_timeScale; - int m_velocityIterations; - int m_positionIterations; - int m_driftIterations; - int m_clusterIterations; - int m_collisionFlags; -}; - -// -------------------------------------------------- // -class SoftBodyPoseData -{ -public: - b3Matrix3x3FloatData m_rot; - b3Matrix3x3FloatData m_scale; - b3Matrix3x3FloatData m_aqq; - b3Vector3FloatData m_com; - b3Vector3FloatData *m_positions; - float *m_weights; - int m_numPositions; - int m_numWeigts; - int m_bvolume; - int m_bframe; - float m_restVolume; - int m_pad; -}; - -// -------------------------------------------------- // -class SoftBodyClusterData -{ -public: - b3TransformFloatData m_framexform; - b3Matrix3x3FloatData m_locii; - b3Matrix3x3FloatData m_invwi; - b3Vector3FloatData m_com; - b3Vector3FloatData m_vimpulses[2]; - b3Vector3FloatData m_dimpulses[2]; - b3Vector3FloatData m_lv; - b3Vector3FloatData m_av; - b3Vector3FloatData *m_framerefs; - int *m_nodeIndices; - float *m_masses; - int m_numFrameRefs; - int m_numNodes; - int m_numMasses; - float m_idmass; - float m_imass; - int m_nvimpulses; - int m_ndimpulses; - float m_ndamping; - float m_ldamping; - float m_adamping; - float m_matching; - float m_maxSelfCollisionImpulse; - float m_selfCollisionImpulseFactor; - int m_containsAnchor; - int m_collide; - int m_clusterIndex; -}; - -// -------------------------------------------------- // -class b3SoftBodyJointData -{ -public: - void *m_bodyA; - void *m_bodyB; - b3Vector3FloatData m_refs[2]; - float m_cfm; - float m_erp; - float m_split; - int m_delete; - b3Vector3FloatData m_relPosition[2]; - int m_bodyAtype; - int m_bodyBtype; - int m_jointType; - int m_pad; -}; - -// -------------------------------------------------- // -class b3SoftBodyFloatData -{ -public: - b3CollisionObjectFloatData m_collisionObjectData; - SoftBodyPoseData *m_pose; - SoftBodyMaterialData **m_materials; - SoftBodyNodeData *m_nodes; - SoftBodyLinkData *m_links; - SoftBodyFaceData *m_faces; - SoftBodyTetraData *m_tetrahedra; - SoftRigidAnchorData *m_anchors; - SoftBodyClusterData *m_clusters; - b3SoftBodyJointData *m_joints; - int m_numMaterials; - int m_numNodes; - int m_numLinks; - int m_numFaces; - int m_numTetrahedra; - int m_numAnchors; - int m_numClusters; - int m_numJoints; - SoftBodyConfigData m_config; -}; - -} // namespace Bullet3SerializeBullet2 -#endif //__BULLET2_H__ \ No newline at end of file diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3BulletFile.cpp b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3BulletFile.cpp deleted file mode 100644 index d2a71636708..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3BulletFile.cpp +++ /dev/null @@ -1,400 +0,0 @@ -/* -bParse -Copyright (c) 2006-2010 Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3BulletFile.h" -#include "b3Defines.h" -#include "b3DNA.h" - -#if !defined(__CELLOS_LV2__) && !defined(__MWERKS__) -#include -#endif -#include - -// 32 && 64 bit versions -#ifdef B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES -#ifdef _WIN64 -extern char b3s_bulletDNAstr64[]; -extern int b3s_bulletDNAlen64; -#else -extern char b3s_bulletDNAstr[]; -extern int b3s_bulletDNAlen; -#endif //_WIN64 -#else //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - -extern char b3s_bulletDNAstr64[]; -extern int b3s_bulletDNAlen64; -extern char b3s_bulletDNAstr[]; -extern int b3s_bulletDNAlen; - -#endif //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - -using namespace bParse; - -b3BulletFile::b3BulletFile() - : bFile("", "BULLET ") -{ - mMemoryDNA = new bDNA(); //this memory gets released in the bFile::~bFile destructor,@todo not consistent with the rule 'who allocates it, has to deallocate it" - - m_DnaCopy = 0; - -#ifdef B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES -#ifdef _WIN64 - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen64, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr64, b3s_bulletDNAlen64); - mMemoryDNA->init(m_DnaCopy, b3s_bulletDNAlen64); -#else //_WIN64 - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr, b3s_bulletDNAlen); - mMemoryDNA->init(m_DnaCopy, b3s_bulletDNAlen); -#endif //_WIN64 -#else //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - if (VOID_IS_8) - { - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen64, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr64, b3s_bulletDNAlen64); - mMemoryDNA->init(m_DnaCopy, b3s_bulletDNAlen64); - } - else - { - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr, b3s_bulletDNAlen); - mMemoryDNA->init(m_DnaCopy, b3s_bulletDNAlen); - } -#endif //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES -} - -b3BulletFile::b3BulletFile(const char* fileName) - : bFile(fileName, "BULLET ") -{ - m_DnaCopy = 0; -} - -b3BulletFile::b3BulletFile(char* memoryBuffer, int len) - : bFile(memoryBuffer, len, "BULLET ") -{ - m_DnaCopy = 0; -} - -b3BulletFile::~b3BulletFile() -{ - if (m_DnaCopy) - b3AlignedFree(m_DnaCopy); - - while (m_dataBlocks.size()) - { - char* dataBlock = m_dataBlocks[m_dataBlocks.size() - 1]; - delete[] dataBlock; - m_dataBlocks.pop_back(); - } -} - -// ----------------------------------------------------- // -void b3BulletFile::parseData() -{ - // printf ("Building datablocks"); - // printf ("Chunk size = %d",CHUNK_HEADER_LEN); - // printf ("File chunk size = %d",ChunkUtils::getOffset(mFlags)); - - const bool brokenDNA = (mFlags & FD_BROKEN_DNA) != 0; - - //const bool swap = (mFlags&FD_ENDIAN_SWAP)!=0; - - mDataStart = 12; - - char* dataPtr = mFileBuffer + mDataStart; - - bChunkInd dataChunk; - dataChunk.code = 0; - - //dataPtr += ChunkUtils::getNextBlock(&dataChunk, dataPtr, mFlags); - int seek = getNextBlock(&dataChunk, dataPtr, mFlags); - - if (mFlags & FD_ENDIAN_SWAP) - swapLen(dataPtr); - - //dataPtr += ChunkUtils::getOffset(mFlags); - char* dataPtrHead = 0; - - while (dataChunk.code != B3_DNA1) - { - if (!brokenDNA || (dataChunk.code != B3_QUANTIZED_BVH_CODE)) - { - // one behind - if (dataChunk.code == B3_SDNA) break; - //if (dataChunk.code == DNA1) break; - - // same as (BHEAD+DATA dependency) - dataPtrHead = dataPtr + ChunkUtils::getOffset(mFlags); - if (dataChunk.dna_nr >= 0) - { - char* id = readStruct(dataPtrHead, dataChunk); - - // lookup maps - if (id) - { - m_chunkPtrPtrMap.insert(dataChunk.oldPtr, dataChunk); - mLibPointers.insert(dataChunk.oldPtr, (bStructHandle*)id); - - m_chunks.push_back(dataChunk); - // block it - //bListBasePtr *listID = mMain->getListBasePtr(dataChunk.code); - //if (listID) - // listID->push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_SOFTBODY_CODE) - { - m_softBodies.push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_RIGIDBODY_CODE) - { - m_rigidBodies.push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_DYNAMICSWORLD_CODE) - { - m_dynamicsWorldInfo.push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_CONSTRAINT_CODE) - { - m_constraints.push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_QUANTIZED_BVH_CODE) - { - m_bvhs.push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_TRIANLGE_INFO_MAP) - { - m_triangleInfoMaps.push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_COLLISIONOBJECT_CODE) - { - m_collisionObjects.push_back((bStructHandle*)id); - } - - if (dataChunk.code == B3_SHAPE_CODE) - { - m_collisionShapes.push_back((bStructHandle*)id); - } - - // if (dataChunk.code == GLOB) - // { - // m_glob = (bStructHandle*) id; - // } - } - else - { - //printf("unknown chunk\n"); - - mLibPointers.insert(dataChunk.oldPtr, (bStructHandle*)dataPtrHead); - } - } - else - { - printf("skipping B3_QUANTIZED_BVH_CODE due to broken DNA\n"); - } - - dataPtr += seek; - - seek = getNextBlock(&dataChunk, dataPtr, mFlags); - if (mFlags & FD_ENDIAN_SWAP) - swapLen(dataPtr); - - if (seek < 0) - break; - } -} - -void b3BulletFile::addDataBlock(char* dataBlock) -{ - m_dataBlocks.push_back(dataBlock); -} - -void b3BulletFile::writeDNA(FILE* fp) -{ - bChunkInd dataChunk; - dataChunk.code = B3_DNA1; - dataChunk.dna_nr = 0; - dataChunk.nr = 1; -#ifdef B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - if (VOID_IS_8) - { -#ifdef _WIN64 - dataChunk.len = b3s_bulletDNAlen64; - dataChunk.oldPtr = b3s_bulletDNAstr64; - fwrite(&dataChunk, sizeof(bChunkInd), 1, fp); - fwrite(b3s_bulletDNAstr64, b3s_bulletDNAlen64, 1, fp); -#else - b3Assert(0); -#endif - } - else - { -#ifndef _WIN64 - dataChunk.len = b3s_bulletDNAlen; - dataChunk.oldPtr = b3s_bulletDNAstr; - fwrite(&dataChunk, sizeof(bChunkInd), 1, fp); - fwrite(b3s_bulletDNAstr, b3s_bulletDNAlen, 1, fp); -#else //_WIN64 - b3Assert(0); -#endif //_WIN64 - } -#else //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - if (VOID_IS_8) - { - dataChunk.len = b3s_bulletDNAlen64; - dataChunk.oldPtr = b3s_bulletDNAstr64; - fwrite(&dataChunk, sizeof(bChunkInd), 1, fp); - fwrite(b3s_bulletDNAstr64, b3s_bulletDNAlen64, 1, fp); - } - else - { - dataChunk.len = b3s_bulletDNAlen; - dataChunk.oldPtr = b3s_bulletDNAstr; - fwrite(&dataChunk, sizeof(bChunkInd), 1, fp); - fwrite(b3s_bulletDNAstr, b3s_bulletDNAlen, 1, fp); - } -#endif //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES -} - -void b3BulletFile::parse(int verboseMode) -{ -#ifdef B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - if (VOID_IS_8) - { -#ifdef _WIN64 - - if (m_DnaCopy) - delete m_DnaCopy; - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen64, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr64, b3s_bulletDNAlen64); - parseInternal(verboseMode, (char*)b3s_bulletDNAstr64, b3s_bulletDNAlen64); -#else - b3Assert(0); -#endif - } - else - { -#ifndef _WIN64 - - if (m_DnaCopy) - delete m_DnaCopy; - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr, b3s_bulletDNAlen); - parseInternal(verboseMode, m_DnaCopy, b3s_bulletDNAlen); -#else - b3Assert(0); -#endif - } -#else //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - if (VOID_IS_8) - { - if (m_DnaCopy) - delete m_DnaCopy; - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen64, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr64, b3s_bulletDNAlen64); - parseInternal(verboseMode, m_DnaCopy, b3s_bulletDNAlen64); - } - else - { - if (m_DnaCopy) - delete m_DnaCopy; - m_DnaCopy = (char*)b3AlignedAlloc(b3s_bulletDNAlen, 16); - memcpy(m_DnaCopy, b3s_bulletDNAstr, b3s_bulletDNAlen); - parseInternal(verboseMode, m_DnaCopy, b3s_bulletDNAlen); - } -#endif //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - - //the parsing will convert to cpu endian - mFlags &= ~FD_ENDIAN_SWAP; - - int littleEndian = 1; - littleEndian = ((char*)&littleEndian)[0]; - - mFileBuffer[8] = littleEndian ? 'v' : 'V'; -} - -// experimental -int b3BulletFile::write(const char* fileName, bool fixupPointers) -{ - FILE* fp = fopen(fileName, "wb"); - if (fp) - { - char header[B3_SIZEOFBLENDERHEADER]; - memcpy(header, m_headerString, 7); - int endian = 1; - endian = ((char*)&endian)[0]; - - if (endian) - { - header[7] = '_'; - } - else - { - header[7] = '-'; - } - if (VOID_IS_8) - { - header[8] = 'V'; - } - else - { - header[8] = 'v'; - } - - header[9] = '2'; - header[10] = '7'; - header[11] = '5'; - - fwrite(header, B3_SIZEOFBLENDERHEADER, 1, fp); - - writeChunks(fp, fixupPointers); - - writeDNA(fp); - - fclose(fp); - } - else - { - printf("Error: cannot open file %s for writing\n", fileName); - return 0; - } - return 1; -} - -void b3BulletFile::addStruct(const char* structType, void* data, int len, void* oldPtr, int code) -{ - bParse::bChunkInd dataChunk; - dataChunk.code = code; - dataChunk.nr = 1; - dataChunk.len = len; - dataChunk.dna_nr = mMemoryDNA->getReverseType(structType); - dataChunk.oldPtr = oldPtr; - - ///Perform structure size validation - short* structInfo = mMemoryDNA->getStruct(dataChunk.dna_nr); - int elemBytes; - elemBytes = mMemoryDNA->getLength(structInfo[0]); - // int elemBytes = mMemoryDNA->getElementSize(structInfo[0],structInfo[1]); - assert(len == elemBytes); - - mLibPointers.insert(dataChunk.oldPtr, (bStructHandle*)data); - m_chunks.push_back(dataChunk); -} diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3BulletFile.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3BulletFile.h deleted file mode 100644 index ede1d378ae0..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3BulletFile.h +++ /dev/null @@ -1,74 +0,0 @@ -/* -bParse -Copyright (c) 2006-2010 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_BULLET_FILE_H -#define B3_BULLET_FILE_H - -#include "b3File.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "b3Defines.h" - -#include "Bullet3Serialize/Bullet2FileLoader/b3Serializer.h" - -namespace bParse -{ -// ----------------------------------------------------- // -class b3BulletFile : public bFile -{ -protected: - char* m_DnaCopy; - -public: - b3AlignedObjectArray m_softBodies; - - b3AlignedObjectArray m_rigidBodies; - - b3AlignedObjectArray m_collisionObjects; - - b3AlignedObjectArray m_collisionShapes; - - b3AlignedObjectArray m_constraints; - - b3AlignedObjectArray m_bvhs; - - b3AlignedObjectArray m_triangleInfoMaps; - - b3AlignedObjectArray m_dynamicsWorldInfo; - - b3AlignedObjectArray m_dataBlocks; - b3BulletFile(); - - b3BulletFile(const char* fileName); - - b3BulletFile(char* memoryBuffer, int len); - - virtual ~b3BulletFile(); - - virtual void addDataBlock(char* dataBlock); - - // experimental - virtual int write(const char* fileName, bool fixupPointers = false); - - virtual void parse(int verboseMode); - - virtual void parseData(); - - virtual void writeDNA(FILE* fp); - - void addStruct(const char* structType, void* data, int len, void* oldPtr, int code); -}; -}; // namespace bParse - -#endif //B3_BULLET_FILE_H diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Chunk.cpp b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Chunk.cpp deleted file mode 100644 index ff75ff8cc4c..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Chunk.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* -bParse -Copyright (c) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3Chunk.h" -#include "b3Defines.h" -#include "b3File.h" - -#if !defined(__CELLOS_LV2__) && !defined(__MWERKS__) -#include -#endif -#include - -using namespace bParse; - -// ----------------------------------------------------- // -short ChunkUtils::swapShort(short sht) -{ - B3_SWITCH_SHORT(sht); - return sht; -} - -// ----------------------------------------------------- // -int ChunkUtils::swapInt(int inte) -{ - B3_SWITCH_INT(inte); - return inte; -} - -// ----------------------------------------------------- // -b3Long64 ChunkUtils::swapLong64(b3Long64 lng) -{ - B3_SWITCH_LONGINT(lng); - return lng; -} - -// ----------------------------------------------------- // -int ChunkUtils::getOffset(int flags) -{ - // if the file is saved in a - // different format, get the - // file's chunk size - int res = CHUNK_HEADER_LEN; - - if (VOID_IS_8) - { - if (flags & FD_BITS_VARIES) - res = sizeof(bChunkPtr4); - } - else - { - if (flags & FD_BITS_VARIES) - res = sizeof(bChunkPtr8); - } - return res; -} - -//eof diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Chunk.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Chunk.h deleted file mode 100644 index c9d0f37d9ec..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Chunk.h +++ /dev/null @@ -1,84 +0,0 @@ -/* -bParse -Copyright (c) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BCHUNK_H__ -#define __BCHUNK_H__ - -#if defined(_WIN32) && !defined(__MINGW32__) -#define b3Long64 __int64 -#elif defined(__MINGW32__) -#include -#define b3Long64 int64_t -#else -#define b3Long64 long long -#endif - -namespace bParse -{ -// ----------------------------------------------------- // -class bChunkPtr4 -{ -public: - bChunkPtr4() {} - int code; - int len; - union { - int m_uniqueInt; - }; - int dna_nr; - int nr; -}; - -// ----------------------------------------------------- // -class bChunkPtr8 -{ -public: - bChunkPtr8() {} - int code, len; - union { - b3Long64 oldPrev; - int m_uniqueInts[2]; - }; - int dna_nr, nr; -}; - -// ----------------------------------------------------- // -class bChunkInd -{ -public: - bChunkInd() {} - int code, len; - void *oldPtr; - int dna_nr, nr; -}; - -// ----------------------------------------------------- // -class ChunkUtils -{ -public: - // file chunk offset - static int getOffset(int flags); - - // endian utils - static short swapShort(short sht); - static int swapInt(int inte); - static b3Long64 swapLong64(b3Long64 lng); -}; - -const int CHUNK_HEADER_LEN = ((sizeof(bChunkInd))); -const bool VOID_IS_8 = ((sizeof(void *) == 8)); -} // namespace bParse - -#endif //__BCHUNK_H__ diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Common.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Common.h deleted file mode 100644 index 5884fad4d6f..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Common.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -bParse -Copyright (c) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BCOMMON_H__ -#define __BCOMMON_H__ - -#include -//#include "bLog.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3HashMap.h" - -namespace bParse -{ -class bMain; -class bFileData; -class bFile; -class bDNA; - -// delete void* undefined -typedef struct bStructHandle -{ - int unused; -} bStructHandle; -typedef b3AlignedObjectArray bListBasePtr; -typedef b3HashMap bPtrMap; -} // namespace bParse - -#endif //__BCOMMON_H__ diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3DNA.cpp b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3DNA.cpp deleted file mode 100644 index 09c8f23859a..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3DNA.cpp +++ /dev/null @@ -1,616 +0,0 @@ -/* -bParse -Copyright (c) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -#include - -#include "b3DNA.h" -#include "b3Chunk.h" -#include -#include -#include - -//this define will force traversal of structures, to check backward (and forward) compatibility -//#define TEST_BACKWARD_FORWARD_COMPATIBILITY - -using namespace bParse; - -// ----------------------------------------------------- // -bDNA::bDNA() - : mPtrLen(0) -{ - // -- -} - -// ----------------------------------------------------- // -bDNA::~bDNA() -{ - // -- -} - -// ----------------------------------------------------- // -bool bDNA::lessThan(bDNA *file) -{ - return (m_Names.size() < file->m_Names.size()); -} - -// ----------------------------------------------------- // -char *bDNA::getName(int ind) -{ - assert(ind <= (int)m_Names.size()); - return m_Names[ind].m_name; -} - -// ----------------------------------------------------- // -char *bDNA::getType(int ind) -{ - assert(ind <= (int)mTypes.size()); - return mTypes[ind]; -} - -// ----------------------------------------------------- // -short *bDNA::getStruct(int ind) -{ - assert(ind <= (int)mStructs.size()); - return mStructs[ind]; -} - -// ----------------------------------------------------- // -short bDNA::getLength(int ind) -{ - assert(ind <= (int)mTlens.size()); - return mTlens[ind]; -} - -// ----------------------------------------------------- // -int bDNA::getReverseType(short type) -{ - int *intPtr = mStructReverse.find(type); - if (intPtr) - return *intPtr; - - return -1; -} - -// ----------------------------------------------------- // -int bDNA::getReverseType(const char *type) -{ - b3HashString key(type); - int *valuePtr = mTypeLookup.find(key); - if (valuePtr) - return *valuePtr; - - return -1; -} - -// ----------------------------------------------------- // -int bDNA::getNumStructs() -{ - return (int)mStructs.size(); -} - -// ----------------------------------------------------- // -bool bDNA::flagNotEqual(int dna_nr) -{ - assert(dna_nr <= (int)mCMPFlags.size()); - return mCMPFlags[dna_nr] == FDF_STRUCT_NEQU; -} - -// ----------------------------------------------------- // -bool bDNA::flagEqual(int dna_nr) -{ - assert(dna_nr <= (int)mCMPFlags.size()); - int flag = mCMPFlags[dna_nr]; - return flag == FDF_STRUCT_EQU; -} - -// ----------------------------------------------------- // -bool bDNA::flagNone(int dna_nr) -{ - assert(dna_nr <= (int)mCMPFlags.size()); - return mCMPFlags[dna_nr] == FDF_NONE; -} - -// ----------------------------------------------------- // -int bDNA::getPointerSize() -{ - return mPtrLen; -} - -// ----------------------------------------------------- // -void bDNA::initRecurseCmpFlags(int iter) -{ - // iter is FDF_STRUCT_NEQU - - short *oldStrc = mStructs[iter]; - short type = oldStrc[0]; - - for (int i = 0; i < (int)mStructs.size(); i++) - { - if (i != iter && mCMPFlags[i] == FDF_STRUCT_EQU) - { - short *curStruct = mStructs[i]; - int eleLen = curStruct[1]; - curStruct += 2; - - for (int j = 0; j < eleLen; j++, curStruct += 2) - { - if (curStruct[0] == type) - { - //char *name = m_Names[curStruct[1]].m_name; - //if (name[0] != '*') - if (m_Names[curStruct[1]].m_isPointer) - { - mCMPFlags[i] = FDF_STRUCT_NEQU; - initRecurseCmpFlags(i); - } - } - } - } - } -} - -// ----------------------------------------------------- // -void bDNA::initCmpFlags(bDNA *memDNA) -{ - // compare the file to memory - // this ptr should be the file data - - assert(!(m_Names.size() == 0)); // && "SDNA empty!"); - mCMPFlags.resize(mStructs.size(), FDF_NONE); - - int i; - for (i = 0; i < (int)mStructs.size(); i++) - { - short *oldStruct = mStructs[i]; - - int oldLookup = getReverseType(oldStruct[0]); - if (oldLookup == -1) - { - mCMPFlags[i] = FDF_NONE; - continue; - } - //char* typeName = mTypes[oldStruct[0]]; - -//#define SLOW_FORWARD_COMPATIBLE 1 -#ifdef SLOW_FORWARD_COMPATIBLE - char *typeName = mTypes[oldLookup]; - int newLookup = memDNA->getReverseType(typeName); - if (newLookup == -1) - { - mCMPFlags[i] = FDF_NONE; - continue; - } - short *curStruct = memDNA->mStructs[newLookup]; -#else - // memory for file - - if (oldLookup < memDNA->mStructs.size()) - { - short *curStruct = memDNA->mStructs[oldLookup]; -#endif - - // rebuild... - mCMPFlags[i] = FDF_STRUCT_NEQU; - -#ifndef TEST_BACKWARD_FORWARD_COMPATIBILITY - - if (curStruct[1] == oldStruct[1]) - { - // type len same ... - if (mTlens[oldStruct[0]] == memDNA->mTlens[curStruct[0]]) - { - bool isSame = true; - int elementLength = oldStruct[1]; - - curStruct += 2; - oldStruct += 2; - - for (int j = 0; j < elementLength; j++, curStruct += 2, oldStruct += 2) - { - // type the same - //const char* typeFileDNA = mTypes[oldStruct[0]]; - //const char* typeMemDNA = mTypes[curStruct[0]]; - if (strcmp(mTypes[oldStruct[0]], memDNA->mTypes[curStruct[0]]) != 0) - { - isSame = false; - break; - } - - // name the same - if (strcmp(m_Names[oldStruct[1]].m_name, memDNA->m_Names[curStruct[1]].m_name) != 0) - { - isSame = false; - break; - } - } - // flag valid == - if (isSame) - mCMPFlags[i] = FDF_STRUCT_EQU; - } - } -#endif - } -} - -// recurse in -for (i = 0; i < (int)mStructs.size(); i++) -{ - if (mCMPFlags[i] == FDF_STRUCT_NEQU) - initRecurseCmpFlags(i); -} -} - -static int name_is_array(char *name, int *dim1, int *dim2) -{ - int len = strlen(name); - /*fprintf(stderr,"[%s]",name);*/ - /*if (len >= 1) { - if (name[len-1] != ']') - return 1; - } - return 0;*/ - char *bp; - int num; - if (dim1) - { - *dim1 = 1; - } - if (dim2) - { - *dim2 = 1; - } - bp = strchr(name, '['); - if (!bp) - { - return 0; - } - num = 0; - while (++bp < name + len - 1) - { - const char c = *bp; - if (c == ']') - { - break; - } - if (c <= '9' && c >= '0') - { - num *= 10; - num += (c - '0'); - } - else - { - printf("array parse error.\n"); - return 0; - } - } - if (dim2) - { - *dim2 = num; - } - - /* find second dim, if any. */ - bp = strchr(bp, '['); - if (!bp) - { - return 1; /* at least we got the first dim. */ - } - num = 0; - while (++bp < name + len - 1) - { - const char c = *bp; - if (c == ']') - { - break; - } - if (c <= '9' && c >= '0') - { - num *= 10; - num += (c - '0'); - } - else - { - printf("array2 parse error.\n"); - return 1; - } - } - if (dim1) - { - if (dim2) - { - *dim1 = *dim2; - *dim2 = num; - } - else - { - *dim1 = num; - } - } - - return 1; -} - -// ----------------------------------------------------- // -void bDNA::init(char *data, int len, bool swap) -{ - int *intPtr = 0; - short *shtPtr = 0; - char *cp = 0; - int dataLen = 0; - //long nr=0; - intPtr = (int *)data; - - /* - SDNA (4 bytes) (magic number) - NAME (4 bytes) - (4 bytes) amount of names (int) - - - */ - - if (strncmp(data, "SDNA", 4) == 0) - { - // skip ++ NAME - intPtr++; - intPtr++; - } - - // Parse names - if (swap) - { - *intPtr = ChunkUtils::swapInt(*intPtr); - } - dataLen = *intPtr; - intPtr++; - - cp = (char *)intPtr; - int i; - for (i = 0; i < dataLen; i++) - { - bNameInfo info; - info.m_name = cp; - info.m_isPointer = (info.m_name[0] == '*') || (info.m_name[1] == '*'); - name_is_array(info.m_name, &info.m_dim0, &info.m_dim1); - m_Names.push_back(info); - while (*cp) cp++; - cp++; - } - - cp = b3AlignPointer(cp, 4); - - /* - TYPE (4 bytes) - amount of types (int) - - - */ - - intPtr = (int *)cp; - assert(strncmp(cp, "TYPE", 4) == 0); - intPtr++; - - if (swap) - { - *intPtr = ChunkUtils::swapInt(*intPtr); - } - dataLen = *intPtr; - intPtr++; - - cp = (char *)intPtr; - for (i = 0; i < dataLen; i++) - { - mTypes.push_back(cp); - while (*cp) cp++; - cp++; - } - - cp = b3AlignPointer(cp, 4); - - /* - TLEN (4 bytes) - (short) the lengths of types - - */ - - // Parse type lens - intPtr = (int *)cp; - assert(strncmp(cp, "TLEN", 4) == 0); - intPtr++; - - dataLen = (int)mTypes.size(); - - shtPtr = (short *)intPtr; - for (i = 0; i < dataLen; i++, shtPtr++) - { - if (swap) - shtPtr[0] = ChunkUtils::swapShort(shtPtr[0]); - mTlens.push_back(shtPtr[0]); - } - - if (dataLen & 1) shtPtr++; - - /* - STRC (4 bytes) - amount of structs (int) - - - - - - - */ - - intPtr = (int *)shtPtr; - cp = (char *)intPtr; - assert(strncmp(cp, "STRC", 4) == 0); - intPtr++; - - if (swap) - { - *intPtr = ChunkUtils::swapInt(*intPtr); - } - dataLen = *intPtr; - intPtr++; - - shtPtr = (short *)intPtr; - for (i = 0; i < dataLen; i++) - { - mStructs.push_back(shtPtr); - if (swap) - { - shtPtr[0] = ChunkUtils::swapShort(shtPtr[0]); - shtPtr[1] = ChunkUtils::swapShort(shtPtr[1]); - - int len = shtPtr[1]; - shtPtr += 2; - - for (int a = 0; a < len; a++, shtPtr += 2) - { - shtPtr[0] = ChunkUtils::swapShort(shtPtr[0]); - shtPtr[1] = ChunkUtils::swapShort(shtPtr[1]); - } - } - else - shtPtr += (2 * shtPtr[1]) + 2; - } - - // build reverse lookups - for (i = 0; i < (int)mStructs.size(); i++) - { - short *strc = mStructs.at(i); - if (!mPtrLen && strcmp(mTypes[strc[0]], "ListBase") == 0) - { - mPtrLen = mTlens[strc[0]] / 2; - } - - mStructReverse.insert(strc[0], i); - mTypeLookup.insert(b3HashString(mTypes[strc[0]]), i); - } -} - -// ----------------------------------------------------- // -int bDNA::getArraySize(char *string) -{ - int ret = 1; - int len = strlen(string); - - char *next = 0; - for (int i = 0; i < len; i++) - { - char c = string[i]; - - if (c == '[') - next = &string[i + 1]; - else if (c == ']') - if (next) - ret *= atoi(next); - } - - // print (string << ' ' << ret); - return ret; -} - -void bDNA::dumpTypeDefinitions() -{ - int i; - - int numTypes = mTypes.size(); - - for (i = 0; i < numTypes; i++) - { - } - - for (i = 0; i < (int)mStructs.size(); i++) - { - int totalBytes = 0; - short *oldStruct = mStructs[i]; - - int oldLookup = getReverseType(oldStruct[0]); - if (oldLookup == -1) - { - mCMPFlags[i] = FDF_NONE; - continue; - } - - short *newStruct = mStructs[oldLookup]; - char *typeName = mTypes[newStruct[0]]; - printf("%3d: %s ", i, typeName); - - //char *name = mNames[oldStruct[1]]; - int len = oldStruct[1]; - printf(" (%d fields) ", len); - oldStruct += 2; - - printf("{"); - int j; - for (j = 0; j < len; ++j, oldStruct += 2) - { - const char *name = m_Names[oldStruct[1]].m_name; - printf("%s %s", mTypes[oldStruct[0]], name); - int elemNumBytes = 0; - int arrayDimensions = getArraySizeNew(oldStruct[1]); - - if (m_Names[oldStruct[1]].m_isPointer) - { - elemNumBytes = VOID_IS_8 ? 8 : 4; - } - else - { - elemNumBytes = getLength(oldStruct[0]); - } - printf(" /* %d bytes */", elemNumBytes * arrayDimensions); - - if (j == len - 1) - { - printf(";}"); - } - else - { - printf("; "); - } - totalBytes += elemNumBytes * arrayDimensions; - } - printf("\ntotalBytes=%d\n\n", totalBytes); - } - -#if 0 - /* dump out display of types and their sizes */ - for (i=0; itypes_count; ++i) { - /* if (!bf->types[i].is_struct)*/ - { - printf("%3d: sizeof(%s%s)=%d", - i, - bf->types[i].is_struct ? "struct " : "atomic ", - bf->types[i].name, bf->types[i].size); - if (bf->types[i].is_struct) { - int j; - printf(", %d fields: { ", bf->types[i].fieldtypes_count); - for (j=0; jtypes[i].fieldtypes_count; ++j) { - printf("%s %s", - bf->types[bf->types[i].fieldtypes[j]].name, - bf->names[bf->types[i].fieldnames[j]]); - if (j == bf->types[i].fieldtypes_count-1) { - printf(";}"); - } else { - printf("; "); - } - } - } - printf("\n\n"); - - } - } -#endif -} - -//eof diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3DNA.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3DNA.h deleted file mode 100644 index ca6004d960d..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3DNA.h +++ /dev/null @@ -1,101 +0,0 @@ -/* -bParse -Copyright (c) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BDNA_H__ -#define __BDNA_H__ - -#include "b3Common.h" - -namespace bParse -{ -struct bNameInfo -{ - char *m_name; - bool m_isPointer; - int m_dim0; - int m_dim1; -}; - -class bDNA -{ -public: - bDNA(); - ~bDNA(); - - void init(char *data, int len, bool swap = false); - - int getArraySize(char *str); - int getArraySizeNew(short name) - { - const bNameInfo &nameInfo = m_Names[name]; - return nameInfo.m_dim0 * nameInfo.m_dim1; - } - int getElementSize(short type, short name) - { - const bNameInfo &nameInfo = m_Names[name]; - int size = nameInfo.m_isPointer ? mPtrLen * nameInfo.m_dim0 * nameInfo.m_dim1 : mTlens[type] * nameInfo.m_dim0 * nameInfo.m_dim1; - return size; - } - - int getNumNames() const - { - return m_Names.size(); - } - - char *getName(int ind); - char *getType(int ind); - short *getStruct(int ind); - short getLength(int ind); - int getReverseType(short type); - int getReverseType(const char *type); - - int getNumStructs(); - - // - bool lessThan(bDNA *other); - - void initCmpFlags(bDNA *memDNA); - bool flagNotEqual(int dna_nr); - bool flagEqual(int dna_nr); - bool flagNone(int dna_nr); - - int getPointerSize(); - - void dumpTypeDefinitions(); - -private: - enum FileDNAFlags - { - FDF_NONE = 0, - FDF_STRUCT_NEQU, - FDF_STRUCT_EQU - }; - - void initRecurseCmpFlags(int i); - - b3AlignedObjectArray mCMPFlags; - - b3AlignedObjectArray m_Names; - b3AlignedObjectArray mTypes; - b3AlignedObjectArray mStructs; - b3AlignedObjectArray mTlens; - b3HashMap mStructReverse; - b3HashMap mTypeLookup; - - int mPtrLen; -}; -} // namespace bParse - -#endif //__BDNA_H__ diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Defines.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Defines.h deleted file mode 100644 index 0524c94db18..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Defines.h +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright (C) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com -* -* This software is provided 'as-is', without any express or implied -* warranty. In no event will the authors be held liable for any damages -* arising from the use of this software. -* -* Permission is granted to anyone to use this software for any purpose, -* including commercial applications, and to alter it and redistribute it -* freely, subject to the following restrictions: -* -* 1. The origin of this software must not be misrepresented; you must not -* claim that you wrote the original software. If you use this software -* in a product, an acknowledgment in the product documentation would be -* appreciated but is not required. -* 2. Altered source versions must be plainly marked as such, and must not be -* misrepresented as being the original software. -* 3. This notice may not be removed or altered from any source distribution. -*/ -#ifndef __B_DEFINES_H__ -#define __B_DEFINES_H__ - -// MISC defines, see BKE_global.h, BKE_utildefines.h -#define B3_SIZEOFBLENDERHEADER 12 - -// ------------------------------------------------------------ -#if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined(__PPC__) || defined(__ppc__) || defined(__BIG_ENDIAN__) -#define B3_MAKE_ID(a, b, c, d) ((int)(a) << 24 | (int)(b) << 16 | (c) << 8 | (d)) -#else -#define B3_MAKE_ID(a, b, c, d) ((int)(d) << 24 | (int)(c) << 16 | (b) << 8 | (a)) -#endif - -// ------------------------------------------------------------ -#if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined(__PPC__) || defined(__ppc__) || defined(__BIG_ENDIAN__) -#define B3_MAKE_ID2(c, d) ((c) << 8 | (d)) -#else -#define B3_MAKE_ID2(c, d) ((d) << 8 | (c)) -#endif - -// ------------------------------------------------------------ -#define B3_ID_SCE B3_MAKE_ID2('S', 'C') -#define B3_ID_LI B3_MAKE_ID2('L', 'I') -#define B3_ID_OB B3_MAKE_ID2('O', 'B') -#define B3_ID_ME B3_MAKE_ID2('M', 'E') -#define B3_ID_CU B3_MAKE_ID2('C', 'U') -#define B3_ID_MB B3_MAKE_ID2('M', 'B') -#define B3_ID_MA B3_MAKE_ID2('M', 'A') -#define B3_ID_TE B3_MAKE_ID2('T', 'E') -#define B3_ID_IM B3_MAKE_ID2('I', 'M') -#define B3_ID_IK B3_MAKE_ID2('I', 'K') -#define B3_ID_WV B3_MAKE_ID2('W', 'V') -#define B3_ID_LT B3_MAKE_ID2('L', 'T') -#define B3_ID_SE B3_MAKE_ID2('S', 'E') -#define B3_ID_LF B3_MAKE_ID2('L', 'F') -#define B3_ID_LA B3_MAKE_ID2('L', 'A') -#define B3_ID_CA B3_MAKE_ID2('C', 'A') -#define B3_ID_IP B3_MAKE_ID2('I', 'P') -#define B3_ID_KE B3_MAKE_ID2('K', 'E') -#define B3_ID_WO B3_MAKE_ID2('W', 'O') -#define B3_ID_SCR B3_MAKE_ID2('S', 'R') -#define B3_ID_VF B3_MAKE_ID2('V', 'F') -#define B3_ID_TXT B3_MAKE_ID2('T', 'X') -#define B3_ID_SO B3_MAKE_ID2('S', 'O') -#define B3_ID_SAMPLE B3_MAKE_ID2('S', 'A') -#define B3_ID_GR B3_MAKE_ID2('G', 'R') -#define B3_ID_ID B3_MAKE_ID2('I', 'D') -#define B3_ID_AR B3_MAKE_ID2('A', 'R') -#define B3_ID_AC B3_MAKE_ID2('A', 'C') -#define B3_ID_SCRIPT B3_MAKE_ID2('P', 'Y') -#define B3_ID_FLUIDSIM B3_MAKE_ID2('F', 'S') -#define B3_ID_NT B3_MAKE_ID2('N', 'T') -#define B3_ID_BR B3_MAKE_ID2('B', 'R') - -#define B3_ID_SEQ B3_MAKE_ID2('S', 'Q') -#define B3_ID_CO B3_MAKE_ID2('C', 'O') -#define B3_ID_PO B3_MAKE_ID2('A', 'C') -#define B3_ID_NLA B3_MAKE_ID2('N', 'L') - -#define B3_ID_VS B3_MAKE_ID2('V', 'S') -#define B3_ID_VN B3_MAKE_ID2('V', 'N') - -// ------------------------------------------------------------ -#define B3_FORM B3_MAKE_ID('F', 'O', 'R', 'M') -#define B3_DDG1 B3_MAKE_ID('3', 'D', 'G', '1') -#define B3_DDG2 B3_MAKE_ID('3', 'D', 'G', '2') -#define B3_DDG3 B3_MAKE_ID('3', 'D', 'G', '3') -#define B3_DDG4 B3_MAKE_ID('3', 'D', 'G', '4') -#define B3_GOUR B3_MAKE_ID('G', 'O', 'U', 'R') -#define B3_BLEN B3_MAKE_ID('B', 'L', 'E', 'N') -#define B3_DER_ B3_MAKE_ID('D', 'E', 'R', '_') -#define B3_V100 B3_MAKE_ID('V', '1', '0', '0') -#define B3_DATA B3_MAKE_ID('D', 'A', 'T', 'A') -#define B3_GLOB B3_MAKE_ID('G', 'L', 'O', 'B') -#define B3_IMAG B3_MAKE_ID('I', 'M', 'A', 'G') -#define B3_TEST B3_MAKE_ID('T', 'E', 'S', 'T') -#define B3_USER B3_MAKE_ID('U', 'S', 'E', 'R') - -// ------------------------------------------------------------ -#define B3_DNA1 B3_MAKE_ID('D', 'N', 'A', '1') -#define B3_REND B3_MAKE_ID('R', 'E', 'N', 'D') -#define B3_ENDB B3_MAKE_ID('E', 'N', 'D', 'B') -#define B3_NAME B3_MAKE_ID('N', 'A', 'M', 'E') -#define B3_SDNA B3_MAKE_ID('S', 'D', 'N', 'A') -#define B3_TYPE B3_MAKE_ID('T', 'Y', 'P', 'E') -#define B3_TLEN B3_MAKE_ID('T', 'L', 'E', 'N') -#define B3_STRC B3_MAKE_ID('S', 'T', 'R', 'C') - -// ------------------------------------------------------------ -#define B3_SWITCH_INT(a) \ - { \ - char s_i, *p_i; \ - p_i = (char *)&(a); \ - s_i = p_i[0]; \ - p_i[0] = p_i[3]; \ - p_i[3] = s_i; \ - s_i = p_i[1]; \ - p_i[1] = p_i[2]; \ - p_i[2] = s_i; \ - } - -// ------------------------------------------------------------ -#define B3_SWITCH_SHORT(a) \ - { \ - char s_i, *p_i; \ - p_i = (char *)&(a); \ - s_i = p_i[0]; \ - p_i[0] = p_i[1]; \ - p_i[1] = s_i; \ - } - -// ------------------------------------------------------------ -#define B3_SWITCH_LONGINT(a) \ - { \ - char s_i, *p_i; \ - p_i = (char *)&(a); \ - s_i = p_i[0]; \ - p_i[0] = p_i[7]; \ - p_i[7] = s_i; \ - s_i = p_i[1]; \ - p_i[1] = p_i[6]; \ - p_i[6] = s_i; \ - s_i = p_i[2]; \ - p_i[2] = p_i[5]; \ - p_i[5] = s_i; \ - s_i = p_i[3]; \ - p_i[3] = p_i[4]; \ - p_i[4] = s_i; \ - } - -#endif //__B_DEFINES_H__ diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp deleted file mode 100644 index f6c779a9192..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp +++ /dev/null @@ -1,1653 +0,0 @@ -/* -bParse -Copyright (c) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -#include "b3File.h" -#include "b3Common.h" -#include "b3Chunk.h" -#include "b3DNA.h" -#include -#include -#include -#include "b3Defines.h" -#include "Bullet3Serialize/Bullet2FileLoader/b3Serializer.h" -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3MinMax.h" - -#define B3_SIZEOFBLENDERHEADER 12 -#define MAX_ARRAY_LENGTH 512 -using namespace bParse; -#define MAX_STRLEN 1024 - -const char *getCleanName(const char *memName, char *buffer) -{ - int slen = strlen(memName); - assert(slen < MAX_STRLEN); - slen = b3Min(slen, MAX_STRLEN); - for (int i = 0; i < slen; i++) - { - if (memName[i] == ']' || memName[i] == '[') - { - buffer[i] = 0; //'_'; - } - else - { - buffer[i] = memName[i]; - } - } - buffer[slen] = 0; - return buffer; -} - -// ----------------------------------------------------- // -bFile::bFile(const char *filename, const char headerString[7]) - : mOwnsBuffer(true), - mFileBuffer(0), - mFileLen(0), - mVersion(0), - mDataStart(0), - mFileDNA(0), - mMemoryDNA(0), - mFlags(FD_INVALID) -{ - for (int i = 0; i < 7; i++) - { - m_headerString[i] = headerString[i]; - } - - FILE *fp = fopen(filename, "rb"); - if (fp) - { - fseek(fp, 0L, SEEK_END); - mFileLen = ftell(fp); - fseek(fp, 0L, SEEK_SET); - - mFileBuffer = (char *)malloc(mFileLen + 1); - int bytesRead; - bytesRead = fread(mFileBuffer, mFileLen, 1, fp); - - fclose(fp); - - // - parseHeader(); - } -} - -// ----------------------------------------------------- // -bFile::bFile(char *memoryBuffer, int len, const char headerString[7]) - : mOwnsBuffer(false), - mFileBuffer(0), - mFileLen(0), - mVersion(0), - mDataStart(0), - mFileDNA(0), - mMemoryDNA(0), - mFlags(FD_INVALID) -{ - for (int i = 0; i < 7; i++) - { - m_headerString[i] = headerString[i]; - } - mFileBuffer = memoryBuffer; - mFileLen = len; - - parseHeader(); -} - -// ----------------------------------------------------- // -bFile::~bFile() -{ - if (mOwnsBuffer && mFileBuffer) - { - free(mFileBuffer); - mFileBuffer = 0; - } - - delete mMemoryDNA; - delete mFileDNA; -} - -// ----------------------------------------------------- // -void bFile::parseHeader() -{ - if (!mFileLen || !mFileBuffer) - return; - - char *blenderBuf = mFileBuffer; - char header[B3_SIZEOFBLENDERHEADER + 1]; - memcpy(header, blenderBuf, B3_SIZEOFBLENDERHEADER); - header[B3_SIZEOFBLENDERHEADER] = '\0'; - - if (strncmp(header, m_headerString, 6) != 0) - { - memcpy(header, m_headerString, B3_SIZEOFBLENDERHEADER); - return; - } - - if (header[6] == 'd') - { - mFlags |= FD_DOUBLE_PRECISION; - } - - char *ver = header + 9; - mVersion = atoi(ver); - if (mVersion <= 241) - { - //printf("Warning, %d not fully tested : <= 242\n", mVersion); - } - - int littleEndian = 1; - littleEndian = ((char *)&littleEndian)[0]; - - // swap ptr sizes... - if (header[7] == '-') - { - mFlags |= FD_FILE_64; - if (!VOID_IS_8) - mFlags |= FD_BITS_VARIES; - } - else if (VOID_IS_8) - mFlags |= FD_BITS_VARIES; - - // swap endian... - if (header[8] == 'V') - { - if (littleEndian == 1) - mFlags |= FD_ENDIAN_SWAP; - } - else if (littleEndian == 0) - mFlags |= FD_ENDIAN_SWAP; - - mFlags |= FD_OK; -} - -// ----------------------------------------------------- // -bool bFile::ok() -{ - return (mFlags & FD_OK) != 0; -} - -// ----------------------------------------------------- // -void bFile::parseInternal(int verboseMode, char *memDna, int memDnaLength) -{ - if ((mFlags & FD_OK) == 0) - return; - - char *blenderData = mFileBuffer; - bChunkInd dna; - dna.oldPtr = 0; - - char *tempBuffer = blenderData; - for (int i = 0; i < mFileLen; i++) - { - // looking for the data's starting position - // and the start of SDNA decls - - if (!mDataStart && strncmp(tempBuffer, "REND", 4) == 0) - mDataStart = i; - - if (strncmp(tempBuffer, "DNA1", 4) == 0) - { - // read the DNA1 block and extract SDNA - if (getNextBlock(&dna, tempBuffer, mFlags) > 0) - { - if (strncmp((tempBuffer + ChunkUtils::getOffset(mFlags)), "SDNANAME", 8) == 0) - dna.oldPtr = (tempBuffer + ChunkUtils::getOffset(mFlags)); - else - dna.oldPtr = 0; - } - else - dna.oldPtr = 0; - } - // Some Bullet files are missing the DNA1 block - // In Blender it's DNA1 + ChunkUtils::getOffset() + SDNA + NAME - // In Bullet tests its SDNA + NAME - else if (strncmp(tempBuffer, "SDNANAME", 8) == 0) - { - dna.oldPtr = blenderData + i; - dna.len = mFileLen - i; - - // Also no REND block, so exit now. - if (mVersion == 276) break; - } - - if (mDataStart && dna.oldPtr) break; - tempBuffer++; - } - if (!dna.oldPtr || !dna.len) - { - //printf("Failed to find DNA1+SDNA pair\n"); - mFlags &= ~FD_OK; - return; - } - - mFileDNA = new bDNA(); - - ///mFileDNA->init will convert part of DNA file endianness to current CPU endianness if necessary - mFileDNA->init((char *)dna.oldPtr, dna.len, (mFlags & FD_ENDIAN_SWAP) != 0); - - if (mVersion == 276) - { - int i; - for (i = 0; i < mFileDNA->getNumNames(); i++) - { - if (strcmp(mFileDNA->getName(i), "int") == 0) - { - mFlags |= FD_BROKEN_DNA; - } - } - if ((mFlags & FD_BROKEN_DNA) != 0) - { - //printf("warning: fixing some broken DNA version\n"); - } - } - - if (verboseMode & FD_VERBOSE_DUMP_DNA_TYPE_DEFINITIONS) - mFileDNA->dumpTypeDefinitions(); - - mMemoryDNA = new bDNA(); - int littleEndian = 1; - littleEndian = ((char *)&littleEndian)[0]; - - mMemoryDNA->init(memDna, memDnaLength, littleEndian == 0); - - ///@todo we need a better version check, add version/sub version info from FileGlobal into memory DNA/header files - if (mMemoryDNA->getNumNames() != mFileDNA->getNumNames()) - { - mFlags |= FD_VERSION_VARIES; - //printf ("Warning, file DNA is different than built in, performance is reduced. Best to re-export file with a matching version/platform"); - } - - // as long as it kept up to date it will be ok!! - if (mMemoryDNA->lessThan(mFileDNA)) - { - //printf ("Warning, file DNA is newer than built in."); - } - - mFileDNA->initCmpFlags(mMemoryDNA); - - parseData(); - - resolvePointers(verboseMode); - - updateOldPointers(); -} - -// ----------------------------------------------------- // -void bFile::swap(char *head, bChunkInd &dataChunk, bool ignoreEndianFlag) -{ - char *data = head; - short *strc = mFileDNA->getStruct(dataChunk.dna_nr); - - const char s[] = "SoftBodyMaterialData"; - int szs = sizeof(s); - if (strncmp((char *)&dataChunk.code, "ARAY", 4) == 0) - { - short *oldStruct = mFileDNA->getStruct(dataChunk.dna_nr); - char *oldType = mFileDNA->getType(oldStruct[0]); - if (strncmp(oldType, s, szs) == 0) - { - return; - } - } - - int len = mFileDNA->getLength(strc[0]); - - for (int i = 0; i < dataChunk.nr; i++) - { - swapStruct(dataChunk.dna_nr, data, ignoreEndianFlag); - data += len; - } -} - -void bFile::swapLen(char *dataPtr) -{ - const bool VOID_IS_8 = ((sizeof(void *) == 8)); - if (VOID_IS_8) - { - if (mFlags & FD_BITS_VARIES) - { - bChunkPtr4 *c = (bChunkPtr4 *)dataPtr; - if ((c->code & 0xFFFF) == 0) - c->code >>= 16; - B3_SWITCH_INT(c->len); - B3_SWITCH_INT(c->dna_nr); - B3_SWITCH_INT(c->nr); - } - else - { - bChunkPtr8 *c = (bChunkPtr8 *)dataPtr; - if ((c->code & 0xFFFF) == 0) - c->code >>= 16; - B3_SWITCH_INT(c->len); - B3_SWITCH_INT(c->dna_nr); - B3_SWITCH_INT(c->nr); - } - } - else - { - if (mFlags & FD_BITS_VARIES) - { - bChunkPtr8 *c = (bChunkPtr8 *)dataPtr; - if ((c->code & 0xFFFF) == 0) - c->code >>= 16; - B3_SWITCH_INT(c->len); - B3_SWITCH_INT(c->dna_nr); - B3_SWITCH_INT(c->nr); - } - else - { - bChunkPtr4 *c = (bChunkPtr4 *)dataPtr; - if ((c->code & 0xFFFF) == 0) - c->code >>= 16; - B3_SWITCH_INT(c->len); - - B3_SWITCH_INT(c->dna_nr); - B3_SWITCH_INT(c->nr); - } - } -} - -void bFile::swapDNA(char *ptr) -{ - bool swap = ((mFlags & FD_ENDIAN_SWAP) != 0); - - char *data = &ptr[20]; - // void bDNA::init(char *data, int len, bool swap) - int *intPtr = 0; - short *shtPtr = 0; - char *cp = 0; - int dataLen = 0; - //long nr=0; - intPtr = (int *)data; - - /* - SDNA (4 bytes) (magic number) - NAME (4 bytes) - (4 bytes) amount of names (int) - - - */ - - if (strncmp(data, "SDNA", 4) == 0) - { - // skip ++ NAME - intPtr++; - intPtr++; - } - - // Parse names - if (swap) - dataLen = ChunkUtils::swapInt(*intPtr); - else - dataLen = *intPtr; - - *intPtr = ChunkUtils::swapInt(*intPtr); - intPtr++; - - cp = (char *)intPtr; - int i; - for (i = 0; i < dataLen; i++) - { - while (*cp) cp++; - cp++; - } - - cp = b3AlignPointer(cp, 4); - - /* - TYPE (4 bytes) - amount of types (int) - - - */ - - intPtr = (int *)cp; - assert(strncmp(cp, "TYPE", 4) == 0); - intPtr++; - - if (swap) - dataLen = ChunkUtils::swapInt(*intPtr); - else - dataLen = *intPtr; - - *intPtr = ChunkUtils::swapInt(*intPtr); - - intPtr++; - - cp = (char *)intPtr; - for (i = 0; i < dataLen; i++) - { - while (*cp) cp++; - cp++; - } - - cp = b3AlignPointer(cp, 4); - - /* - TLEN (4 bytes) - (short) the lengths of types - - */ - - // Parse type lens - intPtr = (int *)cp; - assert(strncmp(cp, "TLEN", 4) == 0); - intPtr++; - - shtPtr = (short *)intPtr; - for (i = 0; i < dataLen; i++, shtPtr++) - { - //??????if (swap) - shtPtr[0] = ChunkUtils::swapShort(shtPtr[0]); - } - - if (dataLen & 1) - shtPtr++; - - /* - STRC (4 bytes) - amount of structs (int) - - - - - - - */ - - intPtr = (int *)shtPtr; - cp = (char *)intPtr; - assert(strncmp(cp, "STRC", 4) == 0); - intPtr++; - - if (swap) - dataLen = ChunkUtils::swapInt(*intPtr); - else - dataLen = *intPtr; - - *intPtr = ChunkUtils::swapInt(*intPtr); - - intPtr++; - - shtPtr = (short *)intPtr; - for (i = 0; i < dataLen; i++) - { - //if (swap) - { - int len = shtPtr[1]; - - shtPtr[0] = ChunkUtils::swapShort(shtPtr[0]); - shtPtr[1] = ChunkUtils::swapShort(shtPtr[1]); - - shtPtr += 2; - - for (int a = 0; a < len; a++, shtPtr += 2) - { - shtPtr[0] = ChunkUtils::swapShort(shtPtr[0]); - shtPtr[1] = ChunkUtils::swapShort(shtPtr[1]); - } - } - // else - // shtPtr+= (2*shtPtr[1])+2; - } -} - -void bFile::writeFile(const char *fileName) -{ - FILE *f = fopen(fileName, "wb"); - fwrite(mFileBuffer, 1, mFileLen, f); - fclose(f); -} - -void bFile::preSwap() -{ - //const bool brokenDNA = (mFlags&FD_BROKEN_DNA)!=0; - //FD_ENDIAN_SWAP - //byte 8 determines the endianness of the file, little (v) versus big (V) - int littleEndian = 1; - littleEndian = ((char *)&littleEndian)[0]; - - if (mFileBuffer[8] == 'V') - { - mFileBuffer[8] = 'v'; - } - else - { - mFileBuffer[8] = 'V'; - } - - mDataStart = 12; - - char *dataPtr = mFileBuffer + mDataStart; - - bChunkInd dataChunk; - dataChunk.code = 0; - bool ignoreEndianFlag = true; - - //we always want to swap here - - int seek = getNextBlock(&dataChunk, dataPtr, mFlags); - //dataPtr += ChunkUtils::getOffset(mFlags); - char *dataPtrHead = 0; - - while (1) - { - // one behind - if (dataChunk.code == B3_SDNA || dataChunk.code == B3_DNA1 || dataChunk.code == B3_TYPE || dataChunk.code == B3_TLEN || dataChunk.code == B3_STRC) - { - swapDNA(dataPtr); - break; - } - else - { - //if (dataChunk.code == DNA1) break; - dataPtrHead = dataPtr + ChunkUtils::getOffset(mFlags); - - swapLen(dataPtr); - if (dataChunk.dna_nr >= 0) - { - swap(dataPtrHead, dataChunk, ignoreEndianFlag); - } - else - { - //printf("unknown chunk\n"); - } - } - - // next please! - dataPtr += seek; - - seek = getNextBlock(&dataChunk, dataPtr, mFlags); - if (seek < 0) - break; - } - - if (mFlags & FD_ENDIAN_SWAP) - { - mFlags &= ~FD_ENDIAN_SWAP; - } - else - { - mFlags |= FD_ENDIAN_SWAP; - } -} - -// ----------------------------------------------------- // -char *bFile::readStruct(char *head, bChunkInd &dataChunk) -{ - bool ignoreEndianFlag = false; - - if (mFlags & FD_ENDIAN_SWAP) - swap(head, dataChunk, ignoreEndianFlag); - - if (!mFileDNA->flagEqual(dataChunk.dna_nr)) - { - // Ouch! need to rebuild the struct - short *oldStruct, *curStruct; - char *oldType, *newType; - int oldLen, curLen, reverseOld; - - oldStruct = mFileDNA->getStruct(dataChunk.dna_nr); - oldType = mFileDNA->getType(oldStruct[0]); - - oldLen = mFileDNA->getLength(oldStruct[0]); - - if ((mFlags & FD_BROKEN_DNA) != 0) - { - if ((strcmp(oldType, "b3QuantizedBvhNodeData") == 0) && oldLen == 20) - { - return 0; - } - if ((strcmp(oldType, "b3ShortIntIndexData") == 0)) - { - int allocLen = 2; - char *dataAlloc = new char[(dataChunk.nr * allocLen) + 1]; - memset(dataAlloc, 0, (dataChunk.nr * allocLen) + 1); - short *dest = (short *)dataAlloc; - const short *src = (short *)head; - for (int i = 0; i < dataChunk.nr; i++) - { - dest[i] = src[i]; - if (mFlags & FD_ENDIAN_SWAP) - { - B3_SWITCH_SHORT(dest[i]); - } - } - addDataBlock(dataAlloc); - return dataAlloc; - } - } - - ///don't try to convert Link block data, just memcpy it. Other data can be converted. - if (strcmp("Link", oldType) != 0) - { - reverseOld = mMemoryDNA->getReverseType(oldType); - - if ((reverseOld != -1)) - { - // make sure it's here - //assert(reverseOld!= -1 && "getReverseType() returned -1, struct required!"); - - // - curStruct = mMemoryDNA->getStruct(reverseOld); - newType = mMemoryDNA->getType(curStruct[0]); - curLen = mMemoryDNA->getLength(curStruct[0]); - - // make sure it's the same - assert((strcmp(oldType, newType) == 0) && "internal error, struct mismatch!"); - - // numBlocks * length - - int allocLen = (curLen); - char *dataAlloc = new char[(dataChunk.nr * allocLen) + 1]; - memset(dataAlloc, 0, (dataChunk.nr * allocLen)); - - // track allocated - addDataBlock(dataAlloc); - - char *cur = dataAlloc; - char *old = head; - for (int block = 0; block < dataChunk.nr; block++) - { - bool fixupPointers = true; - parseStruct(cur, old, dataChunk.dna_nr, reverseOld, fixupPointers); - mLibPointers.insert(old, (bStructHandle *)cur); - - cur += curLen; - old += oldLen; - } - return dataAlloc; - } - } - else - { - //printf("Link found\n"); - } - } - else - { -//#define DEBUG_EQUAL_STRUCTS -#ifdef DEBUG_EQUAL_STRUCTS - short *oldStruct; - char *oldType; - oldStruct = mFileDNA->getStruct(dataChunk.dna_nr); - oldType = mFileDNA->getType(oldStruct[0]); - printf("%s equal structure, just memcpy\n", oldType); -#endif // - } - - char *dataAlloc = new char[(dataChunk.len) + 1]; - memset(dataAlloc, 0, dataChunk.len + 1); - - // track allocated - addDataBlock(dataAlloc); - - memcpy(dataAlloc, head, dataChunk.len); - return dataAlloc; -} - -// ----------------------------------------------------- // -void bFile::parseStruct(char *strcPtr, char *dtPtr, int old_dna, int new_dna, bool fixupPointers) -{ - if (old_dna == -1) return; - if (new_dna == -1) return; - - //disable this, because we need to fixup pointers/ListBase - if (0) //mFileDNA->flagEqual(old_dna)) - { - short *strc = mFileDNA->getStruct(old_dna); - int len = mFileDNA->getLength(strc[0]); - - memcpy(strcPtr, dtPtr, len); - return; - } - - // Ok, now build the struct - char *memType, *memName, *cpc, *cpo; - short *fileStruct, *filePtrOld, *memoryStruct, *firstStruct; - int elementLength, size, revType, old_nr, new_nr, fpLen; - short firstStructType; - - // File to memory lookup - memoryStruct = mMemoryDNA->getStruct(new_dna); - fileStruct = mFileDNA->getStruct(old_dna); - firstStruct = fileStruct; - - filePtrOld = fileStruct; - firstStructType = mMemoryDNA->getStruct(0)[0]; - - // Get number of elements - elementLength = memoryStruct[1]; - memoryStruct += 2; - - cpc = strcPtr; - cpo = 0; - for (int ele = 0; ele < elementLength; ele++, memoryStruct += 2) - { - memType = mMemoryDNA->getType(memoryStruct[0]); - memName = mMemoryDNA->getName(memoryStruct[1]); - - size = mMemoryDNA->getElementSize(memoryStruct[0], memoryStruct[1]); - revType = mMemoryDNA->getReverseType(memoryStruct[0]); - - if (revType != -1 && memoryStruct[0] >= firstStructType && memName[0] != '*') - { - cpo = getFileElement(firstStruct, memName, memType, dtPtr, &filePtrOld); - if (cpo) - { - int arrayLen = mFileDNA->getArraySizeNew(filePtrOld[1]); - old_nr = mFileDNA->getReverseType(memType); - new_nr = revType; - fpLen = mFileDNA->getElementSize(filePtrOld[0], filePtrOld[1]); - if (arrayLen == 1) - { - parseStruct(cpc, cpo, old_nr, new_nr, fixupPointers); - } - else - { - char *tmpCpc = cpc; - char *tmpCpo = cpo; - - for (int i = 0; i < arrayLen; i++) - { - parseStruct(tmpCpc, tmpCpo, old_nr, new_nr, fixupPointers); - tmpCpc += size / arrayLen; - tmpCpo += fpLen / arrayLen; - } - } - cpc += size; - cpo += fpLen; - } - else - cpc += size; - } - else - { - getMatchingFileDNA(fileStruct, memName, memType, cpc, dtPtr, fixupPointers); - cpc += size; - } - } -} - -// ----------------------------------------------------- // -static void getElement(int arrayLen, const char *cur, const char *old, char *oldPtr, char *curData) -{ -#define b3GetEle(value, current, type, cast, size, ptr) \ - if (strcmp(current, type) == 0) \ - { \ - value = (*(cast *)ptr); \ - ptr += size; \ - } - -#define b3SetEle(value, current, type, cast, size, ptr) \ - if (strcmp(current, type) == 0) \ - { \ - (*(cast *)ptr) = (cast)value; \ - ptr += size; \ - } - double value = 0.0; - - for (int i = 0; i < arrayLen; i++) - { - b3GetEle(value, old, "char", char, sizeof(char), oldPtr); - b3SetEle(value, cur, "char", char, sizeof(char), curData); - b3GetEle(value, old, "short", short, sizeof(short), oldPtr); - b3SetEle(value, cur, "short", short, sizeof(short), curData); - b3GetEle(value, old, "ushort", unsigned short, sizeof(unsigned short), oldPtr); - b3SetEle(value, cur, "ushort", unsigned short, sizeof(unsigned short), curData); - b3GetEle(value, old, "int", int, sizeof(int), oldPtr); - b3SetEle(value, cur, "int", int, sizeof(int), curData); - b3GetEle(value, old, "long", int, sizeof(int), oldPtr); - b3SetEle(value, cur, "long", int, sizeof(int), curData); - b3GetEle(value, old, "float", float, sizeof(float), oldPtr); - b3SetEle(value, cur, "float", float, sizeof(float), curData); - b3GetEle(value, old, "double", double, sizeof(double), oldPtr); - b3SetEle(value, cur, "double", double, sizeof(double), curData); - } -} - -// ----------------------------------------------------- // -void bFile::swapData(char *data, short type, int arraySize, bool ignoreEndianFlag) -{ - if (ignoreEndianFlag || (mFlags & FD_ENDIAN_SWAP)) - { - if (type == 2 || type == 3) - { - short *sp = (short *)data; - for (int i = 0; i < arraySize; i++) - { - sp[0] = ChunkUtils::swapShort(sp[0]); - sp++; - } - } - if (type > 3 && type < 8) - { - char c; - char *cp = data; - for (int i = 0; i < arraySize; i++) - { - c = cp[0]; - cp[0] = cp[3]; - cp[3] = c; - c = cp[1]; - cp[1] = cp[2]; - cp[2] = c; - cp += 4; - } - } - } -} - -void bFile::safeSwapPtr(char *dst, const char *src) -{ - if (!src || !dst) - return; - - int ptrFile = mFileDNA->getPointerSize(); - int ptrMem = mMemoryDNA->getPointerSize(); - - if (ptrFile == ptrMem) - { - memcpy(dst, src, ptrMem); - } - else if (ptrMem == 4 && ptrFile == 8) - { - b3PointerUid *oldPtr = (b3PointerUid *)src; - b3PointerUid *newPtr = (b3PointerUid *)dst; - - if (oldPtr->m_uniqueIds[0] == oldPtr->m_uniqueIds[1]) - { - //Bullet stores the 32bit unique ID in both upper and lower part of 64bit pointers - //so it can be used to distinguish between .blend and .bullet - newPtr->m_uniqueIds[0] = oldPtr->m_uniqueIds[0]; - } - else - { - //deal with pointers the Blender .blend style way, see - //readfile.c in the Blender source tree - b3Long64 longValue = *((b3Long64 *)src); - //endian swap for 64bit pointer otherwise truncation will fail due to trailing zeros - if (mFlags & FD_ENDIAN_SWAP) - B3_SWITCH_LONGINT(longValue); - *((int *)dst) = (int)(longValue >> 3); - } - } - else if (ptrMem == 8 && ptrFile == 4) - { - b3PointerUid *oldPtr = (b3PointerUid *)src; - b3PointerUid *newPtr = (b3PointerUid *)dst; - if (oldPtr->m_uniqueIds[0] == oldPtr->m_uniqueIds[1]) - { - newPtr->m_uniqueIds[0] = oldPtr->m_uniqueIds[0]; - newPtr->m_uniqueIds[1] = 0; - } - else - { - *((b3Long64 *)dst) = *((int *)src); - } - } - else - { - printf("%d %d\n", ptrFile, ptrMem); - assert(0 && "Invalid pointer len"); - } -} - -// ----------------------------------------------------- // -void bFile::getMatchingFileDNA(short *dna_addr, const char *lookupName, const char *lookupType, char *strcData, char *data, bool fixupPointers) -{ - // find the matching memory dna data - // to the file being loaded. Fill the - // memory with the file data... - - int len = dna_addr[1]; - dna_addr += 2; - - for (int i = 0; i < len; i++, dna_addr += 2) - { - const char *type = mFileDNA->getType(dna_addr[0]); - const char *name = mFileDNA->getName(dna_addr[1]); - - int eleLen = mFileDNA->getElementSize(dna_addr[0], dna_addr[1]); - - if ((mFlags & FD_BROKEN_DNA) != 0) - { - if ((strcmp(type, "short") == 0) && (strcmp(name, "int") == 0)) - { - eleLen = 0; - } - } - - if (strcmp(lookupName, name) == 0) - { - //int arrayLenold = mFileDNA->getArraySize((char*)name.c_str()); - int arrayLen = mFileDNA->getArraySizeNew(dna_addr[1]); - //assert(arrayLenold == arrayLen); - - if (name[0] == '*') - { - // cast pointers - int ptrFile = mFileDNA->getPointerSize(); - int ptrMem = mMemoryDNA->getPointerSize(); - safeSwapPtr(strcData, data); - - if (fixupPointers) - { - if (arrayLen > 1) - { - //void **sarray = (void**)strcData; - //void **darray = (void**)data; - - char *cpc, *cpo; - cpc = (char *)strcData; - cpo = (char *)data; - - for (int a = 0; a < arrayLen; a++) - { - safeSwapPtr(cpc, cpo); - m_pointerFixupArray.push_back(cpc); - cpc += ptrMem; - cpo += ptrFile; - } - } - else - { - if (name[1] == '*') - m_pointerPtrFixupArray.push_back(strcData); - else - m_pointerFixupArray.push_back(strcData); - } - } - else - { - // printf("skipped %s %s : %x\n",type.c_str(),name.c_str(),strcData); - } - } - - else if (strcmp(type, lookupType) == 0) - memcpy(strcData, data, eleLen); - else - getElement(arrayLen, lookupType, type, data, strcData); - - // -- - return; - } - data += eleLen; - } -} - -// ----------------------------------------------------- // -char *bFile::getFileElement(short *firstStruct, char *lookupName, char *lookupType, char *data, short **foundPos) -{ - short *old = firstStruct; //mFileDNA->getStruct(old_nr); - int elementLength = old[1]; - old += 2; - - for (int i = 0; i < elementLength; i++, old += 2) - { - char *type = mFileDNA->getType(old[0]); - char *name = mFileDNA->getName(old[1]); - int len = mFileDNA->getElementSize(old[0], old[1]); - - if (strcmp(lookupName, name) == 0) - { - if (strcmp(type, lookupType) == 0) - { - if (foundPos) - *foundPos = old; - return data; - } - return 0; - } - data += len; - } - return 0; -} - -// ----------------------------------------------------- // -void bFile::swapStruct(int dna_nr, char *data, bool ignoreEndianFlag) -{ - if (dna_nr == -1) return; - - short *strc = mFileDNA->getStruct(dna_nr); - //short *firstStrc = strc; - - int elementLen = strc[1]; - strc += 2; - - short first = mFileDNA->getStruct(0)[0]; - - char *buf = data; - for (int i = 0; i < elementLen; i++, strc += 2) - { - char *type = mFileDNA->getType(strc[0]); - char *name = mFileDNA->getName(strc[1]); - - int size = mFileDNA->getElementSize(strc[0], strc[1]); - if (strc[0] >= first && name[0] != '*') - { - int old_nr = mFileDNA->getReverseType(type); - int arrayLen = mFileDNA->getArraySizeNew(strc[1]); - if (arrayLen == 1) - { - swapStruct(old_nr, buf, ignoreEndianFlag); - } - else - { - char *tmpBuf = buf; - for (int i = 0; i < arrayLen; i++) - { - swapStruct(old_nr, tmpBuf, ignoreEndianFlag); - tmpBuf += size / arrayLen; - } - } - } - else - { - //int arrayLenOld = mFileDNA->getArraySize(name); - int arrayLen = mFileDNA->getArraySizeNew(strc[1]); - //assert(arrayLenOld == arrayLen); - swapData(buf, strc[0], arrayLen, ignoreEndianFlag); - } - buf += size; - } -} - -void bFile::resolvePointersMismatch() -{ - // printf("resolvePointersStructMismatch\n"); - - int i; - - for (i = 0; i < m_pointerFixupArray.size(); i++) - { - char *cur = m_pointerFixupArray.at(i); - void **ptrptr = (void **)cur; - void *ptr = *ptrptr; - ptr = findLibPointer(ptr); - if (ptr) - { - //printf("Fixup pointer!\n"); - *(ptrptr) = ptr; - } - else - { - // printf("pointer not found: %x\n",cur); - } - } - - for (i = 0; i < m_pointerPtrFixupArray.size(); i++) - { - char *cur = m_pointerPtrFixupArray.at(i); - void **ptrptr = (void **)cur; - - bChunkInd *block = m_chunkPtrPtrMap.find(*ptrptr); - if (block) - { - int ptrMem = mMemoryDNA->getPointerSize(); - int ptrFile = mFileDNA->getPointerSize(); - - int blockLen = block->len / ptrFile; - - void *onptr = findLibPointer(*ptrptr); - if (onptr) - { - char *newPtr = new char[blockLen * ptrMem]; - addDataBlock(newPtr); - memset(newPtr, 0, blockLen * ptrMem); - - void **onarray = (void **)onptr; - char *oldPtr = (char *)onarray; - - int p = 0; - while (blockLen-- > 0) - { - b3PointerUid dp = {{0}}; - safeSwapPtr((char *)dp.m_uniqueIds, oldPtr); - - void **tptr = (void **)(newPtr + p * ptrMem); - *tptr = findLibPointer(dp.m_ptr); - - oldPtr += ptrFile; - ++p; - } - - *ptrptr = newPtr; - } - } - } -} - -///this loop only works fine if the Blender DNA structure of the file matches the headerfiles -void bFile::resolvePointersChunk(const bChunkInd &dataChunk, int verboseMode) -{ - bParse::bDNA *fileDna = mFileDNA ? mFileDNA : mMemoryDNA; - - short int *oldStruct = fileDna->getStruct(dataChunk.dna_nr); - short oldLen = fileDna->getLength(oldStruct[0]); - //char* structType = fileDna->getType(oldStruct[0]); - - char *cur = (char *)findLibPointer(dataChunk.oldPtr); - for (int block = 0; block < dataChunk.nr; block++) - { - resolvePointersStructRecursive(cur, dataChunk.dna_nr, verboseMode, 1); - cur += oldLen; - } -} - -int bFile::resolvePointersStructRecursive(char *strcPtr, int dna_nr, int verboseMode, int recursion) -{ - bParse::bDNA *fileDna = mFileDNA ? mFileDNA : mMemoryDNA; - - char *memType; - char *memName; - short firstStructType = fileDna->getStruct(0)[0]; - - char *elemPtr = strcPtr; - - short int *oldStruct = fileDna->getStruct(dna_nr); - - int elementLength = oldStruct[1]; - oldStruct += 2; - - int totalSize = 0; - - for (int ele = 0; ele < elementLength; ele++, oldStruct += 2) - { - memType = fileDna->getType(oldStruct[0]); - memName = fileDna->getName(oldStruct[1]); - - int arrayLen = fileDna->getArraySizeNew(oldStruct[1]); - if (memName[0] == '*') - { - if (arrayLen > 1) - { - void **array = (void **)elemPtr; - for (int a = 0; a < arrayLen; a++) - { - if (verboseMode & FD_VERBOSE_EXPORT_XML) - { - for (int i = 0; i < recursion; i++) - { - printf(" "); - } - //skip the * - printf("<%s type=\"pointer\"> ", &memName[1]); - printf("%p ", array[a]); - printf("\n", &memName[1]); - } - - array[a] = findLibPointer(array[a]); - } - } - else - { - void **ptrptr = (void **)elemPtr; - void *ptr = *ptrptr; - if (verboseMode & FD_VERBOSE_EXPORT_XML) - { - for (int i = 0; i < recursion; i++) - { - printf(" "); - } - printf("<%s type=\"pointer\"> ", &memName[1]); - printf("%p ", ptr); - printf("\n", &memName[1]); - } - ptr = findLibPointer(ptr); - - if (ptr) - { - // printf("Fixup pointer at 0x%x from 0x%x to 0x%x!\n",ptrptr,*ptrptr,ptr); - *(ptrptr) = ptr; - if (memName[1] == '*' && ptrptr && *ptrptr) - { - // This will only work if the given **array is continuous - void **array = (void **)*(ptrptr); - void *np = array[0]; - int n = 0; - while (np) - { - np = findLibPointer(array[n]); - if (np) array[n] = np; - n++; - } - } - } - else - { - // printf("Cannot fixup pointer at 0x%x from 0x%x to 0x%x!\n",ptrptr,*ptrptr,ptr); - } - } - } - else - { - int revType = fileDna->getReverseType(oldStruct[0]); - if (oldStruct[0] >= firstStructType) //revType != -1 && - { - char cleanName[MAX_STRLEN]; - getCleanName(memName, cleanName); - - int arrayLen = fileDna->getArraySizeNew(oldStruct[1]); - int byteOffset = 0; - - if (verboseMode & FD_VERBOSE_EXPORT_XML) - { - for (int i = 0; i < recursion; i++) - { - printf(" "); - } - - if (arrayLen > 1) - { - printf("<%s type=\"%s\" count=%d>\n", cleanName, memType, arrayLen); - } - else - { - printf("<%s type=\"%s\">\n", cleanName, memType); - } - } - - for (int i = 0; i < arrayLen; i++) - { - byteOffset += resolvePointersStructRecursive(elemPtr + byteOffset, revType, verboseMode, recursion + 1); - } - if (verboseMode & FD_VERBOSE_EXPORT_XML) - { - for (int i = 0; i < recursion; i++) - { - printf(" "); - } - printf("\n", cleanName); - } - } - else - { - //export a simple type - if (verboseMode & FD_VERBOSE_EXPORT_XML) - { - if (arrayLen > MAX_ARRAY_LENGTH) - { - printf("too long\n"); - } - else - { - //printf("%s %s\n",memType,memName); - - bool isIntegerType = (strcmp(memType, "char") == 0) || (strcmp(memType, "int") == 0) || (strcmp(memType, "short") == 0); - - if (isIntegerType) - { - const char *newtype = "int"; - int dbarray[MAX_ARRAY_LENGTH]; - int *dbPtr = 0; - char *tmp = elemPtr; - dbPtr = &dbarray[0]; - if (dbPtr) - { - char cleanName[MAX_STRLEN]; - getCleanName(memName, cleanName); - - int i; - getElement(arrayLen, newtype, memType, tmp, (char *)dbPtr); - for (i = 0; i < recursion; i++) - printf(" "); - if (arrayLen == 1) - printf("<%s type=\"%s\">", cleanName, memType); - else - printf("<%s type=\"%s\" count=%d>", cleanName, memType, arrayLen); - for (i = 0; i < arrayLen; i++) - printf(" %d ", dbPtr[i]); - printf("\n", cleanName); - } - } - else - { - const char *newtype = "double"; - double dbarray[MAX_ARRAY_LENGTH]; - double *dbPtr = 0; - char *tmp = elemPtr; - dbPtr = &dbarray[0]; - if (dbPtr) - { - int i; - getElement(arrayLen, newtype, memType, tmp, (char *)dbPtr); - for (i = 0; i < recursion; i++) - printf(" "); - char cleanName[MAX_STRLEN]; - getCleanName(memName, cleanName); - - if (arrayLen == 1) - { - printf("<%s type=\"%s\">", memName, memType); - } - else - { - printf("<%s type=\"%s\" count=%d>", cleanName, memType, arrayLen); - } - for (i = 0; i < arrayLen; i++) - printf(" %f ", dbPtr[i]); - printf("\n", cleanName); - } - } - } - } - } - } - - int size = fileDna->getElementSize(oldStruct[0], oldStruct[1]); - totalSize += size; - elemPtr += size; - } - - return totalSize; -} - -///Resolve pointers replaces the original pointers in structures, and linked lists by the new in-memory structures -void bFile::resolvePointers(int verboseMode) -{ - bParse::bDNA *fileDna = mFileDNA ? mFileDNA : mMemoryDNA; - - //char *dataPtr = mFileBuffer+mDataStart; - - if (1) //mFlags & (FD_BITS_VARIES | FD_VERSION_VARIES)) - { - resolvePointersMismatch(); - } - - { - if (verboseMode & FD_VERBOSE_EXPORT_XML) - { - printf("\n"); - int numitems = m_chunks.size(); - printf("\n", b3GetVersion(), numitems); - } - for (int i = 0; i < m_chunks.size(); i++) - { - const bChunkInd &dataChunk = m_chunks.at(i); - - if (!mFileDNA || fileDna->flagEqual(dataChunk.dna_nr)) - { - //dataChunk.len - short int *oldStruct = fileDna->getStruct(dataChunk.dna_nr); - char *oldType = fileDna->getType(oldStruct[0]); - - if (verboseMode & FD_VERBOSE_EXPORT_XML) - printf(" <%s pointer=%p>\n", oldType, dataChunk.oldPtr); - - resolvePointersChunk(dataChunk, verboseMode); - - if (verboseMode & FD_VERBOSE_EXPORT_XML) - printf(" \n", oldType); - } - else - { - //printf("skipping mStruct\n"); - } - } - if (verboseMode & FD_VERBOSE_EXPORT_XML) - { - printf("\n"); - } - } -} - -// ----------------------------------------------------- // -void *bFile::findLibPointer(void *ptr) -{ - bStructHandle **ptrptr = getLibPointers().find(ptr); - if (ptrptr) - return *ptrptr; - return 0; -} - -void bFile::updateOldPointers() -{ - int i; - - for (i = 0; i < m_chunks.size(); i++) - { - bChunkInd &dataChunk = m_chunks[i]; - dataChunk.oldPtr = findLibPointer(dataChunk.oldPtr); - } -} -void bFile::dumpChunks(bParse::bDNA *dna) -{ - int i; - - for (i = 0; i < m_chunks.size(); i++) - { - bChunkInd &dataChunk = m_chunks[i]; - char *codeptr = (char *)&dataChunk.code; - char codestr[5] = {codeptr[0], codeptr[1], codeptr[2], codeptr[3], 0}; - - short *newStruct = dna->getStruct(dataChunk.dna_nr); - char *typeName = dna->getType(newStruct[0]); - printf("%3d: %s ", i, typeName); - - printf("code=%s ", codestr); - - printf("ptr=%p ", dataChunk.oldPtr); - printf("len=%d ", dataChunk.len); - printf("nr=%d ", dataChunk.nr); - if (dataChunk.nr != 1) - { - printf("not 1\n"); - } - printf("\n"); - } - -#if 0 - IDFinderData ifd; - ifd.success = 0; - ifd.IDname = NULL; - ifd.just_print_it = 1; - for (i=0; im_blocks.size(); ++i) - { - BlendBlock* bb = bf->m_blocks[i]; - printf("tag='%s'\tptr=%p\ttype=%s\t[%4d]", bb->tag, bb,bf->types[bb->type_index].name,bb->m_array_entries_.size()); - block_ID_finder(bb, bf, &ifd); - printf("\n"); - } -#endif -} - -void bFile::writeChunks(FILE *fp, bool fixupPointers) -{ - bParse::bDNA *fileDna = mFileDNA ? mFileDNA : mMemoryDNA; - - for (int i = 0; i < m_chunks.size(); i++) - { - bChunkInd &dataChunk = m_chunks.at(i); - - // Ouch! need to rebuild the struct - short *oldStruct, *curStruct; - char *oldType, *newType; - int oldLen, curLen, reverseOld; - - oldStruct = fileDna->getStruct(dataChunk.dna_nr); - oldType = fileDna->getType(oldStruct[0]); - oldLen = fileDna->getLength(oldStruct[0]); - ///don't try to convert Link block data, just memcpy it. Other data can be converted. - reverseOld = mMemoryDNA->getReverseType(oldType); - - if ((reverseOld != -1)) - { - // make sure it's here - //assert(reverseOld!= -1 && "getReverseType() returned -1, struct required!"); - // - curStruct = mMemoryDNA->getStruct(reverseOld); - newType = mMemoryDNA->getType(curStruct[0]); - // make sure it's the same - assert((strcmp(oldType, newType) == 0) && "internal error, struct mismatch!"); - - curLen = mMemoryDNA->getLength(curStruct[0]); - dataChunk.dna_nr = reverseOld; - if (strcmp("Link", oldType) != 0) - { - dataChunk.len = curLen * dataChunk.nr; - } - else - { - // printf("keep length of link = %d\n",dataChunk.len); - } - - //write the structure header - fwrite(&dataChunk, sizeof(bChunkInd), 1, fp); - - short int *curStruct1; - curStruct1 = mMemoryDNA->getStruct(dataChunk.dna_nr); - assert(curStruct1 == curStruct); - - char *cur = fixupPointers ? (char *)findLibPointer(dataChunk.oldPtr) : (char *)dataChunk.oldPtr; - - //write the actual contents of the structure(s) - fwrite(cur, dataChunk.len, 1, fp); - } - else - { - printf("serious error, struct mismatch: don't write\n"); - } - } -} - -// ----------------------------------------------------- // -int bFile::getNextBlock(bChunkInd *dataChunk, const char *dataPtr, const int flags) -{ - bool swap = false; - bool varies = false; - - if (flags & FD_ENDIAN_SWAP) - swap = true; - if (flags & FD_BITS_VARIES) - varies = true; - - if (VOID_IS_8) - { - if (varies) - { - bChunkPtr4 head; - memcpy(&head, dataPtr, sizeof(bChunkPtr4)); - - bChunkPtr8 chunk; - - chunk.code = head.code; - chunk.len = head.len; - chunk.m_uniqueInts[0] = head.m_uniqueInt; - chunk.m_uniqueInts[1] = 0; - chunk.dna_nr = head.dna_nr; - chunk.nr = head.nr; - - if (swap) - { - if ((chunk.code & 0xFFFF) == 0) - chunk.code >>= 16; - - B3_SWITCH_INT(chunk.len); - B3_SWITCH_INT(chunk.dna_nr); - B3_SWITCH_INT(chunk.nr); - } - - memcpy(dataChunk, &chunk, sizeof(bChunkInd)); - } - else - { - bChunkPtr8 c; - memcpy(&c, dataPtr, sizeof(bChunkPtr8)); - - if (swap) - { - if ((c.code & 0xFFFF) == 0) - c.code >>= 16; - - B3_SWITCH_INT(c.len); - B3_SWITCH_INT(c.dna_nr); - B3_SWITCH_INT(c.nr); - } - - memcpy(dataChunk, &c, sizeof(bChunkInd)); - } - } - else - { - if (varies) - { - bChunkPtr8 head; - memcpy(&head, dataPtr, sizeof(bChunkPtr8)); - - bChunkPtr4 chunk; - chunk.code = head.code; - chunk.len = head.len; - - if (head.m_uniqueInts[0] == head.m_uniqueInts[1]) - { - chunk.m_uniqueInt = head.m_uniqueInts[0]; - } - else - { - b3Long64 oldPtr = 0; - memcpy(&oldPtr, &head.m_uniqueInts[0], 8); - if (swap) - B3_SWITCH_LONGINT(oldPtr); - chunk.m_uniqueInt = (int)(oldPtr >> 3); - } - - chunk.dna_nr = head.dna_nr; - chunk.nr = head.nr; - - if (swap) - { - if ((chunk.code & 0xFFFF) == 0) - chunk.code >>= 16; - - B3_SWITCH_INT(chunk.len); - B3_SWITCH_INT(chunk.dna_nr); - B3_SWITCH_INT(chunk.nr); - } - - memcpy(dataChunk, &chunk, sizeof(bChunkInd)); - } - else - { - bChunkPtr4 c; - memcpy(&c, dataPtr, sizeof(bChunkPtr4)); - - if (swap) - { - if ((c.code & 0xFFFF) == 0) - c.code >>= 16; - - B3_SWITCH_INT(c.len); - B3_SWITCH_INT(c.dna_nr); - B3_SWITCH_INT(c.nr); - } - memcpy(dataChunk, &c, sizeof(bChunkInd)); - } - } - - if (dataChunk->len < 0) - return -1; - -#if 0 - print ("----------"); - print (dataChunk->code); - print (dataChunk->len); - print (dataChunk->old); - print (dataChunk->dna_nr); - print (dataChunk->nr); -#endif - return (dataChunk->len + ChunkUtils::getOffset(flags)); -} - -//eof diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.h deleted file mode 100644 index bda229cfbd7..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.h +++ /dev/null @@ -1,158 +0,0 @@ -/* -bParse -Copyright (c) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BFILE_H__ -#define __BFILE_H__ - -#include "b3Common.h" -#include "b3Chunk.h" -#include - -namespace bParse -{ -// ----------------------------------------------------- // -enum bFileFlags -{ - FD_INVALID = 0, - FD_OK = 1, - FD_VOID_IS_8 = 2, - FD_ENDIAN_SWAP = 4, - FD_FILE_64 = 8, - FD_BITS_VARIES = 16, - FD_VERSION_VARIES = 32, - FD_DOUBLE_PRECISION = 64, - FD_BROKEN_DNA = 128 -}; - -enum bFileVerboseMode -{ - FD_VERBOSE_EXPORT_XML = 1, - FD_VERBOSE_DUMP_DNA_TYPE_DEFINITIONS = 2, - FD_VERBOSE_DUMP_CHUNKS = 4, - FD_VERBOSE_DUMP_FILE_INFO = 8, -}; -// ----------------------------------------------------- // -class bFile -{ -protected: - char m_headerString[7]; - - bool mOwnsBuffer; - char *mFileBuffer; - int mFileLen; - int mVersion; - - bPtrMap mLibPointers; - - int mDataStart; - bDNA *mFileDNA; - bDNA *mMemoryDNA; - - b3AlignedObjectArray m_pointerFixupArray; - b3AlignedObjectArray m_pointerPtrFixupArray; - - b3AlignedObjectArray m_chunks; - b3HashMap m_chunkPtrPtrMap; - - // - - bPtrMap mDataPointers; - - int mFlags; - - // //////////////////////////////////////////////////////////////////////////// - - // buffer offset util - int getNextBlock(bChunkInd *dataChunk, const char *dataPtr, const int flags); - void safeSwapPtr(char *dst, const char *src); - - virtual void parseHeader(); - - virtual void parseData() = 0; - - void resolvePointersMismatch(); - void resolvePointersChunk(const bChunkInd &dataChunk, int verboseMode); - - int resolvePointersStructRecursive(char *strcPtr, int old_dna, int verboseMode, int recursion); - //void swapPtr(char *dst, char *src); - - void parseStruct(char *strcPtr, char *dtPtr, int old_dna, int new_dna, bool fixupPointers); - void getMatchingFileDNA(short *old, const char *lookupName, const char *lookupType, char *strcData, char *data, bool fixupPointers); - char *getFileElement(short *firstStruct, char *lookupName, char *lookupType, char *data, short **foundPos); - - void swap(char *head, class bChunkInd &ch, bool ignoreEndianFlag); - void swapData(char *data, short type, int arraySize, bool ignoreEndianFlag); - void swapStruct(int dna_nr, char *data, bool ignoreEndianFlag); - void swapLen(char *dataPtr); - void swapDNA(char *ptr); - - char *readStruct(char *head, class bChunkInd &chunk); - char *getAsString(int code); - - void parseInternal(int verboseMode, char *memDna, int memDnaLength); - -public: - bFile(const char *filename, const char headerString[7]); - - //todo: make memoryBuffer const char - //bFile( const char *memoryBuffer, int len); - bFile(char *memoryBuffer, int len, const char headerString[7]); - virtual ~bFile(); - - bDNA *getFileDNA() - { - return mFileDNA; - } - - virtual void addDataBlock(char *dataBlock) = 0; - - int getFlags() const - { - return mFlags; - } - - bPtrMap &getLibPointers() - { - return mLibPointers; - } - - void *findLibPointer(void *ptr); - - bool ok(); - - virtual void parse(int verboseMode) = 0; - - virtual int write(const char *fileName, bool fixupPointers = false) = 0; - - virtual void writeChunks(FILE *fp, bool fixupPointers); - - virtual void writeDNA(FILE *fp) = 0; - - void updateOldPointers(); - void resolvePointers(int verboseMode); - - void dumpChunks(bDNA *dna); - - int getVersion() const - { - return mVersion; - } - //pre-swap the endianness, so that data loaded on a target with different endianness doesn't need to be swapped - void preSwap(); - void writeFile(const char *fileName); -}; -} // namespace bParse - -#endif //__BFILE_H__ diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Serializer.cpp b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Serializer.cpp deleted file mode 100644 index ea4a8e20073..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Serializer.cpp +++ /dev/null @@ -1,18062 +0,0 @@ -char b3s_bulletDNAstr[] = { - char(83), - char(68), - char(78), - char(65), - char(78), - char(65), - char(77), - char(69), - char(63), - char(1), - char(0), - char(0), - char(109), - char(95), - char(115), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(99), - char(97), - char(112), - char(97), - char(99), - char(105), - char(116), - char(121), - char(0), - char(42), - char(109), - char(95), - char(100), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(115), - char(0), - char(42), - char(102), - char(105), - char(114), - char(115), - char(116), - char(0), - char(42), - char(108), - char(97), - char(115), - char(116), - char(0), - char(109), - char(95), - char(102), - char(108), - char(111), - char(97), - char(116), - char(115), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(101), - char(108), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(98), - char(97), - char(115), - char(105), - char(115), - char(0), - char(109), - char(95), - char(111), - char(114), - char(105), - char(103), - char(105), - char(110), - char(0), - char(109), - char(95), - char(114), - char(111), - char(111), - char(116), - char(78), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(115), - char(117), - char(98), - char(116), - char(114), - char(101), - char(101), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(65), - char(97), - char(98), - char(98), - char(77), - char(105), - char(110), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(65), - char(97), - char(98), - char(98), - char(77), - char(97), - char(120), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(97), - char(97), - char(98), - char(98), - char(77), - char(105), - char(110), - char(79), - char(114), - char(103), - char(0), - char(109), - char(95), - char(97), - char(97), - char(98), - char(98), - char(77), - char(97), - char(120), - char(79), - char(114), - char(103), - char(0), - char(109), - char(95), - char(101), - char(115), - char(99), - char(97), - char(112), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(115), - char(117), - char(98), - char(80), - char(97), - char(114), - char(116), - char(0), - char(109), - char(95), - char(116), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(101), - char(115), - char(99), - char(97), - char(112), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(79), - char(114), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(98), - char(118), - char(104), - char(65), - char(97), - char(98), - char(98), - char(77), - char(105), - char(110), - char(0), - char(109), - char(95), - char(98), - char(118), - char(104), - char(65), - char(97), - char(98), - char(98), - char(77), - char(97), - char(120), - char(0), - char(109), - char(95), - char(98), - char(118), - char(104), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(97), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(117), - char(114), - char(78), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(97), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(76), - char(101), - char(97), - char(102), - char(78), - char(111), - char(100), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(67), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(78), - char(111), - char(100), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(99), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(78), - char(111), - char(100), - char(101), - char(115), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(67), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(78), - char(111), - char(100), - char(101), - char(115), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(115), - char(117), - char(98), - char(84), - char(114), - char(101), - char(101), - char(73), - char(110), - char(102), - char(111), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(116), - char(114), - char(97), - char(118), - char(101), - char(114), - char(115), - char(97), - char(108), - char(77), - char(111), - char(100), - char(101), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(83), - char(117), - char(98), - char(116), - char(114), - char(101), - char(101), - char(72), - char(101), - char(97), - char(100), - char(101), - char(114), - char(115), - char(0), - char(42), - char(109), - char(95), - char(110), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(115), - char(104), - char(97), - char(112), - char(101), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(100), - char(105), - char(110), - char(103), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(83), - char(99), - char(97), - char(108), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(112), - char(108), - char(97), - char(110), - char(101), - char(78), - char(111), - char(114), - char(109), - char(97), - char(108), - char(0), - char(109), - char(95), - char(112), - char(108), - char(97), - char(110), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(97), - char(110), - char(116), - char(0), - char(109), - char(95), - char(105), - char(109), - char(112), - char(108), - char(105), - char(99), - char(105), - char(116), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(105), - char(109), - char(101), - char(110), - char(115), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(77), - char(97), - char(114), - char(103), - char(105), - char(110), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(100), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(0), - char(109), - char(95), - char(114), - char(97), - char(100), - char(105), - char(117), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(118), - char(101), - char(120), - char(73), - char(110), - char(116), - char(101), - char(114), - char(110), - char(97), - char(108), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(42), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(65), - char(114), - char(114), - char(97), - char(121), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(65), - char(114), - char(114), - char(97), - char(121), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(118), - char(97), - char(108), - char(117), - char(101), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(118), - char(97), - char(108), - char(117), - char(101), - char(115), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(0), - char(42), - char(109), - char(95), - char(118), - char(101), - char(114), - char(116), - char(105), - char(99), - char(101), - char(115), - char(51), - char(102), - char(0), - char(42), - char(109), - char(95), - char(118), - char(101), - char(114), - char(116), - char(105), - char(99), - char(101), - char(115), - char(51), - char(100), - char(0), - char(42), - char(109), - char(95), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(51), - char(50), - char(0), - char(42), - char(109), - char(95), - char(51), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(49), - char(54), - char(0), - char(42), - char(109), - char(95), - char(51), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(56), - char(0), - char(42), - char(109), - char(95), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(49), - char(54), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(86), - char(101), - char(114), - char(116), - char(105), - char(99), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(109), - char(101), - char(115), - char(104), - char(80), - char(97), - char(114), - char(116), - char(115), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(115), - char(99), - char(97), - char(108), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(77), - char(101), - char(115), - char(104), - char(80), - char(97), - char(114), - char(116), - char(115), - char(0), - char(109), - char(95), - char(109), - char(101), - char(115), - char(104), - char(73), - char(110), - char(116), - char(101), - char(114), - char(102), - char(97), - char(99), - char(101), - char(0), - char(42), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(70), - char(108), - char(111), - char(97), - char(116), - char(66), - char(118), - char(104), - char(0), - char(42), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(66), - char(118), - char(104), - char(0), - char(42), - char(109), - char(95), - char(116), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(102), - char(111), - char(77), - char(97), - char(112), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(51), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(116), - char(114), - char(105), - char(109), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(116), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(0), - char(42), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(0), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(77), - char(97), - char(114), - char(103), - char(105), - char(110), - char(0), - char(42), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(115), - char(0), - char(109), - char(95), - char(117), - char(112), - char(65), - char(120), - char(105), - char(115), - char(0), - char(109), - char(95), - char(102), - char(108), - char(97), - char(103), - char(115), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(86), - char(48), - char(86), - char(49), - char(65), - char(110), - char(103), - char(108), - char(101), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(86), - char(49), - char(86), - char(50), - char(65), - char(110), - char(103), - char(108), - char(101), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(86), - char(50), - char(86), - char(48), - char(65), - char(110), - char(103), - char(108), - char(101), - char(0), - char(42), - char(109), - char(95), - char(104), - char(97), - char(115), - char(104), - char(84), - char(97), - char(98), - char(108), - char(101), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(110), - char(101), - char(120), - char(116), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(118), - char(97), - char(108), - char(117), - char(101), - char(65), - char(114), - char(114), - char(97), - char(121), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(107), - char(101), - char(121), - char(65), - char(114), - char(114), - char(97), - char(121), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(118), - char(101), - char(120), - char(69), - char(112), - char(115), - char(105), - char(108), - char(111), - char(110), - char(0), - char(109), - char(95), - char(112), - char(108), - char(97), - char(110), - char(97), - char(114), - char(69), - char(112), - char(115), - char(105), - char(108), - char(111), - char(110), - char(0), - char(109), - char(95), - char(101), - char(113), - char(117), - char(97), - char(108), - char(86), - char(101), - char(114), - char(116), - char(101), - char(120), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(68), - char(105), - char(115), - char(116), - char(97), - char(110), - char(99), - char(101), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(122), - char(101), - char(114), - char(111), - char(65), - char(114), - char(101), - char(97), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(110), - char(101), - char(120), - char(116), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(104), - char(97), - char(115), - char(104), - char(84), - char(97), - char(98), - char(108), - char(101), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(86), - char(97), - char(108), - char(117), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(75), - char(101), - char(121), - char(115), - char(0), - char(109), - char(95), - char(103), - char(105), - char(109), - char(112), - char(97), - char(99), - char(116), - char(83), - char(117), - char(98), - char(84), - char(121), - char(112), - char(101), - char(0), - char(42), - char(109), - char(95), - char(117), - char(110), - char(115), - char(99), - char(97), - char(108), - char(101), - char(100), - char(80), - char(111), - char(105), - char(110), - char(116), - char(115), - char(70), - char(108), - char(111), - char(97), - char(116), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(117), - char(110), - char(115), - char(99), - char(97), - char(108), - char(101), - char(100), - char(80), - char(111), - char(105), - char(110), - char(116), - char(115), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(85), - char(110), - char(115), - char(99), - char(97), - char(108), - char(101), - char(100), - char(80), - char(111), - char(105), - char(110), - char(116), - char(115), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(100), - char(105), - char(110), - char(103), - char(51), - char(91), - char(52), - char(93), - char(0), - char(42), - char(109), - char(95), - char(98), - char(114), - char(111), - char(97), - char(100), - char(112), - char(104), - char(97), - char(115), - char(101), - char(72), - char(97), - char(110), - char(100), - char(108), - char(101), - char(0), - char(42), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(0), - char(42), - char(109), - char(95), - char(114), - char(111), - char(111), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(0), - char(109), - char(95), - char(119), - char(111), - char(114), - char(108), - char(100), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(112), - char(111), - char(108), - char(97), - char(116), - char(105), - char(111), - char(110), - char(87), - char(111), - char(114), - char(108), - char(100), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(112), - char(111), - char(108), - char(97), - char(116), - char(105), - char(111), - char(110), - char(76), - char(105), - char(110), - char(101), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(112), - char(111), - char(108), - char(97), - char(116), - char(105), - char(111), - char(110), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(110), - char(105), - char(115), - char(111), - char(116), - char(114), - char(111), - char(112), - char(105), - char(99), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(80), - char(114), - char(111), - char(99), - char(101), - char(115), - char(115), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(100), - char(101), - char(97), - char(99), - char(116), - char(105), - char(118), - char(97), - char(116), - char(105), - char(111), - char(110), - char(84), - char(105), - char(109), - char(101), - char(0), - char(109), - char(95), - char(102), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(114), - char(111), - char(108), - char(108), - char(105), - char(110), - char(103), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(105), - char(116), - char(117), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(104), - char(105), - char(116), - char(70), - char(114), - char(97), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(99), - char(100), - char(83), - char(119), - char(101), - char(112), - char(116), - char(83), - char(112), - char(104), - char(101), - char(114), - char(101), - char(82), - char(97), - char(100), - char(105), - char(117), - char(115), - char(0), - char(109), - char(95), - char(99), - char(99), - char(100), - char(77), - char(111), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(104), - char(97), - char(115), - char(65), - char(110), - char(105), - char(115), - char(111), - char(116), - char(114), - char(111), - char(112), - char(105), - char(99), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(70), - char(108), - char(97), - char(103), - char(115), - char(0), - char(109), - char(95), - char(105), - char(115), - char(108), - char(97), - char(110), - char(100), - char(84), - char(97), - char(103), - char(49), - char(0), - char(109), - char(95), - char(99), - char(111), - char(109), - char(112), - char(97), - char(110), - char(105), - char(111), - char(110), - char(73), - char(100), - char(0), - char(109), - char(95), - char(97), - char(99), - char(116), - char(105), - char(118), - char(97), - char(116), - char(105), - char(111), - char(110), - char(83), - char(116), - char(97), - char(116), - char(101), - char(49), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(110), - char(97), - char(108), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(99), - char(104), - char(101), - char(99), - char(107), - char(67), - char(111), - char(108), - char(108), - char(105), - char(100), - char(101), - char(87), - char(105), - char(116), - char(104), - char(0), - char(109), - char(95), - char(115), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(110), - char(102), - char(111), - char(0), - char(109), - char(95), - char(103), - char(114), - char(97), - char(118), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(73), - char(110), - char(101), - char(114), - char(116), - char(105), - char(97), - char(84), - char(101), - char(110), - char(115), - char(111), - char(114), - char(87), - char(111), - char(114), - char(108), - char(100), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(103), - char(114), - char(97), - char(118), - char(105), - char(116), - char(121), - char(95), - char(97), - char(99), - char(99), - char(101), - char(108), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(73), - char(110), - char(101), - char(114), - char(116), - char(105), - char(97), - char(76), - char(111), - char(99), - char(97), - char(108), - char(0), - char(109), - char(95), - char(116), - char(111), - char(116), - char(97), - char(108), - char(70), - char(111), - char(114), - char(99), - char(101), - char(0), - char(109), - char(95), - char(116), - char(111), - char(116), - char(97), - char(108), - char(84), - char(111), - char(114), - char(113), - char(117), - char(101), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(101), - char(114), - char(115), - char(101), - char(77), - char(97), - char(115), - char(115), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(76), - char(105), - char(110), - char(101), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(83), - char(113), - char(114), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(83), - char(113), - char(114), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(83), - char(108), - char(101), - char(101), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(83), - char(108), - char(101), - char(101), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(82), - char(111), - char(119), - char(115), - char(0), - char(110), - char(117), - char(98), - char(0), - char(42), - char(109), - char(95), - char(114), - char(98), - char(65), - char(0), - char(42), - char(109), - char(95), - char(114), - char(98), - char(66), - char(0), - char(109), - char(95), - char(111), - char(98), - char(106), - char(101), - char(99), - char(116), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(73), - char(100), - char(0), - char(109), - char(95), - char(110), - char(101), - char(101), - char(100), - char(115), - char(70), - char(101), - char(101), - char(100), - char(98), - char(97), - char(99), - char(107), - char(0), - char(109), - char(95), - char(97), - char(112), - char(112), - char(108), - char(105), - char(101), - char(100), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(100), - char(98), - char(103), - char(68), - char(114), - char(97), - char(119), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(100), - char(105), - char(115), - char(97), - char(98), - char(108), - char(101), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(115), - char(66), - char(101), - char(116), - char(119), - char(101), - char(101), - char(110), - char(76), - char(105), - char(110), - char(107), - char(101), - char(100), - char(66), - char(111), - char(100), - char(105), - char(101), - char(115), - char(0), - char(109), - char(95), - char(111), - char(118), - char(101), - char(114), - char(114), - char(105), - char(100), - char(101), - char(78), - char(117), - char(109), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(98), - char(114), - char(101), - char(97), - char(107), - char(105), - char(110), - char(103), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(105), - char(115), - char(69), - char(110), - char(97), - char(98), - char(108), - char(101), - char(100), - char(0), - char(109), - char(95), - char(116), - char(121), - char(112), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(112), - char(105), - char(118), - char(111), - char(116), - char(73), - char(110), - char(65), - char(0), - char(109), - char(95), - char(112), - char(105), - char(118), - char(111), - char(116), - char(73), - char(110), - char(66), - char(0), - char(109), - char(95), - char(114), - char(98), - char(65), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(114), - char(98), - char(66), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(82), - char(101), - char(102), - char(101), - char(114), - char(101), - char(110), - char(99), - char(101), - char(70), - char(114), - char(97), - char(109), - char(101), - char(65), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(79), - char(110), - char(108), - char(121), - char(0), - char(109), - char(95), - char(101), - char(110), - char(97), - char(98), - char(108), - char(101), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(77), - char(111), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(109), - char(111), - char(116), - char(111), - char(114), - char(84), - char(97), - char(114), - char(103), - char(101), - char(116), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(77), - char(111), - char(116), - char(111), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(108), - char(111), - char(119), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(117), - char(112), - char(112), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(108), - char(105), - char(109), - char(105), - char(116), - char(83), - char(111), - char(102), - char(116), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(98), - char(105), - char(97), - char(115), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(114), - char(101), - char(108), - char(97), - char(120), - char(97), - char(116), - char(105), - char(111), - char(110), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(115), - char(119), - char(105), - char(110), - char(103), - char(83), - char(112), - char(97), - char(110), - char(49), - char(0), - char(109), - char(95), - char(115), - char(119), - char(105), - char(110), - char(103), - char(83), - char(112), - char(97), - char(110), - char(50), - char(0), - char(109), - char(95), - char(116), - char(119), - char(105), - char(115), - char(116), - char(83), - char(112), - char(97), - char(110), - char(0), - char(109), - char(95), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(85), - char(112), - char(112), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(76), - char(111), - char(119), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(85), - char(112), - char(112), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(76), - char(111), - char(119), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(76), - char(105), - char(110), - char(101), - char(97), - char(114), - char(82), - char(101), - char(102), - char(101), - char(114), - char(101), - char(110), - char(99), - char(101), - char(70), - char(114), - char(97), - char(109), - char(101), - char(65), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(79), - char(102), - char(102), - char(115), - char(101), - char(116), - char(70), - char(111), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(54), - char(100), - char(111), - char(102), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(115), - char(112), - char(114), - char(105), - char(110), - char(103), - char(69), - char(110), - char(97), - char(98), - char(108), - char(101), - char(100), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(101), - char(113), - char(117), - char(105), - char(108), - char(105), - char(98), - char(114), - char(105), - char(117), - char(109), - char(80), - char(111), - char(105), - char(110), - char(116), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(115), - char(112), - char(114), - char(105), - char(110), - char(103), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(115), - char(112), - char(114), - char(105), - char(110), - char(103), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(116), - char(97), - char(117), - char(0), - char(109), - char(95), - char(116), - char(105), - char(109), - char(101), - char(83), - char(116), - char(101), - char(112), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(69), - char(114), - char(114), - char(111), - char(114), - char(82), - char(101), - char(100), - char(117), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(115), - char(111), - char(114), - char(0), - char(109), - char(95), - char(101), - char(114), - char(112), - char(0), - char(109), - char(95), - char(101), - char(114), - char(112), - char(50), - char(0), - char(109), - char(95), - char(103), - char(108), - char(111), - char(98), - char(97), - char(108), - char(67), - char(102), - char(109), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(80), - char(101), - char(110), - char(101), - char(116), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(84), - char(117), - char(114), - char(110), - char(69), - char(114), - char(112), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(83), - char(108), - char(111), - char(112), - char(0), - char(109), - char(95), - char(119), - char(97), - char(114), - char(109), - char(115), - char(116), - char(97), - char(114), - char(116), - char(105), - char(110), - char(103), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(71), - char(121), - char(114), - char(111), - char(115), - char(99), - char(111), - char(112), - char(105), - char(99), - char(70), - char(111), - char(114), - char(99), - char(101), - char(0), - char(109), - char(95), - char(115), - char(105), - char(110), - char(103), - char(108), - char(101), - char(65), - char(120), - char(105), - char(115), - char(82), - char(111), - char(108), - char(108), - char(105), - char(110), - char(103), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(108), - char(118), - char(101), - char(114), - char(77), - char(111), - char(100), - char(101), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(105), - char(110), - char(103), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(82), - char(101), - char(115), - char(116), - char(105), - char(116), - char(117), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(109), - char(105), - char(110), - char(105), - char(109), - char(117), - char(109), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(66), - char(97), - char(116), - char(99), - char(104), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(118), - char(111), - char(108), - char(117), - char(109), - char(101), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(0), - char(42), - char(109), - char(95), - char(109), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(112), - char(114), - char(101), - char(118), - char(105), - char(111), - char(117), - char(115), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(118), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(99), - char(99), - char(117), - char(109), - char(117), - char(108), - char(97), - char(116), - char(101), - char(100), - char(70), - char(111), - char(114), - char(99), - char(101), - char(0), - char(109), - char(95), - char(110), - char(111), - char(114), - char(109), - char(97), - char(108), - char(0), - char(109), - char(95), - char(97), - char(114), - char(101), - char(97), - char(0), - char(109), - char(95), - char(97), - char(116), - char(116), - char(97), - char(99), - char(104), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(76), - char(101), - char(110), - char(103), - char(116), - char(104), - char(0), - char(109), - char(95), - char(98), - char(98), - char(101), - char(110), - char(100), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(65), - char(114), - char(101), - char(97), - char(0), - char(109), - char(95), - char(99), - char(48), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(86), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(99), - char(49), - char(0), - char(109), - char(95), - char(99), - char(50), - char(0), - char(109), - char(95), - char(99), - char(48), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(42), - char(109), - char(95), - char(114), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(97), - char(101), - char(114), - char(111), - char(77), - char(111), - char(100), - char(101), - char(108), - char(0), - char(109), - char(95), - char(98), - char(97), - char(117), - char(109), - char(103), - char(97), - char(114), - char(116), - char(101), - char(0), - char(109), - char(95), - char(100), - char(114), - char(97), - char(103), - char(0), - char(109), - char(95), - char(108), - char(105), - char(102), - char(116), - char(0), - char(109), - char(95), - char(112), - char(114), - char(101), - char(115), - char(115), - char(117), - char(114), - char(101), - char(0), - char(109), - char(95), - char(118), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(100), - char(121), - char(110), - char(97), - char(109), - char(105), - char(99), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(101), - char(77), - char(97), - char(116), - char(99), - char(104), - char(0), - char(109), - char(95), - char(114), - char(105), - char(103), - char(105), - char(100), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(107), - char(105), - char(110), - char(101), - char(116), - char(105), - char(99), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(97), - char(110), - char(99), - char(104), - char(111), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(75), - char(105), - char(110), - char(101), - char(116), - char(105), - char(99), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(83), - char(111), - char(102), - char(116), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(83), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(75), - char(105), - char(110), - char(101), - char(116), - char(105), - char(99), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(83), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(83), - char(111), - char(102), - char(116), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(83), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(86), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(116), - char(105), - char(109), - char(101), - char(83), - char(99), - char(97), - char(108), - char(101), - char(0), - char(109), - char(95), - char(118), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(100), - char(114), - char(105), - char(102), - char(116), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(99), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(114), - char(111), - char(116), - char(0), - char(109), - char(95), - char(115), - char(99), - char(97), - char(108), - char(101), - char(0), - char(109), - char(95), - char(97), - char(113), - char(113), - char(0), - char(109), - char(95), - char(99), - char(111), - char(109), - char(0), - char(42), - char(109), - char(95), - char(112), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(42), - char(109), - char(95), - char(119), - char(101), - char(105), - char(103), - char(104), - char(116), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(87), - char(101), - char(105), - char(103), - char(116), - char(115), - char(0), - char(109), - char(95), - char(98), - char(118), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(98), - char(102), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(102), - char(114), - char(97), - char(109), - char(101), - char(120), - char(102), - char(111), - char(114), - char(109), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(105), - char(105), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(119), - char(105), - char(0), - char(109), - char(95), - char(118), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(100), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(108), - char(118), - char(0), - char(109), - char(95), - char(97), - char(118), - char(0), - char(42), - char(109), - char(95), - char(102), - char(114), - char(97), - char(109), - char(101), - char(114), - char(101), - char(102), - char(115), - char(0), - char(42), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(109), - char(97), - char(115), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(70), - char(114), - char(97), - char(109), - char(101), - char(82), - char(101), - char(102), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(78), - char(111), - char(100), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(77), - char(97), - char(115), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(105), - char(100), - char(109), - char(97), - char(115), - char(115), - char(0), - char(109), - char(95), - char(105), - char(109), - char(97), - char(115), - char(115), - char(0), - char(109), - char(95), - char(110), - char(118), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(100), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(108), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(97), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(109), - char(97), - char(116), - char(99), - char(104), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(83), - char(101), - char(108), - char(102), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(115), - char(101), - char(108), - char(102), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(116), - char(97), - char(105), - char(110), - char(115), - char(65), - char(110), - char(99), - char(104), - char(111), - char(114), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(100), - char(101), - char(0), - char(109), - char(95), - char(99), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(42), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(65), - char(0), - char(42), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(66), - char(0), - char(109), - char(95), - char(114), - char(101), - char(102), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(99), - char(102), - char(109), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(100), - char(101), - char(108), - char(101), - char(116), - char(101), - char(0), - char(109), - char(95), - char(114), - char(101), - char(108), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(65), - char(116), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(66), - char(116), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(106), - char(111), - char(105), - char(110), - char(116), - char(84), - char(121), - char(112), - char(101), - char(0), - char(42), - char(109), - char(95), - char(112), - char(111), - char(115), - char(101), - char(0), - char(42), - char(42), - char(109), - char(95), - char(109), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(115), - char(0), - char(42), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(108), - char(105), - char(110), - char(107), - char(115), - char(0), - char(42), - char(109), - char(95), - char(102), - char(97), - char(99), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(116), - char(101), - char(116), - char(114), - char(97), - char(104), - char(101), - char(100), - char(114), - char(97), - char(0), - char(42), - char(109), - char(95), - char(97), - char(110), - char(99), - char(104), - char(111), - char(114), - char(115), - char(0), - char(42), - char(109), - char(95), - char(99), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(115), - char(0), - char(42), - char(109), - char(95), - char(106), - char(111), - char(105), - char(110), - char(116), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(77), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(76), - char(105), - char(110), - char(107), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(70), - char(97), - char(99), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(84), - char(101), - char(116), - char(114), - char(97), - char(104), - char(101), - char(100), - char(114), - char(97), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(65), - char(110), - char(99), - char(104), - char(111), - char(114), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(74), - char(111), - char(105), - char(110), - char(116), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(102), - char(105), - char(103), - char(0), - char(84), - char(89), - char(80), - char(69), - char(76), - char(0), - char(0), - char(0), - char(99), - char(104), - char(97), - char(114), - char(0), - char(117), - char(99), - char(104), - char(97), - char(114), - char(0), - char(115), - char(104), - char(111), - char(114), - char(116), - char(0), - char(117), - char(115), - char(104), - char(111), - char(114), - char(116), - char(0), - char(105), - char(110), - char(116), - char(0), - char(108), - char(111), - char(110), - char(103), - char(0), - char(117), - char(108), - char(111), - char(110), - char(103), - char(0), - char(102), - char(108), - char(111), - char(97), - char(116), - char(0), - char(100), - char(111), - char(117), - char(98), - char(108), - char(101), - char(0), - char(118), - char(111), - char(105), - char(100), - char(0), - char(80), - char(111), - char(105), - char(110), - char(116), - char(101), - char(114), - char(65), - char(114), - char(114), - char(97), - char(121), - char(0), - char(98), - char(116), - char(80), - char(104), - char(121), - char(115), - char(105), - char(99), - char(115), - char(83), - char(121), - char(115), - char(116), - char(101), - char(109), - char(0), - char(76), - char(105), - char(115), - char(116), - char(66), - char(97), - char(115), - char(101), - char(0), - char(98), - char(116), - char(86), - char(101), - char(99), - char(116), - char(111), - char(114), - char(51), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(86), - char(101), - char(99), - char(116), - char(111), - char(114), - char(51), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(77), - char(97), - char(116), - char(114), - char(105), - char(120), - char(51), - char(120), - char(51), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(77), - char(97), - char(116), - char(114), - char(105), - char(120), - char(51), - char(120), - char(51), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(66), - char(118), - char(104), - char(83), - char(117), - char(98), - char(116), - char(114), - char(101), - char(101), - char(73), - char(110), - char(102), - char(111), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(79), - char(112), - char(116), - char(105), - char(109), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(78), - char(111), - char(100), - char(101), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(79), - char(112), - char(116), - char(105), - char(109), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(78), - char(111), - char(100), - char(101), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(78), - char(111), - char(100), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(116), - char(97), - char(116), - char(105), - char(99), - char(80), - char(108), - char(97), - char(110), - char(101), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(118), - char(101), - char(120), - char(73), - char(110), - char(116), - char(101), - char(114), - char(110), - char(97), - char(108), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(65), - char(110), - char(100), - char(82), - char(97), - char(100), - char(105), - char(117), - char(115), - char(0), - char(98), - char(116), - char(77), - char(117), - char(108), - char(116), - char(105), - char(83), - char(112), - char(104), - char(101), - char(114), - char(101), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(73), - char(110), - char(116), - char(73), - char(110), - char(100), - char(101), - char(120), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(104), - char(111), - char(114), - char(116), - char(73), - char(110), - char(116), - char(73), - char(110), - char(100), - char(101), - char(120), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(104), - char(111), - char(114), - char(116), - char(73), - char(110), - char(116), - char(73), - char(110), - char(100), - char(101), - char(120), - char(84), - char(114), - char(105), - char(112), - char(108), - char(101), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(104), - char(97), - char(114), - char(73), - char(110), - char(100), - char(101), - char(120), - char(84), - char(114), - char(105), - char(112), - char(108), - char(101), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(77), - char(101), - char(115), - char(104), - char(80), - char(97), - char(114), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(116), - char(114), - char(105), - char(100), - char(105), - char(110), - char(103), - char(77), - char(101), - char(115), - char(104), - char(73), - char(110), - char(116), - char(101), - char(114), - char(102), - char(97), - char(99), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(77), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(102), - char(111), - char(77), - char(97), - char(112), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(99), - char(97), - char(108), - char(101), - char(100), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(77), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(109), - char(112), - char(111), - char(117), - char(110), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(67), - char(104), - char(105), - char(108), - char(100), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(109), - char(112), - char(111), - char(117), - char(110), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(121), - char(108), - char(105), - char(110), - char(100), - char(101), - char(114), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(97), - char(112), - char(115), - char(117), - char(108), - char(101), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(102), - char(111), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(71), - char(73), - char(109), - char(112), - char(97), - char(99), - char(116), - char(77), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(118), - char(101), - char(120), - char(72), - char(117), - char(108), - char(108), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(68), - char(121), - char(110), - char(97), - char(109), - char(105), - char(99), - char(115), - char(87), - char(111), - char(114), - char(108), - char(100), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(110), - char(102), - char(111), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(68), - char(121), - char(110), - char(97), - char(109), - char(105), - char(99), - char(115), - char(87), - char(111), - char(114), - char(108), - char(100), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(110), - char(102), - char(111), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(73), - char(110), - char(102), - char(111), - char(49), - char(0), - char(98), - char(116), - char(84), - char(121), - char(112), - char(101), - char(100), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(80), - char(111), - char(105), - char(110), - char(116), - char(50), - char(80), - char(111), - char(105), - char(110), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(80), - char(111), - char(105), - char(110), - char(116), - char(50), - char(80), - char(111), - char(105), - char(110), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(72), - char(105), - char(110), - char(103), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(72), - char(105), - char(110), - char(103), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(101), - char(84), - char(119), - char(105), - char(115), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(71), - char(101), - char(110), - char(101), - char(114), - char(105), - char(99), - char(54), - char(68), - char(111), - char(102), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(71), - char(101), - char(110), - char(101), - char(114), - char(105), - char(99), - char(54), - char(68), - char(111), - char(102), - char(83), - char(112), - char(114), - char(105), - char(110), - char(103), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(108), - char(105), - char(100), - char(101), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(77), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(78), - char(111), - char(100), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(76), - char(105), - char(110), - char(107), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(70), - char(97), - char(99), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(84), - char(101), - char(116), - char(114), - char(97), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(65), - char(110), - char(99), - char(104), - char(111), - char(114), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(67), - char(111), - char(110), - char(102), - char(105), - char(103), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(80), - char(111), - char(115), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(74), - char(111), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(0), - char(0), - char(84), - char(76), - char(69), - char(78), - char(1), - char(0), - char(1), - char(0), - char(2), - char(0), - char(2), - char(0), - char(4), - char(0), - char(4), - char(0), - char(4), - char(0), - char(4), - char(0), - char(8), - char(0), - char(0), - char(0), - char(12), - char(0), - char(36), - char(0), - char(8), - char(0), - char(16), - char(0), - char(32), - char(0), - char(48), - char(0), - char(96), - char(0), - char(64), - char(0), - char(-128), - char(0), - char(20), - char(0), - char(48), - char(0), - char(80), - char(0), - char(16), - char(0), - char(84), - char(0), - char(-124), - char(0), - char(12), - char(0), - char(52), - char(0), - char(52), - char(0), - char(20), - char(0), - char(64), - char(0), - char(4), - char(0), - char(4), - char(0), - char(8), - char(0), - char(4), - char(0), - char(32), - char(0), - char(28), - char(0), - char(60), - char(0), - char(56), - char(0), - char(76), - char(0), - char(76), - char(0), - char(24), - char(0), - char(60), - char(0), - char(60), - char(0), - char(16), - char(0), - char(64), - char(0), - char(68), - char(0), - char(-48), - char(1), - char(0), - char(1), - char(-72), - char(0), - char(-104), - char(0), - char(104), - char(0), - char(88), - char(0), - char(-24), - char(1), - char(-96), - char(3), - char(8), - char(0), - char(52), - char(0), - char(0), - char(0), - char(84), - char(0), - char(116), - char(0), - char(92), - char(1), - char(-36), - char(0), - char(-44), - char(0), - char(-4), - char(0), - char(92), - char(1), - char(-52), - char(0), - char(16), - char(0), - char(100), - char(0), - char(20), - char(0), - char(36), - char(0), - char(100), - char(0), - char(92), - char(0), - char(104), - char(0), - char(-64), - char(0), - char(92), - char(1), - char(104), - char(0), - char(-84), - char(1), - char(83), - char(84), - char(82), - char(67), - char(65), - char(0), - char(0), - char(0), - char(10), - char(0), - char(3), - char(0), - char(4), - char(0), - char(0), - char(0), - char(4), - char(0), - char(1), - char(0), - char(9), - char(0), - char(2), - char(0), - char(11), - char(0), - char(3), - char(0), - char(10), - char(0), - char(3), - char(0), - char(10), - char(0), - char(4), - char(0), - char(10), - char(0), - char(5), - char(0), - char(12), - char(0), - char(2), - char(0), - char(9), - char(0), - char(6), - char(0), - char(9), - char(0), - char(7), - char(0), - char(13), - char(0), - char(1), - char(0), - char(7), - char(0), - char(8), - char(0), - char(14), - char(0), - char(1), - char(0), - char(8), - char(0), - char(8), - char(0), - char(15), - char(0), - char(1), - char(0), - char(13), - char(0), - char(9), - char(0), - char(16), - char(0), - char(1), - char(0), - char(14), - char(0), - char(9), - char(0), - char(17), - char(0), - char(2), - char(0), - char(15), - char(0), - char(10), - char(0), - char(13), - char(0), - char(11), - char(0), - char(18), - char(0), - char(2), - char(0), - char(16), - char(0), - char(10), - char(0), - char(14), - char(0), - char(11), - char(0), - char(19), - char(0), - char(4), - char(0), - char(4), - char(0), - char(12), - char(0), - char(4), - char(0), - char(13), - char(0), - char(2), - char(0), - char(14), - char(0), - char(2), - char(0), - char(15), - char(0), - char(20), - char(0), - char(6), - char(0), - char(13), - char(0), - char(16), - char(0), - char(13), - char(0), - char(17), - char(0), - char(4), - char(0), - char(18), - char(0), - char(4), - char(0), - char(19), - char(0), - char(4), - char(0), - char(20), - char(0), - char(0), - char(0), - char(21), - char(0), - char(21), - char(0), - char(6), - char(0), - char(14), - char(0), - char(16), - char(0), - char(14), - char(0), - char(17), - char(0), - char(4), - char(0), - char(18), - char(0), - char(4), - char(0), - char(19), - char(0), - char(4), - char(0), - char(20), - char(0), - char(0), - char(0), - char(21), - char(0), - char(22), - char(0), - char(3), - char(0), - char(2), - char(0), - char(14), - char(0), - char(2), - char(0), - char(15), - char(0), - char(4), - char(0), - char(22), - char(0), - char(23), - char(0), - char(12), - char(0), - char(13), - char(0), - char(23), - char(0), - char(13), - char(0), - char(24), - char(0), - char(13), - char(0), - char(25), - char(0), - char(4), - char(0), - char(26), - char(0), - char(4), - char(0), - char(27), - char(0), - char(4), - char(0), - char(28), - char(0), - char(4), - char(0), - char(29), - char(0), - char(20), - char(0), - char(30), - char(0), - char(22), - char(0), - char(31), - char(0), - char(19), - char(0), - char(32), - char(0), - char(4), - char(0), - char(33), - char(0), - char(4), - char(0), - char(34), - char(0), - char(24), - char(0), - char(12), - char(0), - char(14), - char(0), - char(23), - char(0), - char(14), - char(0), - char(24), - char(0), - char(14), - char(0), - char(25), - char(0), - char(4), - char(0), - char(26), - char(0), - char(4), - char(0), - char(27), - char(0), - char(4), - char(0), - char(28), - char(0), - char(4), - char(0), - char(29), - char(0), - char(21), - char(0), - char(30), - char(0), - char(22), - char(0), - char(31), - char(0), - char(4), - char(0), - char(33), - char(0), - char(4), - char(0), - char(34), - char(0), - char(19), - char(0), - char(32), - char(0), - char(25), - char(0), - char(3), - char(0), - char(0), - char(0), - char(35), - char(0), - char(4), - char(0), - char(36), - char(0), - char(0), - char(0), - char(37), - char(0), - char(26), - char(0), - char(5), - char(0), - char(25), - char(0), - char(38), - char(0), - char(13), - char(0), - char(39), - char(0), - char(13), - char(0), - char(40), - char(0), - char(7), - char(0), - char(41), - char(0), - char(0), - char(0), - char(21), - char(0), - char(27), - char(0), - char(5), - char(0), - char(25), - char(0), - char(38), - char(0), - char(13), - char(0), - char(39), - char(0), - char(13), - char(0), - char(42), - char(0), - char(7), - char(0), - char(43), - char(0), - char(4), - char(0), - char(44), - char(0), - char(28), - char(0), - char(2), - char(0), - char(13), - char(0), - char(45), - char(0), - char(7), - char(0), - char(46), - char(0), - char(29), - char(0), - char(4), - char(0), - char(27), - char(0), - char(47), - char(0), - char(28), - char(0), - char(48), - char(0), - char(4), - char(0), - char(49), - char(0), - char(0), - char(0), - char(37), - char(0), - char(30), - char(0), - char(1), - char(0), - char(4), - char(0), - char(50), - char(0), - char(31), - char(0), - char(2), - char(0), - char(2), - char(0), - char(50), - char(0), - char(0), - char(0), - char(51), - char(0), - char(32), - char(0), - char(2), - char(0), - char(2), - char(0), - char(52), - char(0), - char(0), - char(0), - char(51), - char(0), - char(33), - char(0), - char(2), - char(0), - char(0), - char(0), - char(52), - char(0), - char(0), - char(0), - char(53), - char(0), - char(34), - char(0), - char(8), - char(0), - char(13), - char(0), - char(54), - char(0), - char(14), - char(0), - char(55), - char(0), - char(30), - char(0), - char(56), - char(0), - char(32), - char(0), - char(57), - char(0), - char(33), - char(0), - char(58), - char(0), - char(31), - char(0), - char(59), - char(0), - char(4), - char(0), - char(60), - char(0), - char(4), - char(0), - char(61), - char(0), - char(35), - char(0), - char(4), - char(0), - char(34), - char(0), - char(62), - char(0), - char(13), - char(0), - char(63), - char(0), - char(4), - char(0), - char(64), - char(0), - char(0), - char(0), - char(37), - char(0), - char(36), - char(0), - char(7), - char(0), - char(25), - char(0), - char(38), - char(0), - char(35), - char(0), - char(65), - char(0), - char(23), - char(0), - char(66), - char(0), - char(24), - char(0), - char(67), - char(0), - char(37), - char(0), - char(68), - char(0), - char(7), - char(0), - char(43), - char(0), - char(0), - char(0), - char(69), - char(0), - char(38), - char(0), - char(2), - char(0), - char(36), - char(0), - char(70), - char(0), - char(13), - char(0), - char(39), - char(0), - char(39), - char(0), - char(4), - char(0), - char(17), - char(0), - char(71), - char(0), - char(25), - char(0), - char(72), - char(0), - char(4), - char(0), - char(73), - char(0), - char(7), - char(0), - char(74), - char(0), - char(40), - char(0), - char(4), - char(0), - char(25), - char(0), - char(38), - char(0), - char(39), - char(0), - char(75), - char(0), - char(4), - char(0), - char(76), - char(0), - char(7), - char(0), - char(43), - char(0), - char(41), - char(0), - char(3), - char(0), - char(27), - char(0), - char(47), - char(0), - char(4), - char(0), - char(77), - char(0), - char(0), - char(0), - char(37), - char(0), - char(42), - char(0), - char(3), - char(0), - char(27), - char(0), - char(47), - char(0), - char(4), - char(0), - char(77), - char(0), - char(0), - char(0), - char(37), - char(0), - char(43), - char(0), - char(4), - char(0), - char(4), - char(0), - char(78), - char(0), - char(7), - char(0), - char(79), - char(0), - char(7), - char(0), - char(80), - char(0), - char(7), - char(0), - char(81), - char(0), - char(37), - char(0), - char(14), - char(0), - char(4), - char(0), - char(82), - char(0), - char(4), - char(0), - char(83), - char(0), - char(43), - char(0), - char(84), - char(0), - char(4), - char(0), - char(85), - char(0), - char(7), - char(0), - char(86), - char(0), - char(7), - char(0), - char(87), - char(0), - char(7), - char(0), - char(88), - char(0), - char(7), - char(0), - char(89), - char(0), - char(7), - char(0), - char(90), - char(0), - char(4), - char(0), - char(91), - char(0), - char(4), - char(0), - char(92), - char(0), - char(4), - char(0), - char(93), - char(0), - char(4), - char(0), - char(94), - char(0), - char(0), - char(0), - char(37), - char(0), - char(44), - char(0), - char(5), - char(0), - char(25), - char(0), - char(38), - char(0), - char(35), - char(0), - char(65), - char(0), - char(13), - char(0), - char(39), - char(0), - char(7), - char(0), - char(43), - char(0), - char(4), - char(0), - char(95), - char(0), - char(45), - char(0), - char(5), - char(0), - char(27), - char(0), - char(47), - char(0), - char(13), - char(0), - char(96), - char(0), - char(14), - char(0), - char(97), - char(0), - char(4), - char(0), - char(98), - char(0), - char(0), - char(0), - char(99), - char(0), - char(46), - char(0), - char(25), - char(0), - char(9), - char(0), - char(100), - char(0), - char(9), - char(0), - char(101), - char(0), - char(25), - char(0), - char(102), - char(0), - char(0), - char(0), - char(35), - char(0), - char(18), - char(0), - char(103), - char(0), - char(18), - char(0), - char(104), - char(0), - char(14), - char(0), - char(105), - char(0), - char(14), - char(0), - char(106), - char(0), - char(14), - char(0), - char(107), - char(0), - char(8), - char(0), - char(108), - char(0), - char(8), - char(0), - char(109), - char(0), - char(8), - char(0), - char(110), - char(0), - char(8), - char(0), - char(111), - char(0), - char(8), - char(0), - char(112), - char(0), - char(8), - char(0), - char(113), - char(0), - char(8), - char(0), - char(114), - char(0), - char(8), - char(0), - char(115), - char(0), - char(4), - char(0), - char(116), - char(0), - char(4), - char(0), - char(117), - char(0), - char(4), - char(0), - char(118), - char(0), - char(4), - char(0), - char(119), - char(0), - char(4), - char(0), - char(120), - char(0), - char(4), - char(0), - char(121), - char(0), - char(4), - char(0), - char(122), - char(0), - char(0), - char(0), - char(37), - char(0), - char(47), - char(0), - char(25), - char(0), - char(9), - char(0), - char(100), - char(0), - char(9), - char(0), - char(101), - char(0), - char(25), - char(0), - char(102), - char(0), - char(0), - char(0), - char(35), - char(0), - char(17), - char(0), - char(103), - char(0), - char(17), - char(0), - char(104), - char(0), - char(13), - char(0), - char(105), - char(0), - char(13), - char(0), - char(106), - char(0), - char(13), - char(0), - char(107), - char(0), - char(7), - char(0), - char(108), - char(0), - char(7), - char(0), - char(109), - char(0), - char(7), - char(0), - char(110), - char(0), - char(7), - char(0), - char(111), - char(0), - char(7), - char(0), - char(112), - char(0), - char(7), - char(0), - char(113), - char(0), - char(7), - char(0), - char(114), - char(0), - char(7), - char(0), - char(115), - char(0), - char(4), - char(0), - char(116), - char(0), - char(4), - char(0), - char(117), - char(0), - char(4), - char(0), - char(118), - char(0), - char(4), - char(0), - char(119), - char(0), - char(4), - char(0), - char(120), - char(0), - char(4), - char(0), - char(121), - char(0), - char(4), - char(0), - char(122), - char(0), - char(0), - char(0), - char(37), - char(0), - char(48), - char(0), - char(2), - char(0), - char(49), - char(0), - char(123), - char(0), - char(14), - char(0), - char(124), - char(0), - char(50), - char(0), - char(2), - char(0), - char(51), - char(0), - char(123), - char(0), - char(13), - char(0), - char(124), - char(0), - char(52), - char(0), - char(21), - char(0), - char(47), - char(0), - char(125), - char(0), - char(15), - char(0), - char(126), - char(0), - char(13), - char(0), - char(127), - char(0), - char(13), - char(0), - char(-128), - char(0), - char(13), - char(0), - char(-127), - char(0), - char(13), - char(0), - char(-126), - char(0), - char(13), - char(0), - char(124), - char(0), - char(13), - char(0), - char(-125), - char(0), - char(13), - char(0), - char(-124), - char(0), - char(13), - char(0), - char(-123), - char(0), - char(13), - char(0), - char(-122), - char(0), - char(7), - char(0), - char(-121), - char(0), - char(7), - char(0), - char(-120), - char(0), - char(7), - char(0), - char(-119), - char(0), - char(7), - char(0), - char(-118), - char(0), - char(7), - char(0), - char(-117), - char(0), - char(7), - char(0), - char(-116), - char(0), - char(7), - char(0), - char(-115), - char(0), - char(7), - char(0), - char(-114), - char(0), - char(7), - char(0), - char(-113), - char(0), - char(4), - char(0), - char(-112), - char(0), - char(53), - char(0), - char(22), - char(0), - char(46), - char(0), - char(125), - char(0), - char(16), - char(0), - char(126), - char(0), - char(14), - char(0), - char(127), - char(0), - char(14), - char(0), - char(-128), - char(0), - char(14), - char(0), - char(-127), - char(0), - char(14), - char(0), - char(-126), - char(0), - char(14), - char(0), - char(124), - char(0), - char(14), - char(0), - char(-125), - char(0), - char(14), - char(0), - char(-124), - char(0), - char(14), - char(0), - char(-123), - char(0), - char(14), - char(0), - char(-122), - char(0), - char(8), - char(0), - char(-121), - char(0), - char(8), - char(0), - char(-120), - char(0), - char(8), - char(0), - char(-119), - char(0), - char(8), - char(0), - char(-118), - char(0), - char(8), - char(0), - char(-117), - char(0), - char(8), - char(0), - char(-116), - char(0), - char(8), - char(0), - char(-115), - char(0), - char(8), - char(0), - char(-114), - char(0), - char(8), - char(0), - char(-113), - char(0), - char(4), - char(0), - char(-112), - char(0), - char(0), - char(0), - char(37), - char(0), - char(54), - char(0), - char(2), - char(0), - char(4), - char(0), - char(-111), - char(0), - char(4), - char(0), - char(-110), - char(0), - char(55), - char(0), - char(13), - char(0), - char(56), - char(0), - char(-109), - char(0), - char(56), - char(0), - char(-108), - char(0), - char(0), - char(0), - char(35), - char(0), - char(4), - char(0), - char(-107), - char(0), - char(4), - char(0), - char(-106), - char(0), - char(4), - char(0), - char(-105), - char(0), - char(4), - char(0), - char(-104), - char(0), - char(7), - char(0), - char(-103), - char(0), - char(7), - char(0), - char(-102), - char(0), - char(4), - char(0), - char(-101), - char(0), - char(4), - char(0), - char(-100), - char(0), - char(7), - char(0), - char(-99), - char(0), - char(4), - char(0), - char(-98), - char(0), - char(57), - char(0), - char(3), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(13), - char(0), - char(-96), - char(0), - char(13), - char(0), - char(-95), - char(0), - char(58), - char(0), - char(3), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(14), - char(0), - char(-96), - char(0), - char(14), - char(0), - char(-95), - char(0), - char(59), - char(0), - char(13), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(18), - char(0), - char(-94), - char(0), - char(18), - char(0), - char(-93), - char(0), - char(4), - char(0), - char(-92), - char(0), - char(4), - char(0), - char(-91), - char(0), - char(4), - char(0), - char(-90), - char(0), - char(7), - char(0), - char(-89), - char(0), - char(7), - char(0), - char(-88), - char(0), - char(7), - char(0), - char(-87), - char(0), - char(7), - char(0), - char(-86), - char(0), - char(7), - char(0), - char(-85), - char(0), - char(7), - char(0), - char(-84), - char(0), - char(7), - char(0), - char(-83), - char(0), - char(60), - char(0), - char(13), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(4), - char(0), - char(-92), - char(0), - char(4), - char(0), - char(-91), - char(0), - char(4), - char(0), - char(-90), - char(0), - char(7), - char(0), - char(-89), - char(0), - char(7), - char(0), - char(-88), - char(0), - char(7), - char(0), - char(-87), - char(0), - char(7), - char(0), - char(-86), - char(0), - char(7), - char(0), - char(-85), - char(0), - char(7), - char(0), - char(-84), - char(0), - char(7), - char(0), - char(-83), - char(0), - char(61), - char(0), - char(11), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(7), - char(0), - char(-82), - char(0), - char(7), - char(0), - char(-81), - char(0), - char(7), - char(0), - char(-80), - char(0), - char(7), - char(0), - char(-85), - char(0), - char(7), - char(0), - char(-84), - char(0), - char(7), - char(0), - char(-83), - char(0), - char(7), - char(0), - char(-79), - char(0), - char(0), - char(0), - char(21), - char(0), - char(62), - char(0), - char(9), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(13), - char(0), - char(-78), - char(0), - char(13), - char(0), - char(-77), - char(0), - char(13), - char(0), - char(-76), - char(0), - char(13), - char(0), - char(-75), - char(0), - char(4), - char(0), - char(-74), - char(0), - char(4), - char(0), - char(-73), - char(0), - char(63), - char(0), - char(5), - char(0), - char(62), - char(0), - char(-72), - char(0), - char(4), - char(0), - char(-71), - char(0), - char(7), - char(0), - char(-70), - char(0), - char(7), - char(0), - char(-69), - char(0), - char(7), - char(0), - char(-68), - char(0), - char(64), - char(0), - char(9), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(7), - char(0), - char(-78), - char(0), - char(7), - char(0), - char(-77), - char(0), - char(7), - char(0), - char(-76), - char(0), - char(7), - char(0), - char(-75), - char(0), - char(4), - char(0), - char(-74), - char(0), - char(4), - char(0), - char(-73), - char(0), - char(49), - char(0), - char(22), - char(0), - char(8), - char(0), - char(-67), - char(0), - char(8), - char(0), - char(-79), - char(0), - char(8), - char(0), - char(110), - char(0), - char(8), - char(0), - char(-66), - char(0), - char(8), - char(0), - char(112), - char(0), - char(8), - char(0), - char(-65), - char(0), - char(8), - char(0), - char(-64), - char(0), - char(8), - char(0), - char(-63), - char(0), - char(8), - char(0), - char(-62), - char(0), - char(8), - char(0), - char(-61), - char(0), - char(8), - char(0), - char(-60), - char(0), - char(8), - char(0), - char(-59), - char(0), - char(8), - char(0), - char(-58), - char(0), - char(8), - char(0), - char(-57), - char(0), - char(8), - char(0), - char(-56), - char(0), - char(8), - char(0), - char(-55), - char(0), - char(4), - char(0), - char(-54), - char(0), - char(4), - char(0), - char(-53), - char(0), - char(4), - char(0), - char(-52), - char(0), - char(4), - char(0), - char(-51), - char(0), - char(4), - char(0), - char(-50), - char(0), - char(0), - char(0), - char(37), - char(0), - char(51), - char(0), - char(22), - char(0), - char(7), - char(0), - char(-67), - char(0), - char(7), - char(0), - char(-79), - char(0), - char(7), - char(0), - char(110), - char(0), - char(7), - char(0), - char(-66), - char(0), - char(7), - char(0), - char(112), - char(0), - char(7), - char(0), - char(-65), - char(0), - char(7), - char(0), - char(-64), - char(0), - char(7), - char(0), - char(-63), - char(0), - char(7), - char(0), - char(-62), - char(0), - char(7), - char(0), - char(-61), - char(0), - char(7), - char(0), - char(-60), - char(0), - char(7), - char(0), - char(-59), - char(0), - char(7), - char(0), - char(-58), - char(0), - char(7), - char(0), - char(-57), - char(0), - char(7), - char(0), - char(-56), - char(0), - char(7), - char(0), - char(-55), - char(0), - char(4), - char(0), - char(-54), - char(0), - char(4), - char(0), - char(-53), - char(0), - char(4), - char(0), - char(-52), - char(0), - char(4), - char(0), - char(-51), - char(0), - char(4), - char(0), - char(-50), - char(0), - char(0), - char(0), - char(37), - char(0), - char(65), - char(0), - char(4), - char(0), - char(7), - char(0), - char(-49), - char(0), - char(7), - char(0), - char(-48), - char(0), - char(7), - char(0), - char(-47), - char(0), - char(4), - char(0), - char(78), - char(0), - char(66), - char(0), - char(10), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(13), - char(0), - char(-45), - char(0), - char(13), - char(0), - char(-44), - char(0), - char(13), - char(0), - char(-43), - char(0), - char(13), - char(0), - char(-42), - char(0), - char(13), - char(0), - char(-41), - char(0), - char(7), - char(0), - char(-121), - char(0), - char(7), - char(0), - char(-40), - char(0), - char(4), - char(0), - char(-39), - char(0), - char(4), - char(0), - char(53), - char(0), - char(67), - char(0), - char(4), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(4), - char(0), - char(-38), - char(0), - char(7), - char(0), - char(-37), - char(0), - char(4), - char(0), - char(-36), - char(0), - char(68), - char(0), - char(4), - char(0), - char(13), - char(0), - char(-41), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(4), - char(0), - char(-35), - char(0), - char(7), - char(0), - char(-34), - char(0), - char(69), - char(0), - char(7), - char(0), - char(13), - char(0), - char(-33), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(4), - char(0), - char(-32), - char(0), - char(7), - char(0), - char(-31), - char(0), - char(7), - char(0), - char(-30), - char(0), - char(7), - char(0), - char(-29), - char(0), - char(4), - char(0), - char(53), - char(0), - char(70), - char(0), - char(6), - char(0), - char(15), - char(0), - char(-28), - char(0), - char(13), - char(0), - char(-30), - char(0), - char(13), - char(0), - char(-27), - char(0), - char(56), - char(0), - char(-26), - char(0), - char(4), - char(0), - char(-25), - char(0), - char(7), - char(0), - char(-29), - char(0), - char(71), - char(0), - char(26), - char(0), - char(4), - char(0), - char(-24), - char(0), - char(7), - char(0), - char(-23), - char(0), - char(7), - char(0), - char(-79), - char(0), - char(7), - char(0), - char(-22), - char(0), - char(7), - char(0), - char(-21), - char(0), - char(7), - char(0), - char(-20), - char(0), - char(7), - char(0), - char(-19), - char(0), - char(7), - char(0), - char(-18), - char(0), - char(7), - char(0), - char(-17), - char(0), - char(7), - char(0), - char(-16), - char(0), - char(7), - char(0), - char(-15), - char(0), - char(7), - char(0), - char(-14), - char(0), - char(7), - char(0), - char(-13), - char(0), - char(7), - char(0), - char(-12), - char(0), - char(7), - char(0), - char(-11), - char(0), - char(7), - char(0), - char(-10), - char(0), - char(7), - char(0), - char(-9), - char(0), - char(7), - char(0), - char(-8), - char(0), - char(7), - char(0), - char(-7), - char(0), - char(7), - char(0), - char(-6), - char(0), - char(7), - char(0), - char(-5), - char(0), - char(4), - char(0), - char(-4), - char(0), - char(4), - char(0), - char(-3), - char(0), - char(4), - char(0), - char(-2), - char(0), - char(4), - char(0), - char(-1), - char(0), - char(4), - char(0), - char(117), - char(0), - char(72), - char(0), - char(12), - char(0), - char(15), - char(0), - char(0), - char(1), - char(15), - char(0), - char(1), - char(1), - char(15), - char(0), - char(2), - char(1), - char(13), - char(0), - char(3), - char(1), - char(13), - char(0), - char(4), - char(1), - char(7), - char(0), - char(5), - char(1), - char(4), - char(0), - char(6), - char(1), - char(4), - char(0), - char(7), - char(1), - char(4), - char(0), - char(8), - char(1), - char(4), - char(0), - char(9), - char(1), - char(7), - char(0), - char(-31), - char(0), - char(4), - char(0), - char(53), - char(0), - char(73), - char(0), - char(27), - char(0), - char(17), - char(0), - char(10), - char(1), - char(15), - char(0), - char(11), - char(1), - char(15), - char(0), - char(12), - char(1), - char(13), - char(0), - char(3), - char(1), - char(13), - char(0), - char(13), - char(1), - char(13), - char(0), - char(14), - char(1), - char(13), - char(0), - char(15), - char(1), - char(13), - char(0), - char(16), - char(1), - char(13), - char(0), - char(17), - char(1), - char(4), - char(0), - char(18), - char(1), - char(7), - char(0), - char(19), - char(1), - char(4), - char(0), - char(20), - char(1), - char(4), - char(0), - char(21), - char(1), - char(4), - char(0), - char(22), - char(1), - char(7), - char(0), - char(23), - char(1), - char(7), - char(0), - char(24), - char(1), - char(4), - char(0), - char(25), - char(1), - char(4), - char(0), - char(26), - char(1), - char(7), - char(0), - char(27), - char(1), - char(7), - char(0), - char(28), - char(1), - char(7), - char(0), - char(29), - char(1), - char(7), - char(0), - char(30), - char(1), - char(7), - char(0), - char(31), - char(1), - char(7), - char(0), - char(32), - char(1), - char(4), - char(0), - char(33), - char(1), - char(4), - char(0), - char(34), - char(1), - char(4), - char(0), - char(35), - char(1), - char(74), - char(0), - char(12), - char(0), - char(9), - char(0), - char(36), - char(1), - char(9), - char(0), - char(37), - char(1), - char(13), - char(0), - char(38), - char(1), - char(7), - char(0), - char(39), - char(1), - char(7), - char(0), - char(-63), - char(0), - char(7), - char(0), - char(40), - char(1), - char(4), - char(0), - char(41), - char(1), - char(13), - char(0), - char(42), - char(1), - char(4), - char(0), - char(43), - char(1), - char(4), - char(0), - char(44), - char(1), - char(4), - char(0), - char(45), - char(1), - char(4), - char(0), - char(53), - char(0), - char(75), - char(0), - char(19), - char(0), - char(47), - char(0), - char(125), - char(0), - char(72), - char(0), - char(46), - char(1), - char(65), - char(0), - char(47), - char(1), - char(66), - char(0), - char(48), - char(1), - char(67), - char(0), - char(49), - char(1), - char(68), - char(0), - char(50), - char(1), - char(69), - char(0), - char(51), - char(1), - char(70), - char(0), - char(52), - char(1), - char(73), - char(0), - char(53), - char(1), - char(74), - char(0), - char(54), - char(1), - char(4), - char(0), - char(55), - char(1), - char(4), - char(0), - char(21), - char(1), - char(4), - char(0), - char(56), - char(1), - char(4), - char(0), - char(57), - char(1), - char(4), - char(0), - char(58), - char(1), - char(4), - char(0), - char(59), - char(1), - char(4), - char(0), - char(60), - char(1), - char(4), - char(0), - char(61), - char(1), - char(71), - char(0), - char(62), - char(1), -}; -int b3s_bulletDNAlen = sizeof(b3s_bulletDNAstr); -char b3s_bulletDNAstr64[] = { - char(83), - char(68), - char(78), - char(65), - char(78), - char(65), - char(77), - char(69), - char(63), - char(1), - char(0), - char(0), - char(109), - char(95), - char(115), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(99), - char(97), - char(112), - char(97), - char(99), - char(105), - char(116), - char(121), - char(0), - char(42), - char(109), - char(95), - char(100), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(115), - char(0), - char(42), - char(102), - char(105), - char(114), - char(115), - char(116), - char(0), - char(42), - char(108), - char(97), - char(115), - char(116), - char(0), - char(109), - char(95), - char(102), - char(108), - char(111), - char(97), - char(116), - char(115), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(101), - char(108), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(98), - char(97), - char(115), - char(105), - char(115), - char(0), - char(109), - char(95), - char(111), - char(114), - char(105), - char(103), - char(105), - char(110), - char(0), - char(109), - char(95), - char(114), - char(111), - char(111), - char(116), - char(78), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(115), - char(117), - char(98), - char(116), - char(114), - char(101), - char(101), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(65), - char(97), - char(98), - char(98), - char(77), - char(105), - char(110), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(65), - char(97), - char(98), - char(98), - char(77), - char(97), - char(120), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(97), - char(97), - char(98), - char(98), - char(77), - char(105), - char(110), - char(79), - char(114), - char(103), - char(0), - char(109), - char(95), - char(97), - char(97), - char(98), - char(98), - char(77), - char(97), - char(120), - char(79), - char(114), - char(103), - char(0), - char(109), - char(95), - char(101), - char(115), - char(99), - char(97), - char(112), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(115), - char(117), - char(98), - char(80), - char(97), - char(114), - char(116), - char(0), - char(109), - char(95), - char(116), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(101), - char(115), - char(99), - char(97), - char(112), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(79), - char(114), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(98), - char(118), - char(104), - char(65), - char(97), - char(98), - char(98), - char(77), - char(105), - char(110), - char(0), - char(109), - char(95), - char(98), - char(118), - char(104), - char(65), - char(97), - char(98), - char(98), - char(77), - char(97), - char(120), - char(0), - char(109), - char(95), - char(98), - char(118), - char(104), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(97), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(117), - char(114), - char(78), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(97), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(76), - char(101), - char(97), - char(102), - char(78), - char(111), - char(100), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(67), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(78), - char(111), - char(100), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(99), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(78), - char(111), - char(100), - char(101), - char(115), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(67), - char(111), - char(110), - char(116), - char(105), - char(103), - char(117), - char(111), - char(117), - char(115), - char(78), - char(111), - char(100), - char(101), - char(115), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(115), - char(117), - char(98), - char(84), - char(114), - char(101), - char(101), - char(73), - char(110), - char(102), - char(111), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(116), - char(114), - char(97), - char(118), - char(101), - char(114), - char(115), - char(97), - char(108), - char(77), - char(111), - char(100), - char(101), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(83), - char(117), - char(98), - char(116), - char(114), - char(101), - char(101), - char(72), - char(101), - char(97), - char(100), - char(101), - char(114), - char(115), - char(0), - char(42), - char(109), - char(95), - char(110), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(115), - char(104), - char(97), - char(112), - char(101), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(100), - char(105), - char(110), - char(103), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(83), - char(99), - char(97), - char(108), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(112), - char(108), - char(97), - char(110), - char(101), - char(78), - char(111), - char(114), - char(109), - char(97), - char(108), - char(0), - char(109), - char(95), - char(112), - char(108), - char(97), - char(110), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(97), - char(110), - char(116), - char(0), - char(109), - char(95), - char(105), - char(109), - char(112), - char(108), - char(105), - char(99), - char(105), - char(116), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(105), - char(109), - char(101), - char(110), - char(115), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(77), - char(97), - char(114), - char(103), - char(105), - char(110), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(100), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(0), - char(109), - char(95), - char(114), - char(97), - char(100), - char(105), - char(117), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(118), - char(101), - char(120), - char(73), - char(110), - char(116), - char(101), - char(114), - char(110), - char(97), - char(108), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(42), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(65), - char(114), - char(114), - char(97), - char(121), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(65), - char(114), - char(114), - char(97), - char(121), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(118), - char(97), - char(108), - char(117), - char(101), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(118), - char(97), - char(108), - char(117), - char(101), - char(115), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(0), - char(42), - char(109), - char(95), - char(118), - char(101), - char(114), - char(116), - char(105), - char(99), - char(101), - char(115), - char(51), - char(102), - char(0), - char(42), - char(109), - char(95), - char(118), - char(101), - char(114), - char(116), - char(105), - char(99), - char(101), - char(115), - char(51), - char(100), - char(0), - char(42), - char(109), - char(95), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(51), - char(50), - char(0), - char(42), - char(109), - char(95), - char(51), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(49), - char(54), - char(0), - char(42), - char(109), - char(95), - char(51), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(56), - char(0), - char(42), - char(109), - char(95), - char(105), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(49), - char(54), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(86), - char(101), - char(114), - char(116), - char(105), - char(99), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(109), - char(101), - char(115), - char(104), - char(80), - char(97), - char(114), - char(116), - char(115), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(115), - char(99), - char(97), - char(108), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(77), - char(101), - char(115), - char(104), - char(80), - char(97), - char(114), - char(116), - char(115), - char(0), - char(109), - char(95), - char(109), - char(101), - char(115), - char(104), - char(73), - char(110), - char(116), - char(101), - char(114), - char(102), - char(97), - char(99), - char(101), - char(0), - char(42), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(70), - char(108), - char(111), - char(97), - char(116), - char(66), - char(118), - char(104), - char(0), - char(42), - char(109), - char(95), - char(113), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(66), - char(118), - char(104), - char(0), - char(42), - char(109), - char(95), - char(116), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(102), - char(111), - char(77), - char(97), - char(112), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(51), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(116), - char(114), - char(105), - char(109), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(116), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(0), - char(42), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(0), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(77), - char(97), - char(114), - char(103), - char(105), - char(110), - char(0), - char(42), - char(109), - char(95), - char(99), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(104), - char(105), - char(108), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(115), - char(0), - char(109), - char(95), - char(117), - char(112), - char(65), - char(120), - char(105), - char(115), - char(0), - char(109), - char(95), - char(102), - char(108), - char(97), - char(103), - char(115), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(86), - char(48), - char(86), - char(49), - char(65), - char(110), - char(103), - char(108), - char(101), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(86), - char(49), - char(86), - char(50), - char(65), - char(110), - char(103), - char(108), - char(101), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(86), - char(50), - char(86), - char(48), - char(65), - char(110), - char(103), - char(108), - char(101), - char(0), - char(42), - char(109), - char(95), - char(104), - char(97), - char(115), - char(104), - char(84), - char(97), - char(98), - char(108), - char(101), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(110), - char(101), - char(120), - char(116), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(118), - char(97), - char(108), - char(117), - char(101), - char(65), - char(114), - char(114), - char(97), - char(121), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(107), - char(101), - char(121), - char(65), - char(114), - char(114), - char(97), - char(121), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(118), - char(101), - char(120), - char(69), - char(112), - char(115), - char(105), - char(108), - char(111), - char(110), - char(0), - char(109), - char(95), - char(112), - char(108), - char(97), - char(110), - char(97), - char(114), - char(69), - char(112), - char(115), - char(105), - char(108), - char(111), - char(110), - char(0), - char(109), - char(95), - char(101), - char(113), - char(117), - char(97), - char(108), - char(86), - char(101), - char(114), - char(116), - char(101), - char(120), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(101), - char(100), - char(103), - char(101), - char(68), - char(105), - char(115), - char(116), - char(97), - char(110), - char(99), - char(101), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(122), - char(101), - char(114), - char(111), - char(65), - char(114), - char(101), - char(97), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(110), - char(101), - char(120), - char(116), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(104), - char(97), - char(115), - char(104), - char(84), - char(97), - char(98), - char(108), - char(101), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(86), - char(97), - char(108), - char(117), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(75), - char(101), - char(121), - char(115), - char(0), - char(109), - char(95), - char(103), - char(105), - char(109), - char(112), - char(97), - char(99), - char(116), - char(83), - char(117), - char(98), - char(84), - char(121), - char(112), - char(101), - char(0), - char(42), - char(109), - char(95), - char(117), - char(110), - char(115), - char(99), - char(97), - char(108), - char(101), - char(100), - char(80), - char(111), - char(105), - char(110), - char(116), - char(115), - char(70), - char(108), - char(111), - char(97), - char(116), - char(80), - char(116), - char(114), - char(0), - char(42), - char(109), - char(95), - char(117), - char(110), - char(115), - char(99), - char(97), - char(108), - char(101), - char(100), - char(80), - char(111), - char(105), - char(110), - char(116), - char(115), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(80), - char(116), - char(114), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(85), - char(110), - char(115), - char(99), - char(97), - char(108), - char(101), - char(100), - char(80), - char(111), - char(105), - char(110), - char(116), - char(115), - char(0), - char(109), - char(95), - char(112), - char(97), - char(100), - char(100), - char(105), - char(110), - char(103), - char(51), - char(91), - char(52), - char(93), - char(0), - char(42), - char(109), - char(95), - char(98), - char(114), - char(111), - char(97), - char(100), - char(112), - char(104), - char(97), - char(115), - char(101), - char(72), - char(97), - char(110), - char(100), - char(108), - char(101), - char(0), - char(42), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(0), - char(42), - char(109), - char(95), - char(114), - char(111), - char(111), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(0), - char(109), - char(95), - char(119), - char(111), - char(114), - char(108), - char(100), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(112), - char(111), - char(108), - char(97), - char(116), - char(105), - char(111), - char(110), - char(87), - char(111), - char(114), - char(108), - char(100), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(112), - char(111), - char(108), - char(97), - char(116), - char(105), - char(111), - char(110), - char(76), - char(105), - char(110), - char(101), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(112), - char(111), - char(108), - char(97), - char(116), - char(105), - char(111), - char(110), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(110), - char(105), - char(115), - char(111), - char(116), - char(114), - char(111), - char(112), - char(105), - char(99), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(80), - char(114), - char(111), - char(99), - char(101), - char(115), - char(115), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(100), - char(101), - char(97), - char(99), - char(116), - char(105), - char(118), - char(97), - char(116), - char(105), - char(111), - char(110), - char(84), - char(105), - char(109), - char(101), - char(0), - char(109), - char(95), - char(102), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(114), - char(111), - char(108), - char(108), - char(105), - char(110), - char(103), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(105), - char(116), - char(117), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(104), - char(105), - char(116), - char(70), - char(114), - char(97), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(99), - char(100), - char(83), - char(119), - char(101), - char(112), - char(116), - char(83), - char(112), - char(104), - char(101), - char(114), - char(101), - char(82), - char(97), - char(100), - char(105), - char(117), - char(115), - char(0), - char(109), - char(95), - char(99), - char(99), - char(100), - char(77), - char(111), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(104), - char(97), - char(115), - char(65), - char(110), - char(105), - char(115), - char(111), - char(116), - char(114), - char(111), - char(112), - char(105), - char(99), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(70), - char(108), - char(97), - char(103), - char(115), - char(0), - char(109), - char(95), - char(105), - char(115), - char(108), - char(97), - char(110), - char(100), - char(84), - char(97), - char(103), - char(49), - char(0), - char(109), - char(95), - char(99), - char(111), - char(109), - char(112), - char(97), - char(110), - char(105), - char(111), - char(110), - char(73), - char(100), - char(0), - char(109), - char(95), - char(97), - char(99), - char(116), - char(105), - char(118), - char(97), - char(116), - char(105), - char(111), - char(110), - char(83), - char(116), - char(97), - char(116), - char(101), - char(49), - char(0), - char(109), - char(95), - char(105), - char(110), - char(116), - char(101), - char(114), - char(110), - char(97), - char(108), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(99), - char(104), - char(101), - char(99), - char(107), - char(67), - char(111), - char(108), - char(108), - char(105), - char(100), - char(101), - char(87), - char(105), - char(116), - char(104), - char(0), - char(109), - char(95), - char(115), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(110), - char(102), - char(111), - char(0), - char(109), - char(95), - char(103), - char(114), - char(97), - char(118), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(73), - char(110), - char(101), - char(114), - char(116), - char(105), - char(97), - char(84), - char(101), - char(110), - char(115), - char(111), - char(114), - char(87), - char(111), - char(114), - char(108), - char(100), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(103), - char(114), - char(97), - char(118), - char(105), - char(116), - char(121), - char(95), - char(97), - char(99), - char(99), - char(101), - char(108), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(73), - char(110), - char(101), - char(114), - char(116), - char(105), - char(97), - char(76), - char(111), - char(99), - char(97), - char(108), - char(0), - char(109), - char(95), - char(116), - char(111), - char(116), - char(97), - char(108), - char(70), - char(111), - char(114), - char(99), - char(101), - char(0), - char(109), - char(95), - char(116), - char(111), - char(116), - char(97), - char(108), - char(84), - char(111), - char(114), - char(113), - char(117), - char(101), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(101), - char(114), - char(115), - char(101), - char(77), - char(97), - char(115), - char(115), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(76), - char(105), - char(110), - char(101), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(83), - char(113), - char(114), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(83), - char(113), - char(114), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(83), - char(108), - char(101), - char(101), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(83), - char(108), - char(101), - char(101), - char(112), - char(105), - char(110), - char(103), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(97), - char(100), - char(100), - char(105), - char(116), - char(105), - char(111), - char(110), - char(97), - char(108), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(82), - char(111), - char(119), - char(115), - char(0), - char(110), - char(117), - char(98), - char(0), - char(42), - char(109), - char(95), - char(114), - char(98), - char(65), - char(0), - char(42), - char(109), - char(95), - char(114), - char(98), - char(66), - char(0), - char(109), - char(95), - char(111), - char(98), - char(106), - char(101), - char(99), - char(116), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(84), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(73), - char(100), - char(0), - char(109), - char(95), - char(110), - char(101), - char(101), - char(100), - char(115), - char(70), - char(101), - char(101), - char(100), - char(98), - char(97), - char(99), - char(107), - char(0), - char(109), - char(95), - char(97), - char(112), - char(112), - char(108), - char(105), - char(101), - char(100), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(100), - char(98), - char(103), - char(68), - char(114), - char(97), - char(119), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(100), - char(105), - char(115), - char(97), - char(98), - char(108), - char(101), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(115), - char(66), - char(101), - char(116), - char(119), - char(101), - char(101), - char(110), - char(76), - char(105), - char(110), - char(107), - char(101), - char(100), - char(66), - char(111), - char(100), - char(105), - char(101), - char(115), - char(0), - char(109), - char(95), - char(111), - char(118), - char(101), - char(114), - char(114), - char(105), - char(100), - char(101), - char(78), - char(117), - char(109), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(98), - char(114), - char(101), - char(97), - char(107), - char(105), - char(110), - char(103), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(105), - char(115), - char(69), - char(110), - char(97), - char(98), - char(108), - char(101), - char(100), - char(0), - char(109), - char(95), - char(116), - char(121), - char(112), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(112), - char(105), - char(118), - char(111), - char(116), - char(73), - char(110), - char(65), - char(0), - char(109), - char(95), - char(112), - char(105), - char(118), - char(111), - char(116), - char(73), - char(110), - char(66), - char(0), - char(109), - char(95), - char(114), - char(98), - char(65), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(114), - char(98), - char(66), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(82), - char(101), - char(102), - char(101), - char(114), - char(101), - char(110), - char(99), - char(101), - char(70), - char(114), - char(97), - char(109), - char(101), - char(65), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(79), - char(110), - char(108), - char(121), - char(0), - char(109), - char(95), - char(101), - char(110), - char(97), - char(98), - char(108), - char(101), - char(65), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(77), - char(111), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(109), - char(111), - char(116), - char(111), - char(114), - char(84), - char(97), - char(114), - char(103), - char(101), - char(116), - char(86), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(77), - char(111), - char(116), - char(111), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(108), - char(111), - char(119), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(117), - char(112), - char(112), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(108), - char(105), - char(109), - char(105), - char(116), - char(83), - char(111), - char(102), - char(116), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(98), - char(105), - char(97), - char(115), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(114), - char(101), - char(108), - char(97), - char(120), - char(97), - char(116), - char(105), - char(111), - char(110), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(115), - char(119), - char(105), - char(110), - char(103), - char(83), - char(112), - char(97), - char(110), - char(49), - char(0), - char(109), - char(95), - char(115), - char(119), - char(105), - char(110), - char(103), - char(83), - char(112), - char(97), - char(110), - char(50), - char(0), - char(109), - char(95), - char(116), - char(119), - char(105), - char(115), - char(116), - char(83), - char(112), - char(97), - char(110), - char(0), - char(109), - char(95), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(85), - char(112), - char(112), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(76), - char(111), - char(119), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(85), - char(112), - char(112), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(76), - char(111), - char(119), - char(101), - char(114), - char(76), - char(105), - char(109), - char(105), - char(116), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(76), - char(105), - char(110), - char(101), - char(97), - char(114), - char(82), - char(101), - char(102), - char(101), - char(114), - char(101), - char(110), - char(99), - char(101), - char(70), - char(114), - char(97), - char(109), - char(101), - char(65), - char(0), - char(109), - char(95), - char(117), - char(115), - char(101), - char(79), - char(102), - char(102), - char(115), - char(101), - char(116), - char(70), - char(111), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(54), - char(100), - char(111), - char(102), - char(68), - char(97), - char(116), - char(97), - char(0), - char(109), - char(95), - char(115), - char(112), - char(114), - char(105), - char(110), - char(103), - char(69), - char(110), - char(97), - char(98), - char(108), - char(101), - char(100), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(101), - char(113), - char(117), - char(105), - char(108), - char(105), - char(98), - char(114), - char(105), - char(117), - char(109), - char(80), - char(111), - char(105), - char(110), - char(116), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(115), - char(112), - char(114), - char(105), - char(110), - char(103), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(115), - char(112), - char(114), - char(105), - char(110), - char(103), - char(68), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(91), - char(54), - char(93), - char(0), - char(109), - char(95), - char(116), - char(97), - char(117), - char(0), - char(109), - char(95), - char(116), - char(105), - char(109), - char(101), - char(83), - char(116), - char(101), - char(112), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(69), - char(114), - char(114), - char(111), - char(114), - char(82), - char(101), - char(100), - char(117), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(115), - char(111), - char(114), - char(0), - char(109), - char(95), - char(101), - char(114), - char(112), - char(0), - char(109), - char(95), - char(101), - char(114), - char(112), - char(50), - char(0), - char(109), - char(95), - char(103), - char(108), - char(111), - char(98), - char(97), - char(108), - char(67), - char(102), - char(109), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(80), - char(101), - char(110), - char(101), - char(116), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(84), - char(117), - char(114), - char(110), - char(69), - char(114), - char(112), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(83), - char(108), - char(111), - char(112), - char(0), - char(109), - char(95), - char(119), - char(97), - char(114), - char(109), - char(115), - char(116), - char(97), - char(114), - char(116), - char(105), - char(110), - char(103), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(71), - char(121), - char(114), - char(111), - char(115), - char(99), - char(111), - char(112), - char(105), - char(99), - char(70), - char(111), - char(114), - char(99), - char(101), - char(0), - char(109), - char(95), - char(115), - char(105), - char(110), - char(103), - char(108), - char(101), - char(65), - char(120), - char(105), - char(115), - char(82), - char(111), - char(108), - char(108), - char(105), - char(110), - char(103), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(108), - char(118), - char(101), - char(114), - char(77), - char(111), - char(100), - char(101), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(105), - char(110), - char(103), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(82), - char(101), - char(115), - char(116), - char(105), - char(116), - char(117), - char(116), - char(105), - char(111), - char(110), - char(84), - char(104), - char(114), - char(101), - char(115), - char(104), - char(111), - char(108), - char(100), - char(0), - char(109), - char(95), - char(109), - char(105), - char(110), - char(105), - char(109), - char(117), - char(109), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(66), - char(97), - char(116), - char(99), - char(104), - char(83), - char(105), - char(122), - char(101), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(108), - char(105), - char(110), - char(101), - char(97), - char(114), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(97), - char(110), - char(103), - char(117), - char(108), - char(97), - char(114), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(118), - char(111), - char(108), - char(117), - char(109), - char(101), - char(83), - char(116), - char(105), - char(102), - char(102), - char(110), - char(101), - char(115), - char(115), - char(0), - char(42), - char(109), - char(95), - char(109), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(112), - char(114), - char(101), - char(118), - char(105), - char(111), - char(117), - char(115), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(118), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(0), - char(109), - char(95), - char(97), - char(99), - char(99), - char(117), - char(109), - char(117), - char(108), - char(97), - char(116), - char(101), - char(100), - char(70), - char(111), - char(114), - char(99), - char(101), - char(0), - char(109), - char(95), - char(110), - char(111), - char(114), - char(109), - char(97), - char(108), - char(0), - char(109), - char(95), - char(97), - char(114), - char(101), - char(97), - char(0), - char(109), - char(95), - char(97), - char(116), - char(116), - char(97), - char(99), - char(104), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(76), - char(101), - char(110), - char(103), - char(116), - char(104), - char(0), - char(109), - char(95), - char(98), - char(98), - char(101), - char(110), - char(100), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(91), - char(51), - char(93), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(65), - char(114), - char(101), - char(97), - char(0), - char(109), - char(95), - char(99), - char(48), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(91), - char(52), - char(93), - char(0), - char(109), - char(95), - char(114), - char(101), - char(115), - char(116), - char(86), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(99), - char(49), - char(0), - char(109), - char(95), - char(99), - char(50), - char(0), - char(109), - char(95), - char(99), - char(48), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(97), - char(108), - char(70), - char(114), - char(97), - char(109), - char(101), - char(0), - char(42), - char(109), - char(95), - char(114), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(0), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(109), - char(95), - char(97), - char(101), - char(114), - char(111), - char(77), - char(111), - char(100), - char(101), - char(108), - char(0), - char(109), - char(95), - char(98), - char(97), - char(117), - char(109), - char(103), - char(97), - char(114), - char(116), - char(101), - char(0), - char(109), - char(95), - char(100), - char(114), - char(97), - char(103), - char(0), - char(109), - char(95), - char(108), - char(105), - char(102), - char(116), - char(0), - char(109), - char(95), - char(112), - char(114), - char(101), - char(115), - char(115), - char(117), - char(114), - char(101), - char(0), - char(109), - char(95), - char(118), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(100), - char(121), - char(110), - char(97), - char(109), - char(105), - char(99), - char(70), - char(114), - char(105), - char(99), - char(116), - char(105), - char(111), - char(110), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(101), - char(77), - char(97), - char(116), - char(99), - char(104), - char(0), - char(109), - char(95), - char(114), - char(105), - char(103), - char(105), - char(100), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(107), - char(105), - char(110), - char(101), - char(116), - char(105), - char(99), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(97), - char(110), - char(99), - char(104), - char(111), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(75), - char(105), - char(110), - char(101), - char(116), - char(105), - char(99), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(83), - char(111), - char(102), - char(116), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(72), - char(97), - char(114), - char(100), - char(110), - char(101), - char(115), - char(115), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(83), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(75), - char(105), - char(110), - char(101), - char(116), - char(105), - char(99), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(83), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(115), - char(111), - char(102), - char(116), - char(83), - char(111), - char(102), - char(116), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(83), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(86), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(116), - char(105), - char(109), - char(101), - char(83), - char(99), - char(97), - char(108), - char(101), - char(0), - char(109), - char(95), - char(118), - char(101), - char(108), - char(111), - char(99), - char(105), - char(116), - char(121), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(112), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(100), - char(114), - char(105), - char(102), - char(116), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(99), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(116), - char(101), - char(114), - char(97), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(114), - char(111), - char(116), - char(0), - char(109), - char(95), - char(115), - char(99), - char(97), - char(108), - char(101), - char(0), - char(109), - char(95), - char(97), - char(113), - char(113), - char(0), - char(109), - char(95), - char(99), - char(111), - char(109), - char(0), - char(42), - char(109), - char(95), - char(112), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(42), - char(109), - char(95), - char(119), - char(101), - char(105), - char(103), - char(104), - char(116), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(87), - char(101), - char(105), - char(103), - char(116), - char(115), - char(0), - char(109), - char(95), - char(98), - char(118), - char(111), - char(108), - char(117), - char(109), - char(101), - char(0), - char(109), - char(95), - char(98), - char(102), - char(114), - char(97), - char(109), - char(101), - char(0), - char(109), - char(95), - char(102), - char(114), - char(97), - char(109), - char(101), - char(120), - char(102), - char(111), - char(114), - char(109), - char(0), - char(109), - char(95), - char(108), - char(111), - char(99), - char(105), - char(105), - char(0), - char(109), - char(95), - char(105), - char(110), - char(118), - char(119), - char(105), - char(0), - char(109), - char(95), - char(118), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(100), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(108), - char(118), - char(0), - char(109), - char(95), - char(97), - char(118), - char(0), - char(42), - char(109), - char(95), - char(102), - char(114), - char(97), - char(109), - char(101), - char(114), - char(101), - char(102), - char(115), - char(0), - char(42), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(73), - char(110), - char(100), - char(105), - char(99), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(109), - char(97), - char(115), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(70), - char(114), - char(97), - char(109), - char(101), - char(82), - char(101), - char(102), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(78), - char(111), - char(100), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(77), - char(97), - char(115), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(105), - char(100), - char(109), - char(97), - char(115), - char(115), - char(0), - char(109), - char(95), - char(105), - char(109), - char(97), - char(115), - char(115), - char(0), - char(109), - char(95), - char(110), - char(118), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(100), - char(105), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(108), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(97), - char(100), - char(97), - char(109), - char(112), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(109), - char(97), - char(116), - char(99), - char(104), - char(105), - char(110), - char(103), - char(0), - char(109), - char(95), - char(109), - char(97), - char(120), - char(83), - char(101), - char(108), - char(102), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(0), - char(109), - char(95), - char(115), - char(101), - char(108), - char(102), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(73), - char(109), - char(112), - char(117), - char(108), - char(115), - char(101), - char(70), - char(97), - char(99), - char(116), - char(111), - char(114), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(116), - char(97), - char(105), - char(110), - char(115), - char(65), - char(110), - char(99), - char(104), - char(111), - char(114), - char(0), - char(109), - char(95), - char(99), - char(111), - char(108), - char(108), - char(105), - char(100), - char(101), - char(0), - char(109), - char(95), - char(99), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(73), - char(110), - char(100), - char(101), - char(120), - char(0), - char(42), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(65), - char(0), - char(42), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(66), - char(0), - char(109), - char(95), - char(114), - char(101), - char(102), - char(115), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(99), - char(102), - char(109), - char(0), - char(109), - char(95), - char(115), - char(112), - char(108), - char(105), - char(116), - char(0), - char(109), - char(95), - char(100), - char(101), - char(108), - char(101), - char(116), - char(101), - char(0), - char(109), - char(95), - char(114), - char(101), - char(108), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(91), - char(50), - char(93), - char(0), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(65), - char(116), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(98), - char(111), - char(100), - char(121), - char(66), - char(116), - char(121), - char(112), - char(101), - char(0), - char(109), - char(95), - char(106), - char(111), - char(105), - char(110), - char(116), - char(84), - char(121), - char(112), - char(101), - char(0), - char(42), - char(109), - char(95), - char(112), - char(111), - char(115), - char(101), - char(0), - char(42), - char(42), - char(109), - char(95), - char(109), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(115), - char(0), - char(42), - char(109), - char(95), - char(110), - char(111), - char(100), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(108), - char(105), - char(110), - char(107), - char(115), - char(0), - char(42), - char(109), - char(95), - char(102), - char(97), - char(99), - char(101), - char(115), - char(0), - char(42), - char(109), - char(95), - char(116), - char(101), - char(116), - char(114), - char(97), - char(104), - char(101), - char(100), - char(114), - char(97), - char(0), - char(42), - char(109), - char(95), - char(97), - char(110), - char(99), - char(104), - char(111), - char(114), - char(115), - char(0), - char(42), - char(109), - char(95), - char(99), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(115), - char(0), - char(42), - char(109), - char(95), - char(106), - char(111), - char(105), - char(110), - char(116), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(77), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(76), - char(105), - char(110), - char(107), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(70), - char(97), - char(99), - char(101), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(84), - char(101), - char(116), - char(114), - char(97), - char(104), - char(101), - char(100), - char(114), - char(97), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(65), - char(110), - char(99), - char(104), - char(111), - char(114), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(115), - char(0), - char(109), - char(95), - char(110), - char(117), - char(109), - char(74), - char(111), - char(105), - char(110), - char(116), - char(115), - char(0), - char(109), - char(95), - char(99), - char(111), - char(110), - char(102), - char(105), - char(103), - char(0), - char(84), - char(89), - char(80), - char(69), - char(76), - char(0), - char(0), - char(0), - char(99), - char(104), - char(97), - char(114), - char(0), - char(117), - char(99), - char(104), - char(97), - char(114), - char(0), - char(115), - char(104), - char(111), - char(114), - char(116), - char(0), - char(117), - char(115), - char(104), - char(111), - char(114), - char(116), - char(0), - char(105), - char(110), - char(116), - char(0), - char(108), - char(111), - char(110), - char(103), - char(0), - char(117), - char(108), - char(111), - char(110), - char(103), - char(0), - char(102), - char(108), - char(111), - char(97), - char(116), - char(0), - char(100), - char(111), - char(117), - char(98), - char(108), - char(101), - char(0), - char(118), - char(111), - char(105), - char(100), - char(0), - char(80), - char(111), - char(105), - char(110), - char(116), - char(101), - char(114), - char(65), - char(114), - char(114), - char(97), - char(121), - char(0), - char(98), - char(116), - char(80), - char(104), - char(121), - char(115), - char(105), - char(99), - char(115), - char(83), - char(121), - char(115), - char(116), - char(101), - char(109), - char(0), - char(76), - char(105), - char(115), - char(116), - char(66), - char(97), - char(115), - char(101), - char(0), - char(98), - char(116), - char(86), - char(101), - char(99), - char(116), - char(111), - char(114), - char(51), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(86), - char(101), - char(99), - char(116), - char(111), - char(114), - char(51), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(77), - char(97), - char(116), - char(114), - char(105), - char(120), - char(51), - char(120), - char(51), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(77), - char(97), - char(116), - char(114), - char(105), - char(120), - char(51), - char(120), - char(51), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(97), - char(110), - char(115), - char(102), - char(111), - char(114), - char(109), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(66), - char(118), - char(104), - char(83), - char(117), - char(98), - char(116), - char(114), - char(101), - char(101), - char(73), - char(110), - char(102), - char(111), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(79), - char(112), - char(116), - char(105), - char(109), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(78), - char(111), - char(100), - char(101), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(79), - char(112), - char(116), - char(105), - char(109), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(78), - char(111), - char(100), - char(101), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(78), - char(111), - char(100), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(81), - char(117), - char(97), - char(110), - char(116), - char(105), - char(122), - char(101), - char(100), - char(66), - char(118), - char(104), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(116), - char(97), - char(116), - char(105), - char(99), - char(80), - char(108), - char(97), - char(110), - char(101), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(118), - char(101), - char(120), - char(73), - char(110), - char(116), - char(101), - char(114), - char(110), - char(97), - char(108), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(80), - char(111), - char(115), - char(105), - char(116), - char(105), - char(111), - char(110), - char(65), - char(110), - char(100), - char(82), - char(97), - char(100), - char(105), - char(117), - char(115), - char(0), - char(98), - char(116), - char(77), - char(117), - char(108), - char(116), - char(105), - char(83), - char(112), - char(104), - char(101), - char(114), - char(101), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(73), - char(110), - char(116), - char(73), - char(110), - char(100), - char(101), - char(120), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(104), - char(111), - char(114), - char(116), - char(73), - char(110), - char(116), - char(73), - char(110), - char(100), - char(101), - char(120), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(104), - char(111), - char(114), - char(116), - char(73), - char(110), - char(116), - char(73), - char(110), - char(100), - char(101), - char(120), - char(84), - char(114), - char(105), - char(112), - char(108), - char(101), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(104), - char(97), - char(114), - char(73), - char(110), - char(100), - char(101), - char(120), - char(84), - char(114), - char(105), - char(112), - char(108), - char(101), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(77), - char(101), - char(115), - char(104), - char(80), - char(97), - char(114), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(116), - char(114), - char(105), - char(100), - char(105), - char(110), - char(103), - char(77), - char(101), - char(115), - char(104), - char(73), - char(110), - char(116), - char(101), - char(114), - char(102), - char(97), - char(99), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(77), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(102), - char(111), - char(77), - char(97), - char(112), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(99), - char(97), - char(108), - char(101), - char(100), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(77), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(109), - char(112), - char(111), - char(117), - char(110), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(67), - char(104), - char(105), - char(108), - char(100), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(109), - char(112), - char(111), - char(117), - char(110), - char(100), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(121), - char(108), - char(105), - char(110), - char(100), - char(101), - char(114), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(97), - char(112), - char(115), - char(117), - char(108), - char(101), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(84), - char(114), - char(105), - char(97), - char(110), - char(103), - char(108), - char(101), - char(73), - char(110), - char(102), - char(111), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(71), - char(73), - char(109), - char(112), - char(97), - char(99), - char(116), - char(77), - char(101), - char(115), - char(104), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(118), - char(101), - char(120), - char(72), - char(117), - char(108), - char(108), - char(83), - char(104), - char(97), - char(112), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(108), - char(108), - char(105), - char(115), - char(105), - char(111), - char(110), - char(79), - char(98), - char(106), - char(101), - char(99), - char(116), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(68), - char(121), - char(110), - char(97), - char(109), - char(105), - char(99), - char(115), - char(87), - char(111), - char(114), - char(108), - char(100), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(110), - char(102), - char(111), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(68), - char(121), - char(110), - char(97), - char(109), - char(105), - char(99), - char(115), - char(87), - char(111), - char(114), - char(108), - char(100), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(116), - char(97), - char(99), - char(116), - char(83), - char(111), - char(108), - char(118), - char(101), - char(114), - char(73), - char(110), - char(102), - char(111), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(73), - char(110), - char(102), - char(111), - char(49), - char(0), - char(98), - char(116), - char(84), - char(121), - char(112), - char(101), - char(100), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(66), - char(111), - char(100), - char(121), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(80), - char(111), - char(105), - char(110), - char(116), - char(50), - char(80), - char(111), - char(105), - char(110), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(80), - char(111), - char(105), - char(110), - char(116), - char(50), - char(80), - char(111), - char(105), - char(110), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(72), - char(105), - char(110), - char(103), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(111), - char(117), - char(98), - char(108), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(72), - char(105), - char(110), - char(103), - char(101), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(67), - char(111), - char(110), - char(101), - char(84), - char(119), - char(105), - char(115), - char(116), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(71), - char(101), - char(110), - char(101), - char(114), - char(105), - char(99), - char(54), - char(68), - char(111), - char(102), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(71), - char(101), - char(110), - char(101), - char(114), - char(105), - char(99), - char(54), - char(68), - char(111), - char(102), - char(83), - char(112), - char(114), - char(105), - char(110), - char(103), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(108), - char(105), - char(100), - char(101), - char(114), - char(67), - char(111), - char(110), - char(115), - char(116), - char(114), - char(97), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(77), - char(97), - char(116), - char(101), - char(114), - char(105), - char(97), - char(108), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(78), - char(111), - char(100), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(76), - char(105), - char(110), - char(107), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(70), - char(97), - char(99), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(84), - char(101), - char(116), - char(114), - char(97), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(82), - char(105), - char(103), - char(105), - char(100), - char(65), - char(110), - char(99), - char(104), - char(111), - char(114), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(67), - char(111), - char(110), - char(102), - char(105), - char(103), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(80), - char(111), - char(115), - char(101), - char(68), - char(97), - char(116), - char(97), - char(0), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(67), - char(108), - char(117), - char(115), - char(116), - char(101), - char(114), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(74), - char(111), - char(105), - char(110), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(98), - char(116), - char(83), - char(111), - char(102), - char(116), - char(66), - char(111), - char(100), - char(121), - char(70), - char(108), - char(111), - char(97), - char(116), - char(68), - char(97), - char(116), - char(97), - char(0), - char(0), - char(0), - char(84), - char(76), - char(69), - char(78), - char(1), - char(0), - char(1), - char(0), - char(2), - char(0), - char(2), - char(0), - char(4), - char(0), - char(4), - char(0), - char(4), - char(0), - char(4), - char(0), - char(8), - char(0), - char(0), - char(0), - char(16), - char(0), - char(48), - char(0), - char(16), - char(0), - char(16), - char(0), - char(32), - char(0), - char(48), - char(0), - char(96), - char(0), - char(64), - char(0), - char(-128), - char(0), - char(20), - char(0), - char(48), - char(0), - char(80), - char(0), - char(16), - char(0), - char(96), - char(0), - char(-112), - char(0), - char(16), - char(0), - char(56), - char(0), - char(56), - char(0), - char(20), - char(0), - char(72), - char(0), - char(4), - char(0), - char(4), - char(0), - char(8), - char(0), - char(4), - char(0), - char(56), - char(0), - char(32), - char(0), - char(80), - char(0), - char(72), - char(0), - char(96), - char(0), - char(80), - char(0), - char(32), - char(0), - char(64), - char(0), - char(64), - char(0), - char(16), - char(0), - char(72), - char(0), - char(80), - char(0), - char(-32), - char(1), - char(16), - char(1), - char(-72), - char(0), - char(-104), - char(0), - char(104), - char(0), - char(88), - char(0), - char(-8), - char(1), - char(-80), - char(3), - char(8), - char(0), - char(64), - char(0), - char(0), - char(0), - char(96), - char(0), - char(-128), - char(0), - char(104), - char(1), - char(-24), - char(0), - char(-32), - char(0), - char(8), - char(1), - char(104), - char(1), - char(-40), - char(0), - char(16), - char(0), - char(104), - char(0), - char(24), - char(0), - char(40), - char(0), - char(104), - char(0), - char(96), - char(0), - char(104), - char(0), - char(-56), - char(0), - char(104), - char(1), - char(112), - char(0), - char(-32), - char(1), - char(83), - char(84), - char(82), - char(67), - char(65), - char(0), - char(0), - char(0), - char(10), - char(0), - char(3), - char(0), - char(4), - char(0), - char(0), - char(0), - char(4), - char(0), - char(1), - char(0), - char(9), - char(0), - char(2), - char(0), - char(11), - char(0), - char(3), - char(0), - char(10), - char(0), - char(3), - char(0), - char(10), - char(0), - char(4), - char(0), - char(10), - char(0), - char(5), - char(0), - char(12), - char(0), - char(2), - char(0), - char(9), - char(0), - char(6), - char(0), - char(9), - char(0), - char(7), - char(0), - char(13), - char(0), - char(1), - char(0), - char(7), - char(0), - char(8), - char(0), - char(14), - char(0), - char(1), - char(0), - char(8), - char(0), - char(8), - char(0), - char(15), - char(0), - char(1), - char(0), - char(13), - char(0), - char(9), - char(0), - char(16), - char(0), - char(1), - char(0), - char(14), - char(0), - char(9), - char(0), - char(17), - char(0), - char(2), - char(0), - char(15), - char(0), - char(10), - char(0), - char(13), - char(0), - char(11), - char(0), - char(18), - char(0), - char(2), - char(0), - char(16), - char(0), - char(10), - char(0), - char(14), - char(0), - char(11), - char(0), - char(19), - char(0), - char(4), - char(0), - char(4), - char(0), - char(12), - char(0), - char(4), - char(0), - char(13), - char(0), - char(2), - char(0), - char(14), - char(0), - char(2), - char(0), - char(15), - char(0), - char(20), - char(0), - char(6), - char(0), - char(13), - char(0), - char(16), - char(0), - char(13), - char(0), - char(17), - char(0), - char(4), - char(0), - char(18), - char(0), - char(4), - char(0), - char(19), - char(0), - char(4), - char(0), - char(20), - char(0), - char(0), - char(0), - char(21), - char(0), - char(21), - char(0), - char(6), - char(0), - char(14), - char(0), - char(16), - char(0), - char(14), - char(0), - char(17), - char(0), - char(4), - char(0), - char(18), - char(0), - char(4), - char(0), - char(19), - char(0), - char(4), - char(0), - char(20), - char(0), - char(0), - char(0), - char(21), - char(0), - char(22), - char(0), - char(3), - char(0), - char(2), - char(0), - char(14), - char(0), - char(2), - char(0), - char(15), - char(0), - char(4), - char(0), - char(22), - char(0), - char(23), - char(0), - char(12), - char(0), - char(13), - char(0), - char(23), - char(0), - char(13), - char(0), - char(24), - char(0), - char(13), - char(0), - char(25), - char(0), - char(4), - char(0), - char(26), - char(0), - char(4), - char(0), - char(27), - char(0), - char(4), - char(0), - char(28), - char(0), - char(4), - char(0), - char(29), - char(0), - char(20), - char(0), - char(30), - char(0), - char(22), - char(0), - char(31), - char(0), - char(19), - char(0), - char(32), - char(0), - char(4), - char(0), - char(33), - char(0), - char(4), - char(0), - char(34), - char(0), - char(24), - char(0), - char(12), - char(0), - char(14), - char(0), - char(23), - char(0), - char(14), - char(0), - char(24), - char(0), - char(14), - char(0), - char(25), - char(0), - char(4), - char(0), - char(26), - char(0), - char(4), - char(0), - char(27), - char(0), - char(4), - char(0), - char(28), - char(0), - char(4), - char(0), - char(29), - char(0), - char(21), - char(0), - char(30), - char(0), - char(22), - char(0), - char(31), - char(0), - char(4), - char(0), - char(33), - char(0), - char(4), - char(0), - char(34), - char(0), - char(19), - char(0), - char(32), - char(0), - char(25), - char(0), - char(3), - char(0), - char(0), - char(0), - char(35), - char(0), - char(4), - char(0), - char(36), - char(0), - char(0), - char(0), - char(37), - char(0), - char(26), - char(0), - char(5), - char(0), - char(25), - char(0), - char(38), - char(0), - char(13), - char(0), - char(39), - char(0), - char(13), - char(0), - char(40), - char(0), - char(7), - char(0), - char(41), - char(0), - char(0), - char(0), - char(21), - char(0), - char(27), - char(0), - char(5), - char(0), - char(25), - char(0), - char(38), - char(0), - char(13), - char(0), - char(39), - char(0), - char(13), - char(0), - char(42), - char(0), - char(7), - char(0), - char(43), - char(0), - char(4), - char(0), - char(44), - char(0), - char(28), - char(0), - char(2), - char(0), - char(13), - char(0), - char(45), - char(0), - char(7), - char(0), - char(46), - char(0), - char(29), - char(0), - char(4), - char(0), - char(27), - char(0), - char(47), - char(0), - char(28), - char(0), - char(48), - char(0), - char(4), - char(0), - char(49), - char(0), - char(0), - char(0), - char(37), - char(0), - char(30), - char(0), - char(1), - char(0), - char(4), - char(0), - char(50), - char(0), - char(31), - char(0), - char(2), - char(0), - char(2), - char(0), - char(50), - char(0), - char(0), - char(0), - char(51), - char(0), - char(32), - char(0), - char(2), - char(0), - char(2), - char(0), - char(52), - char(0), - char(0), - char(0), - char(51), - char(0), - char(33), - char(0), - char(2), - char(0), - char(0), - char(0), - char(52), - char(0), - char(0), - char(0), - char(53), - char(0), - char(34), - char(0), - char(8), - char(0), - char(13), - char(0), - char(54), - char(0), - char(14), - char(0), - char(55), - char(0), - char(30), - char(0), - char(56), - char(0), - char(32), - char(0), - char(57), - char(0), - char(33), - char(0), - char(58), - char(0), - char(31), - char(0), - char(59), - char(0), - char(4), - char(0), - char(60), - char(0), - char(4), - char(0), - char(61), - char(0), - char(35), - char(0), - char(4), - char(0), - char(34), - char(0), - char(62), - char(0), - char(13), - char(0), - char(63), - char(0), - char(4), - char(0), - char(64), - char(0), - char(0), - char(0), - char(37), - char(0), - char(36), - char(0), - char(7), - char(0), - char(25), - char(0), - char(38), - char(0), - char(35), - char(0), - char(65), - char(0), - char(23), - char(0), - char(66), - char(0), - char(24), - char(0), - char(67), - char(0), - char(37), - char(0), - char(68), - char(0), - char(7), - char(0), - char(43), - char(0), - char(0), - char(0), - char(69), - char(0), - char(38), - char(0), - char(2), - char(0), - char(36), - char(0), - char(70), - char(0), - char(13), - char(0), - char(39), - char(0), - char(39), - char(0), - char(4), - char(0), - char(17), - char(0), - char(71), - char(0), - char(25), - char(0), - char(72), - char(0), - char(4), - char(0), - char(73), - char(0), - char(7), - char(0), - char(74), - char(0), - char(40), - char(0), - char(4), - char(0), - char(25), - char(0), - char(38), - char(0), - char(39), - char(0), - char(75), - char(0), - char(4), - char(0), - char(76), - char(0), - char(7), - char(0), - char(43), - char(0), - char(41), - char(0), - char(3), - char(0), - char(27), - char(0), - char(47), - char(0), - char(4), - char(0), - char(77), - char(0), - char(0), - char(0), - char(37), - char(0), - char(42), - char(0), - char(3), - char(0), - char(27), - char(0), - char(47), - char(0), - char(4), - char(0), - char(77), - char(0), - char(0), - char(0), - char(37), - char(0), - char(43), - char(0), - char(4), - char(0), - char(4), - char(0), - char(78), - char(0), - char(7), - char(0), - char(79), - char(0), - char(7), - char(0), - char(80), - char(0), - char(7), - char(0), - char(81), - char(0), - char(37), - char(0), - char(14), - char(0), - char(4), - char(0), - char(82), - char(0), - char(4), - char(0), - char(83), - char(0), - char(43), - char(0), - char(84), - char(0), - char(4), - char(0), - char(85), - char(0), - char(7), - char(0), - char(86), - char(0), - char(7), - char(0), - char(87), - char(0), - char(7), - char(0), - char(88), - char(0), - char(7), - char(0), - char(89), - char(0), - char(7), - char(0), - char(90), - char(0), - char(4), - char(0), - char(91), - char(0), - char(4), - char(0), - char(92), - char(0), - char(4), - char(0), - char(93), - char(0), - char(4), - char(0), - char(94), - char(0), - char(0), - char(0), - char(37), - char(0), - char(44), - char(0), - char(5), - char(0), - char(25), - char(0), - char(38), - char(0), - char(35), - char(0), - char(65), - char(0), - char(13), - char(0), - char(39), - char(0), - char(7), - char(0), - char(43), - char(0), - char(4), - char(0), - char(95), - char(0), - char(45), - char(0), - char(5), - char(0), - char(27), - char(0), - char(47), - char(0), - char(13), - char(0), - char(96), - char(0), - char(14), - char(0), - char(97), - char(0), - char(4), - char(0), - char(98), - char(0), - char(0), - char(0), - char(99), - char(0), - char(46), - char(0), - char(25), - char(0), - char(9), - char(0), - char(100), - char(0), - char(9), - char(0), - char(101), - char(0), - char(25), - char(0), - char(102), - char(0), - char(0), - char(0), - char(35), - char(0), - char(18), - char(0), - char(103), - char(0), - char(18), - char(0), - char(104), - char(0), - char(14), - char(0), - char(105), - char(0), - char(14), - char(0), - char(106), - char(0), - char(14), - char(0), - char(107), - char(0), - char(8), - char(0), - char(108), - char(0), - char(8), - char(0), - char(109), - char(0), - char(8), - char(0), - char(110), - char(0), - char(8), - char(0), - char(111), - char(0), - char(8), - char(0), - char(112), - char(0), - char(8), - char(0), - char(113), - char(0), - char(8), - char(0), - char(114), - char(0), - char(8), - char(0), - char(115), - char(0), - char(4), - char(0), - char(116), - char(0), - char(4), - char(0), - char(117), - char(0), - char(4), - char(0), - char(118), - char(0), - char(4), - char(0), - char(119), - char(0), - char(4), - char(0), - char(120), - char(0), - char(4), - char(0), - char(121), - char(0), - char(4), - char(0), - char(122), - char(0), - char(0), - char(0), - char(37), - char(0), - char(47), - char(0), - char(25), - char(0), - char(9), - char(0), - char(100), - char(0), - char(9), - char(0), - char(101), - char(0), - char(25), - char(0), - char(102), - char(0), - char(0), - char(0), - char(35), - char(0), - char(17), - char(0), - char(103), - char(0), - char(17), - char(0), - char(104), - char(0), - char(13), - char(0), - char(105), - char(0), - char(13), - char(0), - char(106), - char(0), - char(13), - char(0), - char(107), - char(0), - char(7), - char(0), - char(108), - char(0), - char(7), - char(0), - char(109), - char(0), - char(7), - char(0), - char(110), - char(0), - char(7), - char(0), - char(111), - char(0), - char(7), - char(0), - char(112), - char(0), - char(7), - char(0), - char(113), - char(0), - char(7), - char(0), - char(114), - char(0), - char(7), - char(0), - char(115), - char(0), - char(4), - char(0), - char(116), - char(0), - char(4), - char(0), - char(117), - char(0), - char(4), - char(0), - char(118), - char(0), - char(4), - char(0), - char(119), - char(0), - char(4), - char(0), - char(120), - char(0), - char(4), - char(0), - char(121), - char(0), - char(4), - char(0), - char(122), - char(0), - char(0), - char(0), - char(37), - char(0), - char(48), - char(0), - char(2), - char(0), - char(49), - char(0), - char(123), - char(0), - char(14), - char(0), - char(124), - char(0), - char(50), - char(0), - char(2), - char(0), - char(51), - char(0), - char(123), - char(0), - char(13), - char(0), - char(124), - char(0), - char(52), - char(0), - char(21), - char(0), - char(47), - char(0), - char(125), - char(0), - char(15), - char(0), - char(126), - char(0), - char(13), - char(0), - char(127), - char(0), - char(13), - char(0), - char(-128), - char(0), - char(13), - char(0), - char(-127), - char(0), - char(13), - char(0), - char(-126), - char(0), - char(13), - char(0), - char(124), - char(0), - char(13), - char(0), - char(-125), - char(0), - char(13), - char(0), - char(-124), - char(0), - char(13), - char(0), - char(-123), - char(0), - char(13), - char(0), - char(-122), - char(0), - char(7), - char(0), - char(-121), - char(0), - char(7), - char(0), - char(-120), - char(0), - char(7), - char(0), - char(-119), - char(0), - char(7), - char(0), - char(-118), - char(0), - char(7), - char(0), - char(-117), - char(0), - char(7), - char(0), - char(-116), - char(0), - char(7), - char(0), - char(-115), - char(0), - char(7), - char(0), - char(-114), - char(0), - char(7), - char(0), - char(-113), - char(0), - char(4), - char(0), - char(-112), - char(0), - char(53), - char(0), - char(22), - char(0), - char(46), - char(0), - char(125), - char(0), - char(16), - char(0), - char(126), - char(0), - char(14), - char(0), - char(127), - char(0), - char(14), - char(0), - char(-128), - char(0), - char(14), - char(0), - char(-127), - char(0), - char(14), - char(0), - char(-126), - char(0), - char(14), - char(0), - char(124), - char(0), - char(14), - char(0), - char(-125), - char(0), - char(14), - char(0), - char(-124), - char(0), - char(14), - char(0), - char(-123), - char(0), - char(14), - char(0), - char(-122), - char(0), - char(8), - char(0), - char(-121), - char(0), - char(8), - char(0), - char(-120), - char(0), - char(8), - char(0), - char(-119), - char(0), - char(8), - char(0), - char(-118), - char(0), - char(8), - char(0), - char(-117), - char(0), - char(8), - char(0), - char(-116), - char(0), - char(8), - char(0), - char(-115), - char(0), - char(8), - char(0), - char(-114), - char(0), - char(8), - char(0), - char(-113), - char(0), - char(4), - char(0), - char(-112), - char(0), - char(0), - char(0), - char(37), - char(0), - char(54), - char(0), - char(2), - char(0), - char(4), - char(0), - char(-111), - char(0), - char(4), - char(0), - char(-110), - char(0), - char(55), - char(0), - char(13), - char(0), - char(56), - char(0), - char(-109), - char(0), - char(56), - char(0), - char(-108), - char(0), - char(0), - char(0), - char(35), - char(0), - char(4), - char(0), - char(-107), - char(0), - char(4), - char(0), - char(-106), - char(0), - char(4), - char(0), - char(-105), - char(0), - char(4), - char(0), - char(-104), - char(0), - char(7), - char(0), - char(-103), - char(0), - char(7), - char(0), - char(-102), - char(0), - char(4), - char(0), - char(-101), - char(0), - char(4), - char(0), - char(-100), - char(0), - char(7), - char(0), - char(-99), - char(0), - char(4), - char(0), - char(-98), - char(0), - char(57), - char(0), - char(3), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(13), - char(0), - char(-96), - char(0), - char(13), - char(0), - char(-95), - char(0), - char(58), - char(0), - char(3), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(14), - char(0), - char(-96), - char(0), - char(14), - char(0), - char(-95), - char(0), - char(59), - char(0), - char(13), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(18), - char(0), - char(-94), - char(0), - char(18), - char(0), - char(-93), - char(0), - char(4), - char(0), - char(-92), - char(0), - char(4), - char(0), - char(-91), - char(0), - char(4), - char(0), - char(-90), - char(0), - char(7), - char(0), - char(-89), - char(0), - char(7), - char(0), - char(-88), - char(0), - char(7), - char(0), - char(-87), - char(0), - char(7), - char(0), - char(-86), - char(0), - char(7), - char(0), - char(-85), - char(0), - char(7), - char(0), - char(-84), - char(0), - char(7), - char(0), - char(-83), - char(0), - char(60), - char(0), - char(13), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(4), - char(0), - char(-92), - char(0), - char(4), - char(0), - char(-91), - char(0), - char(4), - char(0), - char(-90), - char(0), - char(7), - char(0), - char(-89), - char(0), - char(7), - char(0), - char(-88), - char(0), - char(7), - char(0), - char(-87), - char(0), - char(7), - char(0), - char(-86), - char(0), - char(7), - char(0), - char(-85), - char(0), - char(7), - char(0), - char(-84), - char(0), - char(7), - char(0), - char(-83), - char(0), - char(61), - char(0), - char(11), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(7), - char(0), - char(-82), - char(0), - char(7), - char(0), - char(-81), - char(0), - char(7), - char(0), - char(-80), - char(0), - char(7), - char(0), - char(-85), - char(0), - char(7), - char(0), - char(-84), - char(0), - char(7), - char(0), - char(-83), - char(0), - char(7), - char(0), - char(-79), - char(0), - char(0), - char(0), - char(21), - char(0), - char(62), - char(0), - char(9), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(13), - char(0), - char(-78), - char(0), - char(13), - char(0), - char(-77), - char(0), - char(13), - char(0), - char(-76), - char(0), - char(13), - char(0), - char(-75), - char(0), - char(4), - char(0), - char(-74), - char(0), - char(4), - char(0), - char(-73), - char(0), - char(63), - char(0), - char(5), - char(0), - char(62), - char(0), - char(-72), - char(0), - char(4), - char(0), - char(-71), - char(0), - char(7), - char(0), - char(-70), - char(0), - char(7), - char(0), - char(-69), - char(0), - char(7), - char(0), - char(-68), - char(0), - char(64), - char(0), - char(9), - char(0), - char(55), - char(0), - char(-97), - char(0), - char(17), - char(0), - char(-94), - char(0), - char(17), - char(0), - char(-93), - char(0), - char(7), - char(0), - char(-78), - char(0), - char(7), - char(0), - char(-77), - char(0), - char(7), - char(0), - char(-76), - char(0), - char(7), - char(0), - char(-75), - char(0), - char(4), - char(0), - char(-74), - char(0), - char(4), - char(0), - char(-73), - char(0), - char(49), - char(0), - char(22), - char(0), - char(8), - char(0), - char(-67), - char(0), - char(8), - char(0), - char(-79), - char(0), - char(8), - char(0), - char(110), - char(0), - char(8), - char(0), - char(-66), - char(0), - char(8), - char(0), - char(112), - char(0), - char(8), - char(0), - char(-65), - char(0), - char(8), - char(0), - char(-64), - char(0), - char(8), - char(0), - char(-63), - char(0), - char(8), - char(0), - char(-62), - char(0), - char(8), - char(0), - char(-61), - char(0), - char(8), - char(0), - char(-60), - char(0), - char(8), - char(0), - char(-59), - char(0), - char(8), - char(0), - char(-58), - char(0), - char(8), - char(0), - char(-57), - char(0), - char(8), - char(0), - char(-56), - char(0), - char(8), - char(0), - char(-55), - char(0), - char(4), - char(0), - char(-54), - char(0), - char(4), - char(0), - char(-53), - char(0), - char(4), - char(0), - char(-52), - char(0), - char(4), - char(0), - char(-51), - char(0), - char(4), - char(0), - char(-50), - char(0), - char(0), - char(0), - char(37), - char(0), - char(51), - char(0), - char(22), - char(0), - char(7), - char(0), - char(-67), - char(0), - char(7), - char(0), - char(-79), - char(0), - char(7), - char(0), - char(110), - char(0), - char(7), - char(0), - char(-66), - char(0), - char(7), - char(0), - char(112), - char(0), - char(7), - char(0), - char(-65), - char(0), - char(7), - char(0), - char(-64), - char(0), - char(7), - char(0), - char(-63), - char(0), - char(7), - char(0), - char(-62), - char(0), - char(7), - char(0), - char(-61), - char(0), - char(7), - char(0), - char(-60), - char(0), - char(7), - char(0), - char(-59), - char(0), - char(7), - char(0), - char(-58), - char(0), - char(7), - char(0), - char(-57), - char(0), - char(7), - char(0), - char(-56), - char(0), - char(7), - char(0), - char(-55), - char(0), - char(4), - char(0), - char(-54), - char(0), - char(4), - char(0), - char(-53), - char(0), - char(4), - char(0), - char(-52), - char(0), - char(4), - char(0), - char(-51), - char(0), - char(4), - char(0), - char(-50), - char(0), - char(0), - char(0), - char(37), - char(0), - char(65), - char(0), - char(4), - char(0), - char(7), - char(0), - char(-49), - char(0), - char(7), - char(0), - char(-48), - char(0), - char(7), - char(0), - char(-47), - char(0), - char(4), - char(0), - char(78), - char(0), - char(66), - char(0), - char(10), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(13), - char(0), - char(-45), - char(0), - char(13), - char(0), - char(-44), - char(0), - char(13), - char(0), - char(-43), - char(0), - char(13), - char(0), - char(-42), - char(0), - char(13), - char(0), - char(-41), - char(0), - char(7), - char(0), - char(-121), - char(0), - char(7), - char(0), - char(-40), - char(0), - char(4), - char(0), - char(-39), - char(0), - char(4), - char(0), - char(53), - char(0), - char(67), - char(0), - char(4), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(4), - char(0), - char(-38), - char(0), - char(7), - char(0), - char(-37), - char(0), - char(4), - char(0), - char(-36), - char(0), - char(68), - char(0), - char(4), - char(0), - char(13), - char(0), - char(-41), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(4), - char(0), - char(-35), - char(0), - char(7), - char(0), - char(-34), - char(0), - char(69), - char(0), - char(7), - char(0), - char(13), - char(0), - char(-33), - char(0), - char(65), - char(0), - char(-46), - char(0), - char(4), - char(0), - char(-32), - char(0), - char(7), - char(0), - char(-31), - char(0), - char(7), - char(0), - char(-30), - char(0), - char(7), - char(0), - char(-29), - char(0), - char(4), - char(0), - char(53), - char(0), - char(70), - char(0), - char(6), - char(0), - char(15), - char(0), - char(-28), - char(0), - char(13), - char(0), - char(-30), - char(0), - char(13), - char(0), - char(-27), - char(0), - char(56), - char(0), - char(-26), - char(0), - char(4), - char(0), - char(-25), - char(0), - char(7), - char(0), - char(-29), - char(0), - char(71), - char(0), - char(26), - char(0), - char(4), - char(0), - char(-24), - char(0), - char(7), - char(0), - char(-23), - char(0), - char(7), - char(0), - char(-79), - char(0), - char(7), - char(0), - char(-22), - char(0), - char(7), - char(0), - char(-21), - char(0), - char(7), - char(0), - char(-20), - char(0), - char(7), - char(0), - char(-19), - char(0), - char(7), - char(0), - char(-18), - char(0), - char(7), - char(0), - char(-17), - char(0), - char(7), - char(0), - char(-16), - char(0), - char(7), - char(0), - char(-15), - char(0), - char(7), - char(0), - char(-14), - char(0), - char(7), - char(0), - char(-13), - char(0), - char(7), - char(0), - char(-12), - char(0), - char(7), - char(0), - char(-11), - char(0), - char(7), - char(0), - char(-10), - char(0), - char(7), - char(0), - char(-9), - char(0), - char(7), - char(0), - char(-8), - char(0), - char(7), - char(0), - char(-7), - char(0), - char(7), - char(0), - char(-6), - char(0), - char(7), - char(0), - char(-5), - char(0), - char(4), - char(0), - char(-4), - char(0), - char(4), - char(0), - char(-3), - char(0), - char(4), - char(0), - char(-2), - char(0), - char(4), - char(0), - char(-1), - char(0), - char(4), - char(0), - char(117), - char(0), - char(72), - char(0), - char(12), - char(0), - char(15), - char(0), - char(0), - char(1), - char(15), - char(0), - char(1), - char(1), - char(15), - char(0), - char(2), - char(1), - char(13), - char(0), - char(3), - char(1), - char(13), - char(0), - char(4), - char(1), - char(7), - char(0), - char(5), - char(1), - char(4), - char(0), - char(6), - char(1), - char(4), - char(0), - char(7), - char(1), - char(4), - char(0), - char(8), - char(1), - char(4), - char(0), - char(9), - char(1), - char(7), - char(0), - char(-31), - char(0), - char(4), - char(0), - char(53), - char(0), - char(73), - char(0), - char(27), - char(0), - char(17), - char(0), - char(10), - char(1), - char(15), - char(0), - char(11), - char(1), - char(15), - char(0), - char(12), - char(1), - char(13), - char(0), - char(3), - char(1), - char(13), - char(0), - char(13), - char(1), - char(13), - char(0), - char(14), - char(1), - char(13), - char(0), - char(15), - char(1), - char(13), - char(0), - char(16), - char(1), - char(13), - char(0), - char(17), - char(1), - char(4), - char(0), - char(18), - char(1), - char(7), - char(0), - char(19), - char(1), - char(4), - char(0), - char(20), - char(1), - char(4), - char(0), - char(21), - char(1), - char(4), - char(0), - char(22), - char(1), - char(7), - char(0), - char(23), - char(1), - char(7), - char(0), - char(24), - char(1), - char(4), - char(0), - char(25), - char(1), - char(4), - char(0), - char(26), - char(1), - char(7), - char(0), - char(27), - char(1), - char(7), - char(0), - char(28), - char(1), - char(7), - char(0), - char(29), - char(1), - char(7), - char(0), - char(30), - char(1), - char(7), - char(0), - char(31), - char(1), - char(7), - char(0), - char(32), - char(1), - char(4), - char(0), - char(33), - char(1), - char(4), - char(0), - char(34), - char(1), - char(4), - char(0), - char(35), - char(1), - char(74), - char(0), - char(12), - char(0), - char(9), - char(0), - char(36), - char(1), - char(9), - char(0), - char(37), - char(1), - char(13), - char(0), - char(38), - char(1), - char(7), - char(0), - char(39), - char(1), - char(7), - char(0), - char(-63), - char(0), - char(7), - char(0), - char(40), - char(1), - char(4), - char(0), - char(41), - char(1), - char(13), - char(0), - char(42), - char(1), - char(4), - char(0), - char(43), - char(1), - char(4), - char(0), - char(44), - char(1), - char(4), - char(0), - char(45), - char(1), - char(4), - char(0), - char(53), - char(0), - char(75), - char(0), - char(19), - char(0), - char(47), - char(0), - char(125), - char(0), - char(72), - char(0), - char(46), - char(1), - char(65), - char(0), - char(47), - char(1), - char(66), - char(0), - char(48), - char(1), - char(67), - char(0), - char(49), - char(1), - char(68), - char(0), - char(50), - char(1), - char(69), - char(0), - char(51), - char(1), - char(70), - char(0), - char(52), - char(1), - char(73), - char(0), - char(53), - char(1), - char(74), - char(0), - char(54), - char(1), - char(4), - char(0), - char(55), - char(1), - char(4), - char(0), - char(21), - char(1), - char(4), - char(0), - char(56), - char(1), - char(4), - char(0), - char(57), - char(1), - char(4), - char(0), - char(58), - char(1), - char(4), - char(0), - char(59), - char(1), - char(4), - char(0), - char(60), - char(1), - char(4), - char(0), - char(61), - char(1), - char(71), - char(0), - char(62), - char(1), -}; -int b3s_bulletDNAlen64 = sizeof(b3s_bulletDNAstr64); diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Serializer.h b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Serializer.h deleted file mode 100644 index d9e153e238f..00000000000 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3Serializer.h +++ /dev/null @@ -1,601 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_SERIALIZER_H -#define B3_SERIALIZER_H - -#include "Bullet3Common/b3Scalar.h" // has definitions like B3_FORCE_INLINE -#include "Bullet3Common/b3StackAlloc.h" -#include "Bullet3Common/b3HashMap.h" - -#if !defined(__CELLOS_LV2__) && !defined(__MWERKS__) -#include -#endif -#include - -extern char b3s_bulletDNAstr[]; -extern int b3s_bulletDNAlen; -extern char b3s_bulletDNAstr64[]; -extern int b3s_bulletDNAlen64; - -B3_FORCE_INLINE int b3StrLen(const char* str) -{ - if (!str) - return (0); - int len = 0; - - while (*str != 0) - { - str++; - len++; - } - - return len; -} - -class b3Chunk -{ -public: - int m_chunkCode; - int m_length; - void* m_oldPtr; - int m_dna_nr; - int m_number; -}; - -enum b3SerializationFlags -{ - B3_SERIALIZE_NO_BVH = 1, - B3_SERIALIZE_NO_TRIANGLEINFOMAP = 2, - B3_SERIALIZE_NO_DUPLICATE_ASSERT = 4 -}; - -class b3Serializer -{ -public: - virtual ~b3Serializer() {} - - virtual const unsigned char* getBufferPointer() const = 0; - - virtual int getCurrentBufferSize() const = 0; - - virtual b3Chunk* allocate(size_t size, int numElements) = 0; - - virtual void finalizeChunk(b3Chunk* chunk, const char* structType, int chunkCode, void* oldPtr) = 0; - - virtual void* findPointer(void* oldPtr) = 0; - - virtual void* getUniquePointer(void* oldPtr) = 0; - - virtual void startSerialization() = 0; - - virtual void finishSerialization() = 0; - - virtual const char* findNameForPointer(const void* ptr) const = 0; - - virtual void registerNameForPointer(const void* ptr, const char* name) = 0; - - virtual void serializeName(const char* ptr) = 0; - - virtual int getSerializationFlags() const = 0; - - virtual void setSerializationFlags(int flags) = 0; -}; - -#define B3_HEADER_LENGTH 12 -#if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined(__PPC__) || defined(__ppc__) || defined(__BIG_ENDIAN__) -#define B3_MAKE_ID(a, b, c, d) ((int)(a) << 24 | (int)(b) << 16 | (c) << 8 | (d)) -#else -#define B3_MAKE_ID(a, b, c, d) ((int)(d) << 24 | (int)(c) << 16 | (b) << 8 | (a)) -#endif - -#define B3_SOFTBODY_CODE B3_MAKE_ID('S', 'B', 'D', 'Y') -#define B3_COLLISIONOBJECT_CODE B3_MAKE_ID('C', 'O', 'B', 'J') -#define B3_RIGIDBODY_CODE B3_MAKE_ID('R', 'B', 'D', 'Y') -#define B3_CONSTRAINT_CODE B3_MAKE_ID('C', 'O', 'N', 'S') -#define B3_BOXSHAPE_CODE B3_MAKE_ID('B', 'O', 'X', 'S') -#define B3_QUANTIZED_BVH_CODE B3_MAKE_ID('Q', 'B', 'V', 'H') -#define B3_TRIANLGE_INFO_MAP B3_MAKE_ID('T', 'M', 'A', 'P') -#define B3_SHAPE_CODE B3_MAKE_ID('S', 'H', 'A', 'P') -#define B3_ARRAY_CODE B3_MAKE_ID('A', 'R', 'A', 'Y') -#define B3_SBMATERIAL_CODE B3_MAKE_ID('S', 'B', 'M', 'T') -#define B3_SBNODE_CODE B3_MAKE_ID('S', 'B', 'N', 'D') -#define B3_DYNAMICSWORLD_CODE B3_MAKE_ID('D', 'W', 'L', 'D') -#define B3_DNA_CODE B3_MAKE_ID('D', 'N', 'A', '1') - -struct b3PointerUid -{ - union { - void* m_ptr; - int m_uniqueIds[2]; - }; -}; - -///The b3DefaultSerializer is the main Bullet serialization class. -///The constructor takes an optional argument for backwards compatibility, it is recommended to leave this empty/zero. -class b3DefaultSerializer : public b3Serializer -{ - b3AlignedObjectArray mTypes; - b3AlignedObjectArray mStructs; - b3AlignedObjectArray mTlens; - b3HashMap mStructReverse; - b3HashMap mTypeLookup; - - b3HashMap m_chunkP; - - b3HashMap m_nameMap; - - b3HashMap m_uniquePointers; - int m_uniqueIdGenerator; - - int m_totalSize; - unsigned char* m_buffer; - int m_currentSize; - void* m_dna; - int m_dnaLength; - - int m_serializationFlags; - - b3AlignedObjectArray m_chunkPtrs; - -protected: - virtual void* findPointer(void* oldPtr) - { - void** ptr = m_chunkP.find(oldPtr); - if (ptr && *ptr) - return *ptr; - return 0; - } - - void writeDNA() - { - b3Chunk* dnaChunk = allocate(m_dnaLength, 1); - memcpy(dnaChunk->m_oldPtr, m_dna, m_dnaLength); - finalizeChunk(dnaChunk, "DNA1", B3_DNA_CODE, m_dna); - } - - int getReverseType(const char* type) const - { - b3HashString key(type); - const int* valuePtr = mTypeLookup.find(key); - if (valuePtr) - return *valuePtr; - - return -1; - } - - void initDNA(const char* bdnaOrg, int dnalen) - { - ///was already initialized - if (m_dna) - return; - - int littleEndian = 1; - littleEndian = ((char*)&littleEndian)[0]; - - m_dna = b3AlignedAlloc(dnalen, 16); - memcpy(m_dna, bdnaOrg, dnalen); - m_dnaLength = dnalen; - - int* intPtr = 0; - short* shtPtr = 0; - char* cp = 0; - int dataLen = 0; - intPtr = (int*)m_dna; - - /* - SDNA (4 bytes) (magic number) - NAME (4 bytes) - (4 bytes) amount of names (int) - - - */ - - if (strncmp((const char*)m_dna, "SDNA", 4) == 0) - { - // skip ++ NAME - intPtr++; - intPtr++; - } - - // Parse names - if (!littleEndian) - *intPtr = b3SwapEndian(*intPtr); - - dataLen = *intPtr; - - intPtr++; - - cp = (char*)intPtr; - int i; - for (i = 0; i < dataLen; i++) - { - while (*cp) cp++; - cp++; - } - cp = b3AlignPointer(cp, 4); - - /* - TYPE (4 bytes) - amount of types (int) - - - */ - - intPtr = (int*)cp; - b3Assert(strncmp(cp, "TYPE", 4) == 0); - intPtr++; - - if (!littleEndian) - *intPtr = b3SwapEndian(*intPtr); - - dataLen = *intPtr; - intPtr++; - - cp = (char*)intPtr; - for (i = 0; i < dataLen; i++) - { - mTypes.push_back(cp); - while (*cp) cp++; - cp++; - } - - cp = b3AlignPointer(cp, 4); - - /* - TLEN (4 bytes) - (short) the lengths of types - - */ - - // Parse type lens - intPtr = (int*)cp; - b3Assert(strncmp(cp, "TLEN", 4) == 0); - intPtr++; - - dataLen = (int)mTypes.size(); - - shtPtr = (short*)intPtr; - for (i = 0; i < dataLen; i++, shtPtr++) - { - if (!littleEndian) - shtPtr[0] = b3SwapEndian(shtPtr[0]); - mTlens.push_back(shtPtr[0]); - } - - if (dataLen & 1) shtPtr++; - - /* - STRC (4 bytes) - amount of structs (int) - - - - - - - */ - - intPtr = (int*)shtPtr; - cp = (char*)intPtr; - b3Assert(strncmp(cp, "STRC", 4) == 0); - intPtr++; - - if (!littleEndian) - *intPtr = b3SwapEndian(*intPtr); - dataLen = *intPtr; - intPtr++; - - shtPtr = (short*)intPtr; - for (i = 0; i < dataLen; i++) - { - mStructs.push_back(shtPtr); - - if (!littleEndian) - { - shtPtr[0] = b3SwapEndian(shtPtr[0]); - shtPtr[1] = b3SwapEndian(shtPtr[1]); - - int len = shtPtr[1]; - shtPtr += 2; - - for (int a = 0; a < len; a++, shtPtr += 2) - { - shtPtr[0] = b3SwapEndian(shtPtr[0]); - shtPtr[1] = b3SwapEndian(shtPtr[1]); - } - } - else - { - shtPtr += (2 * shtPtr[1]) + 2; - } - } - - // build reverse lookups - for (i = 0; i < (int)mStructs.size(); i++) - { - short* strc = mStructs.at(i); - mStructReverse.insert(strc[0], i); - mTypeLookup.insert(b3HashString(mTypes[strc[0]]), i); - } - } - -public: - b3DefaultSerializer(int totalSize = 0) - : m_totalSize(totalSize), - m_currentSize(0), - m_dna(0), - m_dnaLength(0), - m_serializationFlags(0) - { - m_buffer = m_totalSize ? (unsigned char*)b3AlignedAlloc(totalSize, 16) : 0; - - const bool VOID_IS_8 = ((sizeof(void*) == 8)); - -#ifdef B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - if (VOID_IS_8) - { -#if _WIN64 - initDNA((const char*)b3s_bulletDNAstr64, b3s_bulletDNAlen64); -#else - b3Assert(0); -#endif - } - else - { -#ifndef _WIN64 - initDNA((const char*)b3s_bulletDNAstr, b3s_bulletDNAlen); -#else - b3Assert(0); -#endif - } - -#else //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - if (VOID_IS_8) - { - initDNA((const char*)b3s_bulletDNAstr64, b3s_bulletDNAlen64); - } - else - { - initDNA((const char*)b3s_bulletDNAstr, b3s_bulletDNAlen); - } -#endif //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES - } - - virtual ~b3DefaultSerializer() - { - if (m_buffer) - b3AlignedFree(m_buffer); - if (m_dna) - b3AlignedFree(m_dna); - } - - void writeHeader(unsigned char* buffer) const - { -#ifdef B3_USE_DOUBLE_PRECISION - memcpy(buffer, "BULLETd", 7); -#else - memcpy(buffer, "BULLETf", 7); -#endif //B3_USE_DOUBLE_PRECISION - - int littleEndian = 1; - littleEndian = ((char*)&littleEndian)[0]; - - if (sizeof(void*) == 8) - { - buffer[7] = '-'; - } - else - { - buffer[7] = '_'; - } - - if (littleEndian) - { - buffer[8] = 'v'; - } - else - { - buffer[8] = 'V'; - } - - buffer[9] = '2'; - buffer[10] = '8'; - buffer[11] = '1'; - } - - virtual void startSerialization() - { - m_uniqueIdGenerator = 1; - if (m_totalSize) - { - unsigned char* buffer = internalAlloc(B3_HEADER_LENGTH); - writeHeader(buffer); - } - } - - virtual void finishSerialization() - { - writeDNA(); - - //if we didn't pre-allocate a buffer, we need to create a contiguous buffer now - int mysize = 0; - if (!m_totalSize) - { - if (m_buffer) - b3AlignedFree(m_buffer); - - m_currentSize += B3_HEADER_LENGTH; - m_buffer = (unsigned char*)b3AlignedAlloc(m_currentSize, 16); - - unsigned char* currentPtr = m_buffer; - writeHeader(m_buffer); - currentPtr += B3_HEADER_LENGTH; - mysize += B3_HEADER_LENGTH; - for (int i = 0; i < m_chunkPtrs.size(); i++) - { - int curLength = sizeof(b3Chunk) + m_chunkPtrs[i]->m_length; - memcpy(currentPtr, m_chunkPtrs[i], curLength); - b3AlignedFree(m_chunkPtrs[i]); - currentPtr += curLength; - mysize += curLength; - } - } - - mTypes.clear(); - mStructs.clear(); - mTlens.clear(); - mStructReverse.clear(); - mTypeLookup.clear(); - m_chunkP.clear(); - m_nameMap.clear(); - m_uniquePointers.clear(); - m_chunkPtrs.clear(); - } - - virtual void* getUniquePointer(void* oldPtr) - { - if (!oldPtr) - return 0; - - b3PointerUid* uptr = (b3PointerUid*)m_uniquePointers.find(oldPtr); - if (uptr) - { - return uptr->m_ptr; - } - m_uniqueIdGenerator++; - - b3PointerUid uid; - uid.m_uniqueIds[0] = m_uniqueIdGenerator; - uid.m_uniqueIds[1] = m_uniqueIdGenerator; - m_uniquePointers.insert(oldPtr, uid); - return uid.m_ptr; - } - - virtual const unsigned char* getBufferPointer() const - { - return m_buffer; - } - - virtual int getCurrentBufferSize() const - { - return m_currentSize; - } - - virtual void finalizeChunk(b3Chunk* chunk, const char* structType, int chunkCode, void* oldPtr) - { - if (!(m_serializationFlags & B3_SERIALIZE_NO_DUPLICATE_ASSERT)) - { - b3Assert(!findPointer(oldPtr)); - } - - chunk->m_dna_nr = getReverseType(structType); - - chunk->m_chunkCode = chunkCode; - - void* uniquePtr = getUniquePointer(oldPtr); - - m_chunkP.insert(oldPtr, uniquePtr); //chunk->m_oldPtr); - chunk->m_oldPtr = uniquePtr; //oldPtr; - } - - virtual unsigned char* internalAlloc(size_t size) - { - unsigned char* ptr = 0; - - if (m_totalSize) - { - ptr = m_buffer + m_currentSize; - m_currentSize += int(size); - b3Assert(m_currentSize < m_totalSize); - } - else - { - ptr = (unsigned char*)b3AlignedAlloc(size, 16); - m_currentSize += int(size); - } - return ptr; - } - - virtual b3Chunk* allocate(size_t size, int numElements) - { - unsigned char* ptr = internalAlloc(int(size) * numElements + sizeof(b3Chunk)); - - unsigned char* data = ptr + sizeof(b3Chunk); - - b3Chunk* chunk = (b3Chunk*)ptr; - chunk->m_chunkCode = 0; - chunk->m_oldPtr = data; - chunk->m_length = int(size) * numElements; - chunk->m_number = numElements; - - m_chunkPtrs.push_back(chunk); - - return chunk; - } - - virtual const char* findNameForPointer(const void* ptr) const - { - const char* const* namePtr = m_nameMap.find(ptr); - if (namePtr && *namePtr) - return *namePtr; - return 0; - } - - virtual void registerNameForPointer(const void* ptr, const char* name) - { - m_nameMap.insert(ptr, name); - } - - virtual void serializeName(const char* name) - { - if (name) - { - //don't serialize name twice - if (findPointer((void*)name)) - return; - - int len = b3StrLen(name); - if (len) - { - int newLen = len + 1; - int padding = ((newLen + 3) & ~3) - newLen; - newLen += padding; - - //serialize name string now - b3Chunk* chunk = allocate(sizeof(char), newLen); - char* destinationName = (char*)chunk->m_oldPtr; - for (int i = 0; i < len; i++) - { - destinationName[i] = name[i]; - } - destinationName[len] = 0; - finalizeChunk(chunk, "char", B3_ARRAY_CODE, (void*)name); - } - } - } - - virtual int getSerializationFlags() const - { - return m_serializationFlags; - } - - virtual void setSerializationFlags(int flags) - { - m_serializationFlags = flags; - } -}; - -#endif //B3_SERIALIZER_H diff --git a/thirdparty/bullet/clew/clew.c b/thirdparty/bullet/clew/clew.c deleted file mode 100644 index 90caced535a..00000000000 --- a/thirdparty/bullet/clew/clew.c +++ /dev/null @@ -1,374 +0,0 @@ -////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009 Organic Vectory B.V. -// Written by George van Venrooij -// -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file license.txt) -////////////////////////////////////////////////////////////////////////// - -#include "clew.h" - -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#define VC_EXTRALEAN -#include - -typedef HMODULE CLEW_DYNLIB_HANDLE; - -#define CLEW_DYNLIB_OPEN LoadLibraryA -#define CLEW_DYNLIB_CLOSE FreeLibrary -#define CLEW_DYNLIB_IMPORT GetProcAddress -#else -#include - -typedef void* CLEW_DYNLIB_HANDLE; - -#define CLEW_DYNLIB_OPEN(path) dlopen(path, RTLD_NOW | RTLD_GLOBAL) -#define CLEW_DYNLIB_CLOSE dlclose -#define CLEW_DYNLIB_IMPORT dlsym -#endif - -#include - -//! \brief module handle -static CLEW_DYNLIB_HANDLE module = NULL; - -// Variables holding function entry points -PFNCLGETPLATFORMIDS __clewGetPlatformIDs = NULL; -PFNCLGETPLATFORMINFO __clewGetPlatformInfo = NULL; -PFNCLGETDEVICEIDS __clewGetDeviceIDs = NULL; -PFNCLGETDEVICEINFO __clewGetDeviceInfo = NULL; -PFNCLCREATECONTEXT __clewCreateContext = NULL; -PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType = NULL; -PFNCLRETAINCONTEXT __clewRetainContext = NULL; -PFNCLRELEASECONTEXT __clewReleaseContext = NULL; -PFNCLGETCONTEXTINFO __clewGetContextInfo = NULL; -PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue = NULL; -PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue = NULL; -PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue = NULL; -PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo = NULL; -#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS -PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty = NULL; -#endif -PFNCLCREATEBUFFER __clewCreateBuffer = NULL; -PFNCLCREATESUBBUFFER __clewCreateSubBuffer = NULL; -PFNCLCREATEIMAGE2D __clewCreateImage2D = NULL; -PFNCLCREATEIMAGE3D __clewCreateImage3D = NULL; -PFNCLRETAINMEMOBJECT __clewRetainMemObject = NULL; -PFNCLRELEASEMEMOBJECT __clewReleaseMemObject = NULL; -PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats = NULL; -PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo = NULL; -PFNCLGETIMAGEINFO __clewGetImageInfo = NULL; -PFNCLSETMEMOBJECTDESTRUCTORCALLBACK __clewSetMemObjectDestructorCallback = NULL; -PFNCLCREATESAMPLER __clewCreateSampler = NULL; -PFNCLRETAINSAMPLER __clewRetainSampler = NULL; -PFNCLRELEASESAMPLER __clewReleaseSampler = NULL; -PFNCLGETSAMPLERINFO __clewGetSamplerInfo = NULL; -PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource = NULL; -PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary = NULL; -PFNCLRETAINPROGRAM __clewRetainProgram = NULL; -PFNCLRELEASEPROGRAM __clewReleaseProgram = NULL; -PFNCLBUILDPROGRAM __clewBuildProgram = NULL; -PFNCLUNLOADCOMPILER __clewUnloadCompiler = NULL; -PFNCLGETPROGRAMINFO __clewGetProgramInfo = NULL; -PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo = NULL; -PFNCLCREATEKERNEL __clewCreateKernel = NULL; -PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram = NULL; -PFNCLRETAINKERNEL __clewRetainKernel = NULL; -PFNCLRELEASEKERNEL __clewReleaseKernel = NULL; -PFNCLSETKERNELARG __clewSetKernelArg = NULL; -PFNCLGETKERNELINFO __clewGetKernelInfo = NULL; -PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo = NULL; -PFNCLWAITFOREVENTS __clewWaitForEvents = NULL; -PFNCLGETEVENTINFO __clewGetEventInfo = NULL; -PFNCLCREATEUSEREVENT __clewCreateUserEvent = NULL; -PFNCLRETAINEVENT __clewRetainEvent = NULL; -PFNCLRELEASEEVENT __clewReleaseEvent = NULL; -PFNCLSETUSEREVENTSTATUS __clewSetUserEventStatus = NULL; -PFNCLSETEVENTCALLBACK __clewSetEventCallback = NULL; -PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo = NULL; -PFNCLFLUSH __clewFlush = NULL; -PFNCLFINISH __clewFinish = NULL; -PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer = NULL; -PFNCLENQUEUEREADBUFFERRECT __clewEnqueueReadBufferRect = NULL; -PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer = NULL; -PFNCLENQUEUEWRITEBUFFERRECT __clewEnqueueWriteBufferRect = NULL; -PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer = NULL; -PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage = NULL; -PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage = NULL; -PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage = NULL; -PFNCLENQUEUECOPYBUFFERRECT __clewEnqueueCopyBufferRect = NULL; -PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer = NULL; -PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage = NULL; -PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer = NULL; -PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage = NULL; -PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject = NULL; -PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel = NULL; -PFNCLENQUEUETASK __clewEnqueueTask = NULL; -PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel = NULL; -PFNCLENQUEUEMARKER __clewEnqueueMarker = NULL; -PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents = NULL; -PFNCLENQUEUEBARRIER __clewEnqueueBarrier = NULL; -PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress = NULL; - -void clewExit(void) -{ - if (module != NULL) - { - // Ignore errors - CLEW_DYNLIB_CLOSE(module); - module = NULL; - } -} - -int clewInit(const char* path) -{ - int error = 0; - - // Check if already initialized - if (module != NULL) - { - return CLEW_SUCCESS; - } - - // Load library - module = CLEW_DYNLIB_OPEN(path); - - // Check for errors - if (module == NULL) - { - return CLEW_ERROR_OPEN_FAILED; - } - - // Set unloading - error = atexit(clewExit); - - if (error) - { - // Failure queuing atexit, shutdown with error - CLEW_DYNLIB_CLOSE(module); - module = NULL; - - return CLEW_ERROR_ATEXIT_FAILED; - } - - // Determine function entry-points - __clewGetPlatformIDs = (PFNCLGETPLATFORMIDS)CLEW_DYNLIB_IMPORT(module, "clGetPlatformIDs"); - __clewGetPlatformInfo = (PFNCLGETPLATFORMINFO)CLEW_DYNLIB_IMPORT(module, "clGetPlatformInfo"); - __clewGetDeviceIDs = (PFNCLGETDEVICEIDS)CLEW_DYNLIB_IMPORT(module, "clGetDeviceIDs"); - __clewGetDeviceInfo = (PFNCLGETDEVICEINFO)CLEW_DYNLIB_IMPORT(module, "clGetDeviceInfo"); - __clewCreateContext = (PFNCLCREATECONTEXT)CLEW_DYNLIB_IMPORT(module, "clCreateContext"); - __clewCreateContextFromType = (PFNCLCREATECONTEXTFROMTYPE)CLEW_DYNLIB_IMPORT(module, "clCreateContextFromType"); - __clewRetainContext = (PFNCLRETAINCONTEXT)CLEW_DYNLIB_IMPORT(module, "clRetainContext"); - __clewReleaseContext = (PFNCLRELEASECONTEXT)CLEW_DYNLIB_IMPORT(module, "clReleaseContext"); - __clewGetContextInfo = (PFNCLGETCONTEXTINFO)CLEW_DYNLIB_IMPORT(module, "clGetContextInfo"); - __clewCreateCommandQueue = (PFNCLCREATECOMMANDQUEUE)CLEW_DYNLIB_IMPORT(module, "clCreateCommandQueue"); - __clewRetainCommandQueue = (PFNCLRETAINCOMMANDQUEUE)CLEW_DYNLIB_IMPORT(module, "clRetainCommandQueue"); - __clewReleaseCommandQueue = (PFNCLRELEASECOMMANDQUEUE)CLEW_DYNLIB_IMPORT(module, "clReleaseCommandQueue"); - __clewGetCommandQueueInfo = (PFNCLGETCOMMANDQUEUEINFO)CLEW_DYNLIB_IMPORT(module, "clGetCommandQueueInfo"); -#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS - __clewSetCommandQueueProperty = (PFNCLSETCOMMANDQUEUEPROPERTY)CLEW_DYNLIB_IMPORT(module, "clSetCommandQueueProperty"); -#endif - __clewCreateBuffer = (PFNCLCREATEBUFFER)CLEW_DYNLIB_IMPORT(module, "clCreateBuffer"); - __clewCreateSubBuffer = (PFNCLCREATESUBBUFFER)CLEW_DYNLIB_IMPORT(module, "clCreateBuffer"); - __clewCreateImage2D = (PFNCLCREATEIMAGE2D)CLEW_DYNLIB_IMPORT(module, "clCreateImage2D"); - __clewCreateImage3D = (PFNCLCREATEIMAGE3D)CLEW_DYNLIB_IMPORT(module, "clCreateImage3D"); - __clewRetainMemObject = (PFNCLRETAINMEMOBJECT)CLEW_DYNLIB_IMPORT(module, "clRetainMemObject"); - __clewReleaseMemObject = (PFNCLRELEASEMEMOBJECT)CLEW_DYNLIB_IMPORT(module, "clReleaseMemObject"); - __clewGetSupportedImageFormats = (PFNCLGETSUPPORTEDIMAGEFORMATS)CLEW_DYNLIB_IMPORT(module, "clGetSupportedImageFormats"); - __clewGetMemObjectInfo = (PFNCLGETMEMOBJECTINFO)CLEW_DYNLIB_IMPORT(module, "clGetMemObjectInfo"); - __clewGetImageInfo = (PFNCLGETIMAGEINFO)CLEW_DYNLIB_IMPORT(module, "clGetImageInfo"); - __clewSetMemObjectDestructorCallback = (PFNCLSETMEMOBJECTDESTRUCTORCALLBACK)CLEW_DYNLIB_IMPORT(module, "clSetMemObjectDestructorCallback"); - __clewCreateSampler = (PFNCLCREATESAMPLER)CLEW_DYNLIB_IMPORT(module, "clCreateSampler"); - __clewRetainSampler = (PFNCLRETAINSAMPLER)CLEW_DYNLIB_IMPORT(module, "clRetainSampler"); - __clewReleaseSampler = (PFNCLRELEASESAMPLER)CLEW_DYNLIB_IMPORT(module, "clReleaseSampler"); - __clewGetSamplerInfo = (PFNCLGETSAMPLERINFO)CLEW_DYNLIB_IMPORT(module, "clGetSamplerInfo"); - __clewCreateProgramWithSource = (PFNCLCREATEPROGRAMWITHSOURCE)CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithSource"); - __clewCreateProgramWithBinary = (PFNCLCREATEPROGRAMWITHBINARY)CLEW_DYNLIB_IMPORT(module, "clCreateProgramWithBinary"); - __clewRetainProgram = (PFNCLRETAINPROGRAM)CLEW_DYNLIB_IMPORT(module, "clRetainProgram"); - __clewReleaseProgram = (PFNCLRELEASEPROGRAM)CLEW_DYNLIB_IMPORT(module, "clReleaseProgram"); - __clewBuildProgram = (PFNCLBUILDPROGRAM)CLEW_DYNLIB_IMPORT(module, "clBuildProgram"); - __clewUnloadCompiler = (PFNCLUNLOADCOMPILER)CLEW_DYNLIB_IMPORT(module, "clUnloadCompiler"); - __clewGetProgramInfo = (PFNCLGETPROGRAMINFO)CLEW_DYNLIB_IMPORT(module, "clGetProgramInfo"); - __clewGetProgramBuildInfo = (PFNCLGETPROGRAMBUILDINFO)CLEW_DYNLIB_IMPORT(module, "clGetProgramBuildInfo"); - __clewCreateKernel = (PFNCLCREATEKERNEL)CLEW_DYNLIB_IMPORT(module, "clCreateKernel"); - __clewCreateKernelsInProgram = (PFNCLCREATEKERNELSINPROGRAM)CLEW_DYNLIB_IMPORT(module, "clCreateKernelsInProgram"); - __clewRetainKernel = (PFNCLRETAINKERNEL)CLEW_DYNLIB_IMPORT(module, "clRetainKernel"); - __clewReleaseKernel = (PFNCLRELEASEKERNEL)CLEW_DYNLIB_IMPORT(module, "clReleaseKernel"); - __clewSetKernelArg = (PFNCLSETKERNELARG)CLEW_DYNLIB_IMPORT(module, "clSetKernelArg"); - __clewGetKernelInfo = (PFNCLGETKERNELINFO)CLEW_DYNLIB_IMPORT(module, "clGetKernelInfo"); - __clewGetKernelWorkGroupInfo = (PFNCLGETKERNELWORKGROUPINFO)CLEW_DYNLIB_IMPORT(module, "clGetKernelWorkGroupInfo"); - __clewWaitForEvents = (PFNCLWAITFOREVENTS)CLEW_DYNLIB_IMPORT(module, "clWaitForEvents"); - __clewGetEventInfo = (PFNCLGETEVENTINFO)CLEW_DYNLIB_IMPORT(module, "clGetEventInfo"); - __clewCreateUserEvent = (PFNCLCREATEUSEREVENT)CLEW_DYNLIB_IMPORT(module, "clCreateUserEvent"); - __clewRetainEvent = (PFNCLRETAINEVENT)CLEW_DYNLIB_IMPORT(module, "clRetainEvent"); - __clewReleaseEvent = (PFNCLRELEASEEVENT)CLEW_DYNLIB_IMPORT(module, "clReleaseEvent"); - __clewSetUserEventStatus = (PFNCLSETUSEREVENTSTATUS)CLEW_DYNLIB_IMPORT(module, "clSetUserEventStatus"); - __clewSetEventCallback = (PFNCLSETEVENTCALLBACK)CLEW_DYNLIB_IMPORT(module, "clSetEventCallback"); - __clewGetEventProfilingInfo = (PFNCLGETEVENTPROFILINGINFO)CLEW_DYNLIB_IMPORT(module, "clGetEventProfilingInfo"); - __clewFlush = (PFNCLFLUSH)CLEW_DYNLIB_IMPORT(module, "clFlush"); - __clewFinish = (PFNCLFINISH)CLEW_DYNLIB_IMPORT(module, "clFinish"); - __clewEnqueueReadBuffer = (PFNCLENQUEUEREADBUFFER)CLEW_DYNLIB_IMPORT(module, "clEnqueueReadBuffer"); - __clewEnqueueReadBufferRect = (PFNCLENQUEUEREADBUFFERRECT)CLEW_DYNLIB_IMPORT(module, "clEnqueueReadBufferRect"); - __clewEnqueueWriteBuffer = (PFNCLENQUEUEWRITEBUFFER)CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteBuffer"); - __clewEnqueueWriteBufferRect = (PFNCLENQUEUEWRITEBUFFERRECT)CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteBufferRect"); - __clewEnqueueCopyBuffer = (PFNCLENQUEUECOPYBUFFER)CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBuffer"); - __clewEnqueueCopyBufferRect = (PFNCLENQUEUECOPYBUFFERRECT)CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBufferRect"); - __clewEnqueueReadImage = (PFNCLENQUEUEREADIMAGE)CLEW_DYNLIB_IMPORT(module, "clEnqueueReadImage"); - __clewEnqueueWriteImage = (PFNCLENQUEUEWRITEIMAGE)CLEW_DYNLIB_IMPORT(module, "clEnqueueWriteImage"); - __clewEnqueueCopyImage = (PFNCLENQUEUECOPYIMAGE)CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyImage"); - __clewEnqueueCopyImageToBuffer = (PFNCLENQUEUECOPYIMAGETOBUFFER)CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyImageToBuffer"); - __clewEnqueueCopyBufferToImage = (PFNCLENQUEUECOPYBUFFERTOIMAGE)CLEW_DYNLIB_IMPORT(module, "clEnqueueCopyBufferToImage"); - __clewEnqueueMapBuffer = (PFNCLENQUEUEMAPBUFFER)CLEW_DYNLIB_IMPORT(module, "clEnqueueMapBuffer"); - __clewEnqueueMapImage = (PFNCLENQUEUEMAPIMAGE)CLEW_DYNLIB_IMPORT(module, "clEnqueueMapImage"); - __clewEnqueueUnmapMemObject = (PFNCLENQUEUEUNMAPMEMOBJECT)CLEW_DYNLIB_IMPORT(module, "clEnqueueUnmapMemObject"); - __clewEnqueueNDRangeKernel = (PFNCLENQUEUENDRANGEKERNEL)CLEW_DYNLIB_IMPORT(module, "clEnqueueNDRangeKernel"); - __clewEnqueueTask = (PFNCLENQUEUETASK)CLEW_DYNLIB_IMPORT(module, "clEnqueueTask"); - __clewEnqueueNativeKernel = (PFNCLENQUEUENATIVEKERNEL)CLEW_DYNLIB_IMPORT(module, "clEnqueueNativeKernel"); - __clewEnqueueMarker = (PFNCLENQUEUEMARKER)CLEW_DYNLIB_IMPORT(module, "clEnqueueMarker"); - __clewEnqueueWaitForEvents = (PFNCLENQUEUEWAITFOREVENTS)CLEW_DYNLIB_IMPORT(module, "clEnqueueWaitForEvents"); - __clewEnqueueBarrier = (PFNCLENQUEUEBARRIER)CLEW_DYNLIB_IMPORT(module, "clEnqueueBarrier"); - __clewGetExtensionFunctionAddress = (PFNCLGETEXTENSIONFUNCTIONADDRESS)CLEW_DYNLIB_IMPORT(module, "clGetExtensionFunctionAddress"); - - return CLEW_SUCCESS; -} - -const char* clewErrorString(cl_int error) -{ - static const char* strings[] = - { - // Error Codes - "CL_SUCCESS" // 0 - , - "CL_DEVICE_NOT_FOUND" // -1 - , - "CL_DEVICE_NOT_AVAILABLE" // -2 - , - "CL_COMPILER_NOT_AVAILABLE" // -3 - , - "CL_MEM_OBJECT_ALLOCATION_FAILURE" // -4 - , - "CL_OUT_OF_RESOURCES" // -5 - , - "CL_OUT_OF_HOST_MEMORY" // -6 - , - "CL_PROFILING_INFO_NOT_AVAILABLE" // -7 - , - "CL_MEM_COPY_OVERLAP" // -8 - , - "CL_IMAGE_FORMAT_MISMATCH" // -9 - , - "CL_IMAGE_FORMAT_NOT_SUPPORTED" // -10 - , - "CL_BUILD_PROGRAM_FAILURE" // -11 - , - "CL_MAP_FAILURE" // -12 - - , - "" // -13 - , - "" // -14 - , - "" // -15 - , - "" // -16 - , - "" // -17 - , - "" // -18 - , - "" // -19 - - , - "" // -20 - , - "" // -21 - , - "" // -22 - , - "" // -23 - , - "" // -24 - , - "" // -25 - , - "" // -26 - , - "" // -27 - , - "" // -28 - , - "" // -29 - - , - "CL_INVALID_VALUE" // -30 - , - "CL_INVALID_DEVICE_TYPE" // -31 - , - "CL_INVALID_PLATFORM" // -32 - , - "CL_INVALID_DEVICE" // -33 - , - "CL_INVALID_CONTEXT" // -34 - , - "CL_INVALID_QUEUE_PROPERTIES" // -35 - , - "CL_INVALID_COMMAND_QUEUE" // -36 - , - "CL_INVALID_HOST_PTR" // -37 - , - "CL_INVALID_MEM_OBJECT" // -38 - , - "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR" // -39 - , - "CL_INVALID_IMAGE_SIZE" // -40 - , - "CL_INVALID_SAMPLER" // -41 - , - "CL_INVALID_BINARY" // -42 - , - "CL_INVALID_BUILD_OPTIONS" // -43 - , - "CL_INVALID_PROGRAM" // -44 - , - "CL_INVALID_PROGRAM_EXECUTABLE" // -45 - , - "CL_INVALID_KERNEL_NAME" // -46 - , - "CL_INVALID_KERNEL_DEFINITION" // -47 - , - "CL_INVALID_KERNEL" // -48 - , - "CL_INVALID_ARG_INDEX" // -49 - , - "CL_INVALID_ARG_VALUE" // -50 - , - "CL_INVALID_ARG_SIZE" // -51 - , - "CL_INVALID_KERNEL_ARGS" // -52 - , - "CL_INVALID_WORK_DIMENSION" // -53 - , - "CL_INVALID_WORK_GROUP_SIZE" // -54 - , - "CL_INVALID_WORK_ITEM_SIZE" // -55 - , - "CL_INVALID_GLOBAL_OFFSET" // -56 - , - "CL_INVALID_EVENT_WAIT_LIST" // -57 - , - "CL_INVALID_EVENT" // -58 - , - "CL_INVALID_OPERATION" // -59 - , - "CL_INVALID_GL_OBJECT" // -60 - , - "CL_INVALID_BUFFER_SIZE" // -61 - , - "CL_INVALID_MIP_LEVEL" // -62 - , - "CL_INVALID_GLOBAL_WORK_SIZE" // -63 - }; - - return strings[-error]; -} diff --git a/thirdparty/bullet/clew/clew.h b/thirdparty/bullet/clew/clew.h deleted file mode 100644 index cba85852339..00000000000 --- a/thirdparty/bullet/clew/clew.h +++ /dev/null @@ -1,2708 +0,0 @@ -#ifndef CLEW_HPP_INCLUDED -#define CLEW_HPP_INCLUDED - -////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Organic Vectory B.V., KindDragon -// Written by George van Venrooij -// -// Distributed under the MIT License. -////////////////////////////////////////////////////////////////////////// - -//! \file clew.h -//! \brief OpenCL run-time loader header -//! -//! This file contains a copy of the contents of CL.H and CL_PLATFORM.H from the -//! official OpenCL spec. The purpose of this code is to load the OpenCL dynamic -//! library at run-time and thus allow the executable to function on many -//! platforms regardless of the vendor of the OpenCL driver actually installed. -//! Some of the techniques used here were inspired by work done in the GLEW -//! library (http://glew.sourceforge.net/) - -// Run-time dynamic linking functionality based on concepts used in GLEW -#ifdef __OPENCL_CL_H -#error cl.h included before clew.h -#endif - -#ifdef __OPENCL_CL_PLATFORM_H -#error cl_platform.h included before clew.h -#endif - -// Prevent cl.h inclusion -#define __OPENCL_CL_H -// Prevent cl_platform.h inclusion -#define __CL_PLATFORM_H - -/******************************************************************************* -* Copyright (c) 2008-2010 The Khronos Group Inc. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and/or associated documentation files (the -* "Materials"), to deal in the Materials without restriction, including -* without limitation the rights to use, copy, modify, merge, publish, -* distribute, sublicense, and/or sell copies of the Materials, and to -* permit persons to whom the Materials are furnished to do so, subject to -* the following conditions: -* -* The above copyright notice and this permission notice shall be included -* in all copies or substantial portions of the Materials. -* -* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. -******************************************************************************/ -#ifdef __APPLE__ -/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */ -#include -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - -#if defined(_WIN32) -#define CL_API_ENTRY -#define CL_API_CALL __stdcall -#define CL_CALLBACK __stdcall -#else -#define CL_API_ENTRY -#define CL_API_CALL -#define CL_CALLBACK -#endif - //disabled the APPLE thing, don't know why it is there, is just causes tons of warnings - -#ifdef __APPLE1__ -#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) -#define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER -#define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER -#define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK -#define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK -#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER -#else -#define CL_EXTENSION_WEAK_LINK -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXT_SUFFIX__VERSION_1_0 -#define CL_API_SUFFIX__VERSION_1_1 -#define CL_EXT_SUFFIX__VERSION_1_1 -#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED -#endif - -#if (defined(_WIN32) && defined(_MSC_VER)) - - /* scalar types */ - typedef signed __int8 cl_char; - typedef unsigned __int8 cl_uchar; - typedef signed __int16 cl_short; - typedef unsigned __int16 cl_ushort; - typedef signed __int32 cl_int; - typedef unsigned __int32 cl_uint; - typedef signed __int64 cl_long; - typedef unsigned __int64 cl_ulong; - - typedef unsigned __int16 cl_half; - typedef float cl_float; - typedef double cl_double; - -/* Macro names and corresponding values defined by OpenCL */ -#define CL_CHAR_BIT 8 -#define CL_SCHAR_MAX 127 -#define CL_SCHAR_MIN (-127 - 1) -#define CL_CHAR_MAX CL_SCHAR_MAX -#define CL_CHAR_MIN CL_SCHAR_MIN -#define CL_UCHAR_MAX 255 -#define CL_SHRT_MAX 32767 -#define CL_SHRT_MIN (-32767 - 1) -#define CL_USHRT_MAX 65535 -#define CL_INT_MAX 2147483647 -#define CL_INT_MIN (-2147483647 - 1) -#define CL_UINT_MAX 0xffffffffU -#define CL_LONG_MAX ((cl_long)0x7FFFFFFFFFFFFFFFLL) -#define CL_LONG_MIN ((cl_long)-0x7FFFFFFFFFFFFFFFLL - 1LL) -#define CL_ULONG_MAX ((cl_ulong)0xFFFFFFFFFFFFFFFFULL) - -#define CL_FLT_DIG 6 -#define CL_FLT_MANT_DIG 24 -#define CL_FLT_MAX_10_EXP +38 -#define CL_FLT_MAX_EXP +128 -#define CL_FLT_MIN_10_EXP -37 -#define CL_FLT_MIN_EXP -125 -#define CL_FLT_RADIX 2 -#define CL_FLT_MAX 340282346638528859811704183484516925440.0f -#define CL_FLT_MIN 1.175494350822287507969e-38f -#define CL_FLT_EPSILON 0x1.0p-23f - -#define CL_DBL_DIG 15 -#define CL_DBL_MANT_DIG 53 -#define CL_DBL_MAX_10_EXP +308 -#define CL_DBL_MAX_EXP +1024 -#define CL_DBL_MIN_10_EXP -307 -#define CL_DBL_MIN_EXP -1021 -#define CL_DBL_RADIX 2 -#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 -#define CL_DBL_MIN 2.225073858507201383090e-308 -#define CL_DBL_EPSILON 2.220446049250313080847e-16 - -#define CL_M_E 2.718281828459045090796 -#define CL_M_LOG2E 1.442695040888963387005 -#define CL_M_LOG10E 0.434294481903251816668 -#define CL_M_LN2 0.693147180559945286227 -#define CL_M_LN10 2.302585092994045901094 -#define CL_M_PI 3.141592653589793115998 -#define CL_M_PI_2 1.570796326794896557999 -#define CL_M_PI_4 0.785398163397448278999 -#define CL_M_1_PI 0.318309886183790691216 -#define CL_M_2_PI 0.636619772367581382433 -#define CL_M_2_SQRTPI 1.128379167095512558561 -#define CL_M_SQRT2 1.414213562373095145475 -#define CL_M_SQRT1_2 0.707106781186547572737 - -#define CL_M_E_F 2.71828174591064f -#define CL_M_LOG2E_F 1.44269502162933f -#define CL_M_LOG10E_F 0.43429449200630f -#define CL_M_LN2_F 0.69314718246460f -#define CL_M_LN10_F 2.30258512496948f -#define CL_M_PI_F 3.14159274101257f -#define CL_M_PI_2_F 1.57079637050629f -#define CL_M_PI_4_F 0.78539818525314f -#define CL_M_1_PI_F 0.31830987334251f -#define CL_M_2_PI_F 0.63661974668503f -#define CL_M_2_SQRTPI_F 1.12837922573090f -#define CL_M_SQRT2_F 1.41421353816986f -#define CL_M_SQRT1_2_F 0.70710676908493f - -#define CL_NAN (CL_INFINITY - CL_INFINITY) -#define CL_HUGE_VALF ((cl_float)1e50) -#define CL_HUGE_VAL ((cl_double)1e500) -#define CL_MAXFLOAT CL_FLT_MAX -#define CL_INFINITY CL_HUGE_VALF - -#else - -#include - -/* scalar types */ -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short __attribute__((aligned(2))); -typedef uint16_t cl_ushort __attribute__((aligned(2))); -typedef int32_t cl_int __attribute__((aligned(4))); -typedef uint32_t cl_uint __attribute__((aligned(4))); -typedef int64_t cl_long __attribute__((aligned(8))); -typedef uint64_t cl_ulong __attribute__((aligned(8))); - -typedef uint16_t cl_half __attribute__((aligned(2))); -typedef float cl_float __attribute__((aligned(4))); -typedef double cl_double __attribute__((aligned(8))); - -/* Macro names and corresponding values defined by OpenCL */ -#define CL_CHAR_BIT 8 -#define CL_SCHAR_MAX 127 -#define CL_SCHAR_MIN (-127 - 1) -#define CL_CHAR_MAX CL_SCHAR_MAX -#define CL_CHAR_MIN CL_SCHAR_MIN -#define CL_UCHAR_MAX 255 -#define CL_SHRT_MAX 32767 -#define CL_SHRT_MIN (-32767 - 1) -#define CL_USHRT_MAX 65535 -#define CL_INT_MAX 2147483647 -#define CL_INT_MIN (-2147483647 - 1) -#define CL_UINT_MAX 0xffffffffU -#define CL_LONG_MAX ((cl_long)0x7FFFFFFFFFFFFFFFLL) -#define CL_LONG_MIN ((cl_long)-0x7FFFFFFFFFFFFFFFLL - 1LL) -#define CL_ULONG_MAX ((cl_ulong)0xFFFFFFFFFFFFFFFFULL) - -#define CL_FLT_DIG 6 -#define CL_FLT_MANT_DIG 24 -#define CL_FLT_MAX_10_EXP +38 -#define CL_FLT_MAX_EXP +128 -#define CL_FLT_MIN_10_EXP -37 -#define CL_FLT_MIN_EXP -125 -#define CL_FLT_RADIX 2 -#define CL_FLT_MAX 0x1.fffffep127f -#define CL_FLT_MIN 0x1.0p-126f -#define CL_FLT_EPSILON 0x1.0p-23f - -#define CL_DBL_DIG 15 -#define CL_DBL_MANT_DIG 53 -#define CL_DBL_MAX_10_EXP +308 -#define CL_DBL_MAX_EXP +1024 -#define CL_DBL_MIN_10_EXP -307 -#define CL_DBL_MIN_EXP -1021 -#define CL_DBL_RADIX 2 -#define CL_DBL_MAX 0x1.fffffffffffffp1023 -#define CL_DBL_MIN 0x1.0p-1022 -#define CL_DBL_EPSILON 0x1.0p-52 - -#define CL_M_E 2.718281828459045090796 -#define CL_M_LOG2E 1.442695040888963387005 -#define CL_M_LOG10E 0.434294481903251816668 -#define CL_M_LN2 0.693147180559945286227 -#define CL_M_LN10 2.302585092994045901094 -#define CL_M_PI 3.141592653589793115998 -#define CL_M_PI_2 1.570796326794896557999 -#define CL_M_PI_4 0.785398163397448278999 -#define CL_M_1_PI 0.318309886183790691216 -#define CL_M_2_PI 0.636619772367581382433 -#define CL_M_2_SQRTPI 1.128379167095512558561 -#define CL_M_SQRT2 1.414213562373095145475 -#define CL_M_SQRT1_2 0.707106781186547572737 - -#define CL_M_E_F 2.71828174591064f -#define CL_M_LOG2E_F 1.44269502162933f -#define CL_M_LOG10E_F 0.43429449200630f -#define CL_M_LN2_F 0.69314718246460f -#define CL_M_LN10_F 2.30258512496948f -#define CL_M_PI_F 3.14159274101257f -#define CL_M_PI_2_F 1.57079637050629f -#define CL_M_PI_4_F 0.78539818525314f -#define CL_M_1_PI_F 0.31830987334251f -#define CL_M_2_PI_F 0.63661974668503f -#define CL_M_2_SQRTPI_F 1.12837922573090f -#define CL_M_SQRT2_F 1.41421353816986f -#define CL_M_SQRT1_2_F 0.70710676908493f - -#if defined(__GNUC__) -#define CL_HUGE_VALF __builtin_huge_valf() -#define CL_HUGE_VAL __builtin_huge_val() -#define CL_NAN __builtin_nanf("") -#else -#define CL_HUGE_VALF ((cl_float)1e50) -#define CL_HUGE_VAL ((cl_double)1e500) -float nanf(const char *); -#define CL_NAN nanf("") -#endif -#define CL_MAXFLOAT CL_FLT_MAX -#define CL_INFINITY CL_HUGE_VALF - -#endif - -#include - - /* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */ - typedef unsigned int cl_GLuint; - typedef int cl_GLint; - typedef unsigned int cl_GLenum; - - /* - * Vector types - * - * Note: OpenCL requires that all types be naturally aligned. - * This means that vector types must be naturally aligned. - * For example, a vector of four floats must be aligned to - * a 16 byte boundary (calculated as 4 * the natural 4-byte - * alignment of the float). The alignment qualifiers here - * will only function properly if your compiler supports them - * and if you don't actively work to defeat them. For example, - * in order for a cl_float4 to be 16 byte aligned in a struct, - * the start of the struct must itself be 16-byte aligned. - * - * Maintaining proper alignment is the user's responsibility. - */ - -#ifdef _MSC_VER -#if defined(_M_IX86) -#if _M_IX86_FP >= 0 -#define __SSE__ -#endif -#if _M_IX86_FP >= 1 -#define __SSE2__ -#endif -#elif defined(_M_X64) -#define __SSE__ -#define __SSE2__ -#endif -#endif - -/* Define basic vector types */ -#if defined(__VEC__) -#include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ - typedef vector unsigned char __cl_uchar16; - typedef vector signed char __cl_char16; - typedef vector unsigned short __cl_ushort8; - typedef vector signed short __cl_short8; - typedef vector unsigned int __cl_uint4; - typedef vector signed int __cl_int4; - typedef vector float __cl_float4; -#define __CL_UCHAR16__ 1 -#define __CL_CHAR16__ 1 -#define __CL_USHORT8__ 1 -#define __CL_SHORT8__ 1 -#define __CL_UINT4__ 1 -#define __CL_INT4__ 1 -#define __CL_FLOAT4__ 1 -#endif - -#if defined(__SSE__) -#if defined(__MINGW64__) -#include -#else -#include -#endif -#if defined(__GNUC__) && !defined(__ICC) - typedef float __cl_float4 __attribute__((vector_size(16))); -#else - typedef __m128 __cl_float4; -#endif -#define __CL_FLOAT4__ 1 -#endif - -#if defined(__SSE2__) -#if defined(__MINGW64__) -#include -#else -#include -#endif -#if defined(__GNUC__) && !defined(__ICC) - typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); - typedef cl_char __cl_char16 __attribute__((vector_size(16))); - typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); - typedef cl_short __cl_short8 __attribute__((vector_size(16))); - typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); - typedef cl_int __cl_int4 __attribute__((vector_size(16))); - typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); - typedef cl_long __cl_long2 __attribute__((vector_size(16))); - typedef cl_double __cl_double2 __attribute__((vector_size(16))); -#else - typedef __m128i __cl_uchar16; - typedef __m128i __cl_char16; - typedef __m128i __cl_ushort8; - typedef __m128i __cl_short8; - typedef __m128i __cl_uint4; - typedef __m128i __cl_int4; - typedef __m128i __cl_ulong2; - typedef __m128i __cl_long2; - typedef __m128d __cl_double2; -#endif -#define __CL_UCHAR16__ 1 -#define __CL_CHAR16__ 1 -#define __CL_USHORT8__ 1 -#define __CL_SHORT8__ 1 -#define __CL_INT4__ 1 -#define __CL_UINT4__ 1 -#define __CL_ULONG2__ 1 -#define __CL_LONG2__ 1 -#define __CL_DOUBLE2__ 1 -#endif - -#if defined(__MMX__) -#include -#if defined(__GNUC__) && !defined(__ICC) - typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); - typedef cl_char __cl_char8 __attribute__((vector_size(8))); - typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); - typedef cl_short __cl_short4 __attribute__((vector_size(8))); - typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); - typedef cl_int __cl_int2 __attribute__((vector_size(8))); - typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); - typedef cl_long __cl_long1 __attribute__((vector_size(8))); - typedef cl_float __cl_float2 __attribute__((vector_size(8))); -#else - typedef __m64 __cl_uchar8; - typedef __m64 __cl_char8; - typedef __m64 __cl_ushort4; - typedef __m64 __cl_short4; - typedef __m64 __cl_uint2; - typedef __m64 __cl_int2; - typedef __m64 __cl_ulong1; - typedef __m64 __cl_long1; - typedef __m64 __cl_float2; -#endif -#define __CL_UCHAR8__ 1 -#define __CL_CHAR8__ 1 -#define __CL_USHORT4__ 1 -#define __CL_SHORT4__ 1 -#define __CL_INT2__ 1 -#define __CL_UINT2__ 1 -#define __CL_ULONG1__ 1 -#define __CL_LONG1__ 1 -#define __CL_FLOAT2__ 1 -#endif - -#if defined(__AVX__) -#if defined(__MINGW64__) -#include -#else -#include -#endif -#if defined(__GNUC__) && !defined(__ICC) - typedef cl_float __cl_float8 __attribute__((vector_size(32))); - typedef cl_double __cl_double4 __attribute__((vector_size(32))); -#else - typedef __m256 __cl_float8; - typedef __m256d __cl_double4; -#endif -#define __CL_FLOAT8__ 1 -#define __CL_DOUBLE4__ 1 -#endif - -/* Define alignment keys */ -#if defined(__GNUC__) -#define CL_ALIGNED(_x) __attribute__((aligned(_x))) -#elif defined(_WIN32) && (_MSC_VER) -/* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ -/* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ -/* #include */ -/* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ -#define CL_ALIGNED(_x) -#else -#warning Need to implement some method to align data here -#define CL_ALIGNED(_x) -#endif - -/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ -#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || (defined(_MSC_VER) && !defined(__STDC__)) -/* .xyzw and .s0123...{f|F} are supported */ -#define CL_HAS_NAMED_VECTOR_FIELDS 1 -/* .hi and .lo are supported */ -#define CL_HAS_HI_LO_VECTOR_FIELDS 1 - -#define CL_NAMED_STRUCT_SUPPORTED -#endif - -#if defined(CL_NAMED_STRUCT_SUPPORTED) && defined(_MSC_VER) -#define __extension__ __pragma(warning(suppress : 4201)) -#endif - - /* Define cl_vector types */ - - /* ---- cl_charn ---- */ - typedef union { - cl_char CL_ALIGNED(2) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_char x, y; - }; - __extension__ struct - { - cl_char s0, s1; - }; - __extension__ struct - { - cl_char lo, hi; - }; -#endif -#if defined(__CL_CHAR2__) - __cl_char2 v2; -#endif - } cl_char2; - - typedef union { - cl_char CL_ALIGNED(4) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_char x, y, z, w; - }; - __extension__ struct - { - cl_char s0, s1, s2, s3; - }; - __extension__ struct - { - cl_char2 lo, hi; - }; -#endif -#if defined(__CL_CHAR2__) - __cl_char2 v2[2]; -#endif -#if defined(__CL_CHAR4__) - __cl_char4 v4; -#endif - } cl_char4; - - /* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ - typedef cl_char4 cl_char3; - - typedef union { - cl_char CL_ALIGNED(8) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_char x, y, z, w; - }; - __extension__ struct - { - cl_char s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_char4 lo, hi; - }; -#endif -#if defined(__CL_CHAR2__) - __cl_char2 v2[4]; -#endif -#if defined(__CL_CHAR4__) - __cl_char4 v4[2]; -#endif -#if defined(__CL_CHAR8__) - __cl_char8 v8; -#endif - } cl_char8; - - typedef union { - cl_char CL_ALIGNED(16) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_char8 lo, hi; - }; -#endif -#if defined(__CL_CHAR2__) - __cl_char2 v2[8]; -#endif -#if defined(__CL_CHAR4__) - __cl_char4 v4[4]; -#endif -#if defined(__CL_CHAR8__) - __cl_char8 v8[2]; -#endif -#if defined(__CL_CHAR16__) - __cl_char16 v16; -#endif - } cl_char16; - - /* ---- cl_ucharn ---- */ - typedef union { - cl_uchar CL_ALIGNED(2) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uchar x, y; - }; - __extension__ struct - { - cl_uchar s0, s1; - }; - __extension__ struct - { - cl_uchar lo, hi; - }; -#endif -#if defined(__cl_uchar2__) - __cl_uchar2 v2; -#endif - } cl_uchar2; - - typedef union { - cl_uchar CL_ALIGNED(4) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uchar x, y, z, w; - }; - __extension__ struct - { - cl_uchar s0, s1, s2, s3; - }; - __extension__ struct - { - cl_uchar2 lo, hi; - }; -#endif -#if defined(__CL_UCHAR2__) - __cl_uchar2 v2[2]; -#endif -#if defined(__CL_UCHAR4__) - __cl_uchar4 v4; -#endif - } cl_uchar4; - - /* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ - typedef cl_uchar4 cl_uchar3; - - typedef union { - cl_uchar CL_ALIGNED(8) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uchar x, y, z, w; - }; - __extension__ struct - { - cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_uchar4 lo, hi; - }; -#endif -#if defined(__CL_UCHAR2__) - __cl_uchar2 v2[4]; -#endif -#if defined(__CL_UCHAR4__) - __cl_uchar4 v4[2]; -#endif -#if defined(__CL_UCHAR8__) - __cl_uchar8 v8; -#endif - } cl_uchar8; - - typedef union { - cl_uchar CL_ALIGNED(16) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_uchar8 lo, hi; - }; -#endif -#if defined(__CL_UCHAR2__) - __cl_uchar2 v2[8]; -#endif -#if defined(__CL_UCHAR4__) - __cl_uchar4 v4[4]; -#endif -#if defined(__CL_UCHAR8__) - __cl_uchar8 v8[2]; -#endif -#if defined(__CL_UCHAR16__) - __cl_uchar16 v16; -#endif - } cl_uchar16; - - /* ---- cl_shortn ---- */ - typedef union { - cl_short CL_ALIGNED(4) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_short x, y; - }; - __extension__ struct - { - cl_short s0, s1; - }; - __extension__ struct - { - cl_short lo, hi; - }; -#endif -#if defined(__CL_SHORT2__) - __cl_short2 v2; -#endif - } cl_short2; - - typedef union { - cl_short CL_ALIGNED(8) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_short x, y, z, w; - }; - __extension__ struct - { - cl_short s0, s1, s2, s3; - }; - __extension__ struct - { - cl_short2 lo, hi; - }; -#endif -#if defined(__CL_SHORT2__) - __cl_short2 v2[2]; -#endif -#if defined(__CL_SHORT4__) - __cl_short4 v4; -#endif - } cl_short4; - - /* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ - typedef cl_short4 cl_short3; - - typedef union { - cl_short CL_ALIGNED(16) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_short x, y, z, w; - }; - __extension__ struct - { - cl_short s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_short4 lo, hi; - }; -#endif -#if defined(__CL_SHORT2__) - __cl_short2 v2[4]; -#endif -#if defined(__CL_SHORT4__) - __cl_short4 v4[2]; -#endif -#if defined(__CL_SHORT8__) - __cl_short8 v8; -#endif - } cl_short8; - - typedef union { - cl_short CL_ALIGNED(32) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_short8 lo, hi; - }; -#endif -#if defined(__CL_SHORT2__) - __cl_short2 v2[8]; -#endif -#if defined(__CL_SHORT4__) - __cl_short4 v4[4]; -#endif -#if defined(__CL_SHORT8__) - __cl_short8 v8[2]; -#endif -#if defined(__CL_SHORT16__) - __cl_short16 v16; -#endif - } cl_short16; - - /* ---- cl_ushortn ---- */ - typedef union { - cl_ushort CL_ALIGNED(4) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ushort x, y; - }; - __extension__ struct - { - cl_ushort s0, s1; - }; - __extension__ struct - { - cl_ushort lo, hi; - }; -#endif -#if defined(__CL_USHORT2__) - __cl_ushort2 v2; -#endif - } cl_ushort2; - - typedef union { - cl_ushort CL_ALIGNED(8) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ushort x, y, z, w; - }; - __extension__ struct - { - cl_ushort s0, s1, s2, s3; - }; - __extension__ struct - { - cl_ushort2 lo, hi; - }; -#endif -#if defined(__CL_USHORT2__) - __cl_ushort2 v2[2]; -#endif -#if defined(__CL_USHORT4__) - __cl_ushort4 v4; -#endif - } cl_ushort4; - - /* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ - typedef cl_ushort4 cl_ushort3; - - typedef union { - cl_ushort CL_ALIGNED(16) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ushort x, y, z, w; - }; - __extension__ struct - { - cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_ushort4 lo, hi; - }; -#endif -#if defined(__CL_USHORT2__) - __cl_ushort2 v2[4]; -#endif -#if defined(__CL_USHORT4__) - __cl_ushort4 v4[2]; -#endif -#if defined(__CL_USHORT8__) - __cl_ushort8 v8; -#endif - } cl_ushort8; - - typedef union { - cl_ushort CL_ALIGNED(32) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_ushort8 lo, hi; - }; -#endif -#if defined(__CL_USHORT2__) - __cl_ushort2 v2[8]; -#endif -#if defined(__CL_USHORT4__) - __cl_ushort4 v4[4]; -#endif -#if defined(__CL_USHORT8__) - __cl_ushort8 v8[2]; -#endif -#if defined(__CL_USHORT16__) - __cl_ushort16 v16; -#endif - } cl_ushort16; - - /* ---- cl_intn ---- */ - typedef union { - cl_int CL_ALIGNED(8) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_int x, y; - }; - __extension__ struct - { - cl_int s0, s1; - }; - __extension__ struct - { - cl_int lo, hi; - }; -#endif -#if defined(__CL_INT2__) - __cl_int2 v2; -#endif - } cl_int2; - - typedef union { - cl_int CL_ALIGNED(16) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_int x, y, z, w; - }; - __extension__ struct - { - cl_int s0, s1, s2, s3; - }; - __extension__ struct - { - cl_int2 lo, hi; - }; -#endif -#if defined(__CL_INT2__) - __cl_int2 v2[2]; -#endif -#if defined(__CL_INT4__) - __cl_int4 v4; -#endif - } cl_int4; - - /* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ - typedef cl_int4 cl_int3; - - typedef union { - cl_int CL_ALIGNED(32) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_int x, y, z, w; - }; - __extension__ struct - { - cl_int s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_int4 lo, hi; - }; -#endif -#if defined(__CL_INT2__) - __cl_int2 v2[4]; -#endif -#if defined(__CL_INT4__) - __cl_int4 v4[2]; -#endif -#if defined(__CL_INT8__) - __cl_int8 v8; -#endif - } cl_int8; - - typedef union { - cl_int CL_ALIGNED(64) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_int8 lo, hi; - }; -#endif -#if defined(__CL_INT2__) - __cl_int2 v2[8]; -#endif -#if defined(__CL_INT4__) - __cl_int4 v4[4]; -#endif -#if defined(__CL_INT8__) - __cl_int8 v8[2]; -#endif -#if defined(__CL_INT16__) - __cl_int16 v16; -#endif - } cl_int16; - - /* ---- cl_uintn ---- */ - typedef union { - cl_uint CL_ALIGNED(8) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uint x, y; - }; - __extension__ struct - { - cl_uint s0, s1; - }; - __extension__ struct - { - cl_uint lo, hi; - }; -#endif -#if defined(__CL_UINT2__) - __cl_uint2 v2; -#endif - } cl_uint2; - - typedef union { - cl_uint CL_ALIGNED(16) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uint x, y, z, w; - }; - __extension__ struct - { - cl_uint s0, s1, s2, s3; - }; - __extension__ struct - { - cl_uint2 lo, hi; - }; -#endif -#if defined(__CL_UINT2__) - __cl_uint2 v2[2]; -#endif -#if defined(__CL_UINT4__) - __cl_uint4 v4; -#endif - } cl_uint4; - - /* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ - typedef cl_uint4 cl_uint3; - - typedef union { - cl_uint CL_ALIGNED(32) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uint x, y, z, w; - }; - __extension__ struct - { - cl_uint s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_uint4 lo, hi; - }; -#endif -#if defined(__CL_UINT2__) - __cl_uint2 v2[4]; -#endif -#if defined(__CL_UINT4__) - __cl_uint4 v4[2]; -#endif -#if defined(__CL_UINT8__) - __cl_uint8 v8; -#endif - } cl_uint8; - - typedef union { - cl_uint CL_ALIGNED(64) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_uint8 lo, hi; - }; -#endif -#if defined(__CL_UINT2__) - __cl_uint2 v2[8]; -#endif -#if defined(__CL_UINT4__) - __cl_uint4 v4[4]; -#endif -#if defined(__CL_UINT8__) - __cl_uint8 v8[2]; -#endif -#if defined(__CL_UINT16__) - __cl_uint16 v16; -#endif - } cl_uint16; - - /* ---- cl_longn ---- */ - typedef union { - cl_long CL_ALIGNED(16) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_long x, y; - }; - __extension__ struct - { - cl_long s0, s1; - }; - __extension__ struct - { - cl_long lo, hi; - }; -#endif -#if defined(__CL_LONG2__) - __cl_long2 v2; -#endif - } cl_long2; - - typedef union { - cl_long CL_ALIGNED(32) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_long x, y, z, w; - }; - __extension__ struct - { - cl_long s0, s1, s2, s3; - }; - __extension__ struct - { - cl_long2 lo, hi; - }; -#endif -#if defined(__CL_LONG2__) - __cl_long2 v2[2]; -#endif -#if defined(__CL_LONG4__) - __cl_long4 v4; -#endif - } cl_long4; - - /* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ - typedef cl_long4 cl_long3; - - typedef union { - cl_long CL_ALIGNED(64) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_long x, y, z, w; - }; - __extension__ struct - { - cl_long s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_long4 lo, hi; - }; -#endif -#if defined(__CL_LONG2__) - __cl_long2 v2[4]; -#endif -#if defined(__CL_LONG4__) - __cl_long4 v4[2]; -#endif -#if defined(__CL_LONG8__) - __cl_long8 v8; -#endif - } cl_long8; - - typedef union { - cl_long CL_ALIGNED(128) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_long8 lo, hi; - }; -#endif -#if defined(__CL_LONG2__) - __cl_long2 v2[8]; -#endif -#if defined(__CL_LONG4__) - __cl_long4 v4[4]; -#endif -#if defined(__CL_LONG8__) - __cl_long8 v8[2]; -#endif -#if defined(__CL_LONG16__) - __cl_long16 v16; -#endif - } cl_long16; - - /* ---- cl_ulongn ---- */ - typedef union { - cl_ulong CL_ALIGNED(16) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ulong x, y; - }; - __extension__ struct - { - cl_ulong s0, s1; - }; - __extension__ struct - { - cl_ulong lo, hi; - }; -#endif -#if defined(__CL_ULONG2__) - __cl_ulong2 v2; -#endif - } cl_ulong2; - - typedef union { - cl_ulong CL_ALIGNED(32) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ulong x, y, z, w; - }; - __extension__ struct - { - cl_ulong s0, s1, s2, s3; - }; - __extension__ struct - { - cl_ulong2 lo, hi; - }; -#endif -#if defined(__CL_ULONG2__) - __cl_ulong2 v2[2]; -#endif -#if defined(__CL_ULONG4__) - __cl_ulong4 v4; -#endif - } cl_ulong4; - - /* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ - typedef cl_ulong4 cl_ulong3; - - typedef union { - cl_ulong CL_ALIGNED(64) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ulong x, y, z, w; - }; - __extension__ struct - { - cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_ulong4 lo, hi; - }; -#endif -#if defined(__CL_ULONG2__) - __cl_ulong2 v2[4]; -#endif -#if defined(__CL_ULONG4__) - __cl_ulong4 v4[2]; -#endif -#if defined(__CL_ULONG8__) - __cl_ulong8 v8; -#endif - } cl_ulong8; - - typedef union { - cl_ulong CL_ALIGNED(128) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_ulong8 lo, hi; - }; -#endif -#if defined(__CL_ULONG2__) - __cl_ulong2 v2[8]; -#endif -#if defined(__CL_ULONG4__) - __cl_ulong4 v4[4]; -#endif -#if defined(__CL_ULONG8__) - __cl_ulong8 v8[2]; -#endif -#if defined(__CL_ULONG16__) - __cl_ulong16 v16; -#endif - } cl_ulong16; - - /* --- cl_floatn ---- */ - - typedef union { - cl_float CL_ALIGNED(8) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_float x, y; - }; - __extension__ struct - { - cl_float s0, s1; - }; - __extension__ struct - { - cl_float lo, hi; - }; -#endif -#if defined(__CL_FLOAT2__) - __cl_float2 v2; -#endif - } cl_float2; - - typedef union { - cl_float CL_ALIGNED(16) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_float x, y, z, w; - }; - __extension__ struct - { - cl_float s0, s1, s2, s3; - }; - __extension__ struct - { - cl_float2 lo, hi; - }; -#endif -#if defined(__CL_FLOAT2__) - __cl_float2 v2[2]; -#endif -#if defined(__CL_FLOAT4__) - __cl_float4 v4; -#endif - } cl_float4; - - /* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ - typedef cl_float4 cl_float3; - - typedef union { - cl_float CL_ALIGNED(32) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_float x, y, z, w; - }; - __extension__ struct - { - cl_float s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_float4 lo, hi; - }; -#endif -#if defined(__CL_FLOAT2__) - __cl_float2 v2[4]; -#endif -#if defined(__CL_FLOAT4__) - __cl_float4 v4[2]; -#endif -#if defined(__CL_FLOAT8__) - __cl_float8 v8; -#endif - } cl_float8; - - typedef union { - cl_float CL_ALIGNED(64) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_float8 lo, hi; - }; -#endif -#if defined(__CL_FLOAT2__) - __cl_float2 v2[8]; -#endif -#if defined(__CL_FLOAT4__) - __cl_float4 v4[4]; -#endif -#if defined(__CL_FLOAT8__) - __cl_float8 v8[2]; -#endif -#if defined(__CL_FLOAT16__) - __cl_float16 v16; -#endif - } cl_float16; - - /* --- cl_doublen ---- */ - - typedef union { - cl_double CL_ALIGNED(16) s[2]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_double x, y; - }; - __extension__ struct - { - cl_double s0, s1; - }; - __extension__ struct - { - cl_double lo, hi; - }; -#endif -#if defined(__CL_DOUBLE2__) - __cl_double2 v2; -#endif - } cl_double2; - - typedef union { - cl_double CL_ALIGNED(32) s[4]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_double x, y, z, w; - }; - __extension__ struct - { - cl_double s0, s1, s2, s3; - }; - __extension__ struct - { - cl_double2 lo, hi; - }; -#endif -#if defined(__CL_DOUBLE2__) - __cl_double2 v2[2]; -#endif -#if defined(__CL_DOUBLE4__) - __cl_double4 v4; -#endif - } cl_double4; - - /* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ - typedef cl_double4 cl_double3; - - typedef union { - cl_double CL_ALIGNED(64) s[8]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_double x, y, z, w; - }; - __extension__ struct - { - cl_double s0, s1, s2, s3, s4, s5, s6, s7; - }; - __extension__ struct - { - cl_double4 lo, hi; - }; -#endif -#if defined(__CL_DOUBLE2__) - __cl_double2 v2[4]; -#endif -#if defined(__CL_DOUBLE4__) - __cl_double4 v4[2]; -#endif -#if defined(__CL_DOUBLE8__) - __cl_double8 v8; -#endif - } cl_double8; - - typedef union { - cl_double CL_ALIGNED(128) s[16]; -#if defined(CL_NAMED_STRUCT_SUPPORTED) - __extension__ struct - { - cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; - }; - __extension__ struct - { - cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; - }; - __extension__ struct - { - cl_double8 lo, hi; - }; -#endif -#if defined(__CL_DOUBLE2__) - __cl_double2 v2[8]; -#endif -#if defined(__CL_DOUBLE4__) - __cl_double4 v4[4]; -#endif -#if defined(__CL_DOUBLE8__) - __cl_double8 v8[2]; -#endif -#if defined(__CL_DOUBLE16__) - __cl_double16 v16; -#endif - } cl_double16; - -/* Macro to facilitate debugging - * Usage: - * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. - * The first line ends with: CL_PROGRAM_STRING_BEGIN \" - * Each line thereafter of OpenCL C source must end with: \n\ - * The last line ends in "; - * - * Example: - * - * const char *my_program = CL_PROGRAM_STRING_BEGIN "\ - * kernel void foo( int a, float * b ) \n\ - * { \n\ - * // my comment \n\ - * *b[ get_global_id(0)] = a; \n\ - * } \n\ - * "; - * - * This should correctly set up the line, (column) and file information for your source - * string so you can do source level debugging. - */ -#define __CL_STRINGIFY(_x) #_x -#define _CL_STRINGIFY(_x) __CL_STRINGIFY(_x) -#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" - - // CL.h contents - /******************************************************************************/ - - typedef struct _cl_platform_id *cl_platform_id; - typedef struct _cl_device_id *cl_device_id; - typedef struct _cl_context *cl_context; - typedef struct _cl_command_queue *cl_command_queue; - typedef struct _cl_mem *cl_mem; - typedef struct _cl_program *cl_program; - typedef struct _cl_kernel *cl_kernel; - typedef struct _cl_event *cl_event; - typedef struct _cl_sampler *cl_sampler; - - typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ - typedef cl_ulong cl_bitfield; - typedef cl_bitfield cl_device_type; - typedef cl_uint cl_platform_info; - typedef cl_uint cl_device_info; - typedef cl_bitfield cl_device_fp_config; - typedef cl_uint cl_device_mem_cache_type; - typedef cl_uint cl_device_local_mem_type; - typedef cl_bitfield cl_device_exec_capabilities; - typedef cl_bitfield cl_command_queue_properties; - - typedef intptr_t cl_context_properties; - typedef cl_uint cl_context_info; - typedef cl_uint cl_command_queue_info; - typedef cl_uint cl_channel_order; - typedef cl_uint cl_channel_type; - typedef cl_bitfield cl_mem_flags; - typedef cl_uint cl_mem_object_type; - typedef cl_uint cl_mem_info; - typedef cl_uint cl_image_info; - typedef cl_uint cl_buffer_create_type; - typedef cl_uint cl_addressing_mode; - typedef cl_uint cl_filter_mode; - typedef cl_uint cl_sampler_info; - typedef cl_bitfield cl_map_flags; - typedef cl_uint cl_program_info; - typedef cl_uint cl_program_build_info; - typedef cl_int cl_build_status; - typedef cl_uint cl_kernel_info; - typedef cl_uint cl_kernel_work_group_info; - typedef cl_uint cl_event_info; - typedef cl_uint cl_command_type; - typedef cl_uint cl_profiling_info; - - typedef struct _cl_image_format - { - cl_channel_order image_channel_order; - cl_channel_type image_channel_data_type; - } cl_image_format; - - typedef struct _cl_buffer_region - { - size_t origin; - size_t size; - } cl_buffer_region; - -/******************************************************************************/ - -/* Error Codes */ -#define CL_SUCCESS 0 -#define CL_DEVICE_NOT_FOUND -1 -#define CL_DEVICE_NOT_AVAILABLE -2 -#define CL_COMPILER_NOT_AVAILABLE -3 -#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 -#define CL_OUT_OF_RESOURCES -5 -#define CL_OUT_OF_HOST_MEMORY -6 -#define CL_PROFILING_INFO_NOT_AVAILABLE -7 -#define CL_MEM_COPY_OVERLAP -8 -#define CL_IMAGE_FORMAT_MISMATCH -9 -#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 -#define CL_BUILD_PROGRAM_FAILURE -11 -#define CL_MAP_FAILURE -12 -#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 -#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 - -#define CL_INVALID_VALUE -30 -#define CL_INVALID_DEVICE_TYPE -31 -#define CL_INVALID_PLATFORM -32 -#define CL_INVALID_DEVICE -33 -#define CL_INVALID_CONTEXT -34 -#define CL_INVALID_QUEUE_PROPERTIES -35 -#define CL_INVALID_COMMAND_QUEUE -36 -#define CL_INVALID_HOST_PTR -37 -#define CL_INVALID_MEM_OBJECT -38 -#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 -#define CL_INVALID_IMAGE_SIZE -40 -#define CL_INVALID_SAMPLER -41 -#define CL_INVALID_BINARY -42 -#define CL_INVALID_BUILD_OPTIONS -43 -#define CL_INVALID_PROGRAM -44 -#define CL_INVALID_PROGRAM_EXECUTABLE -45 -#define CL_INVALID_KERNEL_NAME -46 -#define CL_INVALID_KERNEL_DEFINITION -47 -#define CL_INVALID_KERNEL -48 -#define CL_INVALID_ARG_INDEX -49 -#define CL_INVALID_ARG_VALUE -50 -#define CL_INVALID_ARG_SIZE -51 -#define CL_INVALID_KERNEL_ARGS -52 -#define CL_INVALID_WORK_DIMENSION -53 -#define CL_INVALID_WORK_GROUP_SIZE -54 -#define CL_INVALID_WORK_ITEM_SIZE -55 -#define CL_INVALID_GLOBAL_OFFSET -56 -#define CL_INVALID_EVENT_WAIT_LIST -57 -#define CL_INVALID_EVENT -58 -#define CL_INVALID_OPERATION -59 -#define CL_INVALID_GL_OBJECT -60 -#define CL_INVALID_BUFFER_SIZE -61 -#define CL_INVALID_MIP_LEVEL -62 -#define CL_INVALID_GLOBAL_WORK_SIZE -63 -#define CL_INVALID_PROPERTY -64 - -/* OpenCL Version */ -#define CL_VERSION_1_0 1 -#define CL_VERSION_1_1 1 - -/* cl_bool */ -#define CL_FALSE 0 -#define CL_TRUE 1 - -/* cl_platform_info */ -#define CL_PLATFORM_PROFILE 0x0900 -#define CL_PLATFORM_VERSION 0x0901 -#define CL_PLATFORM_NAME 0x0902 -#define CL_PLATFORM_VENDOR 0x0903 -#define CL_PLATFORM_EXTENSIONS 0x0904 - -/* cl_device_type - bitfield */ -#define CL_DEVICE_TYPE_DEFAULT (1 << 0) -#define CL_DEVICE_TYPE_CPU (1 << 1) -#define CL_DEVICE_TYPE_GPU (1 << 2) -#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) -#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF - -/* cl_device_info */ -#define CL_DEVICE_TYPE 0x1000 -#define CL_DEVICE_VENDOR_ID 0x1001 -#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 -#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 -#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 -#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B -#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C -#define CL_DEVICE_ADDRESS_BITS 0x100D -#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E -#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F -#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 -#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 -#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 -#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 -#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 -#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 -#define CL_DEVICE_IMAGE_SUPPORT 0x1016 -#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 -#define CL_DEVICE_MAX_SAMPLERS 0x1018 -#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 -#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A -#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B -#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C -#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D -#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E -#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F -#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 -#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 -#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 -#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 -#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 -#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 -#define CL_DEVICE_ENDIAN_LITTLE 0x1026 -#define CL_DEVICE_AVAILABLE 0x1027 -#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 -#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 -#define CL_DEVICE_QUEUE_PROPERTIES 0x102A -#define CL_DEVICE_NAME 0x102B -#define CL_DEVICE_VENDOR 0x102C -#define CL_DRIVER_VERSION 0x102D -#define CL_DEVICE_PROFILE 0x102E -#define CL_DEVICE_VERSION 0x102F -#define CL_DEVICE_EXTENSIONS 0x1030 -#define CL_DEVICE_PLATFORM 0x1031 -/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */ -/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 -#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C -#define CL_DEVICE_OPENCL_C_VERSION 0x103D - -/* cl_device_fp_config - bitfield */ -#define CL_FP_DENORM (1 << 0) -#define CL_FP_INF_NAN (1 << 1) -#define CL_FP_ROUND_TO_NEAREST (1 << 2) -#define CL_FP_ROUND_TO_ZERO (1 << 3) -#define CL_FP_ROUND_TO_INF (1 << 4) -#define CL_FP_FMA (1 << 5) -#define CL_FP_SOFT_FLOAT (1 << 6) - -/* cl_device_mem_cache_type */ -#define CL_NONE 0x0 -#define CL_READ_ONLY_CACHE 0x1 -#define CL_READ_WRITE_CACHE 0x2 - -/* cl_device_local_mem_type */ -#define CL_LOCAL 0x1 -#define CL_GLOBAL 0x2 - -/* cl_device_exec_capabilities - bitfield */ -#define CL_EXEC_KERNEL (1 << 0) -#define CL_EXEC_NATIVE_KERNEL (1 << 1) - -/* cl_command_queue_properties - bitfield */ -#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) -#define CL_QUEUE_PROFILING_ENABLE (1 << 1) - -/* cl_context_info */ -#define CL_CONTEXT_REFERENCE_COUNT 0x1080 -#define CL_CONTEXT_DEVICES 0x1081 -#define CL_CONTEXT_PROPERTIES 0x1082 -#define CL_CONTEXT_NUM_DEVICES 0x1083 - -/* cl_context_info + cl_context_properties */ -#define CL_CONTEXT_PLATFORM 0x1084 - -/* cl_command_queue_info */ -#define CL_QUEUE_CONTEXT 0x1090 -#define CL_QUEUE_DEVICE 0x1091 -#define CL_QUEUE_REFERENCE_COUNT 0x1092 -#define CL_QUEUE_PROPERTIES 0x1093 - -/* cl_mem_flags - bitfield */ -#define CL_MEM_READ_WRITE (1 << 0) -#define CL_MEM_WRITE_ONLY (1 << 1) -#define CL_MEM_READ_ONLY (1 << 2) -#define CL_MEM_USE_HOST_PTR (1 << 3) -#define CL_MEM_ALLOC_HOST_PTR (1 << 4) -#define CL_MEM_COPY_HOST_PTR (1 << 5) - -/* cl_channel_order */ -#define CL_R 0x10B0 -#define CL_A 0x10B1 -#define CL_RG 0x10B2 -#define CL_RA 0x10B3 -#define CL_RGB 0x10B4 -#define CL_RGBA 0x10B5 -#define CL_BGRA 0x10B6 -#define CL_ARGB 0x10B7 -#define CL_INTENSITY 0x10B8 -#define CL_LUMINANCE 0x10B9 -#define CL_Rx 0x10BA -#define CL_RGx 0x10BB -#define CL_RGBx 0x10BC - -/* cl_channel_type */ -#define CL_SNORM_INT8 0x10D0 -#define CL_SNORM_INT16 0x10D1 -#define CL_UNORM_INT8 0x10D2 -#define CL_UNORM_INT16 0x10D3 -#define CL_UNORM_SHORT_565 0x10D4 -#define CL_UNORM_SHORT_555 0x10D5 -#define CL_UNORM_INT_101010 0x10D6 -#define CL_SIGNED_INT8 0x10D7 -#define CL_SIGNED_INT16 0x10D8 -#define CL_SIGNED_INT32 0x10D9 -#define CL_UNSIGNED_INT8 0x10DA -#define CL_UNSIGNED_INT16 0x10DB -#define CL_UNSIGNED_INT32 0x10DC -#define CL_HALF_FLOAT 0x10DD -#define CL_FLOAT 0x10DE - -/* cl_mem_object_type */ -#define CL_MEM_OBJECT_BUFFER 0x10F0 -#define CL_MEM_OBJECT_IMAGE2D 0x10F1 -#define CL_MEM_OBJECT_IMAGE3D 0x10F2 - -/* cl_mem_info */ -#define CL_MEM_TYPE 0x1100 -#define CL_MEM_FLAGS 0x1101 -#define CL_MEM_SIZE 0x1102 -#define CL_MEM_HOST_PTR 0x1103 -#define CL_MEM_MAP_COUNT 0x1104 -#define CL_MEM_REFERENCE_COUNT 0x1105 -#define CL_MEM_CONTEXT 0x1106 -#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 -#define CL_MEM_OFFSET 0x1108 - -/* cl_image_info */ -#define CL_IMAGE_FORMAT 0x1110 -#define CL_IMAGE_ELEMENT_SIZE 0x1111 -#define CL_IMAGE_ROW_PITCH 0x1112 -#define CL_IMAGE_SLICE_PITCH 0x1113 -#define CL_IMAGE_WIDTH 0x1114 -#define CL_IMAGE_HEIGHT 0x1115 -#define CL_IMAGE_DEPTH 0x1116 - -/* cl_addressing_mode */ -#define CL_ADDRESS_NONE 0x1130 -#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 -#define CL_ADDRESS_CLAMP 0x1132 -#define CL_ADDRESS_REPEAT 0x1133 -#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 - -/* cl_filter_mode */ -#define CL_FILTER_NEAREST 0x1140 -#define CL_FILTER_LINEAR 0x1141 - -/* cl_sampler_info */ -#define CL_SAMPLER_REFERENCE_COUNT 0x1150 -#define CL_SAMPLER_CONTEXT 0x1151 -#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 -#define CL_SAMPLER_ADDRESSING_MODE 0x1153 -#define CL_SAMPLER_FILTER_MODE 0x1154 - -/* cl_map_flags - bitfield */ -#define CL_MAP_READ (1 << 0) -#define CL_MAP_WRITE (1 << 1) - -/* cl_program_info */ -#define CL_PROGRAM_REFERENCE_COUNT 0x1160 -#define CL_PROGRAM_CONTEXT 0x1161 -#define CL_PROGRAM_NUM_DEVICES 0x1162 -#define CL_PROGRAM_DEVICES 0x1163 -#define CL_PROGRAM_SOURCE 0x1164 -#define CL_PROGRAM_BINARY_SIZES 0x1165 -#define CL_PROGRAM_BINARIES 0x1166 - -/* cl_program_build_info */ -#define CL_PROGRAM_BUILD_STATUS 0x1181 -#define CL_PROGRAM_BUILD_OPTIONS 0x1182 -#define CL_PROGRAM_BUILD_LOG 0x1183 - -/* cl_build_status */ -#define CL_BUILD_SUCCESS 0 -#define CL_BUILD_NONE -1 -#define CL_BUILD_ERROR -2 -#define CL_BUILD_IN_PROGRESS -3 - -/* cl_kernel_info */ -#define CL_KERNEL_FUNCTION_NAME 0x1190 -#define CL_KERNEL_NUM_ARGS 0x1191 -#define CL_KERNEL_REFERENCE_COUNT 0x1192 -#define CL_KERNEL_CONTEXT 0x1193 -#define CL_KERNEL_PROGRAM 0x1194 - -/* cl_kernel_work_group_info */ -#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 -#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 -#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 -#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 -#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 - -/* cl_event_info */ -#define CL_EVENT_COMMAND_QUEUE 0x11D0 -#define CL_EVENT_COMMAND_TYPE 0x11D1 -#define CL_EVENT_REFERENCE_COUNT 0x11D2 -#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 -#define CL_EVENT_CONTEXT 0x11D4 - -/* cl_command_type */ -#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 -#define CL_COMMAND_TASK 0x11F1 -#define CL_COMMAND_NATIVE_KERNEL 0x11F2 -#define CL_COMMAND_READ_BUFFER 0x11F3 -#define CL_COMMAND_WRITE_BUFFER 0x11F4 -#define CL_COMMAND_COPY_BUFFER 0x11F5 -#define CL_COMMAND_READ_IMAGE 0x11F6 -#define CL_COMMAND_WRITE_IMAGE 0x11F7 -#define CL_COMMAND_COPY_IMAGE 0x11F8 -#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 -#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA -#define CL_COMMAND_MAP_BUFFER 0x11FB -#define CL_COMMAND_MAP_IMAGE 0x11FC -#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD -#define CL_COMMAND_MARKER 0x11FE -#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF -#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 -#define CL_COMMAND_READ_BUFFER_RECT 0x1201 -#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 -#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 -#define CL_COMMAND_USER 0x1204 - -/* command execution status */ -#define CL_COMPLETE 0x0 -#define CL_RUNNING 0x1 -#define CL_SUBMITTED 0x2 -#define CL_QUEUED 0x3 - -/* cl_buffer_create_type */ -#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 - -/* cl_profiling_info */ -#define CL_PROFILING_COMMAND_QUEUED 0x1280 -#define CL_PROFILING_COMMAND_SUBMIT 0x1281 -#define CL_PROFILING_COMMAND_START 0x1282 -#define CL_PROFILING_COMMAND_END 0x1283 - - /********************************************************************************************************/ - - /********************************************************************************************************/ - - /* Function signature typedef's */ - - /* Platform API */ - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETPLATFORMIDS)(cl_uint /* num_entries */, - cl_platform_id * /* platforms */, - cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETPLATFORMINFO)(cl_platform_id /* platform */, - cl_platform_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - /* Device APIs */ - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETDEVICEIDS)(cl_platform_id /* platform */, - cl_device_type /* device_type */, - cl_uint /* num_entries */, - cl_device_id * /* devices */, - cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETDEVICEINFO)(cl_device_id /* device */, - cl_device_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - // Context APIs - typedef CL_API_ENTRY cl_context(CL_API_CALL * - PFNCLCREATECONTEXT)(const cl_context_properties * /* properties */, - cl_uint /* num_devices */, - const cl_device_id * /* devices */, - void(CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_context(CL_API_CALL * - PFNCLCREATECONTEXTFROMTYPE)(const cl_context_properties * /* properties */, - cl_device_type /* device_type */, - void(CL_CALLBACK * /* pfn_notify*/)(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRETAINCONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRELEASECONTEXT)(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETCONTEXTINFO)(cl_context /* context */, - cl_context_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - /* Command Queue APIs */ - typedef CL_API_ENTRY cl_command_queue(CL_API_CALL * - PFNCLCREATECOMMANDQUEUE)(cl_context /* context */, - cl_device_id /* device */, - cl_command_queue_properties /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRETAINCOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRELEASECOMMANDQUEUE)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETCOMMANDQUEUEINFO)(cl_command_queue /* command_queue */, - cl_command_queue_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLSETCOMMANDQUEUEPROPERTY)(cl_command_queue /* command_queue */, - cl_command_queue_properties /* properties */, - cl_bool /* enable */, - cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0; - - /* Memory Object APIs */ - typedef CL_API_ENTRY cl_mem(CL_API_CALL * - PFNCLCREATEBUFFER)(cl_context /* context */, - cl_mem_flags /* flags */, - size_t /* size */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_mem(CL_API_CALL * - PFNCLCREATESUBBUFFER)(cl_mem /* buffer */, - cl_mem_flags /* flags */, - cl_buffer_create_type /* buffer_create_type */, - const void * /* buffer_create_info */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; - - typedef CL_API_ENTRY cl_mem(CL_API_CALL * - PFNCLCREATEIMAGE2D)(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_row_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_mem(CL_API_CALL * - PFNCLCREATEIMAGE3D)(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_depth */, - size_t /* image_row_pitch */, - size_t /* image_slice_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRETAINMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRELEASEMEMOBJECT)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETSUPPORTEDIMAGEFORMATS)(cl_context /* context */, - cl_mem_flags /* flags */, - cl_mem_object_type /* image_type */, - cl_uint /* num_entries */, - cl_image_format * /* image_formats */, - cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETMEMOBJECTINFO)(cl_mem /* memobj */, - cl_mem_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETIMAGEINFO)(cl_mem /* image */, - cl_image_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLSETMEMOBJECTDESTRUCTORCALLBACK)(cl_mem /* memobj */, - void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/), - void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; - - /* Sampler APIs */ - typedef CL_API_ENTRY cl_sampler(CL_API_CALL * - PFNCLCREATESAMPLER)(cl_context /* context */, - cl_bool /* normalized_coords */, - cl_addressing_mode /* addressing_mode */, - cl_filter_mode /* filter_mode */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRETAINSAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRELEASESAMPLER)(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETSAMPLERINFO)(cl_sampler /* sampler */, - cl_sampler_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - /* Program Object APIs */ - typedef CL_API_ENTRY cl_program(CL_API_CALL * - PFNCLCREATEPROGRAMWITHSOURCE)(cl_context /* context */, - cl_uint /* count */, - const char ** /* strings */, - const size_t * /* lengths */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_program(CL_API_CALL * - PFNCLCREATEPROGRAMWITHBINARY)(cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const size_t * /* lengths */, - const unsigned char ** /* binaries */, - cl_int * /* binary_status */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRETAINPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRELEASEPROGRAM)(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLBUILDPROGRAM)(cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLUNLOADCOMPILER)(void) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETPROGRAMINFO)(cl_program /* program */, - cl_program_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETPROGRAMBUILDINFO)(cl_program /* program */, - cl_device_id /* device */, - cl_program_build_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - /* Kernel Object APIs */ - typedef CL_API_ENTRY cl_kernel(CL_API_CALL * - PFNCLCREATEKERNEL)(cl_program /* program */, - const char * /* kernel_name */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLCREATEKERNELSINPROGRAM)(cl_program /* program */, - cl_uint /* num_kernels */, - cl_kernel * /* kernels */, - cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRETAINKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRELEASEKERNEL)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLSETKERNELARG)(cl_kernel /* kernel */, - cl_uint /* arg_index */, - size_t /* arg_size */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETKERNELINFO)(cl_kernel /* kernel */, - cl_kernel_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETKERNELWORKGROUPINFO)(cl_kernel /* kernel */, - cl_device_id /* device */, - cl_kernel_work_group_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - // Event Object APIs - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLWAITFOREVENTS)(cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETEVENTINFO)(cl_event /* event */, - cl_event_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_event(CL_API_CALL * - PFNCLCREATEUSEREVENT)(cl_context /* context */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRETAINEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLRELEASEEVENT)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLSETUSEREVENTSTATUS)(cl_event /* event */, - cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLSETEVENTCALLBACK)(cl_event /* event */, - cl_int /* command_exec_callback_type */, - void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; - - /* Profiling APIs */ - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLGETEVENTPROFILINGINFO)(cl_event /* event */, - cl_profiling_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - - // Flush and Finish APIs - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLFLUSH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLFINISH)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - - /* Enqueued Commands APIs */ - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEREADBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - size_t /* offset */, - size_t /* cb */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEREADBUFFERRECT)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - const size_t * /* buffer_origin */, - const size_t * /* host_origin */, - const size_t * /* region */, - size_t /* buffer_row_pitch */, - size_t /* buffer_slice_pitch */, - size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEWRITEBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_write */, - size_t /* offset */, - size_t /* cb */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEWRITEBUFFERRECT)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_write */, - const size_t * /* buffer_origin */, - const size_t * /* host_origin */, - const size_t * /* region */, - size_t /* buffer_row_pitch */, - size_t /* buffer_slice_pitch */, - size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUECOPYBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - size_t /* src_offset */, - size_t /* dst_offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUECOPYBUFFERRECT)(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin */, - const size_t * /* dst_origin */, - const size_t * /* region */, - size_t /* src_row_pitch */, - size_t /* src_slice_pitch */, - size_t /* dst_row_pitch */, - size_t /* dst_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEREADIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_read */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* row_pitch */, - size_t /* slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEWRITEIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_write */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* input_row_pitch */, - size_t /* input_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUECOPYIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_image */, - const size_t * /* src_origin[3] */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUECOPYIMAGETOBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin[3] */, - const size_t * /* region[3] */, - size_t /* dst_offset */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUECOPYBUFFERTOIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_image */, - size_t /* src_offset */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY void *(CL_API_CALL * - PFNCLENQUEUEMAPBUFFER)(cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - size_t /* offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY void *(CL_API_CALL * - PFNCLENQUEUEMAPIMAGE)(cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t * /* image_row_pitch */, - size_t * /* image_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEUNMAPMEMOBJECT)(cl_command_queue /* command_queue */, - cl_mem /* memobj */, - void * /* mapped_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUENDRANGEKERNEL)(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* work_dim */, - const size_t * /* global_work_offset */, - const size_t * /* global_work_size */, - const size_t * /* local_work_size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUETASK)(cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUENATIVEKERNEL)(cl_command_queue /* command_queue */, - void (*user_func)(void *), - void * /* args */, - size_t /* cb_args */, - cl_uint /* num_mem_objects */, - const cl_mem * /* mem_list */, - const void ** /* args_mem_loc */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEMARKER)(cl_command_queue /* command_queue */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEWAITFOREVENTS)(cl_command_queue /* command_queue */, - cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; - - typedef CL_API_ENTRY cl_int(CL_API_CALL * - PFNCLENQUEUEBARRIER)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - - // Extension function access - // - // Returns the extension function address for the given function name, - // or NULL if a valid function can not be found. The client must - // check to make sure the address is not NULL, before using or - // calling the returned function address. - // - typedef CL_API_ENTRY void *(CL_API_CALL *PFNCLGETEXTENSIONFUNCTIONADDRESS)(const char * /* func_name */)CL_API_SUFFIX__VERSION_1_0; - -#define CLEW_STATIC - -#ifdef CLEW_STATIC -#define CLEWAPI extern -#else -#ifdef CLEW_BUILD -#define CLEWAPI extern __declspec(dllexport) -#else -#define CLEWAPI extern __declspec(dllimport) -#endif -#endif - -#if defined(_WIN32) -#define CLEW_FUN_EXPORT extern -#else -#define CLEW_FUN_EXPORT CLEWAPI -#endif - -#define CLEW_GET_FUN(x) x - - // Variables holding function entry points - CLEW_FUN_EXPORT PFNCLGETPLATFORMIDS __clewGetPlatformIDs; - CLEW_FUN_EXPORT PFNCLGETPLATFORMINFO __clewGetPlatformInfo; - CLEW_FUN_EXPORT PFNCLGETDEVICEIDS __clewGetDeviceIDs; - CLEW_FUN_EXPORT PFNCLGETDEVICEINFO __clewGetDeviceInfo; - CLEW_FUN_EXPORT PFNCLCREATECONTEXT __clewCreateContext; - CLEW_FUN_EXPORT PFNCLCREATECONTEXTFROMTYPE __clewCreateContextFromType; - CLEW_FUN_EXPORT PFNCLRETAINCONTEXT __clewRetainContext; - CLEW_FUN_EXPORT PFNCLRELEASECONTEXT __clewReleaseContext; - CLEW_FUN_EXPORT PFNCLGETCONTEXTINFO __clewGetContextInfo; - CLEW_FUN_EXPORT PFNCLCREATECOMMANDQUEUE __clewCreateCommandQueue; - CLEW_FUN_EXPORT PFNCLRETAINCOMMANDQUEUE __clewRetainCommandQueue; - CLEW_FUN_EXPORT PFNCLRELEASECOMMANDQUEUE __clewReleaseCommandQueue; - CLEW_FUN_EXPORT PFNCLGETCOMMANDQUEUEINFO __clewGetCommandQueueInfo; -#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS - CLEW_FUN_EXPORT PFNCLSETCOMMANDQUEUEPROPERTY __clewSetCommandQueueProperty; -#endif - CLEW_FUN_EXPORT PFNCLCREATEBUFFER __clewCreateBuffer; - CLEW_FUN_EXPORT PFNCLCREATESUBBUFFER __clewCreateSubBuffer; - CLEW_FUN_EXPORT PFNCLCREATEIMAGE2D __clewCreateImage2D; - CLEW_FUN_EXPORT PFNCLCREATEIMAGE3D __clewCreateImage3D; - CLEW_FUN_EXPORT PFNCLRETAINMEMOBJECT __clewRetainMemObject; - CLEW_FUN_EXPORT PFNCLRELEASEMEMOBJECT __clewReleaseMemObject; - CLEW_FUN_EXPORT PFNCLGETSUPPORTEDIMAGEFORMATS __clewGetSupportedImageFormats; - CLEW_FUN_EXPORT PFNCLGETMEMOBJECTINFO __clewGetMemObjectInfo; - CLEW_FUN_EXPORT PFNCLGETIMAGEINFO __clewGetImageInfo; - CLEW_FUN_EXPORT PFNCLSETMEMOBJECTDESTRUCTORCALLBACK __clewSetMemObjectDestructorCallback; - CLEW_FUN_EXPORT PFNCLCREATESAMPLER __clewCreateSampler; - CLEW_FUN_EXPORT PFNCLRETAINSAMPLER __clewRetainSampler; - CLEW_FUN_EXPORT PFNCLRELEASESAMPLER __clewReleaseSampler; - CLEW_FUN_EXPORT PFNCLGETSAMPLERINFO __clewGetSamplerInfo; - CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHSOURCE __clewCreateProgramWithSource; - CLEW_FUN_EXPORT PFNCLCREATEPROGRAMWITHBINARY __clewCreateProgramWithBinary; - CLEW_FUN_EXPORT PFNCLRETAINPROGRAM __clewRetainProgram; - CLEW_FUN_EXPORT PFNCLRELEASEPROGRAM __clewReleaseProgram; - CLEW_FUN_EXPORT PFNCLBUILDPROGRAM __clewBuildProgram; - CLEW_FUN_EXPORT PFNCLUNLOADCOMPILER __clewUnloadCompiler; - CLEW_FUN_EXPORT PFNCLGETPROGRAMINFO __clewGetProgramInfo; - CLEW_FUN_EXPORT PFNCLGETPROGRAMBUILDINFO __clewGetProgramBuildInfo; - CLEW_FUN_EXPORT PFNCLCREATEKERNEL __clewCreateKernel; - CLEW_FUN_EXPORT PFNCLCREATEKERNELSINPROGRAM __clewCreateKernelsInProgram; - CLEW_FUN_EXPORT PFNCLRETAINKERNEL __clewRetainKernel; - CLEW_FUN_EXPORT PFNCLRELEASEKERNEL __clewReleaseKernel; - CLEW_FUN_EXPORT PFNCLSETKERNELARG __clewSetKernelArg; - CLEW_FUN_EXPORT PFNCLGETKERNELINFO __clewGetKernelInfo; - CLEW_FUN_EXPORT PFNCLGETKERNELWORKGROUPINFO __clewGetKernelWorkGroupInfo; - CLEW_FUN_EXPORT PFNCLWAITFOREVENTS __clewWaitForEvents; - CLEW_FUN_EXPORT PFNCLGETEVENTINFO __clewGetEventInfo; - CLEW_FUN_EXPORT PFNCLCREATEUSEREVENT __clewCreateUserEvent; - CLEW_FUN_EXPORT PFNCLRETAINEVENT __clewRetainEvent; - CLEW_FUN_EXPORT PFNCLRELEASEEVENT __clewReleaseEvent; - CLEW_FUN_EXPORT PFNCLSETUSEREVENTSTATUS __clewSetUserEventStatus; - CLEW_FUN_EXPORT PFNCLSETEVENTCALLBACK __clewSetEventCallback; - CLEW_FUN_EXPORT PFNCLGETEVENTPROFILINGINFO __clewGetEventProfilingInfo; - CLEW_FUN_EXPORT PFNCLFLUSH __clewFlush; - CLEW_FUN_EXPORT PFNCLFINISH __clewFinish; - CLEW_FUN_EXPORT PFNCLENQUEUEREADBUFFER __clewEnqueueReadBuffer; - CLEW_FUN_EXPORT PFNCLENQUEUEREADBUFFERRECT __clewEnqueueReadBufferRect; - CLEW_FUN_EXPORT PFNCLENQUEUEWRITEBUFFER __clewEnqueueWriteBuffer; - CLEW_FUN_EXPORT PFNCLENQUEUEWRITEBUFFERRECT __clewEnqueueWriteBufferRect; - CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFER __clewEnqueueCopyBuffer; - CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFERRECT __clewEnqueueCopyBufferRect; - CLEW_FUN_EXPORT PFNCLENQUEUEREADIMAGE __clewEnqueueReadImage; - CLEW_FUN_EXPORT PFNCLENQUEUEWRITEIMAGE __clewEnqueueWriteImage; - CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGE __clewEnqueueCopyImage; - CLEW_FUN_EXPORT PFNCLENQUEUECOPYIMAGETOBUFFER __clewEnqueueCopyImageToBuffer; - CLEW_FUN_EXPORT PFNCLENQUEUECOPYBUFFERTOIMAGE __clewEnqueueCopyBufferToImage; - CLEW_FUN_EXPORT PFNCLENQUEUEMAPBUFFER __clewEnqueueMapBuffer; - CLEW_FUN_EXPORT PFNCLENQUEUEMAPIMAGE __clewEnqueueMapImage; - CLEW_FUN_EXPORT PFNCLENQUEUEUNMAPMEMOBJECT __clewEnqueueUnmapMemObject; - CLEW_FUN_EXPORT PFNCLENQUEUENDRANGEKERNEL __clewEnqueueNDRangeKernel; - CLEW_FUN_EXPORT PFNCLENQUEUETASK __clewEnqueueTask; - CLEW_FUN_EXPORT PFNCLENQUEUENATIVEKERNEL __clewEnqueueNativeKernel; - CLEW_FUN_EXPORT PFNCLENQUEUEMARKER __clewEnqueueMarker; - CLEW_FUN_EXPORT PFNCLENQUEUEWAITFOREVENTS __clewEnqueueWaitForEvents; - CLEW_FUN_EXPORT PFNCLENQUEUEBARRIER __clewEnqueueBarrier; - CLEW_FUN_EXPORT PFNCLGETEXTENSIONFUNCTIONADDRESS __clewGetExtensionFunctionAddress; - -#define clGetPlatformIDs CLEW_GET_FUN(__clewGetPlatformIDs) -#define clGetPlatformInfo CLEW_GET_FUN(__clewGetPlatformInfo) -#define clGetDeviceIDs CLEW_GET_FUN(__clewGetDeviceIDs) -#define clGetDeviceInfo CLEW_GET_FUN(__clewGetDeviceInfo) -#define clCreateContext CLEW_GET_FUN(__clewCreateContext) -#define clCreateContextFromType CLEW_GET_FUN(__clewCreateContextFromType) -#define clRetainContext CLEW_GET_FUN(__clewRetainContext) -#define clReleaseContext CLEW_GET_FUN(__clewReleaseContext) -#define clGetContextInfo CLEW_GET_FUN(__clewGetContextInfo) -#define clCreateCommandQueue CLEW_GET_FUN(__clewCreateCommandQueue) -#define clRetainCommandQueue CLEW_GET_FUN(__clewRetainCommandQueue) -#define clReleaseCommandQueue CLEW_GET_FUN(__clewReleaseCommandQueue) -#define clGetCommandQueueInfo CLEW_GET_FUN(__clewGetCommandQueueInfo) -#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS -#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1! -/* - * WARNING: - * This API introduces mutable state into the OpenCL implementation. It has been REMOVED - * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the - * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. - * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. - * - * Software developers previously relying on this API are instructed to set the command queue - * properties when creating the queue, instead. - */ -#define clSetCommandQueueProperty CLEW_GET_FUN(__clewSetCommandQueueProperty) -#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ -#define clCreateBuffer CLEW_GET_FUN(__clewCreateBuffer) -#define clCreateSubBuffer CLEW_GET_FUN(__clewCreateSubBuffer) -#define clCreateImage2D CLEW_GET_FUN(__clewCreateImage2D) -#define clCreateImage3D CLEW_GET_FUN(__clewCreateImage3D) -#define clRetainMemObject CLEW_GET_FUN(__clewRetainMemObject) -#define clReleaseMemObject CLEW_GET_FUN(__clewReleaseMemObject) -#define clGetSupportedImageFormats CLEW_GET_FUN(__clewGetSupportedImageFormats) -#define clGetMemObjectInfo CLEW_GET_FUN(__clewGetMemObjectInfo) -#define clGetImageInfo CLEW_GET_FUN(__clewGetImageInfo) -#define clSetMemObjectDestructorCallback CLEW_GET_FUN(__clewSetMemObjectDestructorCallback) -#define clCreateSampler CLEW_GET_FUN(__clewCreateSampler) -#define clRetainSampler CLEW_GET_FUN(__clewRetainSampler) -#define clReleaseSampler CLEW_GET_FUN(__clewReleaseSampler) -#define clGetSamplerInfo CLEW_GET_FUN(__clewGetSamplerInfo) -#define clCreateProgramWithSource CLEW_GET_FUN(__clewCreateProgramWithSource) -#define clCreateProgramWithBinary CLEW_GET_FUN(__clewCreateProgramWithBinary) -#define clRetainProgram CLEW_GET_FUN(__clewRetainProgram) -#define clReleaseProgram CLEW_GET_FUN(__clewReleaseProgram) -#define clBuildProgram CLEW_GET_FUN(__clewBuildProgram) -#define clUnloadCompiler CLEW_GET_FUN(__clewUnloadCompiler) -#define clGetProgramInfo CLEW_GET_FUN(__clewGetProgramInfo) -#define clGetProgramBuildInfo CLEW_GET_FUN(__clewGetProgramBuildInfo) -#define clCreateKernel CLEW_GET_FUN(__clewCreateKernel) -#define clCreateKernelsInProgram CLEW_GET_FUN(__clewCreateKernelsInProgram) -#define clRetainKernel CLEW_GET_FUN(__clewRetainKernel) -#define clReleaseKernel CLEW_GET_FUN(__clewReleaseKernel) -#define clSetKernelArg CLEW_GET_FUN(__clewSetKernelArg) -#define clGetKernelInfo CLEW_GET_FUN(__clewGetKernelInfo) -#define clGetKernelWorkGroupInfo CLEW_GET_FUN(__clewGetKernelWorkGroupInfo) -#define clWaitForEvents CLEW_GET_FUN(__clewWaitForEvents) -#define clGetEventInfo CLEW_GET_FUN(__clewGetEventInfo) -#define clCreateUserEvent CLEW_GET_FUN(__clewCreateUserEvent) -#define clRetainEvent CLEW_GET_FUN(__clewRetainEvent) -#define clReleaseEvent CLEW_GET_FUN(__clewReleaseEvent) -#define clSetUserEventStatus CLEW_GET_FUN(__clewSetUserEventStatus) -#define clSetEventCallback CLEW_GET_FUN(__clewSetEventCallback) -#define clGetEventProfilingInfo CLEW_GET_FUN(__clewGetEventProfilingInfo) -#define clFlush CLEW_GET_FUN(__clewFlush) -#define clFinish CLEW_GET_FUN(__clewFinish) -#define clEnqueueReadBuffer CLEW_GET_FUN(__clewEnqueueReadBuffer) -#define clEnqueueReadBufferRect CLEW_GET_FUN(__clewEnqueueReadBufferRect) -#define clEnqueueWriteBuffer CLEW_GET_FUN(__clewEnqueueWriteBuffer) -#define clEnqueueWriteBufferRect CLEW_GET_FUN(__clewEnqueueWriteBufferRect) -#define clEnqueueCopyBuffer CLEW_GET_FUN(__clewEnqueueCopyBuffer) -#define clEnqueueCopyBufferRect CLEW_GET_FUN(__clewEnqueueCopyBufferRect) -#define clEnqueueReadImage CLEW_GET_FUN(__clewEnqueueReadImage) -#define clEnqueueWriteImage CLEW_GET_FUN(__clewEnqueueWriteImage) -#define clEnqueueCopyImage CLEW_GET_FUN(__clewEnqueueCopyImage) -#define clEnqueueCopyImageToBuffer CLEW_GET_FUN(__clewEnqueueCopyImageToBuffer) -#define clEnqueueCopyBufferToImage CLEW_GET_FUN(__clewEnqueueCopyBufferToImage) -#define clEnqueueMapBuffer CLEW_GET_FUN(__clewEnqueueMapBuffer) -#define clEnqueueMapImage CLEW_GET_FUN(__clewEnqueueMapImage) -#define clEnqueueUnmapMemObject CLEW_GET_FUN(__clewEnqueueUnmapMemObject) -#define clEnqueueNDRangeKernel CLEW_GET_FUN(__clewEnqueueNDRangeKernel) -#define clEnqueueTask CLEW_GET_FUN(__clewEnqueueTask) -#define clEnqueueNativeKernel CLEW_GET_FUN(__clewEnqueueNativeKernel) -#define clEnqueueMarker CLEW_GET_FUN(__clewEnqueueMarker) -#define clEnqueueWaitForEvents CLEW_GET_FUN(__clewEnqueueWaitForEvents) -#define clEnqueueBarrier CLEW_GET_FUN(__clewEnqueueBarrier) -#define clGetExtensionFunctionAddress CLEW_GET_FUN(__clewGetExtensionFunctionAddress) - -#define CLEW_SUCCESS 0 //!< Success error code -#define CLEW_ERROR_OPEN_FAILED -1 //!< Error code for failing to open the dynamic library -#define CLEW_ERROR_ATEXIT_FAILED -2 //!< Error code for failing to queue the closing of the dynamic library to atexit() - - //! \brief Load OpenCL dynamic library and set function entry points - int clewInit(const char *); - - //! \brief Exit clew and unload OpenCL dynamic library - void clewExit(); - - //! \brief Convert an OpenCL error code to its string equivalent - const char *clewErrorString(cl_int error); - -#ifdef __cplusplus -} -#endif - -#endif // CLEW_HPP_INCLUDED