2017-08-01 14:30:58 +02:00
bool gUseLargeBatches = false ;
bool gCpuBatchContacts = false ;
bool gCpuSolveConstraint = false ;
2019-01-03 14:26:51 +01:00
bool gCpuRadixSort = false ;
2017-08-01 14:30:58 +02:00
bool gCpuSetSortData = false ;
bool gCpuSortContactsDeterminism = false ;
bool gUseCpuCopyConstraints = false ;
bool gUseScanHost = false ;
bool gReorderContactsOnCpu = false ;
bool optionalSortContactsDeterminism = true ;
# include "b3GpuPgsContactSolver.h"
# include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
# include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
# include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h"
# include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
# include <string.h>
# include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
# include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
# include "b3Solver.h"
# define B3_SOLVER_SETUP_KERNEL_PATH "src / Bullet3OpenCL / RigidBody / kernels / solverSetup.cl"
# define B3_SOLVER_SETUP2_KERNEL_PATH "src / Bullet3OpenCL / RigidBody / kernels / solverSetup2.cl"
# define B3_SOLVER_CONTACT_KERNEL_PATH "src / Bullet3OpenCL / RigidBody / kernels / solveContact.cl"
# define B3_SOLVER_FRICTION_KERNEL_PATH "src / Bullet3OpenCL / RigidBody / kernels / solveFriction.cl"
# define B3_BATCHING_PATH "src / Bullet3OpenCL / RigidBody / kernels / batchingKernels.cl"
# define B3_BATCHING_NEW_PATH "src / Bullet3OpenCL / RigidBody / kernels / batchingKernelsNew.cl"
# include "kernels/solverSetup.h"
# include "kernels/solverSetup2.h"
# include "kernels/solveContact.h"
# include "kernels/solveFriction.h"
# include "kernels/batchingKernels.h"
# include "kernels/batchingKernelsNew.h"
2019-01-03 14:26:51 +01:00
struct b3GpuBatchingPgsSolverInternalData
2017-08-01 14:30:58 +02:00
{
cl_context m_context ;
cl_device_id m_device ;
cl_command_queue m_queue ;
int m_pairCapacity ;
int m_nIterations ;
b3OpenCLArray < b3GpuConstraint4 > * m_contactCGPU ;
b3OpenCLArray < unsigned int > * m_numConstraints ;
b3OpenCLArray < unsigned int > * m_offsets ;
2019-01-03 14:26:51 +01:00
b3Solver * m_solverGPU ;
2017-08-01 14:30:58 +02:00
cl_kernel m_batchingKernel ;
cl_kernel m_batchingKernelNew ;
cl_kernel m_solveContactKernel ;
cl_kernel m_solveSingleContactKernel ;
cl_kernel m_solveSingleFrictionKernel ;
cl_kernel m_solveFrictionKernel ;
cl_kernel m_contactToConstraintKernel ;
cl_kernel m_setSortDataKernel ;
cl_kernel m_reorderContactKernel ;
cl_kernel m_copyConstraintKernel ;
2019-01-03 14:26:51 +01:00
cl_kernel m_setDeterminismSortDataBodyAKernel ;
cl_kernel m_setDeterminismSortDataBodyBKernel ;
cl_kernel m_setDeterminismSortDataChildShapeAKernel ;
cl_kernel m_setDeterminismSortDataChildShapeBKernel ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
class b3RadixSort32CL * m_sort32 ;
class b3BoundSearchCL * m_search ;
class b3PrefixScanCL * m_scan ;
2017-08-01 14:30:58 +02:00
b3OpenCLArray < b3SortData > * m_sortDataBuffer ;
b3OpenCLArray < b3Contact4 > * m_contactBuffer ;
b3OpenCLArray < b3RigidBodyData > * m_bodyBufferGPU ;
b3OpenCLArray < b3InertiaData > * m_inertiaBufferGPU ;
b3OpenCLArray < b3Contact4 > * m_pBufContactOutGPU ;
2019-01-03 14:26:51 +01:00
b3OpenCLArray < b3Contact4 > * m_pBufContactOutGPUCopy ;
b3OpenCLArray < b3SortData > * m_contactKeyValues ;
2017-08-01 14:30:58 +02:00
b3AlignedObjectArray < unsigned int > m_idxBuffer ;
b3AlignedObjectArray < b3SortData > m_sortData ;
b3AlignedObjectArray < b3Contact4 > m_old ;
2019-01-03 14:26:51 +01:00
b3AlignedObjectArray < int > m_batchSizes ;
b3OpenCLArray < int > * m_batchSizesGpu ;
2017-08-01 14:30:58 +02:00
} ;
2019-01-03 14:26:51 +01:00
b3GpuPgsContactSolver : : b3GpuPgsContactSolver ( cl_context ctx , cl_device_id device , cl_command_queue q , int pairCapacity )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
m_debugOutput = 0 ;
2017-08-01 14:30:58 +02:00
m_data = new b3GpuBatchingPgsSolverInternalData ;
m_data - > m_context = ctx ;
m_data - > m_device = device ;
m_data - > m_queue = q ;
m_data - > m_pairCapacity = pairCapacity ;
m_data - > m_nIterations = 4 ;
2019-01-03 14:26:51 +01:00
m_data - > m_batchSizesGpu = new b3OpenCLArray < int > ( ctx , q ) ;
m_data - > m_bodyBufferGPU = new b3OpenCLArray < b3RigidBodyData > ( ctx , q ) ;
m_data - > m_inertiaBufferGPU = new b3OpenCLArray < b3InertiaData > ( ctx , q ) ;
m_data - > m_pBufContactOutGPU = new b3OpenCLArray < b3Contact4 > ( ctx , q ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
m_data - > m_pBufContactOutGPUCopy = new b3OpenCLArray < b3Contact4 > ( ctx , q ) ;
m_data - > m_contactKeyValues = new b3OpenCLArray < b3SortData > ( ctx , q ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU = new b3Solver ( ctx , device , q , 512 * 1024 ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
m_data - > m_sort32 = new b3RadixSort32CL ( ctx , device , m_data - > m_queue ) ;
m_data - > m_scan = new b3PrefixScanCL ( ctx , device , m_data - > m_queue , B3_SOLVER_N_CELLS ) ;
m_data - > m_search = new b3BoundSearchCL ( ctx , device , m_data - > m_queue , B3_SOLVER_N_CELLS ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
const int sortSize = B3NEXTMULTIPLEOF ( pairCapacity , 512 ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
m_data - > m_sortDataBuffer = new b3OpenCLArray < b3SortData > ( ctx , m_data - > m_queue , sortSize ) ;
m_data - > m_contactBuffer = new b3OpenCLArray < b3Contact4 > ( ctx , m_data - > m_queue ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
m_data - > m_numConstraints = new b3OpenCLArray < unsigned int > ( ctx , m_data - > m_queue , B3_SOLVER_N_CELLS ) ;
2017-08-01 14:30:58 +02:00
m_data - > m_numConstraints - > resize ( B3_SOLVER_N_CELLS ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_contactCGPU = new b3OpenCLArray < b3GpuConstraint4 > ( ctx , q , pairCapacity ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
m_data - > m_offsets = new b3OpenCLArray < unsigned int > ( ctx , m_data - > m_queue , B3_SOLVER_N_CELLS ) ;
2017-08-01 14:30:58 +02:00
m_data - > m_offsets - > resize ( B3_SOLVER_N_CELLS ) ;
const char * additionalMacros = " " ;
//const char* srcFileNameForCaching="";
cl_int pErrNum ;
const char * batchKernelSource = batchingKernelsCL ;
const char * batchKernelNewSource = batchingKernelsNewCL ;
const char * solverSetupSource = solverSetupCL ;
const char * solverSetup2Source = solverSetup2CL ;
const char * solveContactSource = solveContactCL ;
const char * solveFrictionSource = solveFrictionCL ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
cl_program solveContactProg = b3OpenCLUtils : : compileCLProgramFromString ( ctx , device , solveContactSource , & pErrNum , additionalMacros , B3_SOLVER_CONTACT_KERNEL_PATH ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( solveContactProg ) ;
2019-01-03 14:26:51 +01:00
cl_program solveFrictionProg = b3OpenCLUtils : : compileCLProgramFromString ( ctx , device , solveFrictionSource , & pErrNum , additionalMacros , B3_SOLVER_FRICTION_KERNEL_PATH ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( solveFrictionProg ) ;
2019-01-03 14:26:51 +01:00
cl_program solverSetup2Prog = b3OpenCLUtils : : compileCLProgramFromString ( ctx , device , solverSetup2Source , & pErrNum , additionalMacros , B3_SOLVER_SETUP2_KERNEL_PATH ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( solverSetup2Prog ) ;
2019-01-03 14:26:51 +01:00
cl_program solverSetupProg = b3OpenCLUtils : : compileCLProgramFromString ( ctx , device , solverSetupSource , & pErrNum , additionalMacros , B3_SOLVER_SETUP_KERNEL_PATH ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( solverSetupProg ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solveFrictionKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solveFrictionSource , " BatchSolveKernelFriction " , & pErrNum , solveFrictionProg , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_solveFrictionKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solveContactKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solveContactSource , " BatchSolveKernelContact " , & pErrNum , solveContactProg , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_solveContactKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solveSingleContactKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solveContactSource , " solveSingleContactKernel " , & pErrNum , solveContactProg , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_solveSingleContactKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solveSingleFrictionKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solveFrictionSource , " solveSingleFrictionKernel " , & pErrNum , solveFrictionProg , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_solveSingleFrictionKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_contactToConstraintKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetupSource , " ContactToConstraintKernel " , & pErrNum , solverSetupProg , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_contactToConstraintKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_setSortDataKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetup2Source , " SetSortDataKernel " , & pErrNum , solverSetup2Prog , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_setSortDataKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_setDeterminismSortDataBodyAKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetup2Source , " SetDeterminismSortDataBodyA " , & pErrNum , solverSetup2Prog , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_setDeterminismSortDataBodyAKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_setDeterminismSortDataBodyBKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetup2Source , " SetDeterminismSortDataBodyB " , & pErrNum , solverSetup2Prog , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_setDeterminismSortDataBodyBKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_setDeterminismSortDataChildShapeAKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetup2Source , " SetDeterminismSortDataChildShapeA " , & pErrNum , solverSetup2Prog , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_setDeterminismSortDataChildShapeAKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_setDeterminismSortDataChildShapeBKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetup2Source , " SetDeterminismSortDataChildShapeB " , & pErrNum , solverSetup2Prog , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_setDeterminismSortDataChildShapeBKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_reorderContactKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetup2Source , " ReorderContactKernel " , & pErrNum , solverSetup2Prog , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_reorderContactKernel ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_copyConstraintKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , solverSetup2Source , " CopyConstraintKernel " , & pErrNum , solverSetup2Prog , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_copyConstraintKernel ) ;
}
{
2019-01-03 14:26:51 +01:00
cl_program batchingProg = b3OpenCLUtils : : compileCLProgramFromString ( ctx , device , batchKernelSource , & pErrNum , additionalMacros , B3_BATCHING_PATH ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( batchingProg ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_batchingKernel = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , batchKernelSource , " CreateBatches " , & pErrNum , batchingProg , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_batchingKernel ) ;
}
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
cl_program batchingNewProg = b3OpenCLUtils : : compileCLProgramFromString ( ctx , device , batchKernelNewSource , & pErrNum , additionalMacros , B3_BATCHING_NEW_PATH ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( batchingNewProg ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_batchingKernelNew = b3OpenCLUtils : : compileCLKernelFromString ( ctx , device , batchKernelNewSource , " CreateBatchesNew " , & pErrNum , batchingNewProg , additionalMacros ) ;
2017-08-01 14:30:58 +02:00
b3Assert ( m_data - > m_batchingKernelNew ) ;
}
}
b3GpuPgsContactSolver : : ~ b3GpuPgsContactSolver ( )
{
delete m_data - > m_batchSizesGpu ;
delete m_data - > m_bodyBufferGPU ;
delete m_data - > m_inertiaBufferGPU ;
delete m_data - > m_pBufContactOutGPU ;
delete m_data - > m_pBufContactOutGPUCopy ;
delete m_data - > m_contactKeyValues ;
delete m_data - > m_contactCGPU ;
delete m_data - > m_numConstraints ;
delete m_data - > m_offsets ;
delete m_data - > m_sortDataBuffer ;
delete m_data - > m_contactBuffer ;
delete m_data - > m_sort32 ;
delete m_data - > m_scan ;
delete m_data - > m_search ;
delete m_data - > m_solverGPU ;
clReleaseKernel ( m_data - > m_batchingKernel ) ;
clReleaseKernel ( m_data - > m_batchingKernelNew ) ;
clReleaseKernel ( m_data - > m_solveSingleContactKernel ) ;
clReleaseKernel ( m_data - > m_solveSingleFrictionKernel ) ;
2019-01-03 14:26:51 +01:00
clReleaseKernel ( m_data - > m_solveContactKernel ) ;
clReleaseKernel ( m_data - > m_solveFrictionKernel ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
clReleaseKernel ( m_data - > m_contactToConstraintKernel ) ;
clReleaseKernel ( m_data - > m_setSortDataKernel ) ;
clReleaseKernel ( m_data - > m_reorderContactKernel ) ;
clReleaseKernel ( m_data - > m_copyConstraintKernel ) ;
2017-08-01 14:30:58 +02:00
clReleaseKernel ( m_data - > m_setDeterminismSortDataBodyAKernel ) ;
clReleaseKernel ( m_data - > m_setDeterminismSortDataBodyBKernel ) ;
clReleaseKernel ( m_data - > m_setDeterminismSortDataChildShapeAKernel ) ;
clReleaseKernel ( m_data - > m_setDeterminismSortDataChildShapeBKernel ) ;
delete m_data ;
}
struct b3ConstraintCfg
{
2019-01-03 14:26:51 +01:00
b3ConstraintCfg ( float dt = 0.f ) : m_positionDrift ( 0.005f ) , m_positionConstraintCoeff ( 0.2f ) , m_dt ( dt ) , m_staticIdx ( 0 ) { }
2017-08-01 14:30:58 +02:00
float m_positionDrift ;
float m_positionConstraintCoeff ;
float m_dt ;
bool m_enableParallelSolve ;
float m_batchCellSize ;
int m_staticIdx ;
} ;
2019-01-03 14:26:51 +01:00
void b3GpuPgsContactSolver : : solveContactConstraintBatchSizes ( const b3OpenCLArray < b3RigidBodyData > * bodyBuf , const b3OpenCLArray < b3InertiaData > * shapeBuf ,
b3OpenCLArray < b3GpuConstraint4 > * constraint , void * additionalData , int n , int maxNumBatches , int numIterations , const b3AlignedObjectArray < int > * batchSizes ) //const b3OpenCLArray<int>* gpuBatchSizes)
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " solveContactConstraintBatchSizes " ) ;
2019-01-03 14:26:51 +01:00
int numBatches = batchSizes - > size ( ) / B3_MAX_NUM_BATCHES ;
for ( int iter = 0 ; iter < numIterations ; iter + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
for ( int cellId = 0 ; cellId < numBatches ; cellId + + )
2017-08-01 14:30:58 +02:00
{
int offset = 0 ;
2019-01-03 14:26:51 +01:00
for ( int ii = 0 ; ii < B3_MAX_NUM_BATCHES ; ii + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
int numInBatch = batchSizes - > at ( cellId * B3_MAX_NUM_BATCHES + ii ) ;
2017-08-01 14:30:58 +02:00
if ( ! numInBatch )
break ;
{
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solveSingleContactKernel , " m_solveSingleContactKernel " ) ;
launcher . setBuffer ( bodyBuf - > getBufferCL ( ) ) ;
launcher . setBuffer ( shapeBuf - > getBufferCL ( ) ) ;
launcher . setBuffer ( constraint - > getBufferCL ( ) ) ;
2017-08-01 14:30:58 +02:00
launcher . setConst ( cellId ) ;
launcher . setConst ( offset ) ;
launcher . setConst ( numInBatch ) ;
launcher . launch1D ( numInBatch ) ;
2019-01-03 14:26:51 +01:00
offset + = numInBatch ;
2017-08-01 14:30:58 +02:00
}
}
}
}
2019-01-03 14:26:51 +01:00
for ( int iter = 0 ; iter < numIterations ; iter + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
for ( int cellId = 0 ; cellId < numBatches ; cellId + + )
2017-08-01 14:30:58 +02:00
{
int offset = 0 ;
2019-01-03 14:26:51 +01:00
for ( int ii = 0 ; ii < B3_MAX_NUM_BATCHES ; ii + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
int numInBatch = batchSizes - > at ( cellId * B3_MAX_NUM_BATCHES + ii ) ;
2017-08-01 14:30:58 +02:00
if ( ! numInBatch )
break ;
{
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solveSingleFrictionKernel , " m_solveSingleFrictionKernel " ) ;
launcher . setBuffer ( bodyBuf - > getBufferCL ( ) ) ;
launcher . setBuffer ( shapeBuf - > getBufferCL ( ) ) ;
launcher . setBuffer ( constraint - > getBufferCL ( ) ) ;
2017-08-01 14:30:58 +02:00
launcher . setConst ( cellId ) ;
launcher . setConst ( offset ) ;
launcher . setConst ( numInBatch ) ;
launcher . launch1D ( numInBatch ) ;
2019-01-03 14:26:51 +01:00
offset + = numInBatch ;
2017-08-01 14:30:58 +02:00
}
}
}
}
}
2019-01-03 14:26:51 +01:00
void b3GpuPgsContactSolver : : solveContactConstraint ( const b3OpenCLArray < b3RigidBodyData > * bodyBuf , const b3OpenCLArray < b3InertiaData > * shapeBuf ,
b3OpenCLArray < b3GpuConstraint4 > * constraint , void * additionalData , int n , int maxNumBatches , int numIterations , const b3AlignedObjectArray < int > * batchSizes ) //,const b3OpenCLArray<int>* gpuBatchSizes)
2017-08-01 14:30:58 +02:00
{
//sort the contacts
2019-01-03 14:26:51 +01:00
b3Int4 cdata = b3MakeInt4 ( n , 0 , 0 , 0 ) ;
2017-08-01 14:30:58 +02:00
{
const int nn = B3_SOLVER_N_CELLS ;
cdata . x = 0 ;
2019-01-03 14:26:51 +01:00
cdata . y = maxNumBatches ; //250;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES ;
2017-08-01 14:30:58 +02:00
# ifdef DEBUG_ME
2019-01-03 14:26:51 +01:00
SolverDebugInfo * debugInfo = new SolverDebugInfo [ numWorkItems ] ;
adl : : b3OpenCLArray < SolverDebugInfo > gpuDebugInfo ( data - > m_device , numWorkItems ) ;
2017-08-01 14:30:58 +02:00
# endif
{
B3_PROFILE ( " m_batchSolveKernel iterations " ) ;
2019-01-03 14:26:51 +01:00
for ( int iter = 0 ; iter < numIterations ; iter + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
for ( int ib = 0 ; ib < B3_SOLVER_N_BATCHES ; ib + + )
2017-08-01 14:30:58 +02:00
{
# ifdef DEBUG_ME
2019-01-03 14:26:51 +01:00
memset ( debugInfo , 0 , sizeof ( SolverDebugInfo ) * numWorkItems ) ;
gpuDebugInfo . write ( debugInfo , numWorkItems ) ;
2017-08-01 14:30:58 +02:00
# endif
cdata . z = ib ;
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solveContactKernel , " m_solveContactKernel " ) ;
2017-08-01 14:30:58 +02:00
# if 1
2019-01-03 14:26:51 +01:00
b3BufferInfoCL bInfo [ ] = {
b3BufferInfoCL ( bodyBuf - > getBufferCL ( ) ) ,
b3BufferInfoCL ( shapeBuf - > getBufferCL ( ) ) ,
b3BufferInfoCL ( constraint - > getBufferCL ( ) ) ,
b3BufferInfoCL ( m_data - > m_solverGPU - > m_numConstraints - > getBufferCL ( ) ) ,
b3BufferInfoCL ( m_data - > m_solverGPU - > m_offsets - > getBufferCL ( ) )
2017-08-01 14:30:58 +02:00
# ifdef DEBUG_ME
2019-01-03 14:26:51 +01:00
,
b3BufferInfoCL ( & gpuDebugInfo )
2017-08-01 14:30:58 +02:00
# endif
2019-01-03 14:26:51 +01:00
} ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
launcher . setBuffers ( bInfo , sizeof ( bInfo ) / sizeof ( b3BufferInfoCL ) ) ;
launcher . setBuffer ( m_data - > m_solverGPU - > m_batchSizes . getBufferCL ( ) ) ;
2017-08-01 14:30:58 +02:00
//launcher.setConst( cdata.x );
2019-01-03 14:26:51 +01:00
launcher . setConst ( cdata . y ) ;
launcher . setConst ( cdata . z ) ;
2017-08-01 14:30:58 +02:00
b3Int4 nSplit ;
nSplit . x = B3_SOLVER_N_SPLIT_X ;
nSplit . y = B3_SOLVER_N_SPLIT_Y ;
nSplit . z = B3_SOLVER_N_SPLIT_Z ;
2019-01-03 14:26:51 +01:00
launcher . setConst ( nSplit ) ;
launcher . launch1D ( numWorkItems , 64 ) ;
2017-08-01 14:30:58 +02:00
# else
2019-01-03 14:26:51 +01:00
const char * fileName = " m_batchSolveKernel.bin " ;
FILE * f = fopen ( fileName , " rb " ) ;
if ( f )
{
int sizeInBytes = 0 ;
if ( fseek ( f , 0 , SEEK_END ) | | ( sizeInBytes = ftell ( f ) ) = = EOF | | fseek ( f , 0 , SEEK_SET ) )
{
printf ( " error, cannot get file size \n " ) ;
exit ( 0 ) ;
}
unsigned char * buf = ( unsigned char * ) malloc ( sizeInBytes ) ;
fread ( buf , sizeInBytes , 1 , f ) ;
int serializedBytes = launcher . deserializeArgs ( buf , sizeInBytes , m_context ) ;
int num = * ( int * ) & buf [ serializedBytes ] ;
launcher . launch1D ( num ) ;
//this clFinish is for testing on errors
clFinish ( m_queue ) ;
}
2017-08-01 14:30:58 +02:00
# endif
# ifdef DEBUG_ME
clFinish ( m_queue ) ;
2019-01-03 14:26:51 +01:00
gpuDebugInfo . read ( debugInfo , numWorkItems ) ;
2017-08-01 14:30:58 +02:00
clFinish ( m_queue ) ;
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < numWorkItems ; i + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
if ( debugInfo [ i ] . m_valInt2 > 0 )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
printf ( " debugInfo[i].m_valInt2 = %d \n " , i , debugInfo [ i ] . m_valInt2 ) ;
2017-08-01 14:30:58 +02:00
}
2019-01-03 14:26:51 +01:00
if ( debugInfo [ i ] . m_valInt3 > 0 )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
printf ( " debugInfo[i].m_valInt3 = %d \n " , i , debugInfo [ i ] . m_valInt3 ) ;
2017-08-01 14:30:58 +02:00
}
}
2019-01-03 14:26:51 +01:00
# endif //DEBUG_ME
2017-08-01 14:30:58 +02:00
}
}
2019-01-03 14:26:51 +01:00
clFinish ( m_data - > m_queue ) ;
2017-08-01 14:30:58 +02:00
}
cdata . x = 1 ;
2019-01-03 14:26:51 +01:00
bool applyFriction = true ;
2017-08-01 14:30:58 +02:00
if ( applyFriction )
2019-01-03 14:26:51 +01:00
{
2017-08-01 14:30:58 +02:00
B3_PROFILE ( " m_batchSolveKernel iterations2 " ) ;
2019-01-03 14:26:51 +01:00
for ( int iter = 0 ; iter < numIterations ; iter + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
for ( int ib = 0 ; ib < B3_SOLVER_N_BATCHES ; ib + + )
2017-08-01 14:30:58 +02:00
{
cdata . z = ib ;
2019-01-03 14:26:51 +01:00
b3BufferInfoCL bInfo [ ] = {
b3BufferInfoCL ( bodyBuf - > getBufferCL ( ) ) ,
b3BufferInfoCL ( shapeBuf - > getBufferCL ( ) ) ,
b3BufferInfoCL ( constraint - > getBufferCL ( ) ) ,
b3BufferInfoCL ( m_data - > m_solverGPU - > m_numConstraints - > getBufferCL ( ) ) ,
b3BufferInfoCL ( m_data - > m_solverGPU - > m_offsets - > getBufferCL ( ) )
2017-08-01 14:30:58 +02:00
# ifdef DEBUG_ME
2019-01-03 14:26:51 +01:00
,
b3BufferInfoCL ( & gpuDebugInfo )
# endif //DEBUG_ME
2017-08-01 14:30:58 +02:00
} ;
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solveFrictionKernel , " m_solveFrictionKernel " ) ;
launcher . setBuffers ( bInfo , sizeof ( bInfo ) / sizeof ( b3BufferInfoCL ) ) ;
launcher . setBuffer ( m_data - > m_solverGPU - > m_batchSizes . getBufferCL ( ) ) ;
2017-08-01 14:30:58 +02:00
//launcher.setConst( cdata.x );
2019-01-03 14:26:51 +01:00
launcher . setConst ( cdata . y ) ;
launcher . setConst ( cdata . z ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
b3Int4 nSplit ;
2017-08-01 14:30:58 +02:00
nSplit . x = B3_SOLVER_N_SPLIT_X ;
nSplit . y = B3_SOLVER_N_SPLIT_Y ;
nSplit . z = B3_SOLVER_N_SPLIT_Z ;
2019-01-03 14:26:51 +01:00
launcher . setConst ( nSplit ) ;
launcher . launch1D ( 64 * nn / B3_SOLVER_N_BATCHES , 64 ) ;
2017-08-01 14:30:58 +02:00
}
}
clFinish ( m_data - > m_queue ) ;
}
# ifdef DEBUG_ME
delete [ ] debugInfo ;
2019-01-03 14:26:51 +01:00
# endif //DEBUG_ME
2017-08-01 14:30:58 +02:00
}
}
2019-01-03 14:26:51 +01:00
static bool sortfnc ( const b3SortData & a , const b3SortData & b )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
return ( a . m_key < b . m_key ) ;
2017-08-01 14:30:58 +02:00
}
static bool b3ContactCmp ( const b3Contact4 & p , const b3Contact4 & q )
{
2019-01-03 14:26:51 +01:00
return ( ( p . m_bodyAPtrAndSignBit < q . m_bodyAPtrAndSignBit ) | |
( ( p . m_bodyAPtrAndSignBit = = q . m_bodyAPtrAndSignBit ) & & ( p . m_bodyBPtrAndSignBit < q . m_bodyBPtrAndSignBit ) ) | |
( ( p . m_bodyAPtrAndSignBit = = q . m_bodyAPtrAndSignBit ) & & ( p . m_bodyBPtrAndSignBit = = q . m_bodyBPtrAndSignBit ) & & p . m_childIndexA < q . m_childIndexA ) | |
( ( p . m_bodyAPtrAndSignBit = = q . m_bodyAPtrAndSignBit ) & & ( p . m_bodyBPtrAndSignBit = = q . m_bodyBPtrAndSignBit ) & & p . m_childIndexA < q . m_childIndexA ) | |
( ( p . m_bodyAPtrAndSignBit = = q . m_bodyAPtrAndSignBit ) & & ( p . m_bodyBPtrAndSignBit = = q . m_bodyBPtrAndSignBit ) & & p . m_childIndexA = = q . m_childIndexA & & p . m_childIndexB < q . m_childIndexB ) ) ;
2017-08-01 14:30:58 +02:00
}
# define USE_SPATIAL_BATCHING 1
# define USE_4x4_GRID 1
# ifndef USE_SPATIAL_BATCHING
2019-01-03 14:26:51 +01:00
static const int gridTable4x4 [ ] =
{
0 , 1 , 17 , 16 ,
1 , 2 , 18 , 19 ,
17 , 18 , 32 , 3 ,
16 , 19 , 3 , 34 } ;
static const int gridTable8x8 [ ] =
{
0 , 2 , 3 , 16 , 17 , 18 , 19 , 1 ,
66 , 64 , 80 , 67 , 82 , 81 , 65 , 83 ,
131 , 144 , 128 , 130 , 147 , 129 , 145 , 146 ,
208 , 195 , 194 , 192 , 193 , 211 , 210 , 209 ,
21 , 22 , 23 , 5 , 4 , 6 , 7 , 20 ,
86 , 85 , 69 , 87 , 70 , 68 , 84 , 71 ,
151 , 133 , 149 , 150 , 135 , 148 , 132 , 134 ,
197 , 27 , 214 , 213 , 212 , 199 , 198 , 196
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
} ;
2017-08-01 14:30:58 +02:00
# endif
2019-01-03 14:26:51 +01:00
void SetSortDataCPU ( b3Contact4 * gContact , b3RigidBodyData * gBodies , b3SortData * gSortDataOut , int nContacts , float scale , const b3Int4 & nSplit , int staticIdx )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
for ( int gIdx = 0 ; gIdx < nContacts ; gIdx + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
if ( gIdx < nContacts )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
int aPtrAndSignBit = gContact [ gIdx ] . m_bodyAPtrAndSignBit ;
int bPtrAndSignBit = gContact [ gIdx ] . m_bodyBPtrAndSignBit ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
int aIdx = abs ( aPtrAndSignBit ) ;
2017-08-01 14:30:58 +02:00
int bIdx = abs ( bPtrAndSignBit ) ;
2019-01-03 14:26:51 +01:00
bool aStatic = ( aPtrAndSignBit < 0 ) | | ( aPtrAndSignBit = = staticIdx ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
# if USE_SPATIAL_BATCHING
int idx = ( aStatic ) ? bIdx : aIdx ;
2017-08-01 14:30:58 +02:00
b3Vector3 p = gBodies [ idx ] . m_pos ;
2019-01-03 14:26:51 +01:00
int xIdx = ( int ) ( ( p . x - ( ( p . x < 0.f ) ? 1.f : 0.f ) ) * scale ) & ( nSplit . x - 1 ) ;
int yIdx = ( int ) ( ( p . y - ( ( p . y < 0.f ) ? 1.f : 0.f ) ) * scale ) & ( nSplit . y - 1 ) ;
int zIdx = ( int ) ( ( p . z - ( ( p . z < 0.f ) ? 1.f : 0.f ) ) * scale ) & ( nSplit . z - 1 ) ;
int newIndex = ( xIdx + yIdx * nSplit . x + zIdx * nSplit . x * nSplit . y ) ;
# else //USE_SPATIAL_BATCHING
bool bStatic = ( bPtrAndSignBit < 0 ) | | ( bPtrAndSignBit = = staticIdx ) ;
# if USE_4x4_GRID
int aa = aIdx & 3 ;
int bb = bIdx & 3 ;
2017-08-01 14:30:58 +02:00
if ( aStatic )
aa = bb ;
if ( bStatic )
bb = aa ;
2019-01-03 14:26:51 +01:00
int gridIndex = aa + bb * 4 ;
2017-08-01 14:30:58 +02:00
int newIndex = gridTable4x4 [ gridIndex ] ;
2019-01-03 14:26:51 +01:00
# else //USE_4x4_GRID
int aa = aIdx & 7 ;
int bb = bIdx & 7 ;
2017-08-01 14:30:58 +02:00
if ( aStatic )
aa = bb ;
if ( bStatic )
bb = aa ;
2019-01-03 14:26:51 +01:00
int gridIndex = aa + bb * 8 ;
2017-08-01 14:30:58 +02:00
int newIndex = gridTable8x8 [ gridIndex ] ;
2019-01-03 14:26:51 +01:00
# endif //USE_4x4_GRID
# endif //USE_SPATIAL_BATCHING
2017-08-01 14:30:58 +02:00
gSortDataOut [ gIdx ] . x = newIndex ;
gSortDataOut [ gIdx ] . y = gIdx ;
}
else
{
gSortDataOut [ gIdx ] . x = 0xffffffff ;
}
}
}
void b3GpuPgsContactSolver : : solveContacts ( int numBodies , cl_mem bodyBuf , cl_mem inertiaBuf , int numContacts , cl_mem contactBuf , const b3Config & config , int static0Index )
{
B3_PROFILE ( " solveContacts " ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_bodyBufferGPU - > setFromOpenCLBuffer ( bodyBuf , numBodies ) ;
m_data - > m_inertiaBufferGPU - > setFromOpenCLBuffer ( inertiaBuf , numBodies ) ;
m_data - > m_pBufContactOutGPU - > setFromOpenCLBuffer ( contactBuf , numContacts ) ;
2017-08-01 14:30:58 +02:00
if ( optionalSortContactsDeterminism )
{
if ( ! gCpuSortContactsDeterminism )
{
B3_PROFILE ( " GPU Sort contact constraints (determinism) " ) ;
m_data - > m_pBufContactOutGPUCopy - > resize ( numContacts ) ;
m_data - > m_contactKeyValues - > resize ( numContacts ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_pBufContactOutGPU - > copyToCL ( m_data - > m_pBufContactOutGPUCopy - > getBufferCL ( ) , numContacts , 0 , 0 ) ;
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_setDeterminismSortDataChildShapeBKernel , " m_setDeterminismSortDataChildShapeBKernel " ) ;
2017-08-01 14:30:58 +02:00
launcher . setBuffer ( m_data - > m_pBufContactOutGPUCopy - > getBufferCL ( ) ) ;
launcher . setBuffer ( m_data - > m_contactKeyValues - > getBufferCL ( ) ) ;
launcher . setConst ( numContacts ) ;
2019-01-03 14:26:51 +01:00
launcher . launch1D ( numContacts , 64 ) ;
2017-08-01 14:30:58 +02:00
}
m_data - > m_solverGPU - > m_sort32 - > execute ( * m_data - > m_contactKeyValues ) ;
{
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_setDeterminismSortDataChildShapeAKernel , " m_setDeterminismSortDataChildShapeAKernel " ) ;
2017-08-01 14:30:58 +02:00
launcher . setBuffer ( m_data - > m_pBufContactOutGPUCopy - > getBufferCL ( ) ) ;
launcher . setBuffer ( m_data - > m_contactKeyValues - > getBufferCL ( ) ) ;
launcher . setConst ( numContacts ) ;
2019-01-03 14:26:51 +01:00
launcher . launch1D ( numContacts , 64 ) ;
2017-08-01 14:30:58 +02:00
}
m_data - > m_solverGPU - > m_sort32 - > execute ( * m_data - > m_contactKeyValues ) ;
{
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_setDeterminismSortDataBodyBKernel , " m_setDeterminismSortDataBodyBKernel " ) ;
2017-08-01 14:30:58 +02:00
launcher . setBuffer ( m_data - > m_pBufContactOutGPUCopy - > getBufferCL ( ) ) ;
launcher . setBuffer ( m_data - > m_contactKeyValues - > getBufferCL ( ) ) ;
launcher . setConst ( numContacts ) ;
2019-01-03 14:26:51 +01:00
launcher . launch1D ( numContacts , 64 ) ;
2017-08-01 14:30:58 +02:00
}
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
m_data - > m_solverGPU - > m_sort32 - > execute ( * m_data - > m_contactKeyValues ) ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_setDeterminismSortDataBodyAKernel , " m_setDeterminismSortDataBodyAKernel " ) ;
2017-08-01 14:30:58 +02:00
launcher . setBuffer ( m_data - > m_pBufContactOutGPUCopy - > getBufferCL ( ) ) ;
launcher . setBuffer ( m_data - > m_contactKeyValues - > getBufferCL ( ) ) ;
launcher . setConst ( numContacts ) ;
2019-01-03 14:26:51 +01:00
launcher . launch1D ( numContacts , 64 ) ;
2017-08-01 14:30:58 +02:00
}
m_data - > m_solverGPU - > m_sort32 - > execute ( * m_data - > m_contactKeyValues ) ;
{
B3_PROFILE ( " gpu reorderContactKernel (determinism) " ) ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
b3Int4 cdata ;
cdata . x = numContacts ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
// , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solverGPU - > m_reorderContactKernel , " m_reorderContactKernel " ) ;
2017-08-01 14:30:58 +02:00
launcher . setBuffer ( m_data - > m_pBufContactOutGPUCopy - > getBufferCL ( ) ) ;
launcher . setBuffer ( m_data - > m_pBufContactOutGPU - > getBufferCL ( ) ) ;
launcher . setBuffer ( m_data - > m_contactKeyValues - > getBufferCL ( ) ) ;
2019-01-03 14:26:51 +01:00
launcher . setConst ( cdata ) ;
launcher . launch1D ( numContacts , 64 ) ;
}
}
else
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " CPU Sort contact constraints (determinism) " ) ;
b3AlignedObjectArray < b3Contact4 > cpuConstraints ;
m_data - > m_pBufContactOutGPU - > copyToHost ( cpuConstraints ) ;
bool sort = true ;
if ( sort )
{
cpuConstraints . quickSort ( b3ContactCmp ) ;
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < cpuConstraints . size ( ) ; i + + )
2017-08-01 14:30:58 +02:00
{
cpuConstraints [ i ] . m_batchIdx = i ;
}
}
m_data - > m_pBufContactOutGPU - > copyFromHost ( cpuConstraints ) ;
2019-01-03 14:26:51 +01:00
if ( m_debugOutput = = 100 )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < cpuConstraints . size ( ) ; i + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
printf ( " c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d \n " , i , cpuConstraints [ i ] . m_bodyAPtrAndSignBit , cpuConstraints [ i ] . m_bodyBPtrAndSignBit , cpuConstraints [ i ] . m_batchIdx ) ;
2017-08-01 14:30:58 +02:00
}
}
m_debugOutput + + ;
}
}
int nContactOut = m_data - > m_pBufContactOutGPU - > size ( ) ;
bool useSolver = true ;
2019-01-03 14:26:51 +01:00
if ( useSolver )
{
float dt = 1. / 60. ;
b3ConstraintCfg csCfg ( dt ) ;
csCfg . m_enableParallelSolve = true ;
csCfg . m_batchCellSize = 6 ;
csCfg . m_staticIdx = static0Index ;
b3OpenCLArray < b3RigidBodyData > * bodyBuf = m_data - > m_bodyBufferGPU ;
void * additionalData = 0 ; //m_data->m_frictionCGPU;
const b3OpenCLArray < b3InertiaData > * shapeBuf = m_data - > m_inertiaBufferGPU ;
b3OpenCLArray < b3GpuConstraint4 > * contactConstraintOut = m_data - > m_contactCGPU ;
int nContacts = nContactOut ;
2017-08-01 14:30:58 +02:00
int maxNumBatches = 0 ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
if ( ! gUseLargeBatches )
2019-01-03 14:26:51 +01:00
{
if ( m_data - > m_solverGPU - > m_contactBuffer2 )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU - > m_contactBuffer2 - > resize ( nContacts ) ;
}
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
if ( m_data - > m_solverGPU - > m_contactBuffer2 = = 0 )
{
m_data - > m_solverGPU - > m_contactBuffer2 = new b3OpenCLArray < b3Contact4 > ( m_data - > m_context , m_data - > m_queue , nContacts ) ;
m_data - > m_solverGPU - > m_contactBuffer2 - > resize ( nContacts ) ;
}
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
//clFinish(m_data->m_queue);
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
{
B3_PROFILE ( " batching " ) ;
//@todo: just reserve it, without copy of original contact (unless we use warmstarting)
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
//const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf;
2017-08-01 14:30:58 +02:00
{
//b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
//b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
2019-01-03 14:26:51 +01:00
const int sortAlignment = 512 ; // todo. get this out of sort
if ( csCfg . m_enableParallelSolve )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
int sortSize = B3NEXTMULTIPLEOF ( nContacts , sortAlignment ) ;
2017-08-01 14:30:58 +02:00
b3OpenCLArray < unsigned int > * countsNative = m_data - > m_solverGPU - > m_numConstraints ;
b3OpenCLArray < unsigned int > * offsetsNative = m_data - > m_solverGPU - > m_offsets ;
if ( ! gCpuSetSortData )
2019-01-03 14:26:51 +01:00
{ // 2. set cell idx
2017-08-01 14:30:58 +02:00
B3_PROFILE ( " GPU set cell idx " ) ;
struct CB
{
int m_nContacts ;
int m_staticIdx ;
float m_scale ;
b3Int4 m_nSplit ;
} ;
2019-01-03 14:26:51 +01:00
b3Assert ( sortSize % 64 = = 0 ) ;
2017-08-01 14:30:58 +02:00
CB cdata ;
cdata . m_nContacts = nContacts ;
cdata . m_staticIdx = csCfg . m_staticIdx ;
2019-01-03 14:26:51 +01:00
cdata . m_scale = 1.f / csCfg . m_batchCellSize ;
2017-08-01 14:30:58 +02:00
cdata . m_nSplit . x = B3_SOLVER_N_SPLIT_X ;
cdata . m_nSplit . y = B3_SOLVER_N_SPLIT_Y ;
cdata . m_nSplit . z = B3_SOLVER_N_SPLIT_Z ;
m_data - > m_solverGPU - > m_sortDataBuffer - > resize ( nContacts ) ;
2019-01-03 14:26:51 +01:00
b3BufferInfoCL bInfo [ ] = { b3BufferInfoCL ( m_data - > m_pBufContactOutGPU - > getBufferCL ( ) ) , b3BufferInfoCL ( bodyBuf - > getBufferCL ( ) ) , b3BufferInfoCL ( m_data - > m_solverGPU - > m_sortDataBuffer - > getBufferCL ( ) ) } ;
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solverGPU - > m_setSortDataKernel , " m_setSortDataKernel " ) ;
launcher . setBuffers ( bInfo , sizeof ( bInfo ) / sizeof ( b3BufferInfoCL ) ) ;
launcher . setConst ( cdata . m_nContacts ) ;
launcher . setConst ( cdata . m_scale ) ;
2017-08-01 14:30:58 +02:00
launcher . setConst ( cdata . m_nSplit ) ;
launcher . setConst ( cdata . m_staticIdx ) ;
2019-01-03 14:26:51 +01:00
launcher . launch1D ( sortSize , 64 ) ;
}
else
2017-08-01 14:30:58 +02:00
{
m_data - > m_solverGPU - > m_sortDataBuffer - > resize ( nContacts ) ;
b3AlignedObjectArray < b3SortData > sortDataCPU ;
m_data - > m_solverGPU - > m_sortDataBuffer - > copyToHost ( sortDataCPU ) ;
b3AlignedObjectArray < b3Contact4 > contactCPU ;
m_data - > m_pBufContactOutGPU - > copyToHost ( contactCPU ) ;
b3AlignedObjectArray < b3RigidBodyData > bodiesCPU ;
bodyBuf - > copyToHost ( bodiesCPU ) ;
2019-01-03 14:26:51 +01:00
float scale = 1.f / csCfg . m_batchCellSize ;
2017-08-01 14:30:58 +02:00
b3Int4 nSplit ;
nSplit . x = B3_SOLVER_N_SPLIT_X ;
nSplit . y = B3_SOLVER_N_SPLIT_Y ;
nSplit . z = B3_SOLVER_N_SPLIT_Z ;
2019-01-03 14:26:51 +01:00
SetSortDataCPU ( & contactCPU [ 0 ] , & bodiesCPU [ 0 ] , & sortDataCPU [ 0 ] , nContacts , scale , nSplit , csCfg . m_staticIdx ) ;
2017-08-01 14:30:58 +02:00
m_data - > m_solverGPU - > m_sortDataBuffer - > copyFromHost ( sortDataCPU ) ;
}
if ( ! gCpuRadixSort )
2019-01-03 14:26:51 +01:00
{ // 3. sort by cell idx
2017-08-01 14:30:58 +02:00
B3_PROFILE ( " gpuRadixSort " ) ;
//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
//int sortBit = 32;
//if( n <= 0xffff ) sortBit = 16;
//if( n <= 0xff ) sortBit = 8;
//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
//adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
b3OpenCLArray < b3SortData > & keyValuesInOut = * ( m_data - > m_solverGPU - > m_sortDataBuffer ) ;
this - > m_data - > m_solverGPU - > m_sort32 - > execute ( keyValuesInOut ) ;
2019-01-03 14:26:51 +01:00
}
else
2017-08-01 14:30:58 +02:00
{
b3OpenCLArray < b3SortData > & keyValuesInOut = * ( m_data - > m_solverGPU - > m_sortDataBuffer ) ;
b3AlignedObjectArray < b3SortData > hostValues ;
keyValuesInOut . copyToHost ( hostValues ) ;
hostValues . quickSort ( sortfnc ) ;
keyValuesInOut . copyFromHost ( hostValues ) ;
}
if ( gUseScanHost )
{
// 4. find entries
B3_PROFILE ( " cpuBoundSearch " ) ;
b3AlignedObjectArray < unsigned int > countsHost ;
countsNative - > copyToHost ( countsHost ) ;
b3AlignedObjectArray < b3SortData > sortDataHost ;
m_data - > m_solverGPU - > m_sortDataBuffer - > copyToHost ( sortDataHost ) ;
//m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU - > m_search - > executeHost ( sortDataHost , nContacts , countsHost , B3_SOLVER_N_CELLS , b3BoundSearchCL : : COUNT ) ;
2017-08-01 14:30:58 +02:00
countsNative - > copyFromHost ( countsHost ) ;
//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
// B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );
//unsigned int sum;
//m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
b3AlignedObjectArray < unsigned int > offsetsHost ;
offsetsHost . resize ( offsetsNative - > size ( ) ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU - > m_scan - > executeHost ( countsHost , offsetsHost , B3_SOLVER_N_CELLS ) ; //,&sum );
2017-08-01 14:30:58 +02:00
offsetsNative - > copyFromHost ( offsetsHost ) ;
//printf("sum = %d\n",sum);
2019-01-03 14:26:51 +01:00
}
else
2017-08-01 14:30:58 +02:00
{
// 4. find entries
B3_PROFILE ( " gpuBoundSearch " ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU - > m_search - > execute ( * m_data - > m_solverGPU - > m_sortDataBuffer , nContacts , * countsNative , B3_SOLVER_N_CELLS , b3BoundSearchCL : : COUNT ) ;
m_data - > m_solverGPU - > m_scan - > execute ( * countsNative , * offsetsNative , B3_SOLVER_N_CELLS ) ; //,&sum );
}
2017-08-01 14:30:58 +02:00
if ( nContacts )
2019-01-03 14:26:51 +01:00
{ // 5. sort constraints by cellIdx
2017-08-01 14:30:58 +02:00
if ( gReorderContactsOnCpu )
{
B3_PROFILE ( " cpu m_reorderContactKernel " ) ;
b3AlignedObjectArray < b3SortData > sortDataHost ;
m_data - > m_solverGPU - > m_sortDataBuffer - > copyToHost ( sortDataHost ) ;
b3AlignedObjectArray < b3Contact4 > inContacts ;
b3AlignedObjectArray < b3Contact4 > outContacts ;
m_data - > m_pBufContactOutGPU - > copyToHost ( inContacts ) ;
outContacts . resize ( inContacts . size ( ) ) ;
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < nContacts ; i + + )
2017-08-01 14:30:58 +02:00
{
int srcIdx = sortDataHost [ i ] . y ;
outContacts [ i ] = inContacts [ srcIdx ] ;
}
m_data - > m_solverGPU - > m_contactBuffer2 - > copyFromHost ( outContacts ) ;
/* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
" { \n "
" int nContacts = cb.x; \n "
" int gIdx = GET_GLOBAL_IDX; \n "
" if( gIdx < nContacts ) \n "
" { \n "
" int srcIdx = sortData[gIdx].y; \n "
" out[gIdx] = in[srcIdx]; \n "
" } \n "
" } \n "
*/
2019-01-03 14:26:51 +01:00
}
else
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " gpu m_reorderContactKernel " ) ;
b3Int4 cdata ;
cdata . x = nContacts ;
2019-01-03 14:26:51 +01:00
b3BufferInfoCL bInfo [ ] = {
b3BufferInfoCL ( m_data - > m_pBufContactOutGPU - > getBufferCL ( ) ) ,
b3BufferInfoCL ( m_data - > m_solverGPU - > m_contactBuffer2 - > getBufferCL ( ) ) , b3BufferInfoCL ( m_data - > m_solverGPU - > m_sortDataBuffer - > getBufferCL ( ) ) } ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solverGPU - > m_reorderContactKernel , " m_reorderContactKernel " ) ;
launcher . setBuffers ( bInfo , sizeof ( bInfo ) / sizeof ( b3BufferInfoCL ) ) ;
launcher . setConst ( cdata ) ;
launcher . launch1D ( nContacts , 64 ) ;
2017-08-01 14:30:58 +02:00
}
}
}
}
//clFinish(m_data->m_queue);
// {
// b3AlignedObjectArray<unsigned int> histogram;
// m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
// printf(",,,\n");
// }
if ( nContacts )
{
if ( gUseCpuCopyConstraints )
{
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < nContacts ; i + + )
2017-08-01 14:30:58 +02:00
{
m_data - > m_pBufContactOutGPU - > copyFromOpenCLArray ( * m_data - > m_solverGPU - > m_contactBuffer2 ) ;
2019-01-03 14:26:51 +01:00
// m_data->m_solverGPU->m_contactBuffer2->getBufferCL();
// m_data->m_pBufContactOutGPU->getBufferCL()
2017-08-01 14:30:58 +02:00
}
2019-01-03 14:26:51 +01:00
}
else
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " gpu m_copyConstraintKernel " ) ;
2019-01-03 14:26:51 +01:00
b3Int4 cdata ;
cdata . x = nContacts ;
b3BufferInfoCL bInfo [ ] = {
b3BufferInfoCL ( m_data - > m_solverGPU - > m_contactBuffer2 - > getBufferCL ( ) ) ,
b3BufferInfoCL ( m_data - > m_pBufContactOutGPU - > getBufferCL ( ) ) } ;
b3LauncherCL launcher ( m_data - > m_queue , m_data - > m_solverGPU - > m_copyConstraintKernel , " m_copyConstraintKernel " ) ;
launcher . setBuffers ( bInfo , sizeof ( bInfo ) / sizeof ( b3BufferInfoCL ) ) ;
launcher . setConst ( cdata ) ;
launcher . launch1D ( nContacts , 64 ) ;
2017-08-01 14:30:58 +02:00
//we use the clFinish for proper benchmark/profile
clFinish ( m_data - > m_queue ) ;
}
}
2019-01-03 14:26:51 +01:00
// bool compareGPU = false;
2017-08-01 14:30:58 +02:00
if ( nContacts )
{
if ( ! gCpuBatchContacts )
{
B3_PROFILE ( " gpu batchContacts " ) ;
2019-01-03 14:26:51 +01:00
maxNumBatches = 250 ; //250;
m_data - > m_solverGPU - > batchContacts ( m_data - > m_pBufContactOutGPU , nContacts , m_data - > m_solverGPU - > m_numConstraints , m_data - > m_solverGPU - > m_offsets , csCfg . m_staticIdx ) ;
2017-08-01 14:30:58 +02:00
clFinish ( m_data - > m_queue ) ;
2019-01-03 14:26:51 +01:00
}
else
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " cpu batchContacts " ) ;
static b3AlignedObjectArray < b3Contact4 > cpuContacts ;
b3OpenCLArray < b3Contact4 > * contactsIn = m_data - > m_solverGPU - > m_contactBuffer2 ;
{
B3_PROFILE ( " copyToHost " ) ;
contactsIn - > copyToHost ( cpuContacts ) ;
}
b3OpenCLArray < unsigned int > * countsNative = m_data - > m_solverGPU - > m_numConstraints ;
b3OpenCLArray < unsigned int > * offsetsNative = m_data - > m_solverGPU - > m_offsets ;
b3AlignedObjectArray < unsigned int > nNativeHost ;
b3AlignedObjectArray < unsigned int > offsetsNativeHost ;
{
B3_PROFILE ( " countsNative/offsetsNative copyToHost " ) ;
countsNative - > copyToHost ( nNativeHost ) ;
offsetsNative - > copyToHost ( offsetsNativeHost ) ;
}
2019-01-03 14:26:51 +01:00
int numNonzeroGrid = 0 ;
2017-08-01 14:30:58 +02:00
if ( gUseLargeBatches )
{
m_data - > m_batchSizes . resize ( B3_MAX_NUM_BATCHES ) ;
int totalNumConstraints = cpuContacts . size ( ) ;
//int simdWidth =numBodies+1;//-1;//64;//-1;//32;
2019-01-03 14:26:51 +01:00
int numBatches = sortConstraintByBatch3 ( & cpuContacts [ 0 ] , totalNumConstraints , totalNumConstraints + 1 , csCfg . m_staticIdx , numBodies , & m_data - > m_batchSizes [ 0 ] ) ; // on GPU
maxNumBatches = b3Max ( numBatches , maxNumBatches ) ;
2017-08-01 14:30:58 +02:00
static int globalMaxBatch = 0 ;
2019-01-03 14:26:51 +01:00
if ( maxNumBatches > globalMaxBatch )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
globalMaxBatch = maxNumBatches ;
b3Printf ( " maxNumBatches = %d \n " , maxNumBatches ) ;
2017-08-01 14:30:58 +02:00
}
2019-01-03 14:26:51 +01:00
}
else
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
m_data - > m_batchSizes . resize ( B3_SOLVER_N_CELLS * B3_MAX_NUM_BATCHES ) ;
2017-08-01 14:30:58 +02:00
B3_PROFILE ( " cpu batch grid " ) ;
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < B3_SOLVER_N_CELLS ; i + + )
2017-08-01 14:30:58 +02:00
{
int n = ( nNativeHost ) [ i ] ;
int offset = ( offsetsNativeHost ) [ i ] ;
2019-01-03 14:26:51 +01:00
if ( n )
2017-08-01 14:30:58 +02:00
{
numNonzeroGrid + + ;
2019-01-03 14:26:51 +01:00
int simdWidth = numBodies + 1 ; //-1;//64;//-1;//32;
int numBatches = sortConstraintByBatch3 ( & cpuContacts [ 0 ] + offset , n , simdWidth , csCfg . m_staticIdx , numBodies , & m_data - > m_batchSizes [ i * B3_MAX_NUM_BATCHES ] ) ; // on GPU
maxNumBatches = b3Max ( numBatches , maxNumBatches ) ;
2017-08-01 14:30:58 +02:00
static int globalMaxBatch = 0 ;
2019-01-03 14:26:51 +01:00
if ( maxNumBatches > globalMaxBatch )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
globalMaxBatch = maxNumBatches ;
b3Printf ( " maxNumBatches = %d \n " , maxNumBatches ) ;
2017-08-01 14:30:58 +02:00
}
//we use the clFinish for proper benchmark/profile
}
}
//clFinish(m_data->m_queue);
}
{
B3_PROFILE ( " m_contactBuffer->copyFromHost " ) ;
m_data - > m_solverGPU - > m_contactBuffer2 - > copyFromHost ( ( b3AlignedObjectArray < b3Contact4 > & ) cpuContacts ) ;
}
2019-01-03 14:26:51 +01:00
}
2017-08-01 14:30:58 +02:00
}
2019-01-03 14:26:51 +01:00
}
}
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
//printf("maxNumBatches = %d\n", maxNumBatches);
2017-08-01 14:30:58 +02:00
if ( gUseLargeBatches )
{
if ( nContacts )
{
B3_PROFILE ( " cpu batchContacts " ) ;
static b3AlignedObjectArray < b3Contact4 > cpuContacts ;
2019-01-03 14:26:51 +01:00
// b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " copyToHost " ) ;
m_data - > m_pBufContactOutGPU - > copyToHost ( cpuContacts ) ;
}
2019-01-03 14:26:51 +01:00
// b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
// b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
// int numNonzeroGrid=0;
2017-08-01 14:30:58 +02:00
{
m_data - > m_batchSizes . resize ( B3_MAX_NUM_BATCHES ) ;
int totalNumConstraints = cpuContacts . size ( ) ;
2019-01-03 14:26:51 +01:00
// int simdWidth =numBodies+1;//-1;//64;//-1;//32;
int numBatches = sortConstraintByBatch3 ( & cpuContacts [ 0 ] , totalNumConstraints , totalNumConstraints + 1 , csCfg . m_staticIdx , numBodies , & m_data - > m_batchSizes [ 0 ] ) ; // on GPU
maxNumBatches = b3Max ( numBatches , maxNumBatches ) ;
2017-08-01 14:30:58 +02:00
static int globalMaxBatch = 0 ;
2019-01-03 14:26:51 +01:00
if ( maxNumBatches > globalMaxBatch )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
globalMaxBatch = maxNumBatches ;
b3Printf ( " maxNumBatches = %d \n " , maxNumBatches ) ;
2017-08-01 14:30:58 +02:00
}
}
{
B3_PROFILE ( " m_contactBuffer->copyFromHost " ) ;
m_data - > m_solverGPU - > m_contactBuffer2 - > copyFromHost ( ( b3AlignedObjectArray < b3Contact4 > & ) cpuContacts ) ;
}
2019-01-03 14:26:51 +01:00
}
2017-08-01 14:30:58 +02:00
}
if ( nContacts )
{
B3_PROFILE ( " gpu convertToConstraints " ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU - > convertToConstraints ( bodyBuf ,
shapeBuf , m_data - > m_solverGPU - > m_contactBuffer2 ,
contactConstraintOut ,
additionalData , nContacts ,
( b3SolverBase : : ConstraintCfg & ) csCfg ) ;
2017-08-01 14:30:58 +02:00
clFinish ( m_data - > m_queue ) ;
}
if ( 1 )
{
int numIter = 4 ;
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU - > m_nIterations = numIter ; //10
2017-08-01 14:30:58 +02:00
if ( ! gCpuSolveConstraint )
{
B3_PROFILE ( " GPU solveContactConstraint " ) ;
/*m_data->m_solverGPU->solveContactConstraint(
m_data - > m_bodyBufferGPU ,
m_data - > m_inertiaBufferGPU ,
m_data - > m_contactCGPU , 0 ,
nContactOut ,
maxNumBatches ) ;
*/
//m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes);
if ( gUseLargeBatches )
{
2019-01-03 14:26:51 +01:00
solveContactConstraintBatchSizes ( m_data - > m_bodyBufferGPU ,
m_data - > m_inertiaBufferGPU ,
m_data - > m_contactCGPU , 0 ,
nContactOut ,
maxNumBatches , numIter , & m_data - > m_batchSizes ) ;
}
else
2017-08-01 14:30:58 +02:00
{
solveContactConstraint (
2019-01-03 14:26:51 +01:00
m_data - > m_bodyBufferGPU ,
2017-08-01 14:30:58 +02:00
m_data - > m_inertiaBufferGPU ,
2019-01-03 14:26:51 +01:00
m_data - > m_contactCGPU , 0 ,
nContactOut ,
maxNumBatches , numIter , & m_data - > m_batchSizes ) ; //m_data->m_batchSizesGpu);
2017-08-01 14:30:58 +02:00
}
}
else
{
B3_PROFILE ( " Host solveContactConstraint " ) ;
2019-01-03 14:26:51 +01:00
m_data - > m_solverGPU - > solveContactConstraintHost ( m_data - > m_bodyBufferGPU , m_data - > m_inertiaBufferGPU , m_data - > m_contactCGPU , 0 , nContactOut , maxNumBatches , & m_data - > m_batchSizes ) ;
2017-08-01 14:30:58 +02:00
}
2019-01-03 14:26:51 +01:00
}
2017-08-01 14:30:58 +02:00
#if 0
if ( 0 )
{
B3_PROFILE ( " read body velocities back to CPU " ) ;
//read body updated linear/angular velocities back to CPU
m_data - > m_bodyBufferGPU - > read (
m_data - > m_bodyBufferCPU - > m_ptr , numOfConvexRBodies ) ;
adl : : DeviceUtils : : waitForCompletion ( m_data - > m_deviceCL ) ;
}
# endif
2019-01-03 14:26:51 +01:00
}
2017-08-01 14:30:58 +02:00
}
2019-01-03 14:26:51 +01:00
void b3GpuPgsContactSolver : : batchContacts ( b3OpenCLArray < b3Contact4 > * contacts , int nContacts , b3OpenCLArray < unsigned int > * n , b3OpenCLArray < unsigned int > * offsets , int staticIdx )
2017-08-01 14:30:58 +02:00
{
}
b3AlignedObjectArray < unsigned int > idxBuffer ;
b3AlignedObjectArray < b3SortData > sortData ;
b3AlignedObjectArray < b3Contact4 > old ;
2019-01-03 14:26:51 +01:00
inline int b3GpuPgsContactSolver : : sortConstraintByBatch ( b3Contact4 * cs , int n , int simdWidth , int staticIdx , int numBodies )
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " sortConstraintByBatch " ) ;
int numIter = 0 ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
sortData . resize ( n ) ;
idxBuffer . resize ( n ) ;
old . resize ( n ) ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
unsigned int * idxSrc = & idxBuffer [ 0 ] ;
unsigned int * idxDst = & idxBuffer [ 0 ] ;
int nIdxSrc , nIdxDst ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
const int N_FLG = 256 ;
2019-01-03 14:26:51 +01:00
const int FLG_MASK = N_FLG - 1 ;
unsigned int flg [ N_FLG / 32 ] ;
2017-08-01 14:30:58 +02:00
# if defined(_DEBUG)
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < n ; i + + )
2017-08-01 14:30:58 +02:00
cs [ i ] . getBatchIdx ( ) = - 1 ;
# endif
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < n ; i + + )
2017-08-01 14:30:58 +02:00
idxSrc [ i ] = i ;
nIdxSrc = n ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
int batchIdx = 0 ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " cpu batch innerloop " ) ;
2019-01-03 14:26:51 +01:00
while ( nIdxSrc )
2017-08-01 14:30:58 +02:00
{
numIter + + ;
nIdxDst = 0 ;
int nCurrentBatch = 0 ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
// clear flag
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < N_FLG / 32 ; i + + ) flg [ i ] = 0 ;
for ( int i = 0 ; i < nIdxSrc ; i + + )
2017-08-01 14:30:58 +02:00
{
int idx = idxSrc [ i ] ;
2019-01-03 14:26:51 +01:00
b3Assert ( idx < n ) ;
2017-08-01 14:30:58 +02:00
// check if it can go
int bodyAS = cs [ idx ] . m_bodyAPtrAndSignBit ;
int bodyBS = cs [ idx ] . m_bodyBPtrAndSignBit ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
int bodyA = abs ( bodyAS ) ;
int bodyB = abs ( bodyBS ) ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
int aIdx = bodyA & FLG_MASK ;
int bIdx = bodyB & FLG_MASK ;
2019-01-03 14:26:51 +01:00
unsigned int aUnavailable = flg [ aIdx / 32 ] & ( 1 < < ( aIdx & 31 ) ) ;
unsigned int bUnavailable = flg [ bIdx / 32 ] & ( 1 < < ( bIdx & 31 ) ) ;
bool aIsStatic = ( bodyAS < 0 ) | | bodyAS = = staticIdx ;
bool bIsStatic = ( bodyBS < 0 ) | | bodyBS = = staticIdx ;
//use inv_mass!
aUnavailable = ! aIsStatic ? aUnavailable : 0 ; //
bUnavailable = ! bIsStatic ? bUnavailable : 0 ;
if ( aUnavailable = = 0 & & bUnavailable = = 0 ) // ok
2017-08-01 14:30:58 +02:00
{
if ( ! aIsStatic )
2019-01-03 14:26:51 +01:00
flg [ aIdx / 32 ] | = ( 1 < < ( aIdx & 31 ) ) ;
2017-08-01 14:30:58 +02:00
if ( ! bIsStatic )
2019-01-03 14:26:51 +01:00
flg [ bIdx / 32 ] | = ( 1 < < ( bIdx & 31 ) ) ;
2017-08-01 14:30:58 +02:00
cs [ idx ] . getBatchIdx ( ) = batchIdx ;
sortData [ idx ] . m_key = batchIdx ;
sortData [ idx ] . m_value = idx ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
{
nCurrentBatch + + ;
2019-01-03 14:26:51 +01:00
if ( nCurrentBatch = = simdWidth )
2017-08-01 14:30:58 +02:00
{
nCurrentBatch = 0 ;
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < N_FLG / 32 ; i + + ) flg [ i ] = 0 ;
2017-08-01 14:30:58 +02:00
}
}
}
else
{
idxDst [ nIdxDst + + ] = idx ;
}
}
2019-01-03 14:26:51 +01:00
b3Swap ( idxSrc , idxDst ) ;
b3Swap ( nIdxSrc , nIdxDst ) ;
batchIdx + + ;
2017-08-01 14:30:58 +02:00
}
}
{
B3_PROFILE ( " quickSort " ) ;
sortData . quickSort ( sortfnc ) ;
}
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
B3_PROFILE ( " reorder " ) ;
2017-08-01 14:30:58 +02:00
// reorder
2019-01-03 14:26:51 +01:00
memcpy ( & old [ 0 ] , cs , sizeof ( b3Contact4 ) * n ) ;
for ( int i = 0 ; i < n ; i + + )
2017-08-01 14:30:58 +02:00
{
int idx = sortData [ i ] . m_value ;
cs [ i ] = old [ idx ] ;
}
}
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
# if defined(_DEBUG)
2019-01-03 14:26:51 +01:00
// debugPrintf( "nBatches: %d\n", batchIdx );
for ( int i = 0 ; i < n ; i + + )
{
b3Assert ( cs [ i ] . getBatchIdx ( ) ! = - 1 ) ;
}
2017-08-01 14:30:58 +02:00
# endif
return batchIdx ;
}
b3AlignedObjectArray < int > bodyUsed2 ;
2019-01-03 14:26:51 +01:00
inline int b3GpuPgsContactSolver : : sortConstraintByBatch2 ( b3Contact4 * cs , int numConstraints , int simdWidth , int staticIdx , int numBodies )
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " sortConstraintByBatch2 " ) ;
2019-01-03 14:26:51 +01:00
bodyUsed2 . resize ( 2 * simdWidth ) ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
for ( int q = 0 ; q < 2 * simdWidth ; q + + )
bodyUsed2 [ q ] = 0 ;
2017-08-01 14:30:58 +02:00
int curBodyUsed = 0 ;
int numIter = 0 ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
m_data - > m_sortData . resize ( numConstraints ) ;
m_data - > m_idxBuffer . resize ( numConstraints ) ;
m_data - > m_old . resize ( numConstraints ) ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
unsigned int * idxSrc = & m_data - > m_idxBuffer [ 0 ] ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
# if defined(_DEBUG)
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < numConstraints ; i + + )
2017-08-01 14:30:58 +02:00
cs [ i ] . getBatchIdx ( ) = - 1 ;
# endif
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < numConstraints ; i + + )
2017-08-01 14:30:58 +02:00
idxSrc [ i ] = i ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
int numValidConstraints = 0 ;
2019-01-03 14:26:51 +01:00
// int unprocessedConstraintIndex = 0;
2017-08-01 14:30:58 +02:00
int batchIdx = 0 ;
{
B3_PROFILE ( " cpu batch innerloop " ) ;
2019-01-03 14:26:51 +01:00
while ( numValidConstraints < numConstraints )
2017-08-01 14:30:58 +02:00
{
numIter + + ;
int nCurrentBatch = 0 ;
// clear flag
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < curBodyUsed ; i + + )
2017-08-01 14:30:58 +02:00
bodyUsed2 [ i ] = 0 ;
2019-01-03 14:26:51 +01:00
curBodyUsed = 0 ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
for ( int i = numValidConstraints ; i < numConstraints ; i + + )
2017-08-01 14:30:58 +02:00
{
int idx = idxSrc [ i ] ;
2019-01-03 14:26:51 +01:00
b3Assert ( idx < numConstraints ) ;
2017-08-01 14:30:58 +02:00
// check if it can go
int bodyAS = cs [ idx ] . m_bodyAPtrAndSignBit ;
int bodyBS = cs [ idx ] . m_bodyBPtrAndSignBit ;
int bodyA = abs ( bodyAS ) ;
int bodyB = abs ( bodyBS ) ;
2019-01-03 14:26:51 +01:00
bool aIsStatic = ( bodyAS < 0 ) | | bodyAS = = staticIdx ;
bool bIsStatic = ( bodyBS < 0 ) | | bodyBS = = staticIdx ;
2017-08-01 14:30:58 +02:00
int aUnavailable = 0 ;
int bUnavailable = 0 ;
if ( ! aIsStatic )
{
2019-01-03 14:26:51 +01:00
for ( int j = 0 ; j < curBodyUsed ; j + + )
2017-08-01 14:30:58 +02:00
{
if ( bodyA = = bodyUsed2 [ j ] )
{
2019-01-03 14:26:51 +01:00
aUnavailable = 1 ;
2017-08-01 14:30:58 +02:00
break ;
}
}
}
if ( ! aUnavailable )
2019-01-03 14:26:51 +01:00
if ( ! bIsStatic )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
for ( int j = 0 ; j < curBodyUsed ; j + + )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
if ( bodyB = = bodyUsed2 [ j ] )
{
bUnavailable = 1 ;
break ;
}
2017-08-01 14:30:58 +02:00
}
}
2019-01-03 14:26:51 +01:00
if ( aUnavailable = = 0 & & bUnavailable = = 0 ) // ok
2017-08-01 14:30:58 +02:00
{
if ( ! aIsStatic )
{
bodyUsed2 [ curBodyUsed + + ] = bodyA ;
}
if ( ! bIsStatic )
{
bodyUsed2 [ curBodyUsed + + ] = bodyB ;
}
cs [ idx ] . getBatchIdx ( ) = batchIdx ;
m_data - > m_sortData [ idx ] . m_key = batchIdx ;
m_data - > m_sortData [ idx ] . m_value = idx ;
2019-01-03 14:26:51 +01:00
if ( i ! = numValidConstraints )
2017-08-01 14:30:58 +02:00
{
b3Swap ( idxSrc [ i ] , idxSrc [ numValidConstraints ] ) ;
}
numValidConstraints + + ;
{
nCurrentBatch + + ;
2019-01-03 14:26:51 +01:00
if ( nCurrentBatch = = simdWidth )
2017-08-01 14:30:58 +02:00
{
nCurrentBatch = 0 ;
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < curBodyUsed ; i + + )
2017-08-01 14:30:58 +02:00
bodyUsed2 [ i ] = 0 ;
curBodyUsed = 0 ;
}
}
}
}
2019-01-03 14:26:51 +01:00
batchIdx + + ;
2017-08-01 14:30:58 +02:00
}
}
{
B3_PROFILE ( " quickSort " ) ;
//m_data->m_sortData.quickSort(sortfnc);
}
{
2019-01-03 14:26:51 +01:00
B3_PROFILE ( " reorder " ) ;
2017-08-01 14:30:58 +02:00
// reorder
2019-01-03 14:26:51 +01:00
memcpy ( & m_data - > m_old [ 0 ] , cs , sizeof ( b3Contact4 ) * numConstraints ) ;
for ( int i = 0 ; i < numConstraints ; i + + )
2017-08-01 14:30:58 +02:00
{
b3Assert ( m_data - > m_sortData [ idxSrc [ i ] ] . m_value = = idxSrc [ i ] ) ;
int idx = m_data - > m_sortData [ idxSrc [ i ] ] . m_value ;
cs [ i ] = m_data - > m_old [ idx ] ;
}
}
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
# if defined(_DEBUG)
2019-01-03 14:26:51 +01:00
// debugPrintf( "nBatches: %d\n", batchIdx );
for ( int i = 0 ; i < numConstraints ; i + + )
{
b3Assert ( cs [ i ] . getBatchIdx ( ) ! = - 1 ) ;
}
2017-08-01 14:30:58 +02:00
# endif
return batchIdx ;
}
b3AlignedObjectArray < int > bodyUsed ;
b3AlignedObjectArray < int > curUsed ;
2019-01-03 14:26:51 +01:00
inline int b3GpuPgsContactSolver : : sortConstraintByBatch3 ( b3Contact4 * cs , int numConstraints , int simdWidth , int staticIdx , int numBodies , int * batchSizes )
2017-08-01 14:30:58 +02:00
{
B3_PROFILE ( " sortConstraintByBatch3 " ) ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
static int maxSwaps = 0 ;
int numSwaps = 0 ;
2019-01-03 14:26:51 +01:00
curUsed . resize ( 2 * simdWidth ) ;
2017-08-01 14:30:58 +02:00
static int maxNumConstraints = 0 ;
2019-01-03 14:26:51 +01:00
if ( maxNumConstraints < numConstraints )
2017-08-01 14:30:58 +02:00
{
maxNumConstraints = numConstraints ;
//printf("maxNumConstraints = %d\n",maxNumConstraints );
}
2019-01-03 14:26:51 +01:00
int numUsedArray = numBodies / 32 + 1 ;
2017-08-01 14:30:58 +02:00
bodyUsed . resize ( numUsedArray ) ;
2019-01-03 14:26:51 +01:00
for ( int q = 0 ; q < numUsedArray ; q + + )
bodyUsed [ q ] = 0 ;
2017-08-01 14:30:58 +02:00
int curBodyUsed = 0 ;
int numIter = 0 ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
m_data - > m_sortData . resize ( 0 ) ;
m_data - > m_idxBuffer . resize ( 0 ) ;
m_data - > m_old . resize ( 0 ) ;
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
# if defined(_DEBUG)
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < numConstraints ; i + + )
2017-08-01 14:30:58 +02:00
cs [ i ] . getBatchIdx ( ) = - 1 ;
# endif
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
int numValidConstraints = 0 ;
2019-01-03 14:26:51 +01:00
// int unprocessedConstraintIndex = 0;
2017-08-01 14:30:58 +02:00
int batchIdx = 0 ;
{
B3_PROFILE ( " cpu batch innerloop " ) ;
2019-01-03 14:26:51 +01:00
while ( numValidConstraints < numConstraints )
2017-08-01 14:30:58 +02:00
{
numIter + + ;
int nCurrentBatch = 0 ;
batchSizes [ batchIdx ] = 0 ;
// clear flag
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < curBodyUsed ; i + + )
bodyUsed [ curUsed [ i ] / 32 ] = 0 ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
curBodyUsed = 0 ;
2017-08-01 14:30:58 +02:00
2019-01-03 14:26:51 +01:00
for ( int i = numValidConstraints ; i < numConstraints ; i + + )
2017-08-01 14:30:58 +02:00
{
int idx = i ;
2019-01-03 14:26:51 +01:00
b3Assert ( idx < numConstraints ) ;
2017-08-01 14:30:58 +02:00
// check if it can go
int bodyAS = cs [ idx ] . m_bodyAPtrAndSignBit ;
int bodyBS = cs [ idx ] . m_bodyBPtrAndSignBit ;
int bodyA = abs ( bodyAS ) ;
int bodyB = abs ( bodyBS ) ;
2019-01-03 14:26:51 +01:00
bool aIsStatic = ( bodyAS < 0 ) | | bodyAS = = staticIdx ;
bool bIsStatic = ( bodyBS < 0 ) | | bodyBS = = staticIdx ;
2017-08-01 14:30:58 +02:00
int aUnavailable = 0 ;
int bUnavailable = 0 ;
if ( ! aIsStatic )
{
2019-01-03 14:26:51 +01:00
aUnavailable = bodyUsed [ bodyA / 32 ] & ( 1 < < ( bodyA & 31 ) ) ;
2017-08-01 14:30:58 +02:00
}
if ( ! aUnavailable )
2019-01-03 14:26:51 +01:00
if ( ! bIsStatic )
{
bUnavailable = bodyUsed [ bodyB / 32 ] & ( 1 < < ( bodyB & 31 ) ) ;
}
if ( aUnavailable = = 0 & & bUnavailable = = 0 ) // ok
2017-08-01 14:30:58 +02:00
{
if ( ! aIsStatic )
{
2019-01-03 14:26:51 +01:00
bodyUsed [ bodyA / 32 ] | = ( 1 < < ( bodyA & 31 ) ) ;
curUsed [ curBodyUsed + + ] = bodyA ;
2017-08-01 14:30:58 +02:00
}
if ( ! bIsStatic )
{
2019-01-03 14:26:51 +01:00
bodyUsed [ bodyB / 32 ] | = ( 1 < < ( bodyB & 31 ) ) ;
curUsed [ curBodyUsed + + ] = bodyB ;
2017-08-01 14:30:58 +02:00
}
cs [ idx ] . getBatchIdx ( ) = batchIdx ;
2019-01-03 14:26:51 +01:00
if ( i ! = numValidConstraints )
2017-08-01 14:30:58 +02:00
{
2019-01-03 14:26:51 +01:00
b3Swap ( cs [ i ] , cs [ numValidConstraints ] ) ;
2017-08-01 14:30:58 +02:00
numSwaps + + ;
}
numValidConstraints + + ;
{
nCurrentBatch + + ;
2019-01-03 14:26:51 +01:00
if ( nCurrentBatch = = simdWidth )
2017-08-01 14:30:58 +02:00
{
batchSizes [ batchIdx ] + = simdWidth ;
nCurrentBatch = 0 ;
2019-01-03 14:26:51 +01:00
for ( int i = 0 ; i < curBodyUsed ; i + + )
bodyUsed [ curUsed [ i ] / 32 ] = 0 ;
2017-08-01 14:30:58 +02:00
curBodyUsed = 0 ;
}
}
}
}
2019-01-03 14:26:51 +01:00
if ( batchIdx > = B3_MAX_NUM_BATCHES )
2017-08-01 14:30:58 +02:00
{
b3Error ( " batchIdx>=B3_MAX_NUM_BATCHES " ) ;
b3Assert ( 0 ) ;
break ;
}
batchSizes [ batchIdx ] + = nCurrentBatch ;
2019-01-03 14:26:51 +01:00
batchIdx + + ;
2017-08-01 14:30:58 +02:00
}
}
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
# if defined(_DEBUG)
2019-01-03 14:26:51 +01:00
// debugPrintf( "nBatches: %d\n", batchIdx );
for ( int i = 0 ; i < numConstraints ; i + + )
{
b3Assert ( cs [ i ] . getBatchIdx ( ) ! = - 1 ) ;
}
2017-08-01 14:30:58 +02:00
# endif
2019-01-03 14:26:51 +01:00
batchSizes [ batchIdx ] = 0 ;
if ( maxSwaps < numSwaps )
2017-08-01 14:30:58 +02:00
{
maxSwaps = numSwaps ;
//printf("maxSwaps = %d\n", maxSwaps);
}
2019-01-03 14:26:51 +01:00
2017-08-01 14:30:58 +02:00
return batchIdx ;
}