remove m_localPosA for now (it breaks NVIDIA OpenCL, copy of structs > 128 bytes)

This commit is contained in:
erwincoumans 2013-08-08 13:28:23 -07:00
parent 3bf003ace1
commit d158507c03
12 changed files with 17 additions and 95 deletions

View File

@ -117,7 +117,7 @@ if not _OPTIONS["ios"] then
include "../test/OpenCL/BasicInitialize" include "../test/OpenCL/BasicInitialize"
include "../test/OpenCL/KernelLaunch"-- include "../test/OpenCL/KernelLaunch"--
include "../test/OpenCL/BroadphaseCollision" -- include "../test/OpenCL/BroadphaseCollision"
-- include "../test/OpenCL/NarrowphaseCollision" -- include "../test/OpenCL/NarrowphaseCollision"
include "../test/OpenCL/ParallelPrimitives" include "../test/OpenCL/ParallelPrimitives"
include "../test/OpenCL/RadixSortBenchmark" include "../test/OpenCL/RadixSortBenchmark"

View File

@ -21,7 +21,7 @@ struct b3Contact4Data
int m_unused1; int m_unused1;
int m_unused2; int m_unused2;
b3Float4 m_localPosA; // b3Float4 m_localPosA;
}; };
inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact) inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)

View File

@ -31,7 +31,7 @@ static const char* primitiveContactsKernelsCL= \
" int m_childIndexB;\n" " int m_childIndexB;\n"
" int m_unused1;\n" " int m_unused1;\n"
" int m_unused2;\n" " int m_unused2;\n"
" b3Float4 m_localPosA;\n" "// b3Float4 m_localPosA;\n"
"};\n" "};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n" "{\n"

View File

@ -63,7 +63,7 @@ static const char* satClipKernelsCL= \
" int m_childIndexB;\n" " int m_childIndexB;\n"
" int m_unused1;\n" " int m_unused1;\n"
" int m_unused2;\n" " int m_unused2;\n"
" b3Float4 m_localPosA;\n" "// b3Float4 m_localPosA;\n"
"};\n" "};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n" "{\n"

View File

@ -886,7 +886,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (b3GpuBatchContacts) if (b3GpuBatchContacts)
{ {
B3_PROFILE("gpu batchContacts"); B3_PROFILE("gpu batchContacts");
maxNumBatches = 250;//250; maxNumBatches = 150;//250;
m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
} else } else
{ {

View File

@ -17,7 +17,7 @@ subject to the following restrictions:
#include "b3Solver.h" #include "b3Solver.h"
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
bool useNewBatchingKernel = true; bool useNewBatchingKernel = false;
#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" #define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl"
#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" #define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl"

View File

@ -43,7 +43,7 @@ static const char* batchingKernelsCL= \
" int m_childIndexB;\n" " int m_childIndexB;\n"
" int m_unused1;\n" " int m_unused1;\n"
" int m_unused2;\n" " int m_unused2;\n"
" b3Float4 m_localPosA;\n" "// b3Float4 m_localPosA;\n"
"};\n" "};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n" "{\n"

View File

@ -201,50 +201,10 @@ __kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __
if (i!=numValidConstraints) if (i!=numValidConstraints)
{ {
// tmp = cs[i]; tmp = cs[i];
// cs[i] = cs[numValidConstraints]; cs[i] = cs[numValidConstraints];
// cs[numValidConstraints] = tmp; cs[numValidConstraints] = tmp;
#ifdef CHECK_SIZE
tmp.m_worldPos[0] = cs[i].m_worldPos[0];
tmp.m_worldPos[1] = cs[i].m_worldPos[1];
tmp.m_worldPos[2] = cs[i].m_worldPos[2];
tmp.m_worldPos[3] = cs[i].m_worldPos[3];
tmp.m_worldNormal = cs[i].m_worldNormal;
tmp.m_restituitionCoeffCmp = cs[i].m_restituitionCoeffCmp;
tmp.m_frictionCoeffCmp = cs[i].m_frictionCoeffCmp;
tmp.m_batchIdx = cs[i].m_batchIdx;
tmp.m_bodyAPtrAndSignBit = cs[i].m_bodyAPtrAndSignBit;
tmp.m_bodyBPtrAndSignBit = cs[i].m_bodyBPtrAndSignBit;
tmp.m_childIndexA = cs[i].m_childIndexA;
tmp.m_childIndexB = cs[i].m_childIndexB;
cs[i].m_worldPos[0] = cs[numValidConstraints].m_worldPos[0];
cs[i].m_worldPos[1] = cs[numValidConstraints].m_worldPos[1];
cs[i].m_worldPos[2] = cs[numValidConstraints].m_worldPos[2];
cs[i].m_worldPos[3] = cs[numValidConstraints].m_worldPos[3];
cs[i].m_worldNormal = cs[numValidConstraints].m_worldNormal;
cs[i].m_restituitionCoeffCmp = cs[numValidConstraints].m_restituitionCoeffCmp;
cs[i].m_frictionCoeffCmp = cs[numValidConstraints].m_frictionCoeffCmp;
cs[i].m_batchIdx = cs[numValidConstraints].m_batchIdx;
cs[i].m_bodyAPtrAndSignBit = cs[numValidConstraints].m_bodyAPtrAndSignBit;
cs[i].m_bodyBPtrAndSignBit = cs[numValidConstraints].m_bodyBPtrAndSignBit;
cs[i].m_childIndexA = cs[numValidConstraints].m_childIndexA;
cs[i].m_childIndexB = cs[numValidConstraints].m_childIndexB;
cs[numValidConstraints].m_worldPos[0] = tmp.m_worldPos[0];
cs[numValidConstraints].m_worldPos[1] = tmp.m_worldPos[1];
cs[numValidConstraints].m_worldPos[2] = tmp.m_worldPos[2];
cs[numValidConstraints].m_worldPos[3] = tmp.m_worldPos[3];
cs[numValidConstraints].m_worldNormal = tmp.m_worldNormal;
cs[numValidConstraints].m_restituitionCoeffCmp = tmp.m_restituitionCoeffCmp;
cs[numValidConstraints].m_frictionCoeffCmp = tmp.m_frictionCoeffCmp;
cs[numValidConstraints].m_batchIdx = tmp.m_batchIdx;
cs[numValidConstraints].m_bodyAPtrAndSignBit = tmp.m_bodyAPtrAndSignBit;
cs[numValidConstraints].m_bodyBPtrAndSignBit = tmp.m_bodyBPtrAndSignBit;
cs[numValidConstraints].m_childIndexA = tmp.m_childIndexA;
cs[numValidConstraints].m_childIndexB = tmp.m_childIndexB;
#endif
} }

View File

@ -43,7 +43,7 @@ static const char* batchingKernelsNewCL= \
" int m_childIndexB;\n" " int m_childIndexB;\n"
" int m_unused1;\n" " int m_unused1;\n"
" int m_unused2;\n" " int m_unused2;\n"
" b3Float4 m_localPosA;\n" "// b3Float4 m_localPosA;\n"
"};\n" "};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n" "{\n"
@ -202,47 +202,9 @@ static const char* batchingKernelsNewCL= \
" cs[i].m_batchIdx = batchIdx;\n" " cs[i].m_batchIdx = batchIdx;\n"
" if (i!=numValidConstraints)\n" " if (i!=numValidConstraints)\n"
" {\n" " {\n"
"// tmp = cs[i];\n" " tmp = cs[i];\n"
"// cs[i] = cs[numValidConstraints];\n" " cs[i] = cs[numValidConstraints];\n"
"// cs[numValidConstraints] = tmp;\n" " cs[numValidConstraints] = tmp;\n"
"#ifdef CHECK_SIZE\n"
" tmp.m_worldPos[0] = cs[i].m_worldPos[0];\n"
" tmp.m_worldPos[1] = cs[i].m_worldPos[1];\n"
" tmp.m_worldPos[2] = cs[i].m_worldPos[2];\n"
" tmp.m_worldPos[3] = cs[i].m_worldPos[3];\n"
" tmp.m_worldNormal = cs[i].m_worldNormal;\n"
" tmp.m_restituitionCoeffCmp = cs[i].m_restituitionCoeffCmp;\n"
" tmp.m_frictionCoeffCmp = cs[i].m_frictionCoeffCmp;\n"
" tmp.m_batchIdx = cs[i].m_batchIdx;\n"
" tmp.m_bodyAPtrAndSignBit = cs[i].m_bodyAPtrAndSignBit;\n"
" tmp.m_bodyBPtrAndSignBit = cs[i].m_bodyBPtrAndSignBit;\n"
" tmp.m_childIndexA = cs[i].m_childIndexA;\n"
" tmp.m_childIndexB = cs[i].m_childIndexB;\n"
" cs[i].m_worldPos[0] = cs[numValidConstraints].m_worldPos[0];\n"
" cs[i].m_worldPos[1] = cs[numValidConstraints].m_worldPos[1];\n"
" cs[i].m_worldPos[2] = cs[numValidConstraints].m_worldPos[2];\n"
" cs[i].m_worldPos[3] = cs[numValidConstraints].m_worldPos[3];\n"
" cs[i].m_worldNormal = cs[numValidConstraints].m_worldNormal;\n"
" cs[i].m_restituitionCoeffCmp = cs[numValidConstraints].m_restituitionCoeffCmp;\n"
" cs[i].m_frictionCoeffCmp = cs[numValidConstraints].m_frictionCoeffCmp;\n"
" cs[i].m_batchIdx = cs[numValidConstraints].m_batchIdx;\n"
" cs[i].m_bodyAPtrAndSignBit = cs[numValidConstraints].m_bodyAPtrAndSignBit;\n"
" cs[i].m_bodyBPtrAndSignBit = cs[numValidConstraints].m_bodyBPtrAndSignBit;\n"
" cs[i].m_childIndexA = cs[numValidConstraints].m_childIndexA;\n"
" cs[i].m_childIndexB = cs[numValidConstraints].m_childIndexB;\n"
" cs[numValidConstraints].m_worldPos[0] = tmp.m_worldPos[0];\n"
" cs[numValidConstraints].m_worldPos[1] = tmp.m_worldPos[1];\n"
" cs[numValidConstraints].m_worldPos[2] = tmp.m_worldPos[2];\n"
" cs[numValidConstraints].m_worldPos[3] = tmp.m_worldPos[3];\n"
" cs[numValidConstraints].m_worldNormal = tmp.m_worldNormal;\n"
" cs[numValidConstraints].m_restituitionCoeffCmp = tmp.m_restituitionCoeffCmp;\n"
" cs[numValidConstraints].m_frictionCoeffCmp = tmp.m_frictionCoeffCmp;\n"
" cs[numValidConstraints].m_batchIdx = tmp.m_batchIdx;\n"
" cs[numValidConstraints].m_bodyAPtrAndSignBit = tmp.m_bodyAPtrAndSignBit;\n"
" cs[numValidConstraints].m_bodyBPtrAndSignBit = tmp.m_bodyBPtrAndSignBit;\n"
" cs[numValidConstraints].m_childIndexA = tmp.m_childIndexA;\n"
" cs[numValidConstraints].m_childIndexB = tmp.m_childIndexB;\n"
"#endif\n"
" }\n" " }\n"
" numValidConstraints++;\n" " numValidConstraints++;\n"
" \n" " \n"

View File

@ -43,7 +43,7 @@ static const char* solverSetupCL= \
" int m_childIndexB;\n" " int m_childIndexB;\n"
" int m_unused1;\n" " int m_unused1;\n"
" int m_unused2;\n" " int m_unused2;\n"
" b3Float4 m_localPosA;\n" "// b3Float4 m_localPosA;\n"
"};\n" "};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n" "{\n"

View File

@ -43,7 +43,7 @@ static const char* solverSetup2CL= \
" int m_childIndexB;\n" " int m_childIndexB;\n"
" int m_unused1;\n" " int m_unused1;\n"
" int m_unused2;\n" " int m_unused2;\n"
" b3Float4 m_localPosA;\n" "// b3Float4 m_localPosA;\n"
"};\n" "};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n" "{\n"

View File

@ -43,7 +43,7 @@ static const char* solverUtilsCL= \
" int m_childIndexB;\n" " int m_childIndexB;\n"
" int m_unused1;\n" " int m_unused1;\n"
" int m_unused2;\n" " int m_unused2;\n"
" b3Float4 m_localPosA;\n" "// b3Float4 m_localPosA;\n"
"};\n" "};\n"
"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n"
"{\n" "{\n"