Fix OpenCL and CUDA EvalPatches kernels.

This commit is contained in:
Takahito Tejima 2015-05-26 11:13:30 -07:00
parent 541aeddd3a
commit 749bbf4271
5 changed files with 112 additions and 71 deletions

View File

@ -219,17 +219,16 @@ CLEvaluator::EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_patchKernel, 4, sizeof(int), &numPatchCoords);
// clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
// clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
// clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
// clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
// clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
// clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_patchKernel, 5, sizeof(cl_mem), &patchCoordsBuffer);
clSetKernelArg(_patchKernel, 6, sizeof(cl_mem), &patchArrayBuffer);
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &patchIndexBuffer);
clSetKernelArg(_patchKernel, 8, sizeof(cl_mem), &patchParamBuffer);
clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &patchCoordsBuffer);
clSetKernelArg(_patchKernel, 11, sizeof(cl_mem), &patchArrayBuffer);
clSetKernelArg(_patchKernel, 12, sizeof(cl_mem), &patchIndexBuffer);
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &patchParamBuffer);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _patchKernel, 1, NULL,

View File

@ -128,37 +128,85 @@ static void getBSplineWeights(float t, float *point, float *deriv) {
deriv[3] = 0.5f*t2;
}
static void adjustBoundaryWeights(uint bits, float *sWeights, float *tWeights) {
int boundary = ((bits >> 4) & 0xf);
if (boundary & 1) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if (boundary & 2) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if (boundary & 4) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if (boundary & 8) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
static int getDepth(uint patchBits) {
return (patchBits & 0x7);
}
static float getParamFraction(uint patchBits) {
bool nonQuadRoot = (patchBits >> 3) & 0x1;
int depth = getDepth(patchBits);
if (nonQuadRoot) {
return 1.0f / (float)( 1 << (depth-1) );
} else {
return 1.0f / (float)( 1 << depth );
}
}
static void normalizePatchCoord(uint patchBits, float *uv) {
float frac = getParamFraction(patchBits);
int iu = (patchBits >> 22) & 0x3ff;
int iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = (float)iu*frac;
float pv = (float)iv*frac;
// normalize u,v coordinates
uv[0] = (uv[0] - pu) / frac;
uv[1] = (uv[1] - pv) / frac;
}
__kernel void computePatches(__global float *src, int srcOffset,
__global float *dst, int dstOffset,
// __global float *du, int duOffset, int duStride,
// __global float *dv, int dvOffset, int dvStride,
int numPatchCoords,
__global float *du, int duOffset, int duStride,
__global float *dv, int dvOffset, int dvStride,
__global struct PatchCoord *patchCoords,
__global struct PatchArray *patchArrayBuffer,
__global int *patchIndexBuffer,
__global struct PatchParam *patchParamBuffer) {
int current = get_global_id(0);
if (current > numPatchCoords) return;
src += srcOffset;
dst += dstOffset;
// du += duOffset;
// dv += dvOffset;
if (src) src += srcOffset;
if (dst) dst += dstOffset;
if (du) du += duOffset;
if (dv) dv += dvOffset;
struct PatchCoord coord = patchCoords[current];
int patchIndex = coord.patchIndex;
// struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
struct PatchArray array = patchArrayBuffer[0];
struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
int patchType = 6; // array.x XXX: REGULAR only for now.
int patchType = 6; // array.patchType XXX: REGULAR only for now.
int numControlVertices = 16;
uint patchBits = patchParamBuffer[patchIndex].patchBits;
// vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t));
float dScale = 1.0f;//float(1 << getDepth(patchBits));
uint patchBits = patchParamBuffer[coord.patchIndex].patchBits;
float uv[2] = {coord.s, coord.t};
normalizePatchCoord(patchBits, uv);
float dScale = (float)(1 << getDepth(patchBits));
float wP[20], wDs[20], wDt[20];
if (patchType == 6) { // REGULAR
@ -166,8 +214,8 @@ __kernel void computePatches(__global float *src, int srcOffset,
getBSplineWeights(uv[0], sWeights, dsWeights);
getBSplineWeights(uv[1], tWeights, dtWeights);
// adjustBoundaryWeights(patchBits, sWeights, tWeights);
// adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
adjustBoundaryWeights(patchBits, sWeights, tWeights);
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
@ -180,23 +228,33 @@ __kernel void computePatches(__global float *src, int srcOffset,
// TODO: GREGORY BASIS
}
int indexBase = array.indexBase + coord.vertIndex;
struct Vertex v;
clear(&v);
#if 1
// debug
v.v[0] = uv[0];
v.v[1] = uv[1];
v.v[2] = patchIndexBuffer[current] * 0.1;
writeVertex(dst, current, &v);
return;
#endif
int indexBase = array.indexBase + coord.vertIndex;
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
if (index < 0) index = 0;
addWithWeight(&v, src, index, wP[i]);
}
writeVertex(dst, current, &v);
if (du) {
struct Vertex vdu;
clear(&vdu);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&vdu, src, index, wDs[i]);
}
writeVertex(du, current, &vdu);
}
if (dv) {
struct Vertex vdv;
clear(&vdv);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&vdv, src, index, wDt[i]);
}
writeVertex(dv, current, &vdv);
}
}

View File

@ -62,7 +62,8 @@ CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContex
size_t patchParamSize = patchTable.GetPatchParamSize();
cl_int err = 0;
_patchArrays = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
_patchArrays = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
numPatchArrays * sizeof(Osd::PatchArray),
(void*)patchTable.GetPatchArrayBuffer(),
&err);
@ -71,7 +72,8 @@ CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContex
return false;
}
_indexBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
_indexBuffer = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
indexSize * sizeof(int),
(void*)patchTable.GetPatchIndexBuffer(),
&err);
@ -80,7 +82,8 @@ CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContex
return false;
}
_patchParamBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
_patchParamBuffer = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
patchParamSize * sizeof(Osd::PatchParam),
(void*)patchTable.GetPatchParamBuffer(),
&err);

View File

@ -194,9 +194,8 @@ CudaEvaluator::EvalPatches(const float *src,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
src += srcDesc.offset;
if (dst)
dst += dstDesc.offset;
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
CudaEvalPatches(src, dst,
srcDesc.length, srcDesc.stride, dstDesc.stride,

View File

@ -334,24 +334,10 @@ void normalizePatchCoord(unsigned int patchBits, float *u, float *v) {
float pv = (float)iv*frac;
// normalize u,v coordinates
*u = (*u - pu) / frac,
*u = (*u - pu) / frac;
*v = (*v - pv) / frac;
}
// Far::PatchDescriptor::Type
enum Type {
NON_PATCH = 0, ///< undefined
POINTS, ///< points (useful for cage drawing)
LINES, ///< lines (useful for cage drawing)
QUADS, ///< bilinear quads-only patches
TRIANGLES, ///< bilinear triangles-only mesh
LOOP, ///< Loop patch
REGULAR, ///< feature-adaptive bicubic patches
GREGORY,
GREGORY_BOUNDARY,
GREGORY_BASIS
};
__global__ void
computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
@ -371,7 +357,7 @@ computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrayBuffer[coord.arrayIndex];
int patchType = array.patchType;
int patchType = 6; // array.patchType XXX: REGULAR only for now.
int numControlVertices = 16;
// note: patchIndex is absolute.
unsigned int patchBits = patchParamBuffer[coord.patchIndex].bitField;
@ -382,7 +368,7 @@ computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
normalizePatchCoord(patchBits, &s, &t);
float dScale = (float)(1 << getDepth(patchBits));
if (patchType == REGULAR) {
if (patchType == 6) {
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
getBSplineWeights(s, sWeights, dsWeights);
getBSplineWeights(t, tWeights, dtWeights);
@ -399,15 +385,11 @@ computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
}
}
} else if (patchType == GREGORY_BASIS) {
// XXX: not yet implemented.
continue;
} else {
// unknown patchType
// TODO: Gregory Basis.
continue;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.vertIndex];
const int *cvs = patchIndexBuffer + array.indexBase + coord.vertIndex;
float * dstVert = dst + i * dstStride;
clear(dstVert, length);