Merge pull request #781 from davidgyu/perfFix

Tess shader performance fix
This commit is contained in:
Takahito Tejima 2016-02-05 17:00:26 -08:00
commit 4edcdbc1b7
2 changed files with 162 additions and 127 deletions

View File

@ -1092,19 +1092,27 @@ OsdFlipMatrix(mat4 m)
m[0][3], m[0][2], m[0][1], m[0][0]);
}
// Regular BSpline to Bezier
uniform mat4 Q = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
);
// Infinitely Sharp (boundary)
uniform mat4 Mi = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
);
// convert BSpline cv to Bezier cv
void
OsdComputePerPatchVertexBSpline(ivec3 patchParam, int ID, vec3 cv[16],
out OsdPerPatchVertexBezier result)
{
// Regular BSpline to Bezier
mat4 Q = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
);
result.patchParam = patchParam;
int i = ID%4;
@ -1112,15 +1120,10 @@ OsdComputePerPatchVertexBSpline(ivec3 patchParam, int ID, vec3 cv[16],
#if defined OSD_PATCH_ENABLE_SINGLE_CREASE
// Infinitely Sharp (boundary)
mat4 Mi = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
);
vec3 P = vec3(0); // 0 to 1-2^(-Sf)
vec3 P1 = vec3(0); // 1-2^(-Sf) to 1-2^(-Sc)
vec3 P2 = vec3(0); // 1-2^(-Sc) to 1
mat4 Mj, Ms;
float sharpness = OsdGetPatchSharpness(patchParam);
if (sharpness > 0) {
float Sf = floor(sharpness);
@ -1128,59 +1131,74 @@ OsdComputePerPatchVertexBSpline(ivec3 patchParam, int ID, vec3 cv[16],
float Sr = fract(sharpness);
mat4 Mf = OsdComputeMs(Sf);
mat4 Mc = OsdComputeMs(Sc);
Mj = (1-Sr) * Mf + Sr * Mi;
Ms = (1-Sr) * Mf + Sr * Mc;
mat4 Mj = (1-Sr) * Mf + Sr * Mi;
mat4 Ms = (1-Sr) * Mf + Sr * Mc;
float s0 = 1 - pow(2, -floor(sharpness));
float s1 = 1 - pow(2, -ceil(sharpness));
result.vSegments = vec2(s0, s1);
} else {
Mj = Ms = Mi;
result.vSegments = vec2(0);
}
result.P = vec3(0); // 0 to 1-2^(-Sf)
result.P1 = vec3(0); // 1-2^(-Sf) to 1-2^(-Sc)
result.P2 = vec3(0); // 1-2^(-Sc) to 1
mat4 MUi, MUj, MUs;
mat4 MVi, MVj, MVs;
MUi = MUj = MUs = Q;
MVi = MVj = MVs = Q;
mat4 MUi = Q, MUj = Q, MUs = Q;
mat4 MVi = Q, MVj = Q, MVs = Q;
int boundaryMask = OsdGetPatchBoundaryMask(patchParam);
if ((boundaryMask & 1) != 0) {
MVi = OsdFlipMatrix(Mi);
MVj = OsdFlipMatrix(Mj);
MVs = OsdFlipMatrix(Ms);
}
if ((boundaryMask & 2) != 0) {
MUi = Mi;
MUj = Mj;
MUs = Ms;
}
if ((boundaryMask & 4) != 0) {
MVi = Mi;
MVj = Mj;
MVs = Ms;
}
if ((boundaryMask & 8) != 0) {
MUi = OsdFlipMatrix(Mi);
MUj = OsdFlipMatrix(Mj);
MUs = OsdFlipMatrix(Ms);
}
vec3 Hi[4], Hj[4], Hs[4];
for (int l=0; l<4; ++l) {
Hi[l] = Hj[l] = Hs[l] = vec3(0);
for (int k=0; k<4; ++k) {
Hi[l] += MUi[i][k] * cv[l*4 + k];
Hj[l] += MUj[i][k] * cv[l*4 + k];
Hs[l] += MUs[i][k] * cv[l*4 + k];
int boundaryMask = OsdGetPatchBoundaryMask(patchParam);
if ((boundaryMask & 1) != 0) {
MVi = OsdFlipMatrix(Mi);
MVj = OsdFlipMatrix(Mj);
MVs = OsdFlipMatrix(Ms);
}
}
for (int k=0; k<4; ++k) {
result.P += MVi[j][k]*Hi[k];
result.P1 += MVj[j][k]*Hj[k];
result.P2 += MVs[j][k]*Hs[k];
if ((boundaryMask & 2) != 0) {
MUi = Mi;
MUj = Mj;
MUs = Ms;
}
if ((boundaryMask & 4) != 0) {
MVi = Mi;
MVj = Mj;
MVs = Ms;
}
if ((boundaryMask & 8) != 0) {
MUi = OsdFlipMatrix(Mi);
MUj = OsdFlipMatrix(Mj);
MUs = OsdFlipMatrix(Ms);
}
vec3 Hi[4], Hj[4], Hs[4];
for (int l=0; l<4; ++l) {
Hi[l] = Hj[l] = Hs[l] = vec3(0);
for (int k=0; k<4; ++k) {
Hi[l] += MUi[i][k] * cv[l*4 + k];
Hj[l] += MUj[i][k] * cv[l*4 + k];
Hs[l] += MUs[i][k] * cv[l*4 + k];
}
}
for (int k=0; k<4; ++k) {
P += MVi[j][k]*Hi[k];
P1 += MVj[j][k]*Hj[k];
P2 += MVs[j][k]*Hs[k];
}
result.P = P;
result.P1 = P1;
result.P2 = P2;
} else {
result.vSegments = vec2(0);
OsdComputeBSplineBoundaryPoints(cv, patchParam);
vec3 Hi[4];
for (int l=0; l<4; ++l) {
Hi[l] = vec3(0);
for (int k=0; k<4; ++k) {
Hi[l] += Q[i][k] * cv[l*4 + k];
}
}
for (int k=0; k<4; ++k) {
P += Q[j][k]*Hi[k];
}
result.P = P;
result.P1 = P;
result.P2 = P;
}
#else
OsdComputeBSplineBoundaryPoints(cv, patchParam);

View File

@ -966,19 +966,27 @@ OsdFlipMatrix(float4x4 m)
m[0][3], m[0][2], m[0][1], m[0][0]);
}
// Regular BSpline to Bezier
static float4x4 Q = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
};
// Infinitely Sharp (boundary)
static float4x4 Mi = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
};
// convert BSpline cv to Bezier cv
void
OsdComputePerPatchVertexBSpline(int3 patchParam, int ID, float3 cv[16],
out OsdPerPatchVertexBezier result)
{
// Regular BSpline to Bezier
float4x4 Q = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
};
result.patchParam = patchParam;
int i = ID%4;
@ -986,15 +994,10 @@ OsdComputePerPatchVertexBSpline(int3 patchParam, int ID, float3 cv[16],
#if defined OSD_PATCH_ENABLE_SINGLE_CREASE
// Infinitely Sharp (boundary)
float4x4 Mi = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
};
float3 P = float3(0,0,0); // 0 to 1-2^(-Sf)
float3 P1 = float3(0,0,0); // 1-2^(-Sf) to 1-2^(-Sc)
float3 P2 = float3(0,0,0); // 1-2^(-Sc) to 1
float4x4 Mj, Ms;
float sharpness = OsdGetPatchSharpness(patchParam);
if (sharpness > 0) {
float Sf = floor(sharpness);
@ -1002,60 +1005,74 @@ OsdComputePerPatchVertexBSpline(int3 patchParam, int ID, float3 cv[16],
float Sr = frac(sharpness);
float4x4 Mf = OsdComputeMs(Sf);
float4x4 Mc = OsdComputeMs(Sc);
Mj = (1-Sr) * Mf + Sr * Mi;
Ms = (1-Sr) * Mf + Sr * Mc;
float4x4 Mj = (1-Sr) * Mf + Sr * Mi;
float4x4 Ms = (1-Sr) * Mf + Sr * Mc;
float s0 = 1 - pow(2, -floor(sharpness));
float s1 = 1 - pow(2, -ceil(sharpness));
result.vSegments = float2(s0, s1);
} else {
Mj = Ms = Mi;
result.vSegments = float2(0, 0);
}
result.P = float3(0,0,0); // 0 to 1-2^(-Sf)
result.P1 = float3(0,0,0); // 1-2^(-Sf) to 1-2^(-Sc)
result.P2 = float3(0,0,0); // 1-2^(-Sc) to 1
float4x4 MUi = Q, MUj = Q, MUs = Q;
float4x4 MVi = Q, MVj = Q, MVs = Q;
float4x4 MUi, MUj, MUs;
float4x4 MVi, MVj, MVs;
MUi = MUj = MUs = Q;
MVi = MVj = MVs = Q;
int boundaryMask = OsdGetPatchBoundaryMask(patchParam);
if ((boundaryMask & 1) != 0) {
MVi = OsdFlipMatrix(Mi);
MVj = OsdFlipMatrix(Mj);
MVs = OsdFlipMatrix(Ms);
}
if ((boundaryMask & 2) != 0) {
MUi = Mi;
MUj = Mj;
MUs = Ms;
}
if ((boundaryMask & 4) != 0) {
MVi = Mi;
MVj = Mj;
MVs = Ms;
}
if ((boundaryMask & 8) != 0) {
MUi = OsdFlipMatrix(Mi);
MUj = OsdFlipMatrix(Mj);
MUs = OsdFlipMatrix(Ms);
}
float3 Hi[4], Hj[4], Hs[4];
for (int l=0; l<4; ++l) {
Hi[l] = Hj[l] = Hs[l] = float3(0,0,0);
for (int k=0; k<4; ++k) {
Hi[l] += MUi[i][k] * cv[l*4 + k];
Hj[l] += MUj[i][k] * cv[l*4 + k];
Hs[l] += MUs[i][k] * cv[l*4 + k];
int boundaryMask = OsdGetPatchBoundaryMask(patchParam);
if ((boundaryMask & 1) != 0) {
MVi = OsdFlipMatrix(Mi);
MVj = OsdFlipMatrix(Mj);
MVs = OsdFlipMatrix(Ms);
}
}
for (int k=0; k<4; ++k) {
result.P += MVi[j][k]*Hi[k];
result.P1 += MVj[j][k]*Hj[k];
result.P2 += MVs[j][k]*Hs[k];
if ((boundaryMask & 2) != 0) {
MUi = Mi;
MUj = Mj;
MUs = Ms;
}
if ((boundaryMask & 4) != 0) {
MVi = Mi;
MVj = Mj;
MVs = Ms;
}
if ((boundaryMask & 8) != 0) {
MUi = OsdFlipMatrix(Mi);
MUj = OsdFlipMatrix(Mj);
MUs = OsdFlipMatrix(Ms);
}
float3 Hi[4], Hj[4], Hs[4];
for (int l=0; l<4; ++l) {
Hi[l] = Hj[l] = Hs[l] = float3(0,0,0);
for (int k=0; k<4; ++k) {
Hi[l] += MUi[i][k] * cv[l*4 + k];
Hj[l] += MUj[i][k] * cv[l*4 + k];
Hs[l] += MUs[i][k] * cv[l*4 + k];
}
}
for (int k=0; k<4; ++k) {
P += MVi[j][k]*Hi[k];
P1 += MVj[j][k]*Hj[k];
P2 += MVs[j][k]*Hs[k];
}
result.P = P;
result.P1 = P1;
result.P2 = P2;
} else {
result.vSegments = float2(0, 0);
OsdComputeBSplineBoundaryPoints(cv, patchParam);
float3 Hi[4];
for (int l=0; l<4; ++l) {
Hi[l] = float3(0,0,0);
for (int k=0; k<4; ++k) {
Hi[l] += Q[i][k] * cv[l*4 + k];
}
}
for (int k=0; k<4; ++k) {
P += Q[j][k]*Hi[k];
}
result.P = P;
result.P1 = P;
result.P2 = P;
}
#else
OsdComputeBSplineBoundaryPoints(cv, patchParam);