Shader performance workaround

Changed a couple of local 4x4 matrices to global uniforms to
work around a performance problem on recent GL drivers.

There are two local 4x4 float matrices with constant initializers
in the function OsdComputePerPatchVertexBSpline(...). Changing
these from local variables to global initialized uniforms improves
performance dramatically on recent NVIDIA drivers (e.g. 361.48 windows).

There is no such difference with Direct3D, but this change updates
the shader code for both implementations for consistency.
This commit is contained in:
David G. Yu 2016-01-21 17:55:09 -08:00
parent 423d7eff79
commit 86026a9fe8
2 changed files with 32 additions and 32 deletions

View File

@ -1092,19 +1092,27 @@ OsdFlipMatrix(mat4 m)
m[0][3], m[0][2], m[0][1], m[0][0]);
}
// Regular BSpline to Bezier
uniform mat4 Q = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
);
// Infinitely Sharp (boundary)
uniform mat4 Mi = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
);
// convert BSpline cv to Bezier cv
void
OsdComputePerPatchVertexBSpline(ivec3 patchParam, int ID, vec3 cv[16],
out OsdPerPatchVertexBezier result)
{
// Regular BSpline to Bezier
mat4 Q = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
);
result.patchParam = patchParam;
int i = ID%4;
@ -1112,14 +1120,6 @@ OsdComputePerPatchVertexBSpline(ivec3 patchParam, int ID, vec3 cv[16],
#if defined OSD_PATCH_ENABLE_SINGLE_CREASE
// Infinitely Sharp (boundary)
mat4 Mi = mat4(
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
);
mat4 Mj, Ms;
float sharpness = OsdGetPatchSharpness(patchParam);
if (sharpness > 0) {

View File

@ -966,19 +966,27 @@ OsdFlipMatrix(float4x4 m)
m[0][3], m[0][2], m[0][1], m[0][0]);
}
// Regular BSpline to Bezier
static float4x4 Q = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
};
// Infinitely Sharp (boundary)
static float4x4 Mi = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
};
// convert BSpline cv to Bezier cv
void
OsdComputePerPatchVertexBSpline(int3 patchParam, int ID, float3 cv[16],
out OsdPerPatchVertexBezier result)
{
// Regular BSpline to Bezier
float4x4 Q = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 1.f/6.f, 4.f/6.f, 1.f/6.f
};
result.patchParam = patchParam;
int i = ID%4;
@ -986,14 +994,6 @@ OsdComputePerPatchVertexBSpline(int3 patchParam, int ID, float3 cv[16],
#if defined OSD_PATCH_ENABLE_SINGLE_CREASE
// Infinitely Sharp (boundary)
float4x4 Mi = {
1.f/6.f, 4.f/6.f, 1.f/6.f, 0.f,
0.f, 4.f/6.f, 2.f/6.f, 0.f,
0.f, 2.f/6.f, 4.f/6.f, 0.f,
0.f, 0.f, 1.f, 0.f
};
float4x4 Mj, Ms;
float sharpness = OsdGetPatchSharpness(patchParam);
if (sharpness > 0) {