1
0
mirror of https://github.com/microsoft/DirectXTex synced 2024-11-24 21:30:14 +00:00

BC7 GPU codec optimization of parity bit calculation

This commit is contained in:
Chuck Walbourn 2018-07-03 18:32:23 -07:00
parent 707d9acc49
commit 34c6849d74
10 changed files with 3436 additions and 2952 deletions

View File

@ -680,16 +680,16 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
if (1 == g_mode_id)
{
// in mode 1, there is only one p bit per subset
max_p = 4;
max_p = 2;
}
else
{
// in mode 3 7, there are two p bits per subset, one for each end point
max_p = 16;
max_p = 4;
}
uint rotation = 0;
uint error = MAX_UINT;
uint final_p[2] = { 0, 0 };
uint error[2] = { MAX_UINT, MAX_UINT };
for ( uint p = 0; p < max_p; p ++ )
{
endPoint[0] = endPointBackup[0];
@ -699,15 +699,15 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
{
if (g_mode_id == 1)
{
compress_endpoints1( endPoint[i], (p >> i) & 1 );
compress_endpoints1( endPoint[i], p );
}
else if (g_mode_id == 3)
{
compress_endpoints3( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
compress_endpoints3( endPoint[i], uint2(p, p >> 1) & 1 );
}
else if (g_mode_id == 7)
{
compress_endpoints7( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
compress_endpoints7( endPoint[i], uint2(p, p >> 1) & 1 );
}
}
@ -748,10 +748,12 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
step_selector = 1; // mode 1 has 3 bit index
}
uint p_error = 0;
uint p_error[2] = { 0, 0 };
for ( i = 0; i < 16; i ++ )
{
if (((bits >> i) & 0x01) == 1)
uint subset_index = (bits >> i) & 0x01;
if (subset_index == 1)
{
dotProduct = dot( span[1], shared_temp[threadBase + i].pixel - endPoint[1][0] );
color_index = (span_norm_sqr[1] <= 0 || dotProduct <= 0) ? 0
@ -764,8 +766,6 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
: ((dotProduct < span_norm_sqr[0]) ? aStep[step_selector][uint(dotProduct * 63.49999 / span_norm_sqr[0])] : aStep[step_selector][63]);
}
uint subset_index = (bits >> i) & 0x01;
pixel_r = ((64 - aWeight[step_selector][color_index]) * endPoint[subset_index][0]
+ aWeight[step_selector][color_index] * endPoint[subset_index][1] + 32) >> 6;
if (g_mode_id != 7)
@ -776,20 +776,32 @@ void TryMode137CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
uint4 pixel = shared_temp[threadBase + i].pixel;
Ensure_A_Is_Larger( pixel_r, pixel );
pixel_r -= pixel;
p_error += ComputeError(pixel_r, pixel_r);
uint pixel_error = ComputeError(pixel_r, pixel_r);
if ( subset_index == 1 )
p_error[1] += pixel_error;
else
p_error[0] += pixel_error;
}
if (p_error < error)
for ( i = 0; i < 2; i++ )
{
error = p_error;
rotation = p;
if (p_error[i] < error[i])
{
error[i] = p_error[i];
final_p[i] = p;
}
}
}
shared_temp[GI].error = error;
shared_temp[GI].error = error[0] + error[1];
shared_temp[GI].mode = g_mode_id;
shared_temp[GI].partition = partition;
shared_temp[GI].rotation = rotation; // mode 1 3 7 don't have rotation, we use rotation for p bits
// mode 1 3 7 don't have rotation, we use rotation for p bits
if ( g_mode_id == 1 )
shared_temp[GI].rotation = (final_p[1] << 1) | final_p[0];
else
shared_temp[GI].rotation = (final_p[1] << 2) | final_p[0];
}
GroupMemoryBarrierWithGroupSync();
@ -955,15 +967,15 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
uint max_p;
if (0 == g_mode_id)
{
max_p = 64; // changed from 32 to 64
max_p = 4;
}
else
{
max_p = 1;
}
uint rotation = 0;
uint error = MAX_UINT;
uint final_p[3] = { 0, 0, 0 };
uint error[3] = { MAX_UINT, MAX_UINT, MAX_UINT };
for ( uint p = 0; p < max_p; p ++ )
{
endPoint[0] = endPointBackup[0];
@ -974,7 +986,7 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
{
if (0 == g_mode_id)
{
compress_endpoints0( endPoint[i], uint2(p >> (i * 2 + 0), p >> (i * 2 + 1)) & 1 );
compress_endpoints0( endPoint[i], uint2(p, p >> 1) & 1 );
}
else
{
@ -1006,7 +1018,7 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
}
}
uint p_error = 0;
uint p_error[3] = { 0, 0, 0 };
for ( i = 0; i < 16; i ++ )
{
uint subset_index = ( bits2 >> ( i * 2 ) ) & 0x03;
@ -1036,19 +1048,30 @@ void TryMode02CS( uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID ) // mode
uint4 pixel = shared_temp[threadBase + i].pixel;
Ensure_A_Is_Larger( pixel_r, pixel );
pixel_r -= pixel;
p_error += ComputeError(pixel_r, pixel_r);
uint pixel_error = ComputeError(pixel_r, pixel_r);
if ( subset_index == 2 )
p_error[2] += pixel_error;
else if ( subset_index == 1 )
p_error[1] += pixel_error;
else
p_error[0] += pixel_error;
}
if (p_error < error)
for ( i = 0; i < 3; i++ )
{
error = p_error;
rotation = p; // Borrow rotation for p
if (p_error[i] < error[i])
{
error[i] = p_error[i];
final_p[i] = p; // Borrow rotation for p
}
}
}
shared_temp[GI].error = error;
shared_temp[GI].error = error[0] + error[1] + error[2];
shared_temp[GI].partition = partition;
shared_temp[GI].rotation = rotation;
shared_temp[GI].rotation = (final_p[2] << 4) | (final_p[1] << 2) | final_p[0];
}
GroupMemoryBarrierWithGroupSync();

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff