[smooth] Faster bitmap sweeping.
Selecting the fill rule or checking the direct mode each time we call `gray_hline' is sub-optimal. This effectively splits the direct mode into a separate code path while inlining `gray_hline' and saving 5-7% of rendering time. * src/smooth/ftgrays.c (gray_hline): Eliminated in favor of... (FT_FILL_RULE, FT_GRAY_SET): ... these new macros... (gray_sweep): ... inlined here. (gray_sweep_direct): New function that handles the direct span buffer. (gray_TWorker): Remove the span buffer. (gray_raster_render, gray_convert_glyph): Updated.
This commit is contained in:
parent
b070264521
commit
8f43d324d1
16
ChangeLog
16
ChangeLog
@ -1,3 +1,19 @@
|
||||
2021-05-07 Alexei Podtelezhnikov <apodtele@gmail.com>
|
||||
|
||||
[smooth] Faster bitmap sweeping.
|
||||
|
||||
Selecting the fill rule or checking the direct mode each time we call
|
||||
`gray_hline' is sub-optimal. This effectively splits the direct mode
|
||||
into a separate code path while inlining `gray_hline' and saving 5-7%
|
||||
of rendering time.
|
||||
|
||||
* src/smooth/ftgrays.c (gray_hline): Eliminated in favor of...
|
||||
(FT_FILL_RULE, FT_GRAY_SET): ... these new macros...
|
||||
(gray_sweep): ... inlined here.
|
||||
(gray_sweep_direct): New function that handles the direct span buffer.
|
||||
(gray_TWorker): Remove the span buffer.
|
||||
(gray_raster_render, gray_convert_glyph): Updated.
|
||||
|
||||
2021-05-10 Alexei Podtelezhnikov <apodtele@gmail.com>
|
||||
|
||||
* src/smooth/ftgrays.c (gray_hline): Simplify even-odd computations.
|
||||
|
@ -392,6 +392,42 @@ typedef ptrdiff_t FT_PtrDist;
|
||||
( sizeof( long ) * FT_CHAR_BIT - PIXEL_BITS ) )
|
||||
|
||||
|
||||
/* Scale area and apply fill rule to calculate the coverage byte. */
|
||||
/* The top fill bit is used for the non-zero rule. The eighth */
|
||||
/* fill bit is used for the even-odd rule. The higher coverage */
|
||||
/* bytes are either clamped for the non-zero-rule or discarded */
|
||||
/* later for the even-odd rule. */
|
||||
#define FT_FILL_RULE( coverage, area, fill ) \
|
||||
FT_BEGIN_STMNT \
|
||||
coverage = (int)( area >> ( PIXEL_BITS * 2 + 1 - 8 ) ); \
|
||||
if ( coverage & fill ) \
|
||||
coverage = ~coverage; \
|
||||
if ( coverage > 255 && fill & INT_MIN ) \
|
||||
coverage = 255; \
|
||||
FT_END_STMNT
|
||||
|
||||
|
||||
/* It is faster to write small spans byte-by-byte than calling */
|
||||
/* `memset'. This is mainly due to the cost of the function call. */
|
||||
#define FT_GRAY_SET( d, s, count ) \
|
||||
FT_BEGIN_STMNT \
|
||||
unsigned char* q = d; \
|
||||
unsigned char c = (unsigned char)s; \
|
||||
switch ( count ) \
|
||||
{ \
|
||||
case 7: *q++ = c; /* fall through */ \
|
||||
case 6: *q++ = c; /* fall through */ \
|
||||
case 5: *q++ = c; /* fall through */ \
|
||||
case 4: *q++ = c; /* fall through */ \
|
||||
case 3: *q++ = c; /* fall through */ \
|
||||
case 2: *q++ = c; /* fall through */ \
|
||||
case 1: *q = c; /* fall through */ \
|
||||
case 0: break; \
|
||||
default: FT_MEM_SET( d, s, count ); \
|
||||
} \
|
||||
FT_END_STMNT
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* TYPE DEFINITIONS
|
||||
@ -463,8 +499,6 @@ typedef ptrdiff_t FT_PtrDist;
|
||||
|
||||
FT_Raster_Span_Func render_span;
|
||||
void* render_span_data;
|
||||
FT_Span spans[FT_MAX_GRAY_SPANS];
|
||||
int num_spans;
|
||||
|
||||
} gray_TWorker, *gray_PWorker;
|
||||
|
||||
@ -1171,94 +1205,62 @@ typedef ptrdiff_t FT_PtrDist;
|
||||
|
||||
|
||||
static void
|
||||
gray_hline( RAS_ARG_ TCoord x,
|
||||
TCoord y,
|
||||
TArea coverage,
|
||||
TCoord acount )
|
||||
gray_sweep( RAS_ARG )
|
||||
{
|
||||
/* scale the coverage from 0..(ONE_PIXEL*ONE_PIXEL*2) to 0..256 */
|
||||
coverage >>= PIXEL_BITS * 2 + 1 - 8;
|
||||
int fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN;
|
||||
int coverage;
|
||||
int y;
|
||||
|
||||
/* compute the line's coverage depending on the outline fill rule */
|
||||
if ( ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL )
|
||||
|
||||
for ( y = ras.min_ey; y < ras.max_ey; y++ )
|
||||
{
|
||||
if ( coverage & 256 ) /* odd bit */
|
||||
coverage = ~coverage;
|
||||
PCell cell = ras.ycells[y - ras.min_ey];
|
||||
TCoord x = ras.min_ex;
|
||||
TArea cover = 0;
|
||||
TArea area;
|
||||
|
||||
/* higher bits discarded below */
|
||||
}
|
||||
else /* default non-zero winding rule */
|
||||
{
|
||||
if ( coverage < 0 )
|
||||
coverage = ~coverage; /* the same as -coverage - 1 */
|
||||
|
||||
if ( coverage >= 256 )
|
||||
coverage = 255;
|
||||
}
|
||||
|
||||
if ( ras.num_spans >= 0 ) /* for FT_RASTER_FLAG_DIRECT only */
|
||||
{
|
||||
FT_Span* span = ras.spans + ras.num_spans++;
|
||||
unsigned char* line = ras.target.origin - ras.target.pitch * y;
|
||||
|
||||
|
||||
span->x = (short)x;
|
||||
span->len = (unsigned short)acount;
|
||||
span->coverage = (unsigned char)coverage;
|
||||
|
||||
if ( ras.num_spans == FT_MAX_GRAY_SPANS )
|
||||
for ( ; cell != NULL; cell = cell->next )
|
||||
{
|
||||
/* flush the span buffer and reset the count */
|
||||
ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data );
|
||||
ras.num_spans = 0;
|
||||
if ( cover != 0 && cell->x > x )
|
||||
{
|
||||
FT_FILL_RULE( coverage, cover, fill );
|
||||
FT_GRAY_SET( line + x, coverage, cell->x - x );
|
||||
}
|
||||
|
||||
cover += (TArea)cell->cover * ( ONE_PIXEL * 2 );
|
||||
area = cover - cell->area;
|
||||
|
||||
if ( area != 0 && cell->x >= ras.min_ex )
|
||||
{
|
||||
FT_FILL_RULE( coverage, area, fill );
|
||||
line[cell->x] = (unsigned char)coverage;
|
||||
}
|
||||
|
||||
x = cell->x + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned char* q = ras.target.origin - ras.target.pitch * y + x;
|
||||
unsigned char c = (unsigned char)coverage;
|
||||
|
||||
|
||||
/* For small-spans it is faster to do it by ourselves than
|
||||
* calling `memset'. This is mainly due to the cost of the
|
||||
* function call.
|
||||
*/
|
||||
switch ( acount )
|
||||
if ( cover != 0 ) /* only if cropped */
|
||||
{
|
||||
case 7:
|
||||
*q++ = c;
|
||||
/* fall through */
|
||||
case 6:
|
||||
*q++ = c;
|
||||
/* fall through */
|
||||
case 5:
|
||||
*q++ = c;
|
||||
/* fall through */
|
||||
case 4:
|
||||
*q++ = c;
|
||||
/* fall through */
|
||||
case 3:
|
||||
*q++ = c;
|
||||
/* fall through */
|
||||
case 2:
|
||||
*q++ = c;
|
||||
/* fall through */
|
||||
case 1:
|
||||
*q = c;
|
||||
/* fall through */
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
FT_MEM_SET( q, c, acount );
|
||||
FT_FILL_RULE( coverage, cover, fill );
|
||||
FT_GRAY_SET( line + x, coverage, ras.max_ex - x );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
gray_sweep( RAS_ARG )
|
||||
gray_sweep_direct( RAS_ARG )
|
||||
{
|
||||
int fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN;
|
||||
int coverage;
|
||||
int y;
|
||||
|
||||
FT_Span span[FT_MAX_GRAY_SPANS];
|
||||
int n = 0;
|
||||
|
||||
|
||||
for ( y = ras.min_ey; y < ras.max_ey; y++ )
|
||||
{
|
||||
@ -1271,25 +1273,59 @@ typedef ptrdiff_t FT_PtrDist;
|
||||
for ( ; cell != NULL; cell = cell->next )
|
||||
{
|
||||
if ( cover != 0 && cell->x > x )
|
||||
gray_hline( RAS_VAR_ x, y, cover, cell->x - x );
|
||||
{
|
||||
FT_FILL_RULE( coverage, cover, fill );
|
||||
|
||||
span[n].coverage = (unsigned char)coverage;
|
||||
span[n].x = (short)x;
|
||||
span[n].len = (unsigned short)( cell->x - x );
|
||||
|
||||
if ( ++n == FT_MAX_GRAY_SPANS )
|
||||
{
|
||||
/* flush the span buffer and reset the count */
|
||||
ras.render_span( y, n, span, ras.render_span_data );
|
||||
n = 0;
|
||||
}
|
||||
}
|
||||
|
||||
cover += (TArea)cell->cover * ( ONE_PIXEL * 2 );
|
||||
area = cover - cell->area;
|
||||
|
||||
if ( area != 0 && cell->x >= ras.min_ex )
|
||||
gray_hline( RAS_VAR_ cell->x, y, area, 1 );
|
||||
{
|
||||
FT_FILL_RULE( coverage, area, fill );
|
||||
|
||||
span[n].coverage = (unsigned char)coverage;
|
||||
span[n].x = (short)cell->x;
|
||||
span[n].len = 1;
|
||||
|
||||
if ( ++n == FT_MAX_GRAY_SPANS )
|
||||
{
|
||||
/* flush the span buffer and reset the count */
|
||||
ras.render_span( y, n, span, ras.render_span_data );
|
||||
n = 0;
|
||||
}
|
||||
}
|
||||
|
||||
x = cell->x + 1;
|
||||
}
|
||||
|
||||
if ( cover != 0 )
|
||||
gray_hline( RAS_VAR_ x, y, cover, ras.max_ex - x );
|
||||
if ( cover != 0 ) /* only if cropped */
|
||||
{
|
||||
FT_FILL_RULE( coverage, cover, fill );
|
||||
|
||||
if ( ras.num_spans > 0 ) /* for FT_RASTER_FLAG_DIRECT only */
|
||||
span[n].coverage = (unsigned char)coverage;
|
||||
span[n].x = (short)x;
|
||||
span[n].len = (unsigned short)( ras.max_ex - x );
|
||||
|
||||
++n;
|
||||
}
|
||||
|
||||
if ( n )
|
||||
{
|
||||
/* flush the span buffer and reset the count */
|
||||
ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data );
|
||||
ras.num_spans = 0;
|
||||
ras.render_span( y, n, span, ras.render_span_data );
|
||||
n = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1688,7 +1724,10 @@ typedef ptrdiff_t FT_PtrDist;
|
||||
|
||||
if ( !error )
|
||||
{
|
||||
gray_sweep( RAS_VAR );
|
||||
if ( ras.render_span ) /* for FT_RASTER_FLAG_DIRECT only */
|
||||
gray_sweep_direct( RAS_VAR );
|
||||
else
|
||||
gray_sweep( RAS_VAR );
|
||||
band--;
|
||||
continue;
|
||||
}
|
||||
@ -1757,7 +1796,6 @@ typedef ptrdiff_t FT_PtrDist;
|
||||
|
||||
ras.render_span = (FT_Raster_Span_Func)params->gray_spans;
|
||||
ras.render_span_data = params->user;
|
||||
ras.num_spans = 0;
|
||||
|
||||
ras.min_ex = params->clip_box.xMin;
|
||||
ras.min_ey = params->clip_box.yMin;
|
||||
@ -1787,7 +1825,6 @@ typedef ptrdiff_t FT_PtrDist;
|
||||
|
||||
ras.render_span = (FT_Raster_Span_Func)NULL;
|
||||
ras.render_span_data = NULL;
|
||||
ras.num_spans = -1; /* invalid */
|
||||
|
||||
ras.min_ex = 0;
|
||||
ras.min_ey = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user