Modify sample buffer size for larger displays.

Increases the intermediate buffer size for sample pixel indexes,
used in the sample proc function calls. If the operation is bigger
than the buffer it's split into multiple calls, creating overhead.
This would especially impact the performance of SIMD optimizations.
Also, aligns the start address of the buffer to 16 bytes, to enable
more efficient SIMD optimizations.

Author: henrik.smiding@intel.com

Signed-off-by: Henrik Smiding <henrik.smiding@intel.com>

Committed: http://code.google.com/p/skia/source/detail?r=14825

R=reed@google.com, mtklein@google.com, tomhudson@google.com, djsollen@google.com, joakim.landberg@intel.com, scroggo@google.com, bsalomon@chromium.org, bsalomon@google.com

Author: henrik.smiding@intel.com

Review URL: https://codereview.chromium.org/240433002

git-svn-id: http://skia.googlecode.com/svn/trunk@14872 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
commit-bot@chromium.org 2014-05-23 16:05:43 +00:00
parent a3746a0f41
commit 0533146d00
3 changed files with 57 additions and 4 deletions

View File

@ -72,3 +72,34 @@ stroketext
# yunchao: https://codereview.chromium.org/292943002/
# changed the rendering result of this gm case to respect paint's alpha
vertices_80
# https://codereview.chromium.org/240433002/
# scroggo will rebaseline this test
shadertext3
pictureshader
giantbitmap_mirror_bilerp_rotate
giantbitmap_repeat_bilerp_rotate
filterbitmap_image_mandrill_512.png
filterbitmap_image_mandrill_256.png
filterbitmap_image_mandrill_128.png
filterbitmap_image_mandrill_64.png
filterbitmap_image_mandrill_32.png
filterbitmap_image_mandrill_16.png
filterbitmap_checkerboard_192_192
filterbitmap_checkerboard_32_2
filterbitmap_checkerboard_32_8
filterbitmap_checkerboard_32_32
filterbitmap_checkerboard_4_4
filterbitmap_text_10.00pt
filterbitmap_text_7.00pt
filterbitmap_text_3.00pt
downsamplebitmap_image_none_mandrill_512.png
downsamplebitmap_checkerboard_none_512_256
downsamplebitmap_text_none_72.00pt
downsamplebitmap_image_low_mandrill_512.png
downsamplebitmap_checkerboard_low_512_256
downsamplebitmap_text_low_72.00pt
downsamplebitmap_image_medium_mandrill_512.png
downsamplebitmap_checkerboard_medium_512_256
downsamplebitmap_text_medium_72.00pt
drawbitmapmatrix

View File

@ -68,6 +68,16 @@
# endif
#endif
/**
* Used to align stack allocated variables/buffers.
* Different compilers have different ways to accomplish this.
*/
#if defined(_MSC_VER)
# define SK_ALIGN(x) __declspec(align(x))
#else
# define SK_ALIGN(x) __attribute__((aligned(x)))
#endif
#if !defined(SK_SUPPORT_GPU)
# define SK_SUPPORT_GPU 1
#endif

View File

@ -183,13 +183,21 @@ SkBitmapProcShader::BitmapProcShaderContext::~BitmapProcShaderContext() {
fState->~SkBitmapProcState();
}
#define BUF_MAX 128
/* Defines the buffer size for sample pixel indexes, used in the sample proc function calls.
* If the operation is bigger than the buffer, it's split into multiple calls. This split is bad
* for the performance of SIMD optimizations.
* A display in portrait mode, with a width of 720 pixels, requires a buffer size of at least 721
* to run uninterrupted in the more basic operations.
* (Formula: Width + 1 for 'scale/translate with filter' procs.
* See description of SkBitmapProcState::maxCountForBufferSize for more information.)
*/
#define BUF_MAX 1081
#define TEST_BUFFER_OVERRITEx
#ifdef TEST_BUFFER_OVERRITE
#define TEST_BUFFER_EXTRA 32
#define TEST_PATTERN 0x88888888
#define TEST_PATTERN 0x88888888
#else
#define TEST_BUFFER_EXTRA 0
#endif
@ -202,7 +210,9 @@ void SkBitmapProcShader::BitmapProcShaderContext::shadeSpan(int x, int y, SkPMCo
return;
}
uint32_t buffer[BUF_MAX + TEST_BUFFER_EXTRA];
// Align buffer to 16 bytes to enable more efficient SIMD optimizations.
uint32_t SK_ALIGN(16) buffer[BUF_MAX + TEST_BUFFER_EXTRA];
SkBitmapProcState::MatrixProc mproc = state.getMatrixProc();
SkBitmapProcState::SampleProc32 sproc = state.getSampleProc32();
int max = state.maxCountForBufferSize(sizeof(buffer[0]) * BUF_MAX);
@ -255,7 +265,9 @@ void SkBitmapProcShader::BitmapProcShaderContext::shadeSpan16(int x, int y, uint
return;
}
uint32_t buffer[BUF_MAX];
// Align buffer to 16 bytes to enable more efficient SIMD optimizations.
uint32_t SK_ALIGN(16) buffer[BUF_MAX];
SkBitmapProcState::MatrixProc mproc = state.getMatrixProc();
SkBitmapProcState::SampleProc16 sproc = state.getSampleProc16();
int max = state.maxCountForBufferSize(sizeof(buffer));