add dumbest possible 32-bit SkJumper backend

Everything uses a ton of stack, nothing tail calls, and for now this is
non-Windows only.  But, it does run faster than the portable serial code.

On my trashcan, running `monobench SkRasterPipeline_compile`:
   - Normal 64-bit AVX build:  43.6ns
   - Before this CL, 32-bit:  707.9ns
   - This CL:                 147.5ns

Change-Id: I4a8929570ace47193ed8925c58b70bb22d6b1447
Reviewed-on: https://skia-review.googlesource.com/20964
Reviewed-by: Mike Reed <reed@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Mike Klein 2017-06-27 10:48:20 -04:00 committed by Skia Commit-Bot
parent ed50200682
commit ca0b43803b
3 changed files with 12078 additions and 0 deletions

View File

@ -155,6 +155,14 @@ extern "C" {
#define M(st) StageFn ASM(st,ssse3_lowp);
LOWP_STAGES(M)
#undef M
#elif defined(__i386__)
StartPipelineFn ASM(start_pipeline,sse2);
StageFn ASM(just_return,sse2);
#define M(st) StageFn ASM(st,sse2);
SK_RASTER_PIPELINE_STAGES(M)
#undef M
#endif
// Portable, single-pixel stages.
@ -256,6 +264,17 @@ static SkJumper_Engine choose_engine() {
#undef M
};
}
#elif defined(__i386__)
if (1 && SkCpu::Supports(SkCpu::SSE2)) {
return {
#define M(stage) ASM(stage, sse2),
{ SK_RASTER_PIPELINE_STAGES(M) },
M(start_pipeline) M(just_return)
#undef M
};
}
#endif
return kPortable;
}

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@ cflags = ['-std=c++11', '-Os', '-DJUMPER',
'-momit-leaf-frame-pointer', '-ffp-contract=fast',
'-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
x86 = [ '-m32' ]
win = ['-DWIN', '-mno-red-zone']
sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
subprocess.check_call(clang + cflags + sse2 +
@ -28,6 +29,9 @@ subprocess.check_call(clang + cflags + sse2 +
subprocess.check_call(clang + cflags + sse2 + win +
['-c', 'src/jumper/SkJumper_stages.cpp'] +
['-o', 'win_sse2.o'])
subprocess.check_call(clang + cflags + sse2 + x86 +
['-c', 'src/jumper/SkJumper_stages.cpp'] +
['-o', 'x86_sse2.o'])
ssse3 = ['-mssse3', '-mno-sse4.1']
subprocess.check_call(clang + cflags + ssse3 +
@ -206,6 +210,10 @@ parse_object_file('lowp_hsw.o', '.byte')
print 'BALIGN32'
parse_object_file('lowp_ssse3.o', '.byte')
print '#elif defined(__i386__)'
print 'BALIGN32'
parse_object_file('x86_sse2.o', '.byte')
print '#endif'
sys.stdout = open('src/jumper/SkJumper_generated_win.S', 'w')