add dumbest possible 32-bit SkJumper backend
Everything uses a ton of stack, nothing tail calls, and for now this is non-Windows only. But, it does run faster than the portable serial code. On my trashcan, running `monobench SkRasterPipeline_compile`: - Normal 64-bit AVX build: 43.6ns - Before this CL, 32-bit: 707.9ns - This CL: 147.5ns Change-Id: I4a8929570ace47193ed8925c58b70bb22d6b1447 Reviewed-on: https://skia-review.googlesource.com/20964 Reviewed-by: Mike Reed <reed@google.com> Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
ed50200682
commit
ca0b43803b
@ -155,6 +155,14 @@ extern "C" {
|
|||||||
#define M(st) StageFn ASM(st,ssse3_lowp);
|
#define M(st) StageFn ASM(st,ssse3_lowp);
|
||||||
LOWP_STAGES(M)
|
LOWP_STAGES(M)
|
||||||
#undef M
|
#undef M
|
||||||
|
|
||||||
|
#elif defined(__i386__)
|
||||||
|
StartPipelineFn ASM(start_pipeline,sse2);
|
||||||
|
StageFn ASM(just_return,sse2);
|
||||||
|
#define M(st) StageFn ASM(st,sse2);
|
||||||
|
SK_RASTER_PIPELINE_STAGES(M)
|
||||||
|
#undef M
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Portable, single-pixel stages.
|
// Portable, single-pixel stages.
|
||||||
@ -256,6 +264,17 @@ static SkJumper_Engine choose_engine() {
|
|||||||
#undef M
|
#undef M
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(__i386__)
|
||||||
|
if (1 && SkCpu::Supports(SkCpu::SSE2)) {
|
||||||
|
return {
|
||||||
|
#define M(stage) ASM(stage, sse2),
|
||||||
|
{ SK_RASTER_PIPELINE_STAGES(M) },
|
||||||
|
M(start_pipeline) M(just_return)
|
||||||
|
#undef M
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
return kPortable;
|
return kPortable;
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -20,6 +20,7 @@ cflags = ['-std=c++11', '-Os', '-DJUMPER',
|
|||||||
'-momit-leaf-frame-pointer', '-ffp-contract=fast',
|
'-momit-leaf-frame-pointer', '-ffp-contract=fast',
|
||||||
'-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
|
'-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
|
||||||
|
|
||||||
|
x86 = [ '-m32' ]
|
||||||
win = ['-DWIN', '-mno-red-zone']
|
win = ['-DWIN', '-mno-red-zone']
|
||||||
sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
|
sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
|
||||||
subprocess.check_call(clang + cflags + sse2 +
|
subprocess.check_call(clang + cflags + sse2 +
|
||||||
@ -28,6 +29,9 @@ subprocess.check_call(clang + cflags + sse2 +
|
|||||||
subprocess.check_call(clang + cflags + sse2 + win +
|
subprocess.check_call(clang + cflags + sse2 + win +
|
||||||
['-c', 'src/jumper/SkJumper_stages.cpp'] +
|
['-c', 'src/jumper/SkJumper_stages.cpp'] +
|
||||||
['-o', 'win_sse2.o'])
|
['-o', 'win_sse2.o'])
|
||||||
|
subprocess.check_call(clang + cflags + sse2 + x86 +
|
||||||
|
['-c', 'src/jumper/SkJumper_stages.cpp'] +
|
||||||
|
['-o', 'x86_sse2.o'])
|
||||||
|
|
||||||
ssse3 = ['-mssse3', '-mno-sse4.1']
|
ssse3 = ['-mssse3', '-mno-sse4.1']
|
||||||
subprocess.check_call(clang + cflags + ssse3 +
|
subprocess.check_call(clang + cflags + ssse3 +
|
||||||
@ -206,6 +210,10 @@ parse_object_file('lowp_hsw.o', '.byte')
|
|||||||
print 'BALIGN32'
|
print 'BALIGN32'
|
||||||
parse_object_file('lowp_ssse3.o', '.byte')
|
parse_object_file('lowp_ssse3.o', '.byte')
|
||||||
|
|
||||||
|
print '#elif defined(__i386__)'
|
||||||
|
print 'BALIGN32'
|
||||||
|
parse_object_file('x86_sse2.o', '.byte')
|
||||||
|
|
||||||
print '#endif'
|
print '#endif'
|
||||||
|
|
||||||
sys.stdout = open('src/jumper/SkJumper_generated_win.S', 'w')
|
sys.stdout = open('src/jumper/SkJumper_generated_win.S', 'w')
|
||||||
|
Loading…
Reference in New Issue
Block a user