diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index b84e48dad6..6bedeb7eeb 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -723,8 +723,6 @@ namespace SK_OPTS_NS { return {p[ix[0]], p[ix[1]], p[ix[2]], p[ix[3]]}; } - // TODO: these loads and stores are incredibly difficult to follow. - SI void load2(const uint16_t* ptr, size_t tail, U16* r, U16* g) { __m128i _01; if (__builtin_expect(tail,0)) { @@ -1034,11 +1032,6 @@ static const size_t N = sizeof(F) / sizeof(float); // We can still only pass 16 floats, so best as 4x {r,g,b,a}. #define ABI __attribute__((pcs("aapcs-vfp"))) #define JUMPER_NARROW_STAGES 1 -#elif 0 && defined(_MSC_VER) && defined(__clang__) && defined(__x86_64__) - // SysV ABI makes it very sensible to use wide stages with clang-cl. - // TODO: crashes during compilation :( - #define ABI __attribute__((sysv_abi)) - #define JUMPER_NARROW_STAGES 0 #elif defined(_MSC_VER) // Even if not vectorized, this lets us pass {r,g,b,a} as registers, // instead of {b,a} on the stack. Narrow stages work best for __vectorcall. @@ -1254,7 +1247,6 @@ SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, F x, F y) { // // You can adjust the expected input to [0,bias] by tweaking that parameter. SI U32 to_unorm(F v, F scale, F bias = 1.0f) { - // TODO: platform-specific implementations to to_unorm(), removing round() entirely? // Any time we use round() we probably want to use to_unorm(). return round(min(max(0, v), bias), scale); } @@ -1810,7 +1802,7 @@ STAGE(emboss, const SkRasterPipeline_EmbossCtx* ctx) { b = mad(b, mul, add); } -STAGE(byte_tables, const void* ctx) { // TODO: rename Tables SkRasterPipeline_ByteTablesCtx +STAGE(byte_tables, const void* ctx) { struct Tables { const uint8_t *r, *g, *b, *a; }; auto tables = (const Tables*)ctx; @@ -2450,7 +2442,6 @@ STAGE(gradient, const SkRasterPipeline_GradientCtx* c) { } STAGE(evenly_spaced_2_stop_gradient, const void* ctx) { - // TODO: Rename Ctx SkRasterPipeline_EvenlySpaced2StopGradientCtx. struct Ctx { float f[4], b[4]; }; auto c = (const Ctx*)ctx; @@ -2686,7 +2677,7 @@ STAGE(gauss_a_to_rgba, Ctx::None) { SI F tile(F v, SkTileMode mode, float limit, float invLimit) { // The ix_and_ptr() calls in sample() will clamp tile()'s output, so no need to clamp here. switch (mode) { - case SkTileMode::kDecal: // TODO, for now fallthrough to clamp + case SkTileMode::kDecal: case SkTileMode::kClamp: return v; case SkTileMode::kRepeat: return v - floor_(v*invLimit)*limit; case SkTileMode::kMirror: @@ -2701,7 +2692,7 @@ SI void sample(const SkRasterPipeline_SamplerCtx2* ctx, F x, F y, y = tile(y, ctx->tileY, ctx->height, ctx->invHeight); switch (ctx->ct) { - default: *r = *g = *b = *a = 0; // TODO + default: *r = *g = *b = *a = 0; break; case kRGBA_8888_SkColorType: @@ -4006,150 +3997,6 @@ STAGE_PP(srcover_rgba_8888, const SkRasterPipeline_MemoryCtx* ctx) { store_8888_(ptr, tail, r,g,b,a); } -#if defined(SK_DISABLE_LOWP_BILERP_CLAMP_CLAMP_STAGE) - static void(*bilerp_clamp_8888)(void) = nullptr; - static void(*bilinear)(void) = nullptr; -#else -STAGE_GP(bilerp_clamp_8888, const SkRasterPipeline_GatherCtx* ctx) { - // (cx,cy) are the center of our sample. - F cx = x, - cy = y; - - // All sample points are at the same fractional offset (fx,fy). - // They're the 4 corners of a logical 1x1 pixel surrounding (x,y) at (0.5,0.5) offsets. - F fx = fract(cx + 0.5f), - fy = fract(cy + 0.5f); - - // We'll accumulate the color of all four samples into {r,g,b,a} directly. - r = g = b = a = 0; - - // The first three sample points will calculate their area using math - // just like in the float code above, but the fourth will take up all the rest. - // - // Logically this is the same as doing the math for the fourth pixel too, - // but rounding error makes this a better strategy, keeping opaque opaque, etc. - // - // We can keep up to 8 bits of fractional precision without overflowing 16-bit, - // so our "1.0" area is 256. - const uint16_t bias = 256; - U16 remaining = bias; - - for (float dy = -0.5f; dy <= +0.5f; dy += 1.0f) - for (float dx = -0.5f; dx <= +0.5f; dx += 1.0f) { - // (x,y) are the coordinates of this sample point. - F x = cx + dx, - y = cy + dy; - - // ix_and_ptr() will clamp to the image's bounds for us. - const uint32_t* ptr; - U32 ix = ix_and_ptr(&ptr, ctx, x,y); - - U16 sr,sg,sb,sa; - from_8888(gather(ptr, ix), &sr,&sg,&sb,&sa); - - // In bilinear interpolation, the 4 pixels at +/- 0.5 offsets from the sample pixel center - // are combined in direct proportion to their area overlapping that logical query pixel. - // At positive offsets, the x-axis contribution to that rectangle is fx, - // or (1-fx) at negative x. Same deal for y. - F sx = (dx > 0) ? fx : 1.0f - fx, - sy = (dy > 0) ? fy : 1.0f - fy; - - U16 area = (dy == 0.5f && dx == 0.5f) ? remaining - : cast(sx * sy * bias); - for (size_t i = 0; i < N; i++) { - SkASSERT(remaining[i] >= area[i]); - } - remaining -= area; - - r += sr * area; - g += sg * area; - b += sb * area; - a += sa * area; - } - - r = (r + bias/2) / bias; - g = (g + bias/2) / bias; - b = (b + bias/2) / bias; - a = (a + bias/2) / bias; -} - -// TODO: lowp::tile() is identical to the highp tile()... share? -SI F tile(F v, SkTileMode mode, float limit, float invLimit) { - // After ix_and_ptr() will clamp the output of tile(), so we need not clamp here. - switch (mode) { - case SkTileMode::kDecal: // TODO, for now fallthrough to clamp - case SkTileMode::kClamp: return v; - case SkTileMode::kRepeat: return v - floor_(v*invLimit)*limit; - case SkTileMode::kMirror: - return abs_( (v-limit) - (limit+limit)*floor_((v-limit)*(invLimit*0.5f)) - limit ); - } - SkUNREACHABLE; -} - -SI void sample(const SkRasterPipeline_SamplerCtx2* ctx, F x, F y, - U16* r, U16* g, U16* b, U16* a) { - x = tile(x, ctx->tileX, ctx->width , ctx->invWidth ); - y = tile(y, ctx->tileY, ctx->height, ctx->invHeight); - - switch (ctx->ct) { - default: *r = *g = *b = *a = 0; // TODO - break; - - case kRGBA_8888_SkColorType: - case kBGRA_8888_SkColorType: { - const uint32_t* ptr; - U32 ix = ix_and_ptr(&ptr, ctx, x,y); - from_8888(gather(ptr, ix), r,g,b,a); - if (ctx->ct == kBGRA_8888_SkColorType) { - std::swap(*r,*b); - } - } break; - } -} - -template -SI void sampler(const SkRasterPipeline_SamplerCtx2* ctx, - F cx, F cy, const F (&wx)[D], const F (&wy)[D], - U16* r, U16* g, U16* b, U16* a) { - - float start = -0.5f*(D-1); - - const uint16_t bias = 256; - U16 remaining = bias; - - *r = *g = *b = *a = 0; - F y = cy + start; - for (int j = 0; j < D; j++, y += 1.0f) { - F x = cx + start; - for (int i = 0; i < D; i++, x += 1.0f) { - U16 R,G,B,A; - sample(ctx, x,y, &R,&G,&B,&A); - - U16 w = (i == D-1 && j == D-1) ? remaining - : cast(wx[i]*wy[j]*bias); - remaining -= w; - *r += w*R; - *g += w*G; - *b += w*B; - *a += w*A; - } - } - *r = (*r + bias/2) / bias; - *g = (*g + bias/2) / bias; - *b = (*b + bias/2) / bias; - *a = (*a + bias/2) / bias; -} - -STAGE_GP(bilinear, const SkRasterPipeline_SamplerCtx2* ctx) { - F fx = fract(x + 0.5f), - fy = fract(y + 0.5f); - const F wx[] = {1.0f - fx, fx}; - const F wy[] = {1.0f - fy, fy}; - - sampler(ctx, x,y, wx,wy, &r,&g,&b,&a); -} -#endif - // ~~~~~~ GrSwizzle stage ~~~~~~ // STAGE_PP(swizzle, void* ctx) { @@ -4179,7 +4026,7 @@ STAGE_PP(swizzle, void* ctx) { NOT_IMPLEMENTED(unbounded_set_rgb) NOT_IMPLEMENTED(unbounded_uniform_color) NOT_IMPLEMENTED(unpremul) - NOT_IMPLEMENTED(dither) // TODO + NOT_IMPLEMENTED(dither) NOT_IMPLEMENTED(load_16161616) NOT_IMPLEMENTED(load_16161616_dst) NOT_IMPLEMENTED(store_16161616) @@ -4215,7 +4062,7 @@ STAGE_PP(swizzle, void* ctx) { NOT_IMPLEMENTED(store_1010102) NOT_IMPLEMENTED(gather_1010102) NOT_IMPLEMENTED(store_u16_be) - NOT_IMPLEMENTED(byte_tables) // TODO + NOT_IMPLEMENTED(byte_tables) NOT_IMPLEMENTED(colorburn) NOT_IMPLEMENTED(colordodge) NOT_IMPLEMENTED(softlight) @@ -4225,8 +4072,8 @@ STAGE_PP(swizzle, void* ctx) { NOT_IMPLEMENTED(luminosity) NOT_IMPLEMENTED(matrix_3x3) NOT_IMPLEMENTED(matrix_3x4) - NOT_IMPLEMENTED(matrix_4x5) // TODO - NOT_IMPLEMENTED(matrix_4x3) // TODO + NOT_IMPLEMENTED(matrix_4x5) + NOT_IMPLEMENTED(matrix_4x3) NOT_IMPLEMENTED(parametric) NOT_IMPLEMENTED(gamma_) NOT_IMPLEMENTED(PQish) @@ -4234,28 +4081,30 @@ STAGE_PP(swizzle, void* ctx) { NOT_IMPLEMENTED(HLGinvish) NOT_IMPLEMENTED(rgb_to_hsl) NOT_IMPLEMENTED(hsl_to_rgb) - NOT_IMPLEMENTED(gauss_a_to_rgba) // TODO - NOT_IMPLEMENTED(mirror_x) // TODO - NOT_IMPLEMENTED(repeat_x) // TODO - NOT_IMPLEMENTED(mirror_y) // TODO - NOT_IMPLEMENTED(repeat_y) // TODO + NOT_IMPLEMENTED(gauss_a_to_rgba) + NOT_IMPLEMENTED(mirror_x) + NOT_IMPLEMENTED(repeat_x) + NOT_IMPLEMENTED(mirror_y) + NOT_IMPLEMENTED(repeat_y) NOT_IMPLEMENTED(negate_x) - NOT_IMPLEMENTED(bicubic) // TODO if I can figure out negative weights + NOT_IMPLEMENTED(bilinear) + NOT_IMPLEMENTED(bilerp_clamp_8888) + NOT_IMPLEMENTED(bicubic) NOT_IMPLEMENTED(bicubic_clamp_8888) - NOT_IMPLEMENTED(bilinear_nx) // TODO - NOT_IMPLEMENTED(bilinear_ny) // TODO - NOT_IMPLEMENTED(bilinear_px) // TODO - NOT_IMPLEMENTED(bilinear_py) // TODO - NOT_IMPLEMENTED(bicubic_n3x) // TODO - NOT_IMPLEMENTED(bicubic_n1x) // TODO - NOT_IMPLEMENTED(bicubic_p1x) // TODO - NOT_IMPLEMENTED(bicubic_p3x) // TODO - NOT_IMPLEMENTED(bicubic_n3y) // TODO - NOT_IMPLEMENTED(bicubic_n1y) // TODO - NOT_IMPLEMENTED(bicubic_p1y) // TODO - NOT_IMPLEMENTED(bicubic_p3y) // TODO - NOT_IMPLEMENTED(save_xy) // TODO - NOT_IMPLEMENTED(accumulate) // TODO + NOT_IMPLEMENTED(bilinear_nx) + NOT_IMPLEMENTED(bilinear_ny) + NOT_IMPLEMENTED(bilinear_px) + NOT_IMPLEMENTED(bilinear_py) + NOT_IMPLEMENTED(bicubic_n3x) + NOT_IMPLEMENTED(bicubic_n1x) + NOT_IMPLEMENTED(bicubic_p1x) + NOT_IMPLEMENTED(bicubic_p3x) + NOT_IMPLEMENTED(bicubic_n3y) + NOT_IMPLEMENTED(bicubic_n1y) + NOT_IMPLEMENTED(bicubic_p1y) + NOT_IMPLEMENTED(bicubic_p3y) + NOT_IMPLEMENTED(save_xy) + NOT_IMPLEMENTED(accumulate) NOT_IMPLEMENTED(xy_to_2pt_conical_well_behaved) NOT_IMPLEMENTED(xy_to_2pt_conical_strip) NOT_IMPLEMENTED(xy_to_2pt_conical_focal_on_circle)