If we swap its arguments, SkTaskGroup::batch() _is_ sk_parallel_for.
Why have two names if we can get away with one? This kills off sk_parallel_for_thread_count(), which was only used to avoid forcing a deadlock in OncePtrTest on multicore machines in singlethreaded mode... a really niche use case. Instead just don't explicitly force a race. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1552093002 Review URL: https://codereview.chromium.org/1552093002
This commit is contained in:
parent
c4a0d73e9a
commit
279c786409
@ -95,7 +95,7 @@ protected:
|
||||
sk_tool_utils::create_portable_typeface("sans-serif", SkTypeface::kItalic)};
|
||||
|
||||
for (int work = 0; work < loops; work++) {
|
||||
sk_parallel_for(16, [&](int threadIndex) {
|
||||
SkTaskGroup().batch(16, [&](int threadIndex) {
|
||||
SkPaint paint;
|
||||
paint.setAntiAlias(true);
|
||||
paint.setSubpixelText(true);
|
||||
|
@ -1093,12 +1093,13 @@ int dm_main() {
|
||||
}
|
||||
|
||||
SkTaskGroup tg;
|
||||
tg.batch([](int i){ run_test(&gThreadedTests[i]); }, gThreadedTests.count());
|
||||
tg.batch(gThreadedTests.count(), [](int i){ run_test(&gThreadedTests[i]); });
|
||||
for (int i = 0; i < kNumEnclaves; i++) {
|
||||
SkTArray<Task>* currentEnclave = &enclaves[i];
|
||||
switch(i) {
|
||||
case kAnyThread_Enclave:
|
||||
tg.batch([currentEnclave](int j) { Task::Run(&(*currentEnclave)[j]); }, currentEnclave->count());
|
||||
tg.batch(currentEnclave->count(),
|
||||
[currentEnclave](int j) { Task::Run(&(*currentEnclave)[j]); });
|
||||
break;
|
||||
case kGPU_Enclave:
|
||||
tg.add([currentEnclave](){ run_enclave_and_gpu_tests(currentEnclave); });
|
||||
|
@ -619,7 +619,7 @@ static bool contains_only_moveTo(const SkPath& path) {
|
||||
#include "SkTDArray.h"
|
||||
|
||||
static void path_fuzz_stroker(SkBitmap* bitmap, int seed) {
|
||||
sk_parallel_for(100, [&](int i) {
|
||||
SkTaskGroup().batch(100, [&](int i) {
|
||||
int localSeed = seed + i;
|
||||
|
||||
FuzzPath fuzzPath;
|
||||
|
@ -94,7 +94,7 @@ void SkMultiPictureDraw::draw(bool flush) {
|
||||
fThreadSafeDrawData[i].draw();
|
||||
}
|
||||
#else
|
||||
sk_parallel_for(fThreadSafeDrawData.count(), [&](int i) {
|
||||
SkTaskGroup().batch(fThreadSafeDrawData.count(), [&](int i) {
|
||||
fThreadSafeDrawData[i].draw();
|
||||
});
|
||||
#endif
|
||||
|
@ -54,12 +54,12 @@ public:
|
||||
gGlobal->add(fn, pending);
|
||||
}
|
||||
|
||||
static void Batch(std::function<void(int)> fn, int N, SkAtomic<int32_t>* pending) {
|
||||
static void Batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
|
||||
if (!gGlobal) {
|
||||
for (int i = 0; i < N; i++) { fn(i); }
|
||||
return;
|
||||
}
|
||||
gGlobal->batch(fn, N, pending);
|
||||
gGlobal->batch(N, fn, pending);
|
||||
}
|
||||
|
||||
static void Wait(SkAtomic<int32_t>* pending) {
|
||||
@ -142,7 +142,7 @@ private:
|
||||
fWorkAvailable.signal(1);
|
||||
}
|
||||
|
||||
void batch(std::function<void(int)> fn, int N, SkAtomic<int32_t>* pending) {
|
||||
void batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
|
||||
pending->fetch_add(+N, sk_memory_order_relaxed); // No barrier needed.
|
||||
{
|
||||
AutoLock lock(&fWorkLock);
|
||||
@ -196,7 +196,6 @@ private:
|
||||
static ThreadPool* gGlobal;
|
||||
|
||||
friend struct SkTaskGroup::Enabler;
|
||||
friend int ::sk_parallel_for_thread_count();
|
||||
};
|
||||
ThreadPool* ThreadPool::gGlobal = nullptr;
|
||||
|
||||
@ -216,13 +215,7 @@ SkTaskGroup::SkTaskGroup() : fPending(0) {}
|
||||
void SkTaskGroup::wait() { ThreadPool::Wait(&fPending); }
|
||||
void SkTaskGroup::add(SkRunnable* task) { ThreadPool::Add(task, &fPending); }
|
||||
void SkTaskGroup::add(std::function<void(void)> fn) { ThreadPool::Add(fn, &fPending); }
|
||||
void SkTaskGroup::batch (std::function<void(int)> fn, int N) {
|
||||
ThreadPool::Batch(fn, N, &fPending);
|
||||
void SkTaskGroup::batch(int N, std::function<void(int)> fn) {
|
||||
ThreadPool::Batch(N, fn, &fPending);
|
||||
}
|
||||
|
||||
int sk_parallel_for_thread_count() {
|
||||
if (ThreadPool::gGlobal != nullptr) {
|
||||
return ThreadPool::gGlobal->fThreads.count();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
void add(std::function<void(void)> fn);
|
||||
|
||||
// Add a batch of N tasks, all calling fn with different arguments.
|
||||
void batch(std::function<void(int)> fn, int N);
|
||||
void batch(int N, std::function<void(int)> fn);
|
||||
|
||||
// Block until all Tasks previously add()ed to this SkTaskGroup have run.
|
||||
// You may safely reuse this SkTaskGroup after wait() returns.
|
||||
@ -47,48 +47,4 @@ private:
|
||||
// Returns best estimate of number of CPU cores available to use.
|
||||
int sk_num_cores();
|
||||
|
||||
int sk_parallel_for_thread_count();
|
||||
|
||||
// Call f(i) for i in [0, end).
|
||||
template <typename Func>
|
||||
void sk_parallel_for(int end, const Func& f) {
|
||||
if (end <= 0) { return; }
|
||||
|
||||
struct Chunk {
|
||||
const Func* f;
|
||||
int start, end;
|
||||
};
|
||||
|
||||
// TODO(mtklein): this chunking strategy could probably use some tuning.
|
||||
int max_chunks = sk_num_cores() * 2,
|
||||
stride = (end + max_chunks - 1 ) / max_chunks,
|
||||
nchunks = (end + stride - 1 ) / stride;
|
||||
SkASSERT(nchunks <= max_chunks);
|
||||
|
||||
#if defined(GOOGLE3)
|
||||
// Stack frame size is limited in GOOGLE3.
|
||||
SkAutoSTMalloc<512, Chunk> chunks(nchunks);
|
||||
#else
|
||||
// With the chunking strategy above this won't malloc until we have a machine with >512 cores.
|
||||
SkAutoSTMalloc<1024, Chunk> chunks(nchunks);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < nchunks; i++) {
|
||||
Chunk& c = chunks[i];
|
||||
c.f = &f;
|
||||
c.start = i * stride;
|
||||
c.end = SkTMin(c.start + stride, end);
|
||||
SkASSERT(c.start < c.end); // Nothing will break if start >= end, but it's a wasted chunk.
|
||||
}
|
||||
|
||||
Chunk* chunkBase = chunks.get();
|
||||
auto run_chunk = [chunkBase](int i) {
|
||||
Chunk& c = chunkBase[i];
|
||||
for (int i = c.start; i < c.end; i++) {
|
||||
(*c.f)(i);
|
||||
}
|
||||
};
|
||||
SkTaskGroup().batch(run_chunk, nchunks);
|
||||
}
|
||||
|
||||
#endif//SkTaskGroup_DEFINED
|
||||
|
@ -98,5 +98,5 @@ DEF_TEST(Blend_premul_begets_premul, r) {
|
||||
};
|
||||
|
||||
// Parallelism helps speed things up on my desktop from ~725s to ~50s.
|
||||
sk_parallel_for(SkXfermode::kLastMode, test_mode);
|
||||
SkTaskGroup().batch(SkXfermode::kLastMode, test_mode);
|
||||
}
|
||||
|
@ -18,14 +18,7 @@ DEF_TEST(OncePtr, r) {
|
||||
return new int(5);
|
||||
};
|
||||
|
||||
SkAtomic<int> force_a_race(sk_parallel_for_thread_count());
|
||||
if (force_a_race < 1) {
|
||||
return;
|
||||
}
|
||||
sk_parallel_for(sk_num_cores()*4, [&](size_t) {
|
||||
force_a_race.fetch_add(-1);
|
||||
while (force_a_race.load() > 0);
|
||||
|
||||
SkTaskGroup().batch(sk_num_cores()*4, [&](size_t) {
|
||||
int* n = once.get(create);
|
||||
REPORTER_ASSERT(r, *n == 5);
|
||||
});
|
||||
@ -39,7 +32,7 @@ DEF_TEST(OnceNoPtr, r) {
|
||||
static SkAtomic<int> calls(0);
|
||||
|
||||
SkAtomic<int> force_a_race(sk_num_cores());
|
||||
sk_parallel_for(sk_num_cores()*4, [&](size_t) {
|
||||
SkTaskGroup().batch(sk_num_cores()*4, [&](size_t) {
|
||||
force_a_race.fetch_add(-1);
|
||||
while (force_a_race.load() > 0);
|
||||
|
||||
|
@ -32,7 +32,7 @@ SK_DECLARE_STATIC_ONCE(mt_once);
|
||||
DEF_TEST(SkOnce_Multithreaded, r) {
|
||||
int x = 0;
|
||||
// Run a bunch of tasks to be the first to add six to x.
|
||||
sk_parallel_for(1021, [&](int) {
|
||||
SkTaskGroup().batch(1021, [&](int) {
|
||||
void(*add_six)(int*) = [](int* p) { *p += 6; };
|
||||
SkOnce(&mt_once, add_six, &x);
|
||||
});
|
||||
|
@ -307,7 +307,7 @@ TestRunner::~TestRunner() {
|
||||
void TestRunner::render() {
|
||||
// TODO: this doesn't really need to use SkRunnables any more.
|
||||
// We can just write the code to run in the for-loop directly.
|
||||
sk_parallel_for(fRunnables.count(), [&](int i) {
|
||||
SkTaskGroup().batch(fRunnables.count(), [&](int i) {
|
||||
fRunnables[i]->run();
|
||||
});
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ PathOpsThreadedTestRunner::~PathOpsThreadedTestRunner() {
|
||||
}
|
||||
|
||||
void PathOpsThreadedTestRunner::render() {
|
||||
sk_parallel_for(fRunnables.count(), [&](int i) {
|
||||
SkTaskGroup().batch(fRunnables.count(), [&](int i) {
|
||||
fRunnables[i]->run();
|
||||
});
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ DEF_TEST(SkSharedMutexMultiThreaded, r) {
|
||||
for (int i = 0; i < kSharedSize; ++i) {
|
||||
shared[i] = 0;
|
||||
}
|
||||
sk_parallel_for(8, [&](int threadIndex) {
|
||||
SkTaskGroup().batch(8, [&](int threadIndex) {
|
||||
if (threadIndex % 4 != 0) {
|
||||
for (int c = 0; c < 100000; ++c) {
|
||||
sm.acquireShared();
|
||||
|
@ -171,7 +171,7 @@ SkpSkGrThreadedTestRunner::~SkpSkGrThreadedTestRunner() {
|
||||
void SkpSkGrThreadedTestRunner::render() {
|
||||
// TODO: we don't really need to be using SkRunnables here anymore.
|
||||
// We can just write the code we'd run right in the for loop.
|
||||
sk_parallel_for(fRunnables.count(), [&](int i) {
|
||||
SkTaskGroup().batch(fRunnables.count(), [&](int i) {
|
||||
fRunnables[i]->run();
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user