2012-06-19 15:40:27 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2012 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
2019-04-23 17:05:21 +00:00
|
|
|
#include "bench/Benchmark.h"
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
#include "include/private/GrTypesPriv.h"
|
2019-04-23 17:05:21 +00:00
|
|
|
#include "include/utils/SkRandom.h"
|
|
|
|
#include "src/gpu/GrMemoryPool.h"
|
2018-09-19 15:31:27 +00:00
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
// sizeof is a multiple of GrMemoryPool::kAlignment
|
|
|
|
struct Aligned {
|
|
|
|
int fStuff[12]; // Will align on 4, 8, or 16 alignment
|
|
|
|
};
|
|
|
|
static_assert(sizeof(Aligned) % GrMemoryPool::kAlignment == 0);
|
2012-06-19 15:40:27 +00:00
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
// sizeof is not a multiple of GrMemoryPool::kAlignment
|
|
|
|
struct Unaligned {
|
|
|
|
int fStuff[9]; // Will not align on 8 or 16, but will on 4...
|
2012-06-19 15:40:27 +00:00
|
|
|
};
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
static_assert(sizeof(Unaligned) % GrMemoryPool::kAlignment != 0);
|
|
|
|
|
|
|
|
// All benchmarks create and delete the same number of objects. The key difference is the order
|
|
|
|
// of operations, the size of the objects being allocated, and the size of the pool.
|
|
|
|
typedef void (*RunBenchProc)(GrMemoryPool*, int);
|
|
|
|
|
|
|
|
// N objects are created, and then destroyed in reverse order (fully unwinding the cursor within
|
|
|
|
// each block of the memory pool).
|
|
|
|
template <typename T>
|
|
|
|
static void run_stack(GrMemoryPool* pool, int loops) {
|
|
|
|
static const int kMaxObjects = 4 * (1 << 10);
|
|
|
|
T* objs[kMaxObjects];
|
|
|
|
for (int i = 0; i < loops; ++i) {
|
|
|
|
// Push N objects into the pool (or heap if pool is null)
|
|
|
|
for (int j = 0; j < kMaxObjects; ++j) {
|
|
|
|
objs[j] = pool ? (T*) pool->allocate(sizeof(T)) : new T;
|
|
|
|
}
|
|
|
|
// Pop N objects off in LIFO order
|
|
|
|
for (int j = kMaxObjects - 1; j >= 0; --j) {
|
|
|
|
if (pool) {
|
|
|
|
pool->release(objs[j]);
|
|
|
|
} else {
|
|
|
|
delete objs[j];
|
|
|
|
}
|
|
|
|
}
|
2012-06-19 15:40:27 +00:00
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
// Everything has been cleaned up for the next loop
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// N objects are created, and then destroyed in creation order (is not able to unwind the cursor
|
|
|
|
// within each block, but can reclaim the block once everything is destroyed).
|
|
|
|
template <typename T>
|
|
|
|
static void run_queue(GrMemoryPool* pool, int loops) {
|
|
|
|
static const int kMaxObjects = 4 * (1 << 10);
|
|
|
|
T* objs[kMaxObjects];
|
|
|
|
for (int i = 0; i < loops; ++i) {
|
|
|
|
// Push N objects into the pool (or heap if pool is null)
|
|
|
|
for (int j = 0; j < kMaxObjects; ++j) {
|
|
|
|
objs[j] = pool ? (T*) pool->allocate(sizeof(T)) : new T;
|
|
|
|
}
|
|
|
|
// Pop N objects off in FIFO order
|
|
|
|
for (int j = 0; j < kMaxObjects; ++j) {
|
|
|
|
if (pool) {
|
|
|
|
pool->release(objs[j]);
|
|
|
|
} else {
|
|
|
|
delete objs[j];
|
|
|
|
}
|
|
|
|
}
|
2013-11-21 06:21:58 +00:00
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
// Everything has been cleaned up for the next loop
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// N objects are created and immediately destroyed, so space at the start of the pool should be
|
|
|
|
// immediately reclaimed.
|
|
|
|
template <typename T>
|
|
|
|
static void run_pushpop(GrMemoryPool* pool, int loops) {
|
|
|
|
static const int kMaxObjects = 4 * (1 << 10);
|
|
|
|
T* objs[kMaxObjects];
|
|
|
|
for (int i = 0; i < loops; ++i) {
|
|
|
|
// Push N objects into the pool (or heap if pool is null)
|
|
|
|
for (int j = 0; j < kMaxObjects; ++j) {
|
|
|
|
if (pool) {
|
|
|
|
objs[j] = (T*) pool->allocate(sizeof(T));
|
|
|
|
pool->release(objs[j]);
|
2012-06-19 15:40:27 +00:00
|
|
|
} else {
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
objs[j] = new T;
|
|
|
|
delete objs[j];
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
// Everything has been cleaned up for the next loop
|
2019-12-12 15:58:47 +00:00
|
|
|
}
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// N object creations and destructions are invoked in random order.
|
|
|
|
template <typename T>
|
|
|
|
static void run_random(GrMemoryPool* pool, int loops) {
|
|
|
|
static const int kMaxObjects = 4 * (1 << 10);
|
|
|
|
T* objs[kMaxObjects];
|
|
|
|
for (int i = 0; i < kMaxObjects; ++i) {
|
|
|
|
objs[i] = nullptr;
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
2013-11-21 06:21:58 +00:00
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
auto del = [&](int j) {
|
|
|
|
// Delete
|
|
|
|
if (pool) {
|
|
|
|
pool->release(objs[j]);
|
|
|
|
} else {
|
|
|
|
delete objs[j];
|
|
|
|
}
|
|
|
|
objs[j] = nullptr;
|
|
|
|
};
|
2012-06-19 15:40:27 +00:00
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
SkRandom r;
|
|
|
|
for (int i = 0; i < loops; ++i) {
|
|
|
|
// Execute 2*kMaxObjects operations, which should average to N create and N destroy,
|
|
|
|
// followed by a small number of remaining deletions.
|
|
|
|
for (int j = 0; j < 2 * kMaxObjects; ++j) {
|
|
|
|
int k = r.nextRangeU(0, kMaxObjects-1);
|
|
|
|
if (objs[k]) {
|
|
|
|
del(k);
|
2012-06-19 15:40:27 +00:00
|
|
|
} else {
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
// Create
|
|
|
|
objs[k] = pool ? (T*) pool->allocate(sizeof(T)) : new T;
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
// Ensure everything is null for the next loop
|
|
|
|
for (int j = 0; j < kMaxObjects; ++j) {
|
|
|
|
if (objs[j]) {
|
|
|
|
del(j);
|
|
|
|
}
|
2019-12-12 15:58:47 +00:00
|
|
|
}
|
|
|
|
}
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
}
|
2014-02-26 23:01:57 +00:00
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
class GrMemoryPoolBench : public Benchmark {
|
2012-06-19 15:40:27 +00:00
|
|
|
public:
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
GrMemoryPoolBench(const char* name, RunBenchProc proc, int poolSize)
|
|
|
|
: fPoolSize(poolSize)
|
|
|
|
, fProc(proc) {
|
|
|
|
fName.printf("grmemorypool_%s", name);
|
|
|
|
}
|
|
|
|
|
2015-03-26 01:17:31 +00:00
|
|
|
bool isSuitableFor(Backend backend) override {
|
2013-11-21 06:21:58 +00:00
|
|
|
return backend == kNonRendering_Backend;
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
2013-11-21 06:21:58 +00:00
|
|
|
|
2012-06-19 15:40:27 +00:00
|
|
|
protected:
|
2015-07-13 13:18:39 +00:00
|
|
|
const char* onGetName() override {
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
return fName.c_str();
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
|
|
|
|
2015-10-01 16:43:39 +00:00
|
|
|
void onDraw(int loops, SkCanvas*) override {
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
std::unique_ptr<GrMemoryPool> pool;
|
|
|
|
if (fPoolSize > 0) {
|
|
|
|
pool = GrMemoryPool::Make(fPoolSize, fPoolSize);
|
|
|
|
} // else keep it null to test regular new/delete performance
|
|
|
|
|
|
|
|
fProc(pool.get(), loops);
|
2012-06-19 15:40:27 +00:00
|
|
|
}
|
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
SkString fName;
|
|
|
|
int fPoolSize;
|
|
|
|
RunBenchProc fProc;
|
|
|
|
|
2014-06-19 19:32:29 +00:00
|
|
|
typedef Benchmark INHERITED;
|
2012-06-19 15:40:27 +00:00
|
|
|
};
|
|
|
|
|
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code
changes. Previously, a change to the tuned loop count could radically
change the allocation behavior within the loop's iteration and lead to
unfair comparisons.
In addition, this separates the stack allocation pattern into N allocations
followed by N LIFO releases, and a push-pop alternating pattern of N
allocates and releases (so still LIFO, but reuses the memory at the start
of a block).
In later CLs experimenting on the memory pool, I found that there were
surprising effects on performance linked to the specific interaction between
the allocation size, per-allocation metadata, and per-block metadata. To
help differentiate these coincidences, this adds two modes of allocation
where one should already be aligned.
It also moves away from a global pool, so that it's possible to benchmark
on different block sizes and factor in the allocation/release cost of the
actual blocks (vs. the cursor management of a larger sized pool). As part
of this, the new/delete reference operator is added as an explicit benchmark.
Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
2020-01-07 21:29:14 +00:00
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
static const int kLargePool = 10 * (1 << 10);
|
|
|
|
static const int kSmallPool = GrMemoryPool::kMinAllocationSize;
|
|
|
|
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("stack_aligned_lg", run_stack<Aligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("stack_aligned_sm", run_stack<Aligned>, kSmallPool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("stack_aligned_ref", run_stack<Aligned>, 0); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("stack_unaligned_lg", run_stack<Unaligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("stack_unaligned_sm", run_stack<Unaligned>, kSmallPool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("stack_unaligned_ref", run_stack<Unaligned>, 0); )
|
|
|
|
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("queue_aligned_lg", run_queue<Aligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("queue_aligned_sm", run_queue<Aligned>, kSmallPool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("queue_aligned_ref", run_queue<Aligned>, 0); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("queue_unaligned_lg", run_queue<Unaligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("queue_unaligned_sm", run_queue<Unaligned>, kSmallPool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("queue_unaligned_ref", run_queue<Unaligned>, 0); )
|
|
|
|
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("pushpop_aligned_lg", run_pushpop<Aligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("pushpop_aligned_sm", run_pushpop<Aligned>, kSmallPool); )
|
|
|
|
// DEF_BENCH( return new GrMemoryPoolBench("pushpop_aligned_ref", run_pushpop<Aligned>, 0); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("pushpop_unaligned_lg", run_pushpop<Unaligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("pushpop_unaligned_sm", run_pushpop<Unaligned>, kSmallPool); )
|
|
|
|
// DEF_BENCH( return new GrMemoryPoolBench("pushpop_unaligned_ref", run_pushpop<Unaligned>, 0); )
|
|
|
|
// pushpop_x_ref are not meaningful because the compiler completely optimizes away new T; delete *.
|
|
|
|
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("random_aligned_lg", run_random<Aligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("random_aligned_sm", run_random<Aligned>, kSmallPool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("random_aligned_ref", run_random<Aligned>, 0); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("random_unaligned_lg", run_random<Unaligned>, kLargePool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("random_unaligned_sm", run_random<Unaligned>, kSmallPool); )
|
|
|
|
DEF_BENCH( return new GrMemoryPoolBench("random_unaligned_ref", run_random<Unaligned>, 0); )
|