Speed up {MemCopy} and {MemMove} for small sizes

The same change on the {CopyChars} function gave an unexpected 10-20% speedup on microbenchmarks across platforms (ia32, x64, Atom_x64). This CL explores whether a similar change generally speeds up {MemCopy} and {MemMove} (only on x64 for now). If this is the case, we might be able to carefully extend the same pattern to other platforms and remove custom assembly implementations without too much regression. R=leszeks@chromium.org Bug: v8:9810 Change-Id: Ib9674807b67cd2a463680b97a91ae1b41c3be65b Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1871607 Reviewed-by: Leszek Swirski <leszeks@chromium.org> Commit-Queue: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/master@{#64498}
2019-10-22 18:44:32 +02:00 · 2019-10-22 18:44:32 +02:00 · 50784597e8
commit 50784597e8
parent 720961bb39
1 changed files with 61 additions and 5 deletions
--- a/src/utils/memcopy.h
+++ b/src/utils/memcopy.h
@ -78,12 +78,68 @@ V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src,
 }
 #else
 // Copy memory area to disjoint memory area.
-V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
-  memcpy(dest, src, size);
+inline void MemCopy(void* dest, const void* src, size_t size) {
+  // Fast path for small sizes. The compiler will expand the {memcpy} for small
+  // fixed sizes to a sequence of move instructions. This avoids the overhead of
+  // the general {memcpy} function.
+  switch (size) {
+#define CASE(N)           \
+  case N:                 \
+    memcpy(dest, src, N); \
+    return;
+    CASE(1)
+    CASE(2)
+    CASE(3)
+    CASE(4)
+    CASE(5)
+    CASE(6)
+    CASE(7)
+    CASE(8)
+    CASE(9)
+    CASE(10)
+    CASE(11)
+    CASE(12)
+    CASE(13)
+    CASE(14)
+    CASE(15)
+    CASE(16)
+#undef CASE
+    default:
+      memcpy(dest, src, size);
+      return;
+  }
 }
-V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src,
-                                         size_t size) {
-  memmove(dest, src, size);
+V8_EXPORT_PRIVATE inline void MemMove(void* dest, const void* src,
+                                      size_t size) {
+  // Fast path for small sizes. The compiler will expand the {memmove} for small
+  // fixed sizes to a sequence of move instructions. This avoids the overhead of
+  // the general {memmove} function.
+  switch (size) {
+#define CASE(N)            \
+  case N:                  \
+    memmove(dest, src, N); \
+    return;
+    CASE(1)
+    CASE(2)
+    CASE(3)
+    CASE(4)
+    CASE(5)
+    CASE(6)
+    CASE(7)
+    CASE(8)
+    CASE(9)
+    CASE(10)
+    CASE(11)
+    CASE(12)
+    CASE(13)
+    CASE(14)
+    CASE(15)
+    CASE(16)
+#undef CASE
+    default:
+      memmove(dest, src, size);
+      return;
+  }
 }
 const size_t kMinComplexMemCopy = 8;
 #endif  // V8_TARGET_ARCH_IA32