From d0ecfe25d607412e7ab85eac2e9d714466eea668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Z=C3=BCnd?= Date: Thu, 3 May 2018 09:35:25 +0200 Subject: [PATCH] [typedarray] Change Torque sort implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL changes how TypedArray.p.sort is implemented in Torque, mainly to address the binary memory size of the builtin. With this CL the memory comes down from 53611 to 4215 (as reported by --print-builtin-size on a x64.release build). With the following performance impact on the relevant benchmarks: Benchmark Original (JS) Torque (initial) This CL IntTypes 83.9 263.7 202.3 BigIntTypes 32.1 54.6 47.2 FloatTypes 99.3 138.7 109.3 This is achieved by pushing the Load/Store dispatch based on the elements kind into separate builtins that are executed for each load/store. This results in only one version of the sorting algorithm instead of one version per elements kind. R=jgruber@chromium.org Bug: chromium:837282 Change-Id: I7fe2da3cbfd01531d070128126a0d56d3dd6bdcc Reviewed-on: https://chromium-review.googlesource.com/1033744 Commit-Queue: Simon Zünd Reviewed-by: Jakob Gruber Cr-Commit-Position: refs/heads/master@{#52937} --- src/builtins/base.tq | 8 +- src/builtins/builtins-definitions.h | 2 + src/builtins/builtins-typed-array-gen.cc | 36 ++++++ src/builtins/typed-array.tq | 144 ++++++++--------------- 4 files changed, 91 insertions(+), 99 deletions(-) diff --git a/src/builtins/base.tq b/src/builtins/base.tq index 9e520ed6cc..d1324e8ed3 100644 --- a/src/builtins/base.tq +++ b/src/builtins/base.tq @@ -219,6 +219,7 @@ extern implicit operator 'convert<>' macro NumberConstant(const_int32): Number; extern operator 'convert<>' macro ChangeInt32ToTagged(int32): Number; extern operator 'convert<>' macro TruncateWordToWord32(intptr): int32; extern operator 'convert<>' macro SmiTag(intptr): Smi; +extern operator 'convert<>' macro SmiFromInt32(int32): Smi; extern operator 'convert<>' macro SmiUntag(Smi): intptr; @@ -290,10 +291,3 @@ macro HasPropertyObject( extern macro IsCallable(HeapObject): bit; extern macro TaggedIsCallable(Object): bit; extern macro IsDetachedBuffer(JSArrayBuffer): bit; - -type ParameterMode; -const kSmiParameters: ParameterMode = 'ParameterMode::SMI_PARAMETERS'; -extern macro LoadFixedTypedArrayElementAsTagged( - RawPtr, Smi, ElementsKind, ParameterMode): Object; -extern macro StoreFixedTypedArrayElementFromTagged( - Context, RawPtr, Smi, Object, ElementsKind, ParameterMode); diff --git a/src/builtins/builtins-definitions.h b/src/builtins/builtins-definitions.h index 013ee92e35..33bc8476ed 100644 --- a/src/builtins/builtins-definitions.h +++ b/src/builtins/builtins-definitions.h @@ -1175,6 +1175,8 @@ namespace internal { TFJ(TypedArrayOf, SharedFunctionInfo::kDontAdaptArgumentsSentinel) \ /* ES6 %TypedArray%.from */ \ TFJ(TypedArrayFrom, SharedFunctionInfo::kDontAdaptArgumentsSentinel) \ + TFS(TypedArrayLoadElementAsTagged, kArray, kKind, kIndex) \ + TFS(TypedArrayStoreElementFromTagged, kArray, kKind, kIndex, kValue) \ \ /* Wasm */ \ ASM(WasmCompileLazy) \ diff --git a/src/builtins/builtins-typed-array-gen.cc b/src/builtins/builtins-typed-array-gen.cc index e3c6e875d2..8e8a75627c 100644 --- a/src/builtins/builtins-typed-array-gen.cc +++ b/src/builtins/builtins-typed-array-gen.cc @@ -858,6 +858,42 @@ TNode TypedArrayBuiltinsAssembler::GetTypedArrayElementSize( return element_size.value(); } +TF_BUILTIN(TypedArrayLoadElementAsTagged, TypedArrayBuiltinsAssembler) { + TVARIABLE(Object, result); + TNode array = CAST(Parameter(Descriptor::kArray)); + TNode kind = CAST(Parameter(Descriptor::kKind)); + TNode index_node = CAST(Parameter(Descriptor::kIndex)); + + TNode data_pointer = UncheckedCast(LoadDataPtr(array)); + TNode elements_kind = SmiToInt32(kind); + + DispatchTypedArrayByElementsKind( + elements_kind, [&](ElementsKind el_kind, int, int) { + result = CAST(LoadFixedTypedArrayElementAsTagged( + data_pointer, index_node, el_kind, SMI_PARAMETERS)); + }); + + Return(result.value()); +} + +TF_BUILTIN(TypedArrayStoreElementFromTagged, TypedArrayBuiltinsAssembler) { + TNode context = CAST(Parameter(Descriptor::kContext)); + TNode array = CAST(Parameter(Descriptor::kArray)); + TNode kind = CAST(Parameter(Descriptor::kKind)); + TNode index_node = CAST(Parameter(Descriptor::kIndex)); + TNode value = CAST(Parameter(Descriptor::kValue)); + + TNode data_pointer = UncheckedCast(LoadDataPtr(array)); + TNode elements_kind = SmiToInt32(kind); + + DispatchTypedArrayByElementsKind( + elements_kind, [&](ElementsKind el_kind, int, int) { + StoreFixedTypedArrayElementFromTagged(context, data_pointer, index_node, + value, el_kind, SMI_PARAMETERS); + }); + Return(UndefinedConstant()); +} + TNode TypedArrayBuiltinsAssembler::GetDefaultConstructor( TNode context, TNode exemplar) { TVARIABLE(IntPtrT, context_slot); diff --git a/src/builtins/typed-array.tq b/src/builtins/typed-array.tq index f05c3e50ad..3ba5410ef6 100644 --- a/src/builtins/typed-array.tq +++ b/src/builtins/typed-array.tq @@ -9,6 +9,11 @@ module typed_array { const kTypedArrayProtoSort: MethodName = '\"%TypedArray%.prototype.sort\"'; extern macro ValidateTypedArray(Context, Object, MethodName): JSTypedArray; + extern builtin TypedArrayLoadElementAsTagged( + Context, JSTypedArray, Smi, Smi): Object; + extern builtin TypedArrayStoreElementFromTagged( + Context, JSTypedArray, Smi, Smi, Object); + extern macro AssertUnreachable(); extern macro NumberIsNaN(Number): bit; @@ -31,50 +36,55 @@ module typed_array { // Wrapped CSA macro for better readability. Ideally we want to map this // as the array operator "[]". - macro Load(backing_store: RawPtr, index: Smi, kind: ElementsKind): Object { - return LoadFixedTypedArrayElementAsTagged( - backing_store, index, kind, kSmiParameters); + // TODO(szuend): Change Load/Store macros so they use function pointers to + // the correct builtins as soon as they are available in Torque. + // + // Currently the dispatch to the correct load/store instruction + // is done during runtime in a builtin. This costs around 20% + // performance in relevant benchmarks, but greatly reduces the + // code size - compared to sort macro "copies" for each + // ElementsKind that inline the correct load/store. + macro Load( + context: Context, array: JSTypedArray, kind: Smi, index: Smi): Object { + return TypedArrayLoadElementAsTagged(context, array, kind, index); } // Wrapped array store CSA macro for better readability. macro Store( - context: Context, backing_store: RawPtr, index: Smi, value: Object, - kind: ElementsKind) { - StoreFixedTypedArrayElementFromTagged( - context, backing_store, index, value, kind, kSmiParameters); + context: Context, array: JSTypedArray, kind: Smi, index: Smi, + value: Object) { + TypedArrayStoreElementFromTagged(context, array, kind, index, value); } // InsertionSort is used for smaller arrays. macro TypedArrayInsertionSort( - context: Context, array: JSTypedArray, kind: ElementsKind, from_arg: Smi, + context: Context, array: JSTypedArray, kind: Smi, from_arg: Smi, to_arg: Smi, comparefn: Callable) labels Detached { let from: Smi = from_arg; let to: Smi = to_arg; - let backing_store: RawPtr = array.data_ptr; if (IsDetachedBuffer(array.buffer)) goto Detached; for (let i: Smi = from + 1; i < to; ++i) { - let element: Object = Load(backing_store, i, kind); + let element: Object = Load(context, array, kind, i); let j: Smi = i - 1; for (; j >= from; --j) { - let tmp: Object = Load(backing_store, j, kind); + let tmp: Object = Load(context, array, kind, j); let order: Number = CallCompareWithDetachedCheck( context, array, comparefn, tmp, element) otherwise Detached; - backing_store = array.data_ptr; // Force reload. if (order > 0) { - Store(context, backing_store, j + 1, tmp, kind); + Store(context, array, kind, j + 1, tmp); } else { break; } } - Store(context, backing_store, j + 1, element, kind); + Store(context, array, kind, j + 1, element); } } macro TypedArrayQuickSortImpl( - context: Context, array: JSTypedArray, kind: ElementsKind, from_arg: Smi, + context: Context, array: JSTypedArray, kind: Smi, from_arg: Smi, to_arg: Smi, comparefn: Callable) labels Detached { let from: Smi = from_arg; @@ -94,15 +104,12 @@ module typed_array { // worth it for very large arrays. let third_index: Smi = from + ((to - from) >>> 1); - // TODO(szuend): Investigate possible performance impact by caching the - // backing_store ptr for off-heap buffers. - let backing_store: RawPtr = array.data_ptr; if (IsDetachedBuffer(array.buffer)) goto Detached; // Find a pivot as the median of first, last and middle element. - let v0: Object = Load(backing_store, from, kind); - let v1: Object = Load(backing_store, to - 1, kind); - let v2: Object = Load(backing_store, third_index, kind); + let v0: Object = Load(context, array, kind, from); + let v1: Object = Load(context, array, kind, to - 1); + let v2: Object = Load(context, array, kind, third_index); let c01: Number = CallCompareWithDetachedCheck( context, array, comparefn, v0, v1) otherwise Detached; @@ -134,30 +141,28 @@ module typed_array { } // v0 <= v1 <= v2. - backing_store = array.data_ptr; // Force reload. - Store(context, backing_store, from, v0, kind); - Store(context, backing_store, to - 1, v2, kind); + Store(context, array, kind, from, v0); + Store(context, array, kind, to - 1, v2); let pivot: Object = v1; let low_end: Smi = from + 1; // Upper bound of elems lower than pivot. let high_start: Smi = to - 1; // Lower bound of elems greater than pivot. - let low_end_value: Object = Load(backing_store, low_end, kind); - Store(context, backing_store, third_index, low_end_value, kind); - Store(context, backing_store, low_end, pivot, kind); + let low_end_value: Object = Load(context, array, kind, low_end); + Store(context, array, kind, third_index, low_end_value); + Store(context, array, kind, low_end, pivot); // From low_end to idx are elements equal to pivot. // From idx to high_start are elements that haven"t been compared yet. for (let idx: Smi = low_end + 1; idx < high_start; idx++) { - let element: Object = Load(backing_store, idx, kind); + let element: Object = Load(context, array, kind, idx); let order: Number = CallCompareWithDetachedCheck( context, array, comparefn, element, pivot) otherwise Detached; - backing_store = array.data_ptr; // Force reload. if (order < 0) { - low_end_value = Load(backing_store, low_end, kind); - Store(context, backing_store, idx, low_end_value, kind); - Store(context, backing_store, low_end, element, kind); + low_end_value = Load(context, array, kind, low_end); + Store(context, array, kind, idx, low_end_value); + Store(context, array, kind, low_end, element); low_end++; } else if (order > 0) { let break_for: bit = no; @@ -169,92 +174,46 @@ module typed_array { break; } - let top_elem: Object = Load(backing_store, high_start, kind); + let top_elem: Object = Load(context, array, kind, high_start); order = CallCompareWithDetachedCheck( context, array, comparefn, top_elem, pivot) otherwise Detached; - backing_store = array.data_ptr; // Force reload. } if (break_for) { break; } - let high_start_value: Object = Load(backing_store, high_start, kind); - Store(context, backing_store, idx, high_start_value, kind); - Store(context, backing_store, high_start, element, kind); + let high_start_value: Object = Load(context, array, kind, high_start); + Store(context, array, kind, idx, high_start_value); + Store(context, array, kind, high_start, element); if (order < 0) { - element = Load(backing_store, idx, kind); + element = Load(context, array, kind, idx); - low_end_value = Load(backing_store, low_end, kind); - Store(context, backing_store, idx, low_end_value, kind); - Store(context, backing_store, low_end, element, kind); + low_end_value = Load(context, array, kind, low_end); + Store(context, array, kind, idx, low_end_value); + Store(context, array, kind, low_end, element); low_end++; } } } if ((to - high_start) < (low_end - from)) { - TypedArrayQuickSort(context, array, high_start, to, comparefn); + TypedArrayQuickSort(context, array, kind, high_start, to, comparefn); to = low_end; } else { - TypedArrayQuickSort(context, array, from, low_end, comparefn); + TypedArrayQuickSort(context, array, kind, from, low_end, comparefn); from = high_start; } } } builtin TypedArrayQuickSort( - context: Context, array: JSTypedArray, from: Smi, to: Smi, + context: Context, array: JSTypedArray, kind: Smi, from: Smi, to: Smi, comparefn: Callable): JSTypedArray { - let element_kind: int32 = array.elements_kind; - try { - if (element_kind == convert(UINT8_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, UINT8_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(INT8_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, INT8_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(UINT16_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, UINT16_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(INT16_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, INT16_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(UINT32_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, UINT32_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(INT32_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, INT32_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(UINT8_CLAMPED_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, UINT8_CLAMPED_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(FLOAT32_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, FLOAT32_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(FLOAT64_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, FLOAT64_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(BIGUINT64_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, BIGUINT64_ELEMENTS, from, to, comparefn) - otherwise Detached; - } else if (element_kind == convert(BIGINT64_ELEMENTS)) { - TypedArrayQuickSortImpl( - context, array, BIGINT64_ELEMENTS, from, to, comparefn) - otherwise Detached; - } + TypedArrayQuickSortImpl(context, array, kind, from, to, comparefn) + otherwise Detached; } label Detached { ThrowTypeError( @@ -292,7 +251,8 @@ module typed_array { try { let comparefn: Callable = cast(comparefn_obj) otherwise CastError; - TypedArrayQuickSort(context, array, 0, len, comparefn); + let elements_kind: Smi = convert(array.elements_kind); + TypedArrayQuickSort(context, array, elements_kind, 0, len, comparefn); } label CastError { // TODO(szuend): Replace with Unreachable() when its supported in Torque.