6627638a9d
If a stack overflow occurs inside the regexp parser, propagate that information to the parser. Bug: v8:896,chromium:1243989 Change-Id: I5ced27ff968ad97764e156643e1980b3a722af1a Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3127717 Commit-Queue: Leszek Swirski <leszeks@chromium.org> Auto-Submit: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Leszek Swirski <leszeks@chromium.org> Cr-Commit-Position: refs/heads/main@{#76568}
236 lines
9.0 KiB
C++
236 lines
9.0 KiB
C++
// Copyright 2012 the V8 project authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#ifndef V8_REGEXP_REGEXP_H_
|
|
#define V8_REGEXP_REGEXP_H_
|
|
|
|
#include "src/common/assert-scope.h"
|
|
#include "src/handles/handles.h"
|
|
#include "src/regexp/regexp-error.h"
|
|
#include "src/regexp/regexp-flags.h"
|
|
#include "src/zone/zone-containers.h"
|
|
|
|
namespace v8 {
|
|
namespace internal {
|
|
|
|
class JSRegExp;
|
|
class RegExpCapture;
|
|
class RegExpMatchInfo;
|
|
class RegExpNode;
|
|
class RegExpTree;
|
|
|
|
enum class RegExpCompilationTarget : int { kBytecode, kNative };
|
|
|
|
// TODO(jgruber): Do not expose in regexp.h.
|
|
// TODO(jgruber): Consider splitting between ParseData and CompileData.
|
|
struct RegExpCompileData {
|
|
// The parsed AST as produced by the RegExpParser.
|
|
RegExpTree* tree = nullptr;
|
|
|
|
// The compiled Node graph as produced by RegExpTree::ToNode methods.
|
|
RegExpNode* node = nullptr;
|
|
|
|
// Either the generated code as produced by the compiler or a trampoline
|
|
// to the interpreter.
|
|
Handle<Object> code;
|
|
|
|
// True, iff the pattern is a 'simple' atom with zero captures. In other
|
|
// words, the pattern consists of a string with no metacharacters and special
|
|
// regexp features, and can be implemented as a standard string search.
|
|
bool simple = true;
|
|
|
|
// True, iff the pattern is anchored at the start of the string with '^'.
|
|
bool contains_anchor = false;
|
|
|
|
// Only set if the pattern contains named captures.
|
|
// Note: the lifetime equals that of the parse/compile zone.
|
|
ZoneVector<RegExpCapture*>* named_captures = nullptr;
|
|
|
|
// The error message. Only used if an error occurred during parsing or
|
|
// compilation.
|
|
RegExpError error = RegExpError::kNone;
|
|
|
|
// The position at which the error was detected. Only used if an
|
|
// error occurred.
|
|
int error_pos = 0;
|
|
|
|
// The number of capture groups, without the global capture \0.
|
|
int capture_count = 0;
|
|
|
|
// The number of registers used by the generated code.
|
|
int register_count = 0;
|
|
|
|
// The compilation target (bytecode or native code).
|
|
RegExpCompilationTarget compilation_target;
|
|
};
|
|
|
|
class RegExp final : public AllStatic {
|
|
public:
|
|
// Whether the irregexp engine generates interpreter bytecode.
|
|
static bool CanGenerateBytecode();
|
|
|
|
// Verify the given pattern, i.e. check that parsing succeeds. If
|
|
// verification fails, `regexp_error_out` is set.
|
|
template <class CharT>
|
|
static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
|
|
const CharT* input, int input_length,
|
|
RegExpFlags flags, RegExpError* regexp_error_out,
|
|
const DisallowGarbageCollection& no_gc);
|
|
|
|
// Parses the RegExp pattern and prepares the JSRegExp object with
|
|
// generic data and choice of implementation - as well as what
|
|
// the implementation wants to store in the data field.
|
|
// Returns false if compilation fails.
|
|
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
|
|
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
|
|
RegExpFlags flags, uint32_t backtrack_limit);
|
|
|
|
// Ensures that a regexp is fully compiled and ready to be executed on a
|
|
// subject string. Returns true on success. Return false on failure, and
|
|
// then an exception will be pending.
|
|
V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
|
|
Handle<JSRegExp> re,
|
|
Handle<String> subject);
|
|
|
|
enum CallOrigin : int {
|
|
kFromRuntime = 0,
|
|
kFromJs = 1,
|
|
};
|
|
|
|
enum class ExecQuirks {
|
|
kNone,
|
|
// Used to work around an issue in the RegExpPrototypeSplit fast path,
|
|
// which diverges from the spec by not creating a sticky copy of the RegExp
|
|
// instance and calling `exec` in a loop. If called in this context, we
|
|
// must not update the last_match_info on a successful match at the subject
|
|
// string end. See crbug.com/1075514 for more information.
|
|
kTreatMatchAtEndAsFailure,
|
|
};
|
|
|
|
// See ECMA-262 section 15.10.6.2.
|
|
// This function calls the garbage collector if necessary.
|
|
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
|
|
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
|
|
int index, Handle<RegExpMatchInfo> last_match_info,
|
|
ExecQuirks exec_quirks = ExecQuirks::kNone);
|
|
|
|
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
|
|
ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp,
|
|
Handle<String> subject, int index,
|
|
Handle<RegExpMatchInfo> last_match_info,
|
|
ExecQuirks exec_quirks = ExecQuirks::kNone);
|
|
|
|
// Integral return values used throughout regexp code layers.
|
|
static constexpr int kInternalRegExpFailure = 0;
|
|
static constexpr int kInternalRegExpSuccess = 1;
|
|
static constexpr int kInternalRegExpException = -1;
|
|
static constexpr int kInternalRegExpRetry = -2;
|
|
static constexpr int kInternalRegExpFallbackToExperimental = -3;
|
|
static constexpr int kInternalRegExpSmallestResult = -3;
|
|
|
|
enum IrregexpResult : int32_t {
|
|
RE_FAILURE = kInternalRegExpFailure,
|
|
RE_SUCCESS = kInternalRegExpSuccess,
|
|
RE_EXCEPTION = kInternalRegExpException,
|
|
RE_RETRY = kInternalRegExpRetry,
|
|
RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental,
|
|
};
|
|
|
|
// Set last match info. If match is nullptr, then setting captures is
|
|
// omitted.
|
|
static Handle<RegExpMatchInfo> SetLastMatchInfo(
|
|
Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
|
|
Handle<String> subject, int capture_count, int32_t* match);
|
|
|
|
V8_EXPORT_PRIVATE static bool CompileForTesting(
|
|
Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags,
|
|
Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte);
|
|
|
|
V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
|
|
RegExpNode* node);
|
|
|
|
static const int kRegExpTooLargeToOptimize = 20 * KB;
|
|
|
|
V8_WARN_UNUSED_RESULT
|
|
static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
|
|
Handle<JSRegExp> re,
|
|
Handle<String> pattern,
|
|
RegExpError error);
|
|
static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
|
|
RegExpError error_text);
|
|
|
|
static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp);
|
|
|
|
static Handle<FixedArray> CreateCaptureNameMap(
|
|
Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures);
|
|
};
|
|
|
|
// Uses a special global mode of irregexp-generated code to perform a global
|
|
// search and return multiple results at once. As such, this is essentially an
|
|
// iterator over multiple results (retrieved batch-wise in advance).
|
|
class RegExpGlobalCache final {
|
|
public:
|
|
RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
|
|
Isolate* isolate);
|
|
|
|
~RegExpGlobalCache();
|
|
|
|
// Fetch the next entry in the cache for global regexp match results.
|
|
// This does not set the last match info. Upon failure, nullptr is
|
|
// returned. The cause can be checked with Result(). The previous result is
|
|
// still in available in memory when a failure happens.
|
|
int32_t* FetchNext();
|
|
|
|
int32_t* LastSuccessfulMatch();
|
|
|
|
bool HasException() { return num_matches_ < 0; }
|
|
|
|
private:
|
|
int AdvanceZeroLength(int last_index);
|
|
|
|
int num_matches_;
|
|
int max_matches_;
|
|
int current_match_index_;
|
|
int registers_per_match_;
|
|
// Pointer to the last set of captures.
|
|
int32_t* register_array_;
|
|
int register_array_size_;
|
|
Handle<JSRegExp> regexp_;
|
|
Handle<String> subject_;
|
|
Isolate* isolate_;
|
|
};
|
|
|
|
// Caches results for specific regexp queries on the isolate. At the time of
|
|
// writing, this is used during global calls to RegExp.prototype.exec and
|
|
// @@split.
|
|
class RegExpResultsCache final : public AllStatic {
|
|
public:
|
|
enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
|
|
|
|
// Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
|
|
// On success, the returned result is guaranteed to be a COW-array.
|
|
static Object Lookup(Heap* heap, String key_string, Object key_pattern,
|
|
FixedArray* last_match_out, ResultsCacheType type);
|
|
// Attempt to add value_array to the cache specified by type. On success,
|
|
// value_array is turned into a COW-array.
|
|
static void Enter(Isolate* isolate, Handle<String> key_string,
|
|
Handle<Object> key_pattern, Handle<FixedArray> value_array,
|
|
Handle<FixedArray> last_match_cache, ResultsCacheType type);
|
|
static void Clear(FixedArray cache);
|
|
|
|
static constexpr int kRegExpResultsCacheSize = 0x100;
|
|
|
|
private:
|
|
static constexpr int kStringOffset = 0;
|
|
static constexpr int kPatternOffset = 1;
|
|
static constexpr int kArrayOffset = 2;
|
|
static constexpr int kLastMatchOffset = 3;
|
|
static constexpr int kArrayEntriesPerCacheEntry = 4;
|
|
};
|
|
|
|
} // namespace internal
|
|
} // namespace v8
|
|
|
|
#endif // V8_REGEXP_REGEXP_H_
|