54a60fe6f4
Compiled regular expression patterns make use of several shared common UnicodeSets. This change simplifies the creation and use of these static UnicodeSets. - Pointer fields to the static sets are removed from the compiled patterns, and the static variables are accessed directly. The deleted pointers were a hold-over from earlier code that did not use shared statics. - The UnicodeSet pattern literals are changed from hex constants to u"string literals". - The size of fRuleSets (from regexst.h) is changed from a hard-coded 10 to the number of UnicodeSets actually required. Doing this required a change to regexcst.pl to export the required size. Changing and rerunning this perl code resulted in massive but benign changes to the generated file regexcst.h, the result of perl having changed its order of enumeration of hashes since the file was last regenerated. - UnicodeSets are frozen when possible. Should result in faster matching.
61 lines
2.0 KiB
C++
61 lines
2.0 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
//
|
|
// regexst.h
|
|
//
|
|
// Copyright (C) 2003-2010, International Business Machines Corporation and others.
|
|
// All Rights Reserved.
|
|
//
|
|
// This file contains declarations for the class RegexStaticSets
|
|
//
|
|
// This class is internal to the regular expression implementation.
|
|
// For the public Regular Expression API, see the file "unicode/regex.h"
|
|
//
|
|
// RegexStaticSets groups together the common UnicodeSets that are needed
|
|
// for compiling or executing RegularExpressions. This grouping simplifies
|
|
// the thread safe lazy creation and sharing of these sets across
|
|
// all instances of regular expressions.
|
|
//
|
|
|
|
#ifndef REGEXST_H
|
|
#define REGEXST_H
|
|
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/utext.h"
|
|
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
|
|
|
#include "regeximp.h"
|
|
#include "regexcst.h"
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
class UnicodeSet;
|
|
|
|
|
|
class RegexStaticSets : public UMemory {
|
|
public:
|
|
static RegexStaticSets *gStaticSets; // Ptr to all lazily initialized constant
|
|
// shared sets.
|
|
|
|
RegexStaticSets(UErrorCode *status);
|
|
~RegexStaticSets();
|
|
static void initGlobals(UErrorCode *status);
|
|
|
|
UnicodeSet fPropSets[URX_LAST_SET] {}; // The sets for common regex items, e.g. \s
|
|
Regex8BitSet fPropSets8[URX_LAST_SET] {}; // Fast bitmap sets for latin-1 range for above.
|
|
|
|
UnicodeSet fRuleSets[kRuleSet_count] {}; // Sets used while parsing regexp patterns.
|
|
UnicodeSet fUnescapeCharSet {}; // Set of chars handled by unescape when
|
|
// encountered with a \ in a pattern.
|
|
UnicodeSet *fRuleDigitsAlias {};
|
|
UText *fEmptyText {}; // An empty string, to be used when a matcher
|
|
// is created with no input.
|
|
|
|
};
|
|
|
|
|
|
U_NAMESPACE_END
|
|
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
|
|
#endif // REGEXST_H
|
|
|