2011-04-12 08:27:38 +00:00
|
|
|
// Copyright 2011 the V8 project authors. All rights reserved.
|
2014-04-29 06:42:26 +00:00
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
2008-07-03 15:10:15 +00:00
|
|
|
|
|
|
|
#ifndef V8_DATEPARSER_H_
|
|
|
|
#define V8_DATEPARSER_H_
|
|
|
|
|
2014-06-03 08:12:43 +00:00
|
|
|
#include "src/allocation.h"
|
2015-08-12 07:32:36 +00:00
|
|
|
#include "src/char-predicates.h"
|
|
|
|
#include "src/scanner.h"
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2009-05-25 10:05:56 +00:00
|
|
|
namespace v8 {
|
|
|
|
namespace internal {
|
2008-07-03 15:10:15 +00:00
|
|
|
|
|
|
|
class DateParser : public AllStatic {
|
|
|
|
public:
|
|
|
|
// Parse the string as a date. If parsing succeeds, return true after
|
|
|
|
// filling out the output array as follows (all integers are Smis):
|
|
|
|
// [0]: year
|
|
|
|
// [1]: month (0 = Jan, 1 = Feb, ...)
|
|
|
|
// [2]: day
|
|
|
|
// [3]: hour
|
|
|
|
// [4]: minute
|
|
|
|
// [5]: second
|
2010-05-03 06:43:25 +00:00
|
|
|
// [6]: millisecond
|
|
|
|
// [7]: UTC offset in seconds, or null value if no timezone specified
|
2008-07-03 15:10:15 +00:00
|
|
|
// If parsing fails, return false (content of output array is not defined).
|
2009-03-17 13:27:21 +00:00
|
|
|
template <typename Char>
|
2011-04-12 08:27:38 +00:00
|
|
|
static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2009-03-19 09:40:38 +00:00
|
|
|
enum {
|
2010-05-03 06:43:25 +00:00
|
|
|
YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
|
2009-03-19 09:40:38 +00:00
|
|
|
};
|
2008-07-03 15:10:15 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
// Range testing
|
2009-03-17 13:27:21 +00:00
|
|
|
static inline bool Between(int x, int lo, int hi) {
|
|
|
|
return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
|
|
|
|
}
|
2011-07-01 11:41:45 +00:00
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
// Indicates a missing value.
|
|
|
|
static const int kNone = kMaxInt;
|
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
// Maximal number of digits used to build the value of a numeral.
|
|
|
|
// Remaining digits are ignored.
|
|
|
|
static const int kMaxSignificantDigits = 9;
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
// InputReader provides basic string parsing and character classification.
|
2009-03-17 13:27:21 +00:00
|
|
|
template <typename Char>
|
2008-07-03 15:10:15 +00:00
|
|
|
class InputReader BASE_EMBEDDED {
|
|
|
|
public:
|
2011-04-12 08:27:38 +00:00
|
|
|
InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
|
2009-03-17 13:27:21 +00:00
|
|
|
: index_(0),
|
|
|
|
buffer_(s),
|
2011-04-12 08:27:38 +00:00
|
|
|
unicode_cache_(unicode_cache) {
|
2008-07-03 15:10:15 +00:00
|
|
|
Next();
|
|
|
|
}
|
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
int position() { return index_; }
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
// Advance to the next character of the string.
|
2011-07-01 11:41:45 +00:00
|
|
|
void Next() {
|
|
|
|
ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
|
|
|
|
index_++;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
// Read a string of digits as an unsigned number. Cap value at
|
|
|
|
// kMaxSignificantDigits, but skip remaining digits if the numeral
|
|
|
|
// is longer.
|
|
|
|
int ReadUnsignedNumeral() {
|
2010-11-26 11:48:35 +00:00
|
|
|
int n = 0;
|
2011-07-01 11:41:45 +00:00
|
|
|
int i = 0;
|
|
|
|
while (IsAsciiDigit()) {
|
|
|
|
if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
|
|
|
|
i++;
|
|
|
|
Next();
|
2010-11-26 11:48:35 +00:00
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
// Read a word (sequence of chars. >= 'A'), fill the given buffer with a
|
|
|
|
// lower-case prefix, and pad any remainder of the buffer with zeroes.
|
|
|
|
// Return word length.
|
|
|
|
int ReadWord(uint32_t* prefix, int prefix_size) {
|
|
|
|
int len;
|
|
|
|
for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
|
2010-09-03 12:59:52 +00:00
|
|
|
if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
for (int i = len; i < prefix_size; i++) prefix[i] = 0;
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The skip methods return whether they actually skipped something.
|
2010-11-16 08:01:45 +00:00
|
|
|
bool Skip(uint32_t c) {
|
|
|
|
if (ch_ == c) {
|
|
|
|
Next();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2008-07-03 15:10:15 +00:00
|
|
|
|
|
|
|
bool SkipWhiteSpace() {
|
2014-02-10 12:43:10 +00:00
|
|
|
if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
|
2010-11-16 08:01:45 +00:00
|
|
|
Next();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool SkipParentheses() {
|
|
|
|
if (ch_ != '(') return false;
|
|
|
|
int balance = 0;
|
|
|
|
do {
|
|
|
|
if (ch_ == ')') --balance;
|
|
|
|
else if (ch_ == '(') ++balance;
|
|
|
|
Next();
|
|
|
|
} while (balance > 0 && ch_);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Character testing/classification. Non-ASCII digits are not supported.
|
|
|
|
bool Is(uint32_t c) const { return ch_ == c; }
|
|
|
|
bool IsEnd() const { return ch_ == 0; }
|
|
|
|
bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
|
|
|
|
bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
|
|
|
|
bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
|
|
|
|
|
|
|
|
// Return 1 for '+' and -1 for '-'.
|
|
|
|
int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
|
|
|
|
|
|
|
|
private:
|
2009-03-17 13:27:21 +00:00
|
|
|
int index_;
|
|
|
|
Vector<Char> buffer_;
|
2008-07-03 15:10:15 +00:00
|
|
|
uint32_t ch_;
|
2011-04-12 08:27:38 +00:00
|
|
|
UnicodeCache* unicode_cache_;
|
2008-07-03 15:10:15 +00:00
|
|
|
};
|
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
enum KeywordType {
|
|
|
|
INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
|
|
|
|
};
|
|
|
|
|
|
|
|
struct DateToken {
|
|
|
|
public:
|
|
|
|
bool IsInvalid() { return tag_ == kInvalidTokenTag; }
|
|
|
|
bool IsUnknown() { return tag_ == kUnknownTokenTag; }
|
|
|
|
bool IsNumber() { return tag_ == kNumberTag; }
|
|
|
|
bool IsSymbol() { return tag_ == kSymbolTag; }
|
|
|
|
bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
|
|
|
|
bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
|
|
|
|
bool IsKeyword() { return tag_ >= kKeywordTagStart; }
|
|
|
|
|
|
|
|
int length() { return length_; }
|
|
|
|
|
|
|
|
int number() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(IsNumber());
|
2011-07-01 11:41:45 +00:00
|
|
|
return value_;
|
|
|
|
}
|
|
|
|
KeywordType keyword_type() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(IsKeyword());
|
2011-07-01 11:41:45 +00:00
|
|
|
return static_cast<KeywordType>(tag_);
|
|
|
|
}
|
|
|
|
int keyword_value() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(IsKeyword());
|
2011-07-01 11:41:45 +00:00
|
|
|
return value_;
|
|
|
|
}
|
|
|
|
char symbol() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(IsSymbol());
|
2011-07-01 11:41:45 +00:00
|
|
|
return static_cast<char>(value_);
|
|
|
|
}
|
|
|
|
bool IsSymbol(char symbol) {
|
|
|
|
return IsSymbol() && this->symbol() == symbol;
|
|
|
|
}
|
|
|
|
bool IsKeywordType(KeywordType tag) {
|
|
|
|
return tag_ == tag;
|
|
|
|
}
|
|
|
|
bool IsFixedLengthNumber(int length) {
|
|
|
|
return IsNumber() && length_ == length;
|
|
|
|
}
|
|
|
|
bool IsAsciiSign() {
|
|
|
|
return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
|
|
|
|
}
|
|
|
|
int ascii_sign() {
|
2014-08-04 11:34:54 +00:00
|
|
|
DCHECK(IsAsciiSign());
|
2011-07-01 11:41:45 +00:00
|
|
|
return 44 - value_;
|
|
|
|
}
|
|
|
|
bool IsKeywordZ() {
|
|
|
|
return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
|
|
|
|
}
|
|
|
|
bool IsUnknown(int character) {
|
|
|
|
return IsUnknown() && value_ == character;
|
|
|
|
}
|
|
|
|
// Factory functions.
|
|
|
|
static DateToken Keyword(KeywordType tag, int value, int length) {
|
|
|
|
return DateToken(tag, length, value);
|
|
|
|
}
|
|
|
|
static DateToken Number(int value, int length) {
|
|
|
|
return DateToken(kNumberTag, length, value);
|
|
|
|
}
|
|
|
|
static DateToken Symbol(char symbol) {
|
|
|
|
return DateToken(kSymbolTag, 1, symbol);
|
|
|
|
}
|
|
|
|
static DateToken EndOfInput() {
|
|
|
|
return DateToken(kEndOfInputTag, 0, -1);
|
|
|
|
}
|
|
|
|
static DateToken WhiteSpace(int length) {
|
|
|
|
return DateToken(kWhiteSpaceTag, length, -1);
|
|
|
|
}
|
|
|
|
static DateToken Unknown() {
|
|
|
|
return DateToken(kUnknownTokenTag, 1, -1);
|
|
|
|
}
|
|
|
|
static DateToken Invalid() {
|
|
|
|
return DateToken(kInvalidTokenTag, 0, -1);
|
|
|
|
}
|
2011-09-08 19:57:14 +00:00
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
private:
|
|
|
|
enum TagType {
|
|
|
|
kInvalidTokenTag = -6,
|
|
|
|
kUnknownTokenTag = -5,
|
|
|
|
kWhiteSpaceTag = -4,
|
|
|
|
kNumberTag = -3,
|
|
|
|
kSymbolTag = -2,
|
|
|
|
kEndOfInputTag = -1,
|
|
|
|
kKeywordTagStart = 0
|
|
|
|
};
|
|
|
|
DateToken(int tag, int length, int value)
|
|
|
|
: tag_(tag),
|
|
|
|
length_(length),
|
|
|
|
value_(value) { }
|
|
|
|
|
|
|
|
int tag_;
|
|
|
|
int length_; // Number of characters.
|
|
|
|
int value_;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename Char>
|
|
|
|
class DateStringTokenizer {
|
|
|
|
public:
|
|
|
|
explicit DateStringTokenizer(InputReader<Char>* in)
|
|
|
|
: in_(in), next_(Scan()) { }
|
|
|
|
DateToken Next() {
|
|
|
|
DateToken result = next_;
|
|
|
|
next_ = Scan();
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
DateToken Peek() {
|
|
|
|
return next_;
|
|
|
|
}
|
|
|
|
bool SkipSymbol(char symbol) {
|
|
|
|
if (next_.IsSymbol(symbol)) {
|
|
|
|
next_ = Scan();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2011-09-08 19:57:14 +00:00
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
private:
|
|
|
|
DateToken Scan();
|
|
|
|
|
|
|
|
InputReader<Char>* in_;
|
|
|
|
DateToken next_;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int ReadMilliseconds(DateToken number);
|
2008-07-03 15:10:15 +00:00
|
|
|
|
|
|
|
// KeywordTable maps names of months, time zones, am/pm to numbers.
|
|
|
|
class KeywordTable : public AllStatic {
|
|
|
|
public:
|
|
|
|
// Look up a word in the keyword table and return an index.
|
|
|
|
// 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
|
|
|
|
// and 'len' is the word length.
|
|
|
|
static int Lookup(const uint32_t* pre, int len);
|
|
|
|
// Get the type of the keyword at index i.
|
|
|
|
static KeywordType GetType(int i) {
|
|
|
|
return static_cast<KeywordType>(array[i][kTypeOffset]);
|
|
|
|
}
|
|
|
|
// Get the value of the keyword at index i.
|
|
|
|
static int GetValue(int i) { return array[i][kValueOffset]; }
|
|
|
|
|
|
|
|
static const int kPrefixLength = 3;
|
|
|
|
static const int kTypeOffset = kPrefixLength;
|
|
|
|
static const int kValueOffset = kTypeOffset + 1;
|
|
|
|
static const int kEntrySize = kValueOffset + 1;
|
|
|
|
static const int8_t array[][kEntrySize];
|
|
|
|
};
|
|
|
|
|
|
|
|
class TimeZoneComposer BASE_EMBEDDED {
|
|
|
|
public:
|
|
|
|
TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
|
|
|
|
void Set(int offset_in_hours) {
|
|
|
|
sign_ = offset_in_hours < 0 ? -1 : 1;
|
|
|
|
hour_ = offset_in_hours * sign_;
|
|
|
|
minute_ = 0;
|
|
|
|
}
|
|
|
|
void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
|
|
|
|
void SetAbsoluteHour(int hour) { hour_ = hour; }
|
|
|
|
void SetAbsoluteMinute(int minute) { minute_ = minute; }
|
|
|
|
bool IsExpecting(int n) const {
|
|
|
|
return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
|
|
|
|
}
|
|
|
|
bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
|
|
|
|
bool Write(FixedArray* output);
|
2011-07-01 11:41:45 +00:00
|
|
|
bool IsEmpty() { return hour_ == kNone; }
|
2008-07-03 15:10:15 +00:00
|
|
|
private:
|
|
|
|
int sign_;
|
|
|
|
int hour_;
|
|
|
|
int minute_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class TimeComposer BASE_EMBEDDED {
|
|
|
|
public:
|
|
|
|
TimeComposer() : index_(0), hour_offset_(kNone) {}
|
|
|
|
bool IsEmpty() const { return index_ == 0; }
|
|
|
|
bool IsExpecting(int n) const {
|
2010-05-03 06:43:25 +00:00
|
|
|
return (index_ == 1 && IsMinute(n)) ||
|
|
|
|
(index_ == 2 && IsSecond(n)) ||
|
|
|
|
(index_ == 3 && IsMillisecond(n));
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
bool Add(int n) {
|
|
|
|
return index_ < kSize ? (comp_[index_++] = n, true) : false;
|
|
|
|
}
|
|
|
|
bool AddFinal(int n) {
|
|
|
|
if (!Add(n)) return false;
|
|
|
|
while (index_ < kSize) comp_[index_++] = 0;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
void SetHourOffset(int n) { hour_offset_ = n; }
|
|
|
|
bool Write(FixedArray* output);
|
|
|
|
|
|
|
|
static bool IsMinute(int x) { return Between(x, 0, 59); }
|
|
|
|
static bool IsHour(int x) { return Between(x, 0, 23); }
|
|
|
|
static bool IsSecond(int x) { return Between(x, 0, 59); }
|
2011-09-08 19:57:14 +00:00
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
private:
|
|
|
|
static bool IsHour12(int x) { return Between(x, 0, 12); }
|
2010-05-03 06:43:25 +00:00
|
|
|
static bool IsMillisecond(int x) { return Between(x, 0, 999); }
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2010-05-03 06:43:25 +00:00
|
|
|
static const int kSize = 4;
|
2008-07-03 15:10:15 +00:00
|
|
|
int comp_[kSize];
|
|
|
|
int index_;
|
|
|
|
int hour_offset_;
|
|
|
|
};
|
|
|
|
|
|
|
|
class DayComposer BASE_EMBEDDED {
|
|
|
|
public:
|
2011-07-01 11:41:45 +00:00
|
|
|
DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
|
2008-07-03 15:10:15 +00:00
|
|
|
bool IsEmpty() const { return index_ == 0; }
|
|
|
|
bool Add(int n) {
|
2011-07-01 11:41:45 +00:00
|
|
|
if (index_ < kSize) {
|
|
|
|
comp_[index_] = n;
|
|
|
|
index_++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
void SetNamedMonth(int n) { named_month_ = n; }
|
|
|
|
bool Write(FixedArray* output);
|
2011-07-01 11:41:45 +00:00
|
|
|
void set_iso_date() { is_iso_date_ = true; }
|
2008-07-03 15:10:15 +00:00
|
|
|
static bool IsMonth(int x) { return Between(x, 1, 12); }
|
|
|
|
static bool IsDay(int x) { return Between(x, 1, 31); }
|
|
|
|
|
2011-07-01 11:41:45 +00:00
|
|
|
private:
|
2008-07-03 15:10:15 +00:00
|
|
|
static const int kSize = 3;
|
|
|
|
int comp_[kSize];
|
|
|
|
int index_;
|
|
|
|
int named_month_;
|
2011-07-01 11:41:45 +00:00
|
|
|
// If set, ensures that data is always parsed in year-month-date order.
|
|
|
|
bool is_iso_date_;
|
2008-07-03 15:10:15 +00:00
|
|
|
};
|
2011-07-01 11:41:45 +00:00
|
|
|
|
2015-07-24 17:19:33 +00:00
|
|
|
// Tries to parse an ES6 Date Time String. Returns the next token
|
2011-07-01 11:41:45 +00:00
|
|
|
// to continue with in the legacy date string parser. If parsing is
|
|
|
|
// complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
|
|
|
|
// returns DateToken::Invalid(). Otherwise parsing continues in the
|
|
|
|
// legacy parser.
|
|
|
|
template <typename Char>
|
2015-07-24 17:19:33 +00:00
|
|
|
static DateParser::DateToken ParseES6DateTime(
|
2011-07-01 11:41:45 +00:00
|
|
|
DateStringTokenizer<Char>* scanner,
|
|
|
|
DayComposer* day,
|
|
|
|
TimeComposer* time,
|
|
|
|
TimeZoneComposer* tz);
|
2008-07-03 15:10:15 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
} } // namespace v8::internal
|
|
|
|
|
|
|
|
#endif // V8_DATEPARSER_H_
|