StringToInt rewritten. This version doesn't allocate memory for long decimals and uses percise rounding if radix 10 or a power of 2 (in other cases rounding error still may occur). Handling special values moved from Runtime_StringParseInt into StringToInt in order to make it consistent with StringToDouble.

Review URL: http://codereview.chromium.org/1529004

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4329 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
serya@chromium.org 2010-03-31 10:11:33 +00:00
parent 22025291da
commit dcd96d1fa0
4 changed files with 221 additions and 203 deletions

View File

@ -48,51 +48,6 @@ int HexValue(uc32 c) {
return -1;
}
// Provide a common interface to getting a character at a certain
// index from a char* or a String object.
static inline int GetChar(const char* str, int index) {
ASSERT(index >= 0 && index < StrLength(str));
return str[index];
}
static inline int GetChar(String* str, int index) {
return str->Get(index);
}
static inline int GetLength(const char* str) {
return StrLength(str);
}
static inline int GetLength(String* str) {
return str->length();
}
static inline const char* GetCString(const char* str, int index) {
return str + index;
}
static inline const char* GetCString(String* str, int index) {
int length = str->length();
char* result = NewArray<char>(length + 1);
for (int i = index; i < length; i++) {
uc16 c = str->Get(i);
if (c <= 127) {
result[i - index] = static_cast<char>(c);
} else {
result[i - index] = 127; // Force number parsing to fail.
}
}
result[length - index] = '\0';
return result;
}
namespace {
// C++-style iterator adaptor for StringInputBuffer
@ -134,15 +89,6 @@ void StringInputBufferIterator::operator++() {
}
static inline void ReleaseCString(const char* original, const char* str) {
}
static inline void ReleaseCString(String* original, const char* str) {
DeleteArray(const_cast<char *>(str));
}
template <class Iterator, class EndMark>
static bool SubStringEquals(Iterator* current,
EndMark end,
@ -168,97 +114,6 @@ extern "C" double gay_strtod(const char* s00, const char** se);
// we don't need to preserve all the digits.
const int kMaxSignificantDigits = 772;
// Parse an int from a string starting a given index and in a given
// radix. The string can be either a char* or a String*.
template <class S>
static int InternalStringToInt(S* s, int i, int radix, double* value) {
int len = GetLength(s);
// Setup limits for computing the value.
ASSERT(2 <= radix && radix <= 36);
int lim_0 = '0' + (radix < 10 ? radix : 10);
int lim_a = 'a' + (radix - 10);
int lim_A = 'A' + (radix - 10);
// NOTE: The code for computing the value may seem a bit complex at
// first glance. It is structured to use 32-bit multiply-and-add
// loops as long as possible to avoid loosing precision.
double v = 0.0;
int j;
for (j = i; j < len;) {
// Parse the longest part of the string starting at index j
// possible while keeping the multiplier, and thus the part
// itself, within 32 bits.
uint32_t part = 0, multiplier = 1;
int k;
for (k = j; k < len; k++) {
int c = GetChar(s, k);
if (c >= '0' && c < lim_0) {
c = c - '0';
} else if (c >= 'a' && c < lim_a) {
c = c - 'a' + 10;
} else if (c >= 'A' && c < lim_A) {
c = c - 'A' + 10;
} else {
break;
}
// Update the value of the part as long as the multiplier fits
// in 32 bits. When we can't guarantee that the next iteration
// will not overflow the multiplier, we stop parsing the part
// by leaving the loop.
static const uint32_t kMaximumMultiplier = 0xffffffffU / 36;
uint32_t m = multiplier * radix;
if (m > kMaximumMultiplier) break;
part = part * radix + c;
multiplier = m;
ASSERT(multiplier > part);
}
// Compute the number of part digits. If no digits were parsed;
// we're done parsing the entire string.
int digits = k - j;
if (digits == 0) break;
// Update the value and skip the part in the string.
ASSERT(multiplier ==
pow(static_cast<double>(radix), static_cast<double>(digits)));
v = v * multiplier + part;
j = k;
}
// If the resulting value is larger than 2^53 the value does not fit
// in the mantissa of the double and there is a loss of precision.
// When the value is larger than 2^53 the rounding depends on the
// code generation. If the code generator spills the double value
// it uses 64 bits and if it does not it uses 80 bits.
//
// If there is a potential for overflow we resort to strtod for
// radix 10 numbers to get higher precision. For numbers in another
// radix we live with the loss of precision.
static const double kPreciseConversionLimit = 9007199254740992.0;
if (radix == 10 && v > kPreciseConversionLimit) {
const char* cstr = GetCString(s, i);
const char* end;
v = gay_strtod(cstr, &end);
ReleaseCString(s, cstr);
}
*value = v;
return j;
}
int StringToInt(String* str, int index, int radix, double* value) {
return InternalStringToInt(str, index, radix, value);
}
int StringToInt(const char* str, int index, int radix, double* value) {
return InternalStringToInt(const_cast<char*>(str), index, radix, value);
}
static const double JUNK_STRING_VALUE = OS::nan_value();
@ -281,18 +136,23 @@ static bool isDigit(int x, int radix) {
}
static double SignedZero(bool sign) {
return sign ? -0.0 : 0.0;
}
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
template <int radix_log_2, class Iterator, class EndMark>
static double InternalStringToIntDouble(Iterator current,
EndMark end,
bool sign,
bool allow_trailing_junk) {
static double InternalStringToIntDouble(Iterator current,
EndMark end,
bool sign,
bool allow_trailing_junk) {
ASSERT(current != end);
// Skip leading 0s.
while (*current == '0') {
++current;
if (current == end) return sign ? -0.0 : 0.0;
if (current == end) return SignedZero(sign);
}
int64_t number = 0;
@ -382,6 +242,183 @@ template <int radix_log_2, class Iterator, class EndMark>
}
template <class Iterator, class EndMark>
static double InternalStringToInt(Iterator current, EndMark end, int radix) {
const bool allow_trailing_junk = true;
const double empty_string_val = JUNK_STRING_VALUE;
if (!AdvanceToNonspace(&current, end)) return empty_string_val;
bool sign = false;
bool leading_zero = false;
if (*current == '+') {
// Ignore leading sign; skip following spaces.
++current;
if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
} else if (*current == '-') {
++current;
if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
sign = true;
}
if (radix == 0) {
// Radix detection.
if (*current == '0') {
++current;
if (current == end) return SignedZero(sign);
if (*current == 'x' || *current == 'X') {
radix = 16;
++current;
if (current == end) return JUNK_STRING_VALUE;
} else {
radix = 8;
leading_zero = true;
}
} else {
radix = 10;
}
} else if (radix == 16) {
if (*current == '0') {
// Allow "0x" prefix.
++current;
if (current == end) return SignedZero(sign);
if (*current == 'x' || *current == 'X') {
++current;
if (current == end) return JUNK_STRING_VALUE;
} else {
leading_zero = true;
}
}
}
if (radix < 2 || radix > 36) return JUNK_STRING_VALUE;
// Skip leading zeros.
while (*current == '0') {
leading_zero = true;
++current;
if (current == end) return SignedZero(sign);
}
if (!leading_zero && !isDigit(*current, radix)) {
return JUNK_STRING_VALUE;
}
if (IsPowerOf2(radix)) {
switch (radix) {
case 2:
return InternalStringToIntDouble<1>(
current, end, sign, allow_trailing_junk);
case 4:
return InternalStringToIntDouble<2>(
current, end, sign, allow_trailing_junk);
case 8:
return InternalStringToIntDouble<3>(
current, end, sign, allow_trailing_junk);
case 16:
return InternalStringToIntDouble<4>(
current, end, sign, allow_trailing_junk);
case 32:
return InternalStringToIntDouble<5>(
current, end, sign, allow_trailing_junk);
default:
UNREACHABLE();
}
}
if (radix == 10) {
// Parsing with strtod.
const int kMaxSignificantDigits = 308; // Doubles are less than 1.8e308.
// The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero
// end.
const int kBufferSize = kMaxSignificantDigits + 2;
char buffer[kBufferSize];
int buffer_pos = 0;
while (*current >= '0' && *current <= '9') {
if (buffer_pos <= kMaxSignificantDigits) {
// If the number has more than kMaxSignificantDigits it will be parsed
// as infinity.
ASSERT(buffer_pos < kBufferSize);
buffer[buffer_pos++] = static_cast<char>(*current);
}
++current;
if (current == end) break;
}
if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
return JUNK_STRING_VALUE;
}
ASSERT(buffer_pos < kBufferSize);
buffer[buffer_pos++] = '\0';
return sign ? -gay_strtod(buffer, NULL) : gay_strtod(buffer, NULL);
}
// TODO(serya): The following legacy code causes accumulating rounding
// error for number greater than ~2^56. It should be rewritten using long
// arithmetic.
int lim_0 = '0' + (radix < 10 ? radix : 10);
int lim_a = 'a' + (radix - 10);
int lim_A = 'A' + (radix - 10);
// NOTE: The code for computing the value may seem a bit complex at
// first glance. It is structured to use 32-bit multiply-and-add
// loops as long as possible to avoid loosing precision.
double v = 0.0;
bool done = false;
do {
// Parse the longest part of the string starting at index j
// possible while keeping the multiplier, and thus the part
// itself, within 32 bits.
unsigned int part = 0, multiplier = 1;
while (true) {
int d;
if (*current >= '0' && *current < lim_0) {
d = *current - '0';
} else if (*current >= 'a' && *current < lim_a) {
d = *current - 'a' + 10;
} else if (*current >= 'A' && *current < lim_A) {
d = *current - 'A' + 10;
} else {
done = true;
break;
}
// Update the value of the part as long as the multiplier fits
// in 32 bits. When we can't guarantee that the next iteration
// will not overflow the multiplier, we stop parsing the part
// by leaving the loop.
const unsigned int kMaximumMultiplier = 0xffffffffU / 36;
uint32_t m = multiplier * radix;
if (m > kMaximumMultiplier) break;
part = part * radix + d;
multiplier = m;
ASSERT(multiplier > part);
++current;
if (current == end) {
done = true;
break;
}
}
// Update the value and skip the part in the string.
v = v * multiplier + part;
} while (!done);
if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
return JUNK_STRING_VALUE;
}
return sign ? -v : v;
}
// Converts a string to a double value. Assumes the Iterator supports
// the following operations:
// 1. current == end (other ops are not allowed), current != end.
@ -417,7 +454,7 @@ static double InternalStringToDouble(Iterator current,
bool nonzero_digit_dropped = false;
bool fractional_part = false;
double signed_zero = 0.0;
bool sign = false;
if (*current == '+') {
// Ignore leading sign; skip following spaces.
@ -427,7 +464,7 @@ static double InternalStringToDouble(Iterator current,
buffer[buffer_pos++] = '-';
++current;
if (!AdvanceToNonspace(&current, end)) return JUNK_STRING_VALUE;
signed_zero = -0.0;
sign = true;
}
static const char kInfinitySymbol[] = "Infinity";
@ -447,11 +484,11 @@ static double InternalStringToDouble(Iterator current,
bool leading_zero = false;
if (*current == '0') {
++current;
if (current == end) return signed_zero;
if (current == end) return SignedZero(sign);
leading_zero = true;
// It could be hexadecimal value.
// It could be hexadecimal value.
if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
++current;
if (current == end) return JUNK_STRING_VALUE; // "0x".
@ -466,7 +503,7 @@ static double InternalStringToDouble(Iterator current,
// Ignore leading zeros in the integer part.
while (*current == '0') {
++current;
if (current == end) return signed_zero;
if (current == end) return SignedZero(sign);
}
}
@ -508,7 +545,7 @@ static double InternalStringToDouble(Iterator current,
// leading zeros (if any).
while (*current == '0') {
++current;
if (current == end) return signed_zero;
if (current == end) return SignedZero(sign);
exponent--; // Move this 0 into the exponent.
}
}
@ -635,7 +672,7 @@ static double InternalStringToDouble(Iterator current,
ASSERT(exponent == 0);
buffer_pos += exp_digits;
} else if (!fractional_part && significant_digits <= kMaxDigitsInInt) {
if (significant_digits == 0) return signed_zero;
if (significant_digits == 0) return SignedZero(sign);
ASSERT(buffer_pos > 0);
int num = 0;
int start_pos = (buffer[0] == '-' ? 1 : 0);
@ -672,6 +709,25 @@ double StringToDouble(String* str, int flags, double empty_string_val) {
}
double StringToInt(String* str, int radix) {
StringShape shape(str);
if (shape.IsSequentialAscii()) {
const char* begin = SeqAsciiString::cast(str)->GetChars();
const char* end = begin + str->length();
return InternalStringToInt(begin, end, radix);
} else if (shape.IsSequentialTwoByte()) {
const uc16* begin = SeqTwoByteString::cast(str)->GetChars();
const uc16* end = begin + str->length();
return InternalStringToInt(begin, end, radix);
} else {
StringInputBuffer buffer(str);
return InternalStringToInt(StringInputBufferIterator(&buffer),
StringInputBufferIterator::EndMarker(),
radix);
}
}
double StringToDouble(const char* str, int flags, double empty_string_val) {
const char* end = str + StrLength(str);

View File

@ -100,8 +100,7 @@ double StringToDouble(const char* str, int flags, double empty_string_val = 0);
double StringToDouble(String* str, int flags, double empty_string_val = 0);
// Converts a string into an integer.
int StringToInt(String* str, int index, int radix, double* value);
int StringToInt(const char* str, int index, int radix, double* value);
double StringToInt(String* str, int radix);
// Converts a double to a string value according to ECMA-262 9.8.1.
// The buffer should be large enough for any floating point number.

View File

@ -4686,49 +4686,9 @@ static Object* Runtime_StringParseInt(Arguments args) {
s->TryFlatten();
int len = s->length();
int i;
// Skip leading white space.
for (i = 0; i < len && Scanner::kIsWhiteSpace.get(s->Get(i)); i++) ;
if (i == len) return Heap::nan_value();
// Compute the sign (default to +).
int sign = 1;
if (s->Get(i) == '-') {
sign = -1;
i++;
} else if (s->Get(i) == '+') {
i++;
}
// Compute the radix if 0.
if (radix == 0) {
radix = 10;
if (i < len && s->Get(i) == '0') {
radix = 8;
if (i + 1 < len) {
int c = s->Get(i + 1);
if (c == 'x' || c == 'X') {
radix = 16;
i += 2;
}
}
}
} else if (radix == 16) {
// Allow 0x or 0X prefix if radix is 16.
if (i + 1 < len && s->Get(i) == '0') {
int c = s->Get(i + 1);
if (c == 'x' || c == 'X') i += 2;
}
}
RUNTIME_ASSERT(2 <= radix && radix <= 36);
double value;
int end_index = StringToInt(s, i, radix, &value);
if (end_index != i) {
return Heap::NumberFromDouble(sign * value);
}
RUNTIME_ASSERT(radix == 0 || (2 <= radix && radix <= 36));
double value = StringToInt(s, radix);
return Heap::NumberFromDouble(value);
return Heap::nan_value();
}

View File

@ -42,7 +42,10 @@ assertEquals(0x12, parseInt('0x12'));
assertEquals(0x12, parseInt('0x12', 16));
assertEquals(0x12, parseInt('0x12', 16.1));
assertEquals(0x12, parseInt('0x12', NaN));
assertTrue(isNaN(parseInt('0x ')));
assertTrue(isNaN(parseInt('0x')));
assertTrue(isNaN(parseInt('0x ', 16)));
assertTrue(isNaN(parseInt('0x', 16)));
assertEquals(12, parseInt('12aaa'));
assertEquals(0.1, parseFloat('0.1'));