Percise rounding parsing octal and hexadecimal strings.

Rounding happens when the number exceeds 53 bits of floating point mantissa. Current implemetation ignores digits after some limits. 0x1000000000000081 was rounded to 0x1000000000000100 while 0x100000000000008000001 was rounded to 0x100000000000000000000.

Review URL: http://codereview.chromium.org/1374005

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4309 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
serya@chromium.org 2010-03-29 15:46:58 +00:00
parent 08ed4504f6
commit 9e76d99fb9
3 changed files with 129 additions and 77 deletions

View File

@ -274,73 +274,111 @@ static inline bool AdvanceToNonspace(Iterator* current, EndMark end) {
}
template <class Iterator, class EndMark>
static double InternalHexadecimalStringToDouble(Iterator current,
EndMark end,
char* buffer,
bool allow_trailing_junk) {
static bool isDigit(int x, int radix) {
return (x >= '0' && x <= '9' && x < '0' + radix)
|| (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
|| (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
}
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
template <int radix_log_2, class Iterator, class EndMark>
static double InternalStringToIntDouble(Iterator current,
EndMark end,
bool sign,
bool allow_trailing_junk) {
ASSERT(current != end);
const int max_hex_significant_digits = 52 / 4 + 2;
// We reuse the buffer of InternalStringToDouble. Since hexadecimal
// numbers may have much less digits than decimal the buffer won't overflow.
ASSERT(max_hex_significant_digits < kMaxSignificantDigits);
int significant_digits = 0;
int insignificant_digits = 0;
bool leading_zero = false;
// A double has a 53bit significand (once the hidden bit has been added).
// Halfway cases thus have at most 54bits. Therefore 54/4 + 1 digits are
// sufficient to represent halfway cases. By adding another digit we can keep
// track of dropped digits.
int buffer_pos = 0;
bool nonzero_digit_dropped = false;
// Skip leading 0s.
while (*current == '0') {
leading_zero = true;
++current;
if (current == end) return 0;
if (current == end) return sign ? -0.0 : 0.0;
}
int begin_pos = buffer_pos;
while ((*current >= '0' && *current <= '9')
|| (*current >= 'a' && *current <= 'f')
|| (*current >= 'A' && *current <= 'F')) {
if (significant_digits <= max_hex_significant_digits) {
buffer[buffer_pos++] = static_cast<char>(*current);
significant_digits++;
int64_t number = 0;
int exponent = 0;
const int radix = (1 << radix_log_2);
do {
int digit;
if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
digit = static_cast<char>(*current) - '0';
} else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
digit = static_cast<char>(*current) - 'a' + 10;
} else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
digit = static_cast<char>(*current) - 'A' + 10;
} else {
insignificant_digits++;
nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
if (allow_trailing_junk || !AdvanceToNonspace(&current, end)) {
break;
} else {
return JUNK_STRING_VALUE;
}
}
number = number * radix + digit;
int overflow = number >> 53;
if (overflow != 0) {
// Overflow occurred. Need to determine which direction to round the
// result.
int overflow_bits_count = 1;
while (overflow > 1) {
overflow_bits_count++;
overflow >>= 1;
}
int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
int dropped_bits = number & dropped_bits_mask;
number >>= overflow_bits_count;
exponent = overflow_bits_count;
bool zero_tail = true;
while (true) {
++current;
if (current == end || !isDigit(*current, radix)) break;
zero_tail = zero_tail && *current == '0';
exponent += radix_log_2;
}
if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
return JUNK_STRING_VALUE;
}
int middle_value = (1 << (overflow_bits_count - 1));
if (dropped_bits > middle_value) {
number++; // Rounding up.
} else if (dropped_bits == middle_value) {
// Rounding to even to consistency with decimals: half-way case rounds
// up if significant part is odd and down otherwise.
if ((number & 1) != 0 || !zero_tail) {
number++; // Rounding up.
}
}
// Rounding up may cause overflow.
if ((number & ((int64_t)1 << 53)) != 0) {
exponent++;
number >>= 1;
}
break;
}
++current;
if (current == end) break;
} while (current != end);
ASSERT(number < ((int64_t)1 << 53));
ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
if (exponent == 0) {
if (sign) {
if (number == 0) return -0.0;
number = -number;
}
return static_cast<double>(number);
}
if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
return JUNK_STRING_VALUE;
}
if (significant_digits == 0) {
return leading_zero ? 0 : JUNK_STRING_VALUE;
}
if (nonzero_digit_dropped) {
ASSERT(insignificant_digits > 0);
insignificant_digits--;
buffer[buffer_pos++] = '1';
}
buffer[buffer_pos] = '\0';
double result;
StringToInt(buffer, begin_pos, 16, &result);
if (insignificant_digits > 0) {
// Multiplying by a power of 2 doesn't cause a loss of precision.
result *= pow(16.0, insignificant_digits);
}
return result;
ASSERT(number != 0);
// The double could be constructed faster from number (mantissa), exponent
// and sign. Assuming it's a rare case more simple code is used.
return static_cast<double>(sign ? -number : number) * pow(2.0, exponent);
}
@ -413,16 +451,16 @@ static double InternalStringToDouble(Iterator current,
leading_zero = true;
// It could be hexadecimal value.
// It could be hexadecimal value.
if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
++current;
if (current == end) return JUNK_STRING_VALUE; // "0x".
double result = InternalHexadecimalStringToDouble(current,
end,
buffer + buffer_pos,
allow_trailing_junk);
return (buffer_pos > 0 && buffer[0] == '-') ? -result : result;
bool sign = (buffer_pos > 0 && buffer[0] == '-');
return InternalStringToIntDouble<4>(current,
end,
sign,
allow_trailing_junk);
}
// Ignore leading zeros in the integer part.
@ -560,22 +598,13 @@ static double InternalStringToDouble(Iterator current,
exponent += insignificant_digits;
if (octal) {
buffer[buffer_pos] = '\0';
// ALLOW_OCTALS is set and there is no '8' or '9' in insignificant
// digits. Check significant digits now.
char sign = '+';
const char* s = buffer;
if (*s == '-' || *s == '+') sign = *s++;
bool sign = buffer[0] == '-';
int start_pos = (sign ? 1 : 0);
double result;
s += StringToInt(s, 0, 8, &result);
if (!allow_trailing_junk && *s != '\0') return JUNK_STRING_VALUE;
if (sign == '-') result = -result;
if (insignificant_digits > 0) {
result *= pow(8.0, insignificant_digits);
}
return result;
return InternalStringToIntDouble<3>(buffer + start_pos,
buffer + buffer_pos,
sign,
allow_trailing_junk);
}
if (nonzero_digit_dropped) {

View File

@ -122,6 +122,8 @@ assertEquals("100000000000000000000000000000000", Math.pow(2,32).toString(2));
assertEquals("100000000000000000000000000000001", (Math.pow(2,32) + 1).toString(2));
assertEquals("100000000000080", (0x100000000000081).toString(16));
assertEquals("1000000000000100", (-(-'0x1000000000000081')).toString(16));
assertEquals("1000000000000000", (-(-'0x1000000000000080')).toString(16));
assertEquals("1000000000000000", (-(-'0x100000000000007F')).toString(16));
assertEquals("100000000000000000000000000000000000000000000000010000000", (0x100000000000081).toString(2));
assertEquals("-11111111111111111111111111111111", (-(Math.pow(2,32)-1)).toString(2));
assertEquals("-5yc1z", (-10000007).toString(36));

View File

@ -44,7 +44,6 @@ assertEquals('0000000000', repeat('0', 10));
// assertEquals(, toNumber());
assertEquals(123, toNumber(" 123"));
assertEquals(123, toNumber("\n123"));
assertEquals(123, toNumber("\r123"));
@ -151,10 +150,32 @@ assertEquals(10, toNumber("0x00a"));
assertEquals(10, toNumber("0x00A"));
assertEquals(15, toNumber("0x00f"));
assertEquals(15, toNumber("0x00F"));
assertEquals(15, toNumber("0x00F "));
assertEquals(Infinity, toNumber("0x" + repeat('0', 1000) + '1'
+ repeat('0', 1000)));
assertEquals(-Infinity, toNumber("-0x1" + repeat('0', 1000)));
assertEquals(0x1000000 * 0x10000000, toNumber("0x10000000000000"));
assertEquals(0x1000000 * 0x10000000 + 1, toNumber("0x10000000000001"));
assertEquals(0x10 * 0x1000000 * 0x10000000, toNumber("0x100000000000000"));
assertEquals(0x10 * 0x1000000 * 0x10000000, toNumber("0x100000000000001"));
assertEquals(0x10 * 0x1000000 * 0x10000000, toNumber("0x100000000000007"));
assertEquals(0x10 * 0x1000000 * 0x10000000, toNumber("0x100000000000008"));
assertEquals(0x10 * (0x1000000 * 0x10000000 + 1),
toNumber("0x100000000000009"));
assertEquals(0x10 * (0x1000000 * 0x10000000 + 1),
toNumber("0x10000000000000F"));
assertEquals(0x10 * (0x1000000 * 0x10000000 + 1),
toNumber("0x100000000000010"));
assertEquals(0x100000000000 * 0x1000000 * 0x10000000,
toNumber("0x1000000000000000000000000"));
assertEquals(0x100000000000 * 0x1000000 * 0x10000000,
toNumber("0x1000000000000080000000000"));
assertEquals(0x100000000000 * (0x1000000 * 0x10000000 + 1),
toNumber("0x1000000000000080000000001"));
assertEquals(0x100000000000 * 0x1000000 * 0x10000000,
toNumber(" 0x1000000000000000000000000 "));
assertEquals(0, toNumber("00"));
assertEquals(1, toNumber("01"));
assertEquals(2, toNumber("02"));