d0f57e1195
\u0085 (NEL) is now considered a whitespace in accordance to http://www.unicode.org/Public/6.3.0/ucd/PropList.txt R=mstarzinger@chromium.org BUG=v8:3109 LOG=Y Review URL: https://codereview.chromium.org/146983007 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19196 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
135 lines
4.3 KiB
JavaScript
135 lines
4.3 KiB
JavaScript
// Copyright 2014 the V8 project authors. All rights reserved.
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials provided
|
|
// with the distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
var whitespaces = [
|
|
// Whitespaces defined in ECMA-262 5.1, 7.2
|
|
0x0009, // Tab TAB
|
|
0x000B, // Vertical Tab VT
|
|
0x000C, // Form Feed FF
|
|
0x0020, // Space SP
|
|
0x00A0, // No-break space NBSP
|
|
0xFEFF, // Byte Order Mark BOM
|
|
// Unicode whitespaces
|
|
0x000A, // Line Feed LF
|
|
0x000D, // Carriage Return CR
|
|
0x0085, // Next Line NEL
|
|
0x1680, // Ogham Space Mark
|
|
0x180E, // Mongolian Vowel Separator
|
|
0x2000, // EN QUAD
|
|
0x2001, // EM QUAD
|
|
0x2002, // EN SPACE
|
|
0x2003, // EM SPACE
|
|
0x2004, // THREE-PER-EM SPACE
|
|
0x2005, // FOUR-PER-EM SPACE
|
|
0x2006, // SIX-PER-EM SPACE
|
|
0x2007, // FIGURE SPACE
|
|
0x2008, // PUNCTUATION SPACE
|
|
0x2009, // THIN SPACE
|
|
0x200A, // HAIR SPACE
|
|
0x2028, // LINE SEPARATOR
|
|
0x2029, // PARAGRAPH SEPARATOR
|
|
0x202F, // NARROW NO-BREAK SPACE
|
|
0x205F, // MEDIUM MATHEMATICAL SPACE
|
|
0x3000, // IDEOGRAPHIC SPACE
|
|
];
|
|
|
|
// Add single twobyte char to force twobyte representation.
|
|
// Interestingly, snowman is not "white" space :)
|
|
var twobyte = "\u2603";
|
|
var onebyte = "\u007E";
|
|
var twobytespace = "\u2000";
|
|
var onebytespace = "\u0020";
|
|
|
|
function is_whitespace(c) {
|
|
return whitespaces.indexOf(c.charCodeAt(0)) > -1;
|
|
}
|
|
|
|
function test_regexp(str) {
|
|
var pos_match = str.match(/\s/);
|
|
var neg_match = str.match(/\S/);
|
|
var test_char = str[0];
|
|
var postfix = str[1];
|
|
if (is_whitespace(test_char)) {
|
|
assertEquals(test_char, pos_match[0]);
|
|
assertEquals(postfix, neg_match[0]);
|
|
} else {
|
|
assertEquals(test_char, neg_match[0]);
|
|
assertNull(pos_match);
|
|
}
|
|
}
|
|
|
|
function test_trim(c, infix) {
|
|
var str = c + c + c + infix + c;
|
|
if (is_whitespace(c)) {
|
|
assertEquals(infix, str.trim());
|
|
} else {
|
|
assertEquals(str, str.trim());
|
|
}
|
|
}
|
|
|
|
function test_parseInt(c, postfix) {
|
|
// Skip if prefix is a digit.
|
|
if (c >= "0" && c <= 9) return;
|
|
var str = c + c + "123" + postfix;
|
|
if (is_whitespace(c)) {
|
|
assertEquals(123, parseInt(str));
|
|
} else {
|
|
assertEquals(NaN, parseInt(str));
|
|
}
|
|
}
|
|
|
|
function test_eval(c, content) {
|
|
if (!is_whitespace(c)) return;
|
|
var str = c + c + "'" + content + "'" + c + c;
|
|
assertEquals(content, eval(str));
|
|
}
|
|
|
|
function test_stringtonumber(c, postfix) {
|
|
// Skip if prefix is a digit.
|
|
if (c >= "0" && c <= 9) return;
|
|
var result = 1 + Number(c + "123" + c + postfix);
|
|
if (is_whitespace(c)) {
|
|
assertEquals(124, result);
|
|
} else {
|
|
assertEquals(NaN, result);
|
|
}
|
|
}
|
|
|
|
for (var i = 0; i < 0x10000; i++) {
|
|
c = String.fromCharCode(i);
|
|
test_regexp(c + onebyte);
|
|
test_regexp(c + twobyte);
|
|
test_trim(c, onebyte + "trim");
|
|
test_trim(c, twobyte + "trim");
|
|
test_parseInt(c, onebyte);
|
|
test_parseInt(c, twobyte);
|
|
test_eval(c, onebyte);
|
|
test_eval(c, twobyte);
|
|
test_stringtonumber(c, onebytespace);
|
|
test_stringtonumber(c, twobytespace);
|
|
}
|