Fix Chromium issue 176 by treating byte-order marks as whitespace.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@765 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2008-11-17 06:56:28 +00:00 · 2008-11-17 06:56:28 +00:00 · d24556d1c5
commit d24556d1c5
parent 7940adb1ec
2 changed files with 16 additions and 5 deletions
--- a/src/scanner.cc
+++ b/src/scanner.cc
@ -234,11 +234,25 @@ void Scanner::PushBack(uc32 ch) {
 }


+static inline bool IsByteOrderMark(uc32 c) {
+  // The Unicode value U+FFFE is guaranteed never to be assigned as a
+  // Unicode character; this implies that in a Unicode context the
+  // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
+  // character expressed in little-endian byte order (since it could
+  // not be a U+FFFE character expressed in big-endian byte
+  // order). Nevertheless, we check for it to be compatible with
+  // Spidermonkey.
+  return c == 0xFEFF || c == 0xFFFE;
+}
+
+
 void Scanner::SkipWhiteSpace(bool initial) {
  has_line_terminator_before_next_ = initial;

  while (true) {
-    while (kIsWhiteSpace.get(c0_)) {
+    // We treat byte-order marks (BOMs) as whitespace for better
+    // compatibility with Spidermonkey and other JavaScript engines.
+    while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
      // IsWhiteSpace() includes line terminators!
      if (kIsLineTerminator.get(c0_))
        // Ignore line terminators, but remember them. This is necessary
--- a/test/mozilla/mozilla.status
+++ b/test/mozilla/mozilla.status
@ -570,10 +570,7 @@ js1_5/Regress/regress-306633: FAIL
 js1_5/Regress/regress-303213: FAIL


-# Bug 1193440: Ignore Unicode BOM characters when scanning.
-ecma_3/extensions/regress-368516: FAIL
-
-# Bug 1202592:New ecma_3/String/15.5.4.11 is failing.
+# Bug 1202592: New ecma_3/String/15.5.4.11 is failing.
 ecma_3/String/15.5.4.11: FAIL

 # Bug 1202597: New js1_5/Expressions/regress-394673 is failing.