From a0f0faa8e4aadb34823ca664a7dce85b789a5565 Mon Sep 17 00:00:00 2001 From: "cira@chromium.org" Date: Thu, 3 Mar 2011 17:32:18 +0000 Subject: [PATCH] Adding break iterator support to the i18n api extension. This is vendor specific, and is prefixed by v8. WebKit layout tests will be added in a separate CL. Review URL: http://codereview.chromium.org/6610006 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7051 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/extensions/experimental/break-iterator.cc | 229 ++++++++++++++++++ src/extensions/experimental/break-iterator.h | 83 +++++++ src/extensions/experimental/experimental.gyp | 2 + src/extensions/experimental/i18n-extension.cc | 20 ++ 4 files changed, 334 insertions(+) create mode 100644 src/extensions/experimental/break-iterator.cc create mode 100644 src/extensions/experimental/break-iterator.h diff --git a/src/extensions/experimental/break-iterator.cc b/src/extensions/experimental/break-iterator.cc new file mode 100644 index 0000000000..ea218679bd --- /dev/null +++ b/src/extensions/experimental/break-iterator.cc @@ -0,0 +1,229 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "break-iterator.h" + +#include "unicode/brkiter.h" +#include "unicode/locid.h" +#include "unicode/rbbi.h" + +namespace v8 { +namespace internal { + +v8::Persistent BreakIterator::break_iterator_template_; + +icu::BreakIterator* BreakIterator::UnpackBreakIterator( + v8::Handle obj) { + if (break_iterator_template_->HasInstance(obj)) { + return static_cast( + obj->GetPointerFromInternalField(0)); + } + + return NULL; +} + +void BreakIterator::DeleteBreakIterator(v8::Persistent object, + void* param) { + v8::Persistent persistent_object = + v8::Persistent::Cast(object); + + // First delete the hidden C++ object. + // Unpacking should never return NULL here. That would only happen if + // this method is used as the weak callback for persistent handles not + // pointing to a break iterator. + delete UnpackBreakIterator(persistent_object); + + // Then dispose of the persistent handle to JS object. + persistent_object.Dispose(); +} + +// Throws a JavaScript exception. +static v8::Handle ThrowUnexpectedObjectError() { + // Returns undefined, and schedules an exception to be thrown. + return v8::ThrowException(v8::Exception::Error( + v8::String::New("BreakIterator method called on an object " + "that is not a BreakIterator."))); +} + +v8::Handle BreakIterator::BreakIteratorAdoptText( + const v8::Arguments& args) { + if (args.Length() != 1 || !args[0]->IsString()) { + return v8::ThrowException(v8::Exception::SyntaxError( + v8::String::New("Text input is required."))); + } + + icu::BreakIterator* break_iterator = UnpackBreakIterator(args.Holder()); + if (!break_iterator) { + return ThrowUnexpectedObjectError(); + } + + v8::String::Value text_value(args[0]); + UnicodeString text( + reinterpret_cast(*text_value), text_value.length()); + + break_iterator->setText(text); + + return v8::Undefined(); +} + +v8::Handle BreakIterator::BreakIteratorFirst( + const v8::Arguments& args) { + icu::BreakIterator* break_iterator = UnpackBreakIterator(args.Holder()); + if (!break_iterator) { + return ThrowUnexpectedObjectError(); + } + + return v8::Int32::New(break_iterator->first()); +} + +v8::Handle BreakIterator::BreakIteratorNext( + const v8::Arguments& args) { + icu::BreakIterator* break_iterator = UnpackBreakIterator(args.Holder()); + if (!break_iterator) { + return ThrowUnexpectedObjectError(); + } + + return v8::Int32::New(break_iterator->next()); +} + +v8::Handle BreakIterator::BreakIteratorCurrent( + const v8::Arguments& args) { + icu::BreakIterator* break_iterator = UnpackBreakIterator(args.Holder()); + if (!break_iterator) { + return ThrowUnexpectedObjectError(); + } + + return v8::Int32::New(break_iterator->current()); +} + +v8::Handle BreakIterator::BreakIteratorBreakType( + const v8::Arguments& args) { + icu::BreakIterator* break_iterator = UnpackBreakIterator(args.Holder()); + if (!break_iterator) { + return ThrowUnexpectedObjectError(); + } + + // TODO(cira): Remove cast once ICU fixes base BreakIterator class. + int32_t status = + static_cast(break_iterator)->getRuleStatus(); + // Keep return values in sync with JavaScript BreakType enum. + if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) { + return v8::Int32::New(UBRK_WORD_NONE); + } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) { + return v8::Int32::New(UBRK_WORD_NUMBER); + } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) { + return v8::Int32::New(UBRK_WORD_LETTER); + } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { + return v8::Int32::New(UBRK_WORD_KANA); + } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { + return v8::Int32::New(UBRK_WORD_IDEO); + } else { + return v8::Int32::New(-1); + } +} + +v8::Handle BreakIterator::JSBreakIterator( + const v8::Arguments& args) { + v8::HandleScope handle_scope; + + if (args.Length() != 2 || !args[0]->IsString() || !args[1]->IsString()) { + return v8::ThrowException(v8::Exception::SyntaxError( + v8::String::New("Locale and iterator type are required."))); + } + + v8::String::Utf8Value locale(args[0]); + icu::Locale icu_locale(*locale); + + UErrorCode status = U_ZERO_ERROR; + icu::BreakIterator* break_iterator = NULL; + v8::String::Utf8Value type(args[1]); + if (!strcmp(*type, "character")) { + break_iterator = + icu::BreakIterator::createCharacterInstance(icu_locale, status); + } else if (!strcmp(*type, "word")) { + break_iterator = + icu::BreakIterator::createWordInstance(icu_locale, status); + } else if (!strcmp(*type, "sentence")) { + break_iterator = + icu::BreakIterator::createSentenceInstance(icu_locale, status); + } else if (!strcmp(*type, "line")) { + break_iterator = + icu::BreakIterator::createLineInstance(icu_locale, status); + } else { + return v8::ThrowException(v8::Exception::SyntaxError( + v8::String::New("Invalid iterator type."))); + } + + if (U_FAILURE(status)) { + delete break_iterator; + return v8::ThrowException(v8::Exception::Error( + v8::String::New("Failed to create break iterator."))); + } + + if (break_iterator_template_.IsEmpty()) { + v8::Local raw_template(v8::FunctionTemplate::New()); + + raw_template->SetClassName(v8::String::New("v8Locale.v8BreakIterator")); + + // Define internal field count on instance template. + v8::Local object_template = + raw_template->InstanceTemplate(); + object_template->SetInternalFieldCount(1); + + // Define all of the prototype methods on prototype template. + v8::Local proto = raw_template->PrototypeTemplate(); + proto->Set(v8::String::New("adoptText"), + v8::FunctionTemplate::New(BreakIteratorAdoptText)); + proto->Set(v8::String::New("first"), + v8::FunctionTemplate::New(BreakIteratorFirst)); + proto->Set(v8::String::New("next"), + v8::FunctionTemplate::New(BreakIteratorNext)); + proto->Set(v8::String::New("current"), + v8::FunctionTemplate::New(BreakIteratorCurrent)); + proto->Set(v8::String::New("breakType"), + v8::FunctionTemplate::New(BreakIteratorBreakType)); + + break_iterator_template_ = + v8::Persistent::New(raw_template); + } + + // Create an empty object wrapper. + v8::Local local_object = + break_iterator_template_->GetFunction()->NewInstance(); + v8::Persistent wrapper = + v8::Persistent::New(local_object); + + // Set break iterator as internal field of the resulting JS object. + wrapper->SetPointerInInternalField(0, break_iterator); + + // Make object handle weak so we can delete iterator once GC kicks in. + wrapper.MakeWeak(NULL, DeleteBreakIterator); + + return wrapper; +} + +} } // namespace v8::internal diff --git a/src/extensions/experimental/break-iterator.h b/src/extensions/experimental/break-iterator.h new file mode 100644 index 0000000000..318b1534c2 --- /dev/null +++ b/src/extensions/experimental/break-iterator.h @@ -0,0 +1,83 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef V8_EXTENSIONS_EXPERIMENTAL_BREAK_ITERATOR_H_ +#define V8_EXTENSIONS_EXPERIMENTAL_BREAK_ITERATOR_H_ + +#include + +#include "unicode/uversion.h" + +namespace U_ICU_NAMESPACE { +class BreakIterator; +} + +namespace v8 { +namespace internal { + +class BreakIterator { + public: + static v8::Handle JSBreakIterator(const v8::Arguments& args); + + // Helper methods for various bindings. + + // Unpacks break iterator object from corresponding JavaScript object. + static icu::BreakIterator* UnpackBreakIterator(v8::Handle obj); + + // Release memory we allocated for the BreakIterator once the JS object that + // holds the pointer gets garbage collected. + static void DeleteBreakIterator(v8::Persistent object, + void* param); + + // Assigns new text to the iterator. + static v8::Handle BreakIteratorAdoptText( + const v8::Arguments& args); + + // Moves iterator to the beginning of the string and returns new position. + static v8::Handle BreakIteratorFirst(const v8::Arguments& args); + + // Moves iterator to the next position and returns it. + static v8::Handle BreakIteratorNext(const v8::Arguments& args); + + // Returns current iterator's current position. + static v8::Handle BreakIteratorCurrent( + const v8::Arguments& args); + + // Returns type of the item from current position. + // This call is only valid for word break iterators. Others just return 0. + static v8::Handle BreakIteratorBreakType( + const v8::Arguments& args); + + private: + BreakIterator() {} + + static v8::Persistent break_iterator_template_; +}; + +} } // namespace v8::internal + +#endif // V8_EXTENSIONS_EXPERIMENTAL_BREAK_ITERATOR_H_ diff --git a/src/extensions/experimental/experimental.gyp b/src/extensions/experimental/experimental.gyp index 4d7a9363bf..761f4c7926 100644 --- a/src/extensions/experimental/experimental.gyp +++ b/src/extensions/experimental/experimental.gyp @@ -37,6 +37,8 @@ 'target_name': 'i18n_api', 'type': 'static_library', 'sources': [ + 'break-iterator.cc', + 'break-iterator.h', 'i18n-extension.cc', 'i18n-extension.h', ], diff --git a/src/extensions/experimental/i18n-extension.cc b/src/extensions/experimental/i18n-extension.cc index a721ba5ec5..e65fdcc846 100644 --- a/src/extensions/experimental/i18n-extension.cc +++ b/src/extensions/experimental/i18n-extension.cc @@ -30,6 +30,7 @@ #include #include +#include "break-iterator.h" #include "unicode/locid.h" #include "unicode/uloc.h" @@ -87,6 +88,23 @@ const char* const I18NExtension::kSource = " var displayLocale = this.displayLocale_(optDisplayLocale);" " native function NativeJSDisplayName();" " return NativeJSDisplayName(this.locale, displayLocale);" + "};" + "v8Locale.v8BreakIterator = function(locale, type) {" + " native function NativeJSBreakIterator();" + " var iterator = NativeJSBreakIterator(locale, type);" + " iterator.type = type;" + " return iterator;" + "};" + "v8Locale.v8BreakIterator.BreakType = {" + " 'unknown': -1," + " 'none': 0," + " 'number': 100," + " 'word': 200," + " 'kana': 300," + " 'ideo': 400" + "};" + "v8Locale.prototype.v8CreateBreakIterator = function(type) {" + " return new v8Locale.v8BreakIterator(this.locale, type);" "};"; v8::Handle I18NExtension::GetNativeFunction( @@ -107,6 +125,8 @@ v8::Handle I18NExtension::GetNativeFunction( return v8::FunctionTemplate::New(JSDisplayRegion); } else if (name->Equals(v8::String::New("NativeJSDisplayName"))) { return v8::FunctionTemplate::New(JSDisplayName); + } else if (name->Equals(v8::String::New("NativeJSBreakIterator"))) { + return v8::FunctionTemplate::New(BreakIterator::JSBreakIterator); } return v8::Handle();