ICU-13574 Merging trunk to branch

X-SVN-Rev: 40993
This commit is contained in:
Shane Carr 2018-02-27 07:32:38 +00:00
commit 9759fc597f
3079 changed files with 355350 additions and 11709 deletions

13
.gitattributes vendored
View File

@ -50,6 +50,7 @@ README text !eol
icu4c/icu4c.css -text
icu4c/source/aclocal.m4 -text
icu4c/source/allinone/Windows.CopyUnicodeHeaderFiles.targets -text
icu4c/source/config/m4/icu-conditional.m4 -text
icu4c/source/data/curr/pool.res -text
icu4c/source/data/in/coll/ucadata-implicithan.icu -text
@ -601,6 +602,18 @@ tools/unicodetools/com/ibm/rbm/gui/images/tree_icon_item.gif -text
tools/unicodetools/com/ibm/rbm/gui/images/tree_icon_language.gif -text
tools/unicodetools/com/ibm/rbm/gui/images/tree_icon_project.gif -text
tools/unicodetools/com/ibm/rbm/gui/images/tree_icon_variant.gif -text
vendor/double-conversion/UPDATING.md -text
vendor/double-conversion/upstream/AUTHORS -text
vendor/double-conversion/upstream/BUILD -text
vendor/double-conversion/upstream/COPYING -text
vendor/double-conversion/upstream/Changelog -text
vendor/double-conversion/upstream/LICENSE -text
vendor/double-conversion/upstream/README.md -text
vendor/double-conversion/upstream/SConstruct -text
vendor/double-conversion/upstream/WORKSPACE -text
vendor/double-conversion/upstream/double-conversion/SConscript -text
vendor/double-conversion/upstream/msvc/testrunner.cmd -text
vendor/double-conversion/upstream/test/cctest/SConscript -text
# The following file types are stored in Git-LFS.
*.jar filter=lfs diff=lfs merge=lfs -text

12
.gitignore vendored
View File

@ -87,6 +87,7 @@ icu4c/source/extra/scrptrun/srtest.d
icu4c/source/extra/uconv/*.d
icu4c/source/extra/uconv/*.o
icu4c/source/extra/uconv/*.vcxproj.user
icu4c/source/extra/uconv/.vs
icu4c/source/extra/uconv/Debug
icu4c/source/extra/uconv/Makefile
icu4c/source/extra/uconv/Release
@ -139,6 +140,7 @@ icu4c/source/layoutex/*.d
icu4c/source/layoutex/*.o
icu4c/source/layoutex/*.pdb
icu4c/source/layoutex/*.vcxproj.user
icu4c/source/layoutex/.vs
icu4c/source/layoutex/Debug
icu4c/source/layoutex/Makefile
icu4c/source/layoutex/Release
@ -155,6 +157,7 @@ icu4c/source/samples/*.sdf
icu4c/source/samples/Makefile
icu4c/source/samples/all/*.opensdf
icu4c/source/samples/all/*.sdf
icu4c/source/samples/all/.vs
icu4c/source/samples/all/all.ncb
icu4c/source/samples/all/all.suo
icu4c/source/samples/break/*.vcxproj.user
@ -233,6 +236,10 @@ icu4c/source/samples/datefmt/datefmt.vcproj.*.*.user
icu4c/source/samples/datefmt/release
icu4c/source/samples/datefmt/x64
icu4c/source/samples/datefmt/x86
icu4c/source/samples/dtitvfmtsample/x64
icu4c/source/samples/dtitvfmtsample/x86
icu4c/source/samples/dtptngsample/x64
icu4c/source/samples/dtptngsample/x86
icu4c/source/samples/layout/*.d
icu4c/source/samples/layout/*.pdb
icu4c/source/samples/layout/*.vcxproj.user
@ -267,6 +274,8 @@ icu4c/source/samples/numfmt/numfmt.vcproj.*.*.user
icu4c/source/samples/numfmt/release
icu4c/source/samples/numfmt/x64
icu4c/source/samples/numfmt/x86
icu4c/source/samples/plurfmtsample/x64
icu4c/source/samples/plurfmtsample/x86
icu4c/source/samples/props/*.vcxproj.user
icu4c/source/samples/props/Debug
icu4c/source/samples/props/props
@ -635,6 +644,8 @@ icu4c/source/tools/ctestfw/libsicutest*
icu4c/source/tools/ctestfw/release
icu4c/source/tools/ctestfw/x64
icu4c/source/tools/ctestfw/x86
icu4c/source/tools/escapesrc/*.d
icu4c/source/tools/escapesrc/Makefile
icu4c/source/tools/genbrk/*.d
icu4c/source/tools/genbrk/*.o
icu4c/source/tools/genbrk/*.pdb
@ -898,6 +909,7 @@ icu4c/source/tools/toolutil/x64
icu4c/source/tools/toolutil/x86
icu4c/source/tools/tzcode/*.ao
icu4c/source/tools/tzcode/*.o
icu4c/source/tools/tzcode/.vs
icu4c/source/tools/tzcode/Makefile
icu4c/source/uconfig.h.prepend
icu4j/*.jar

View File

@ -383,3 +383,32 @@ Database section 7.
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
6. Google double-conversion
Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -6,19 +6,20 @@
# Date: 2017-04-14
#-------------------------
#
# This builds a zipfile containing the *64 bit* Windows binary
# This builds a zipfile containing the *64 bit* Windows binaries.
# (Note: The zipfile does not include the UWP binaries.)
#
# Usage: (after building ICU using MSVC)
# (bring up Powershell ISE)
# cd C:\icu\icu4c\
# Set-ExecutionPolicy -Scope Process AllSigned
# Set-ExecutionPolicy -Scope Process Unrestricted
# .\packaging\distrelease.ps1
#
# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip
#
#
# You will get warnings from the execution policy and the script itself.
# see https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core
# see https://docs.microsoft.com/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core
# for more about execution policies.

View File

@ -3,7 +3,7 @@
<html lang="en-US" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
<head>
<title>ReadMe for ICU 60.1</title>
<title>ReadMe for ICU 61.1</title>
<meta name="COPYRIGHT" content=
"Copyright (C) 2016 and later: Unicode, Inc. and others. License &amp; terms of use: http://www.unicode.org/copyright.html"/>
<!-- meta name="COPYRIGHT" content=
@ -23,8 +23,8 @@
milestone - if the release note is a milestone release
-->
<body>
<!-- <body> -->
<body class="rc">
<p class="only-draft"><b>Note:</b> This is a draft readme.</p>
<h1>
@ -32,7 +32,7 @@
International Components for Unicode<br/>
<span class="only-rc">Release Candidate</span>
<span class="only-milestone">(Milestone Release)</span>
<abbr title="International Components for Unicode">ICU</abbr> 60.1 ReadMe
<abbr title="International Components for Unicode">ICU</abbr> 61.1 ReadMe
</h1>
<!-- Shouldn't need to comment/uncomment this paragraph, just change the body class -->
@ -44,7 +44,7 @@
<p class="note only-rc">This is a release candidate version of ICU4C.
It is not recommended for production use.</p>
<p>Last updated: 2017-Oct-29<br/>
<p>Last updated: 2018-Feb-16<br/>
Copyright &copy; 2016 and later: Unicode, Inc. and others. License &amp; terms of use:
<a href="http://www.unicode.org/copyright.html">http://www.unicode.org/copyright.html</a><br/>
Copyright &copy; 1997-2016 International Business Machines Corporation and others.
@ -233,7 +233,7 @@
<h2><a name="News" href="#News" id="News">What Is New In This Release?</a></h2>
<p>See the <a href="http://site.icu-project.org/download/60">ICU 60 download page</a>
<p>See the <a href="http://site.icu-project.org/download/61">ICU 61 download page</a>
for an overview of this release, important changes, new features, bug fixes, known issues,
changes to supported platforms and build environments,
and migration issues for existing applications migrating from previous ICU releases.</p>

View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<!--
This file is used to copy all of the header files (*.h) from a project's "unicode" folder to a common output folder.
-->
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<!-- This is the location of the common output folder. -->
<CopyDestionationPath>$(SolutionDir)\..\..\include\unicode</CopyDestionationPath>
<BuildDependsOn>
$(BuildDependsOn);
CopyUnicodeHeaderFiles;
</BuildDependsOn>
</PropertyGroup>
<Target Name="CopyUnicodeHeaderFiles">
<ItemGroup>
<!-- Generate a list of all files that end in .h from the 'unicode' folder, relative to the current project. -->
<OutputFiles Include=".\unicode\**\*.h" />
</ItemGroup>
<!-- This message will be logged in the project's build output. -->
<Message Text="Copying @(OutputFiles->Count()) header files to $(CopyDestionationPath). Files copied: @(OutputFiles)" Importance="high"/>
<!-- Perform the copy. -->
<Copy SourceFiles="@(OutputFiles)"
DestinationFolder="$(CopyDestionationPath)\%(RecursiveDir)"
SkipUnchangedFiles="false"></Copy>
</Target>
</Project>

View File

@ -1,8 +1,12 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
# Visual Studio 15
VisualStudioVersion = 15.0.27130.2026
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcxproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcxproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
EndProject
@ -11,6 +15,10 @@ EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctestfw", "..\tools\ctestfw\ctestfw.vcxproj", "{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "date", "..\samples\date\date.vcxproj", "{38B5751A-C6F9-4409-950C-F4F9DA17275F}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "derb", "..\tools\genrb\derb.vcxproj", "{D3065ADB-8820-4CC7-9B6C-9510833961A3}"
ProjectSection(ProjectDependencies) = postProject
@ -66,8 +74,14 @@ EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makedata_uwp", "..\data\makedata_uwp.vcxproj", "{B1D53358-37BD-48BC-B27C-68BAF1E78508}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "i18n_uwp", "..\i18n\i18n_uwp.vcxproj", "{6786C051-383B-47E0-9E82-B8B994E06A25}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "common_uwp", "..\common\common_uwp.vcxproj", "{C10CF34B-3F79-430E-AD38-5A32DC0589C2}"
ProjectSection(ProjectDependencies) = postProject
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@ -409,6 +423,9 @@ Global
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A714726C-FE2D-466C-95F3-06C5C4EAE54F}
EndGlobalSection
GlobalSection(SubversionScc) = preSolution
Svn-Managed = True
Manager = AnkhSVN - Subversion Support for Visual Studio

View File

@ -59,58 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() {
******************************************************************
*/
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
fHandled[i] = 0;
}
UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
(void)status;
}
UnhandledEngine::~UnhandledEngine() {
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
if (fHandled[i] != 0) {
delete fHandled[i];
}
}
delete fHandled;
fHandled = nullptr;
}
UBool
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
UnhandledEngine::handles(UChar32 c) const {
return fHandled && fHandled->contains(c);
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t endPos,
int32_t breakType,
UVector32 &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
return 0;
}
void
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
if (fHandled[breakType] == 0) {
fHandled[breakType] = new UnicodeSet();
if (fHandled[breakType] == 0) {
return;
}
}
if (!fHandled[breakType]->contains(c)) {
UErrorCode status = U_ZERO_ERROR;
// Apply the entire script of the character.
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
UnhandledEngine::handleCharacter(UChar32 c) {
if (fHandled == nullptr) {
fHandled = new UnicodeSet();
if (fHandled == nullptr) {
return;
}
}
if (!fHandled->contains(c)) {
UErrorCode status = U_ZERO_ERROR;
// Apply the entire script of the character.
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
}
}
/*
@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
const LanguageBreakEngine *lbe = NULL;
UErrorCode status = U_ZERO_ERROR;
@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != NULL && lbe->handles(c, breakType)) {
if (lbe != NULL && lbe->handles(c)) {
return lbe;
}
}
}
// We didn't find an engine. Create one.
lbe = loadEngineFor(c, breakType);
lbe = loadEngineFor(c);
if (lbe != NULL) {
fEngines->push((void *)lbe, status);
}
@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
}
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
if (m != NULL) {
const LanguageBreakEngine *engine = NULL;
switch(code) {
@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
}
DictionaryMatcher *
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
UErrorCode status = U_ZERO_ERROR;
// open root from brkitr tree.
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);

View File

@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
virtual UBool handles(UChar32 c) const = 0;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param breakType The type of break desired, or -1.
* @param foundBreaks A Vector of int32_t to receive the breaks.
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const = 0;
};
@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
};
@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine {
private:
/**
* The sets of characters handled, for each break type
* The sets of characters handled.
* @internal
*/
UnicodeSet *fHandled[4];
UnicodeSet *fHandled;
public:
@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const;
virtual UBool handles(UChar32 c) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const;
/**
* <p>Tell the engine to handle a particular character and break type.</p>
*
* @param c A character which the engine should handle
* @param breakType The type of text break for which the engine should handle c
*/
virtual void handleCharacter(UChar32 c, int32_t breakType);
virtual void handleCharacter(UChar32 c);
};
@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
protected:
/**
@ -263,21 +252,17 @@ protected:
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
/**
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
* @param script An ISO 15924 script code that identifies the dictionary to be
* created.
* @param breakType The kind of text break for which a dictionary is
* sought.
* @return A DictionaryMatcher with the desired characteristics, or NULL.
*/
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
};
U_NAMESPACE_END

View File

@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN
// -------------------------------------
BreakIterator*
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
{
char fnbuff[256];
char ext[4]={'\0'};
@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data());
result->setBreakType(kind);
}
ures_close(b);
@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
result = BreakIterator::buildInstance(loc, "grapheme", status);
break;
case UBRK_WORD:
result = BreakIterator::buildInstance(loc, "word", kind, status);
result = BreakIterator::buildInstance(loc, "word", status);
break;
case UBRK_LINE:
uprv_strcpy(lbType, "line");
@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
uprv_strcat(lbType, lbKeyValue);
}
}
result = BreakIterator::buildInstance(loc, lbType, kind, status);
result = BreakIterator::buildInstance(loc, lbType, status);
break;
case UBRK_SENTENCE:
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
result = BreakIterator::buildInstance(loc, "sentence", status);
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
{
char ssKeyValue[kKeyValueLenMax] = {0};
@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
#endif
break;
case UBRK_TITLE:
result = BreakIterator::buildInstance(loc, "title", kind, status);
result = BreakIterator::buildInstance(loc, "title", status);
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;

View File

@ -92,20 +92,16 @@ ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
sink.Append(s8, 2);
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (length > 0) {
if (edits != nullptr) {
edits->addUnchanged(length);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(reinterpret_cast<const char *>(s), length);
}
void
ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits) {
U_ASSERT(length > 0);
if (edits != nullptr) {
edits->addUnchanged(length);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(reinterpret_cast<const char *>(s), length);
}
return TRUE;
}
UBool
@ -117,7 +113,11 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
int32_t length = (int32_t)(limit - s);
if (length > 0) {
appendNonEmptyUnchanged(s, length, sink, options, edits);
}
return TRUE;
}
U_NAMESPACE_END

View File

@ -43,11 +43,19 @@ public:
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
return TRUE;
}
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};
U_NAMESPACE_END

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
@ -33,7 +33,7 @@
<MinimumVisualStudioVersion>14.0</MinimumVisualStudioVersion>
<AppContainerApplication>true</AppContainerApplication>
<ApplicationType>Windows Store</ApplicationType>
<WindowsTargetPlatformVersion>10.0.10586.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformMinVersion>10.0.10240.0</WindowsTargetPlatformMinVersion>
<ApplicationTypeRevision>10.0</ApplicationTypeRevision>
</PropertyGroup>
@ -42,7 +42,7 @@
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v140</PlatformToolset>
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
@ -192,7 +192,7 @@
<ProgramDataBaseFileName>.\x86\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin32uwp\icuuc60.dll</OutputFile>
<OutputFile>..\..\bin32uwp\icuuc61.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuuc.lib</ImportLibrary>
</Link>
@ -208,7 +208,7 @@
<ProgramDataBaseFileName>.\x86\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin32uwp\icuuc60d.dll</OutputFile>
<OutputFile>..\..\bin32uwp\icuuc61d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuucd.lib</ImportLibrary>
</Link>
@ -224,7 +224,7 @@
<ProgramDataBaseFileName>.\x64\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin64uwp\icuuc60.dll</OutputFile>
<OutputFile>..\..\bin64uwp\icuuc61.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuuc.lib</ImportLibrary>
</Link>
@ -240,7 +240,7 @@
<ProgramDataBaseFileName>.\x64\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin64uwp\icuuc60d.dll</OutputFile>
<OutputFile>..\..\bin64uwp\icuuc61d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuucd.lib</ImportLibrary>
</Link>
@ -256,7 +256,7 @@
<ProgramDataBaseFileName>.\ARM\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\binARMuwp\icuuc60.dll</OutputFile>
<OutputFile>..\..\binARMuwp\icuuc61.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuuc.lib</ImportLibrary>
</Link>
@ -272,7 +272,7 @@
<ProgramDataBaseFileName>.\ARM\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\binARMuwp\icuuc60d.dll</OutputFile>
<OutputFile>..\..\binARMuwp\icuuc61d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuucd.lib</ImportLibrary>
</Link>
@ -302,9 +302,7 @@
<ClCompile Include="rbbi_cache.cpp" />
<ClCompile Include="dictionarydata.cpp" />
<ClCompile Include="ubrk.cpp" />
<ClCompile Include="ucol_swp.cpp">
<AdditionalIncludeDirectories>..\i18n;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<ClCompile Include="ucol_swp.cpp" />
<ClCompile Include="propsvec.cpp" />
<ClCompile Include="uarrsort.cpp" />
<ClCompile Include="uenum.cpp" />
@ -471,34 +469,14 @@
<ClCompile Include="ulistformatter.cpp" />
</ItemGroup>
<ItemGroup>
<CustomBuild Include="unicode\ubidi.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="localsvc.h" />
<ClInclude Include="msvcres.h" />
<ClInclude Include="pluralmap.h" />
<ClInclude Include="propname_data.h" />
<ClInclude Include="ubidi_props.h" />
<ClInclude Include="ubidiimp.h" />
<CustomBuild Include="unicode\ushape.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="brkeng.h" />
<CustomBuild Include="unicode\brkiter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\dbbi.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="dictbe.h" />
<CustomBuild Include="unicode\rbbi.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="rbbidata.h" />
<ClInclude Include="rbbinode.h" />
<ClInclude Include="rbbirb.h" />
@ -508,10 +486,6 @@
<ClInclude Include="rbbitblb.h" />
<ClInclude Include="rbbi_cache.h" />
<ClInclude Include="dictionarydata.h" />
<CustomBuild Include="unicode\ubrk.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ubidi_props_data.h" />
<ClInclude Include="ubrkimpl.h" />
<ClInclude Include="ucase_props_data.h" />
@ -522,23 +496,11 @@
<ClInclude Include="unistrappender.h" />
<ClInclude Include="hash.h" />
<ClInclude Include="propsvec.h" />
<CustomBuild Include="unicode\strenum.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uarrsort.h" />
<CustomBuild Include="unicode\uenum.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uelement.h" />
<ClInclude Include="uenumimp.h" />
<ClInclude Include="uhash.h" />
<ClInclude Include="ulist.h" />
<CustomBuild Include="unicode\enumset.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="unicode\filteredbrk.h" />
<ClInclude Include="ustrenum.h" />
<ClInclude Include="utrie.h" />
@ -549,396 +511,68 @@
<ClInclude Include="uvectr32.h" />
<ClInclude Include="uvectr64.h" />
<ClInclude Include="cpputils.h" />
<CustomBuild Include="unicode\docmain.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\errorcode.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\icudataver.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="locmap.h" />
<ClInclude Include="mutex.h" />
<CustomBuild Include="unicode\platform.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ptypes.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\putil.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="putilimp.h" />
<CustomBuild Include="unicode\std_string.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uassert.h" />
<CustomBuild Include="unicode\uconfig.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\umachine.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="umutex.h" />
<ClInclude Include="uposixdefs.h" />
<CustomBuild Include="unicode\urename.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utrace.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="utracimp.h" />
<CustomBuild Include="unicode\utypes.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uvernum.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uversion.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="wintz.h" />
<CustomBuild Include="unicode\ucnv.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucnv_bld.h" />
<CustomBuild Include="unicode\ucnv_cb.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucnv_cnv.h" />
<CustomBuild Include="unicode\ucnv_err.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucnv_ext.h" />
<ClInclude Include="ucnv_imp.h" />
<ClInclude Include="ucnv_io.h" />
<ClInclude Include="ucnvmbcs.h" />
<CustomBuild Include="unicode\ucnvsel.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="cmemory.h" />
<CustomBuild Include="unicode\localpointer.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uclean.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucln.h" />
<ClInclude Include="ucln_cmn.h" />
<ClInclude Include="ucln_imp.h" />
<ClInclude Include="ucmndata.h" />
<CustomBuild Include="unicode\udata.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="udatamem.h" />
<ClInclude Include="udataswp.h" />
<ClInclude Include="umapfile.h" />
<CustomBuild Include="unicode\uobject.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\dtintrv.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\parseerr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\parsepos.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\umisc.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ustrfmt.h" />
<ClInclude Include="util.h" />
<CustomBuild Include="unicode\idna.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="punycode.h" />
<CustomBuild Include="unicode\uidna.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="locbased.h" />
<CustomBuild Include="unicode\locid.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="locutil.h" />
<CustomBuild Include="unicode\resbund.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="sharedobject.h" />
<ClCompile Include="sharedobject.cpp" />
<CustomBuild Include="unicode\locdspnm.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\simpleformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucat.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\udisplaycontext.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uldnames.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uloc.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ulocimp.h" />
<CustomBuild Include="unicode\ures.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="unifiedcache.h" />
<ClInclude Include="uresdata.h" />
<ClInclude Include="uresimp.h" />
<ClInclude Include="ureslocs.h" />
<ClInclude Include="resource.h" />
<CustomBuild Include="unicode\ucurr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucurrimp.h" />
<CustomBuild Include="unicode\caniter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="norm2allmodes.h" />
<CustomBuild Include="unicode\normalizer2.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="normalizer2impl.h" />
<CustomBuild Include="unicode\normlzr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unorm.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unorm2.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="unormimp.h" />
<ClInclude Include="bmpset.h" />
<ClInclude Include="messageimpl.h" />
<ClInclude Include="patternprops.h" />
<ClInclude Include="propname.h" />
<ClInclude Include="ruleiter.h" />
<CustomBuild Include="unicode\symtable.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucase.h" />
<CustomBuild Include="unicode\uchar.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unifilt.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unifunct.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unimatch.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uniset.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="unisetspan.h" />
<ClInclude Include="uprops.h" />
<ClInclude Include="usc_impl.h" />
<CustomBuild Include="unicode\uscript.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uset.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uset_imp.h" />
<CustomBuild Include="unicode\usetiter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\icuplug.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="icuplugimp.h" />
<ClInclude Include="serv.h" />
<ClInclude Include="servloc.h" />
<ClInclude Include="servnotf.h" />
<ClInclude Include="sprpimpl.h" />
<CustomBuild Include="unicode\usprep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\appendable.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="bytesinkutil.h" />
<CustomBuild Include="unicode\bytestream.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\bytestrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\bytestriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\chariter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="charstr.h" />
<ClInclude Include="cstring.h" />
<ClInclude Include="cstr.h" />
<ClInclude Include="cwchar.h" />
<CustomBuild Include="unicode\messagepattern.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\rep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\schriter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\stringpiece.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\stringtriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucasemap.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucharstrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucharstriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uchriter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uinvchar.h" />
<CustomBuild Include="unicode\uiter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unistr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\urep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ustr_cnv.h" />
<ClInclude Include="ustr_imp.h" />
<CustomBuild Include="unicode\ustring.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ustringtrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utext.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf16.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf32.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf8.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf_old.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\listformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ulistformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\stringoptions.h">
<Command>copy "%(FullPath)" ..\..\include\unicode </Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="common.rc" />

View File

@ -29,24 +29,21 @@ U_NAMESPACE_BEGIN
******************************************************************
*/
DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
fTypes = breakTypes;
DictionaryBreakEngine::DictionaryBreakEngine() {
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
}
UBool
DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
&& fSet.contains(c));
DictionaryBreakEngine::handles(UChar32 c) const {
return fSet.contains(c);
}
int32_t
DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const {
(void)startPos; // TODO: remove this param?
int32_t result = 0;
@ -66,10 +63,8 @@ DictionaryBreakEngine::findBreaks( UText *text,
}
rangeStart = start;
rangeEnd = current;
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
}
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
return result;
}
@ -194,7 +189,7 @@ static const int32_t THAI_MIN_WORD = 2;
static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
@ -436,7 +431,7 @@ static const int32_t LAO_MIN_WORD = 2;
static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2;
LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
@ -632,7 +627,7 @@ static const int32_t BURMESE_MIN_WORD = 2;
static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2;
BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
@ -825,7 +820,7 @@ static const int32_t KHMER_MIN_WORD = 2;
static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
@ -1047,7 +1042,7 @@ foundBest:
*/
static const uint32_t kuint32max = 0xFFFFFFFF;
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
@ -1324,8 +1319,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
if (katakanaRunLength < kMaxKatakanaGroupLength) {
uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) {
bestSnlp.setElementAt(newSnlp, j);
if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i;
}
}

View File

@ -42,27 +42,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
UnicodeSet fSet;
/**
* The set of break types handled by this engine
* @internal
*/
uint32_t fTypes;
/**
* <p>Default constructor.</p>
*
*/
DictionaryBreakEngine();
public:
/**
* <p>Constructor setting the break types handled.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
* <p>Constructor </p>
*/
DictionaryBreakEngine( uint32_t breakTypes );
DictionaryBreakEngine();
/**
* <p>Virtual destructor.</p>
@ -74,11 +59,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles( UChar32 c, int32_t breakType ) const;
virtual UBool handles(UChar32 c) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@ -88,14 +72,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* that starts from the first character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param breakType The type of break desired, or -1.
* @param foundBreaks vector of int32_t to receive the break positions
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const;
protected:
@ -107,13 +89,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
*/
virtual void setCharacters( const UnicodeSet &set );
/**
* <p>Set the break types handled by this engine.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
// virtual void setBreakTypes( uint32_t breakTypes );
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*

View File

@ -65,7 +65,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
* tables object that is passed in as a parameter.
*/
RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
: fSCharIter(UnicodeString())
: fText(UTEXT_INITIALIZER), fSCharIter(UnicodeString())
{
init(status);
fData = new RBBIDataWrapper(data, status); // status checked in constructor
@ -83,7 +83,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
uint32_t ruleLength,
UErrorCode &status)
: fSCharIter(UnicodeString())
: fText(UTEXT_INITIALIZER), fSCharIter(UnicodeString())
{
init(status);
if (U_FAILURE(status)) {
@ -114,7 +114,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
: fSCharIter(UnicodeString())
: fText(UTEXT_INITIALIZER), fSCharIter(UnicodeString())
{
init(status);
fData = new RBBIDataWrapper(udm, status); // status checked in constructor
@ -135,7 +135,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &sta
RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
UParseError &parseError,
UErrorCode &status)
: fSCharIter(UnicodeString())
: fText(UTEXT_INITIALIZER), fSCharIter(UnicodeString())
{
init(status);
if (U_FAILURE(status)) {return;}
@ -159,7 +159,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
// of rules.
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator()
: fSCharIter(UnicodeString())
: fText(UTEXT_INITIALIZER), fSCharIter(UnicodeString())
{
UErrorCode status = U_ZERO_ERROR;
init(status);
@ -174,7 +174,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator()
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
: BreakIterator(other),
fSCharIter(UnicodeString())
fText(UTEXT_INITIALIZER), fSCharIter(UnicodeString())
{
UErrorCode status = U_ZERO_ERROR;
this->init(status);
@ -222,7 +222,6 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
}
BreakIterator::operator=(that);
fBreakType = that.fBreakType;
if (fLanguageBreakEngines != NULL) {
delete fLanguageBreakEngines;
fLanguageBreakEngines = NULL; // Just rebuild for now
@ -278,18 +277,12 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
//
//-----------------------------------------------------------------------------
void RuleBasedBreakIterator::init(UErrorCode &status) {
fText = UTEXT_INITIALIZER;
fCharIter = NULL;
fData = NULL;
fPosition = 0;
fRuleStatusIndex = 0;
fDone = false;
fDictionaryCharCount = 0;
fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
// dictionary behavior for Break Iterators that are
// built from rules. Even better would be the ability to
// declare the type in the rules.
fLanguageBreakEngines = NULL;
fUnhandledBreakEngine = NULL;
fBreakCache = NULL;
@ -1239,7 +1232,7 @@ static void U_CALLCONV initLanguageFactories() {
static const LanguageBreakEngine*
getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
getLanguageBreakEngineFromFactory(UChar32 c)
{
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
if (gLanguageBreakFactories == NULL) {
@ -1250,7 +1243,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
const LanguageBreakEngine *lbe = NULL;
while (--i >= 0) {
LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
lbe = factory->getEngineFor(c, breakType);
lbe = factory->getEngineFor(c);
if (lbe != NULL) {
break;
}
@ -1282,14 +1275,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
int32_t i = fLanguageBreakEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
if (lbe->handles(c, fBreakType)) {
if (lbe->handles(c)) {
return lbe;
}
}
// No existing dictionary took the character. See if a factory wants to
// give us a new LanguageBreakEngine for this character.
lbe = getLanguageBreakEngineFromFactory(c, fBreakType);
lbe = getLanguageBreakEngineFromFactory(c);
// If we got one, use it and push it on our stack.
if (lbe != NULL) {
@ -1305,6 +1298,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
fUnhandledBreakEngine = new UnhandledEngine(status);
if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
// Put it last so that scripts for which we have an engine get tried
// first.
@ -1319,25 +1313,19 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
// Tell the reject engine about the character; at its discretion, it may
// add more than just the one character.
fUnhandledBreakEngine->handleCharacter(c, fBreakType);
fUnhandledBreakEngine->handleCharacter(c);
return fUnhandledBreakEngine;
}
/*int32_t RuleBasedBreakIterator::getBreakType() const {
return fBreakType;
}*/
void RuleBasedBreakIterator::setBreakType(int32_t type) {
fBreakType = type;
}
void RuleBasedBreakIterator::dumpCache() {
fBreakCache->dumpCache();
}
void RuleBasedBreakIterator::dumpTables() {
fData->printData();
}
/**
* Returns the description used to create this iterator
*/

View File

@ -165,7 +165,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, fBreaks);
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
}
// Reload the loop variables for the next go-round

View File

@ -267,8 +267,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
#endif
#ifdef RBBI_DEBUG
void RBBIDataWrapper::printData() {
#ifdef RBBI_DEBUG
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
@ -285,8 +285,8 @@ void RBBIDataWrapper::printData() {
RBBIDebugPrintf("%c", fRuleSource[c]);
}
RBBIDebugPrintf("\n\n");
}
#endif
}
U_NAMESPACE_END

View File

@ -165,13 +165,8 @@ public:
UBool operator ==(const RBBIDataWrapper &other) const;
int32_t hashCode();
const UnicodeString &getRuleSourceString() const;
#ifdef RBBI_DEBUG
void printData();
void printTable(const char *heading, const RBBIStateTable *table);
#else
#define printData()
#define printTable(heading, table)
#endif
/* */
/* Pointers to items within the data */

View File

@ -282,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input 32-bit characters to
// Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
builder.fSetBuilder->build();
builder.fSetBuilder->buildRanges();
//
@ -317,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
}
#endif
builder.optimizeTables();
builder.fSetBuilder->buildTrie();
//
// Package up the compiled data into a memory image
// in the run-time format.
@ -348,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
return This;
}
void RBBIRuleBuilder::optimizeTables() {
int32_t leftClass;
int32_t rightClass;
leftClass = 3;
rightClass = 0;
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
fSetBuilder->mergeCategories(leftClass, rightClass);
fForwardTables->removeColumn(rightClass);
fReverseTables->removeColumn(rightClass);
fSafeFwdTables->removeColumn(rightClass);
fSafeRevTables->removeColumn(rightClass);
}
fForwardTables->removeDuplicateStates();
fReverseTables->removeDuplicateStates();
fSafeFwdTables->removeDuplicateStates();
fSafeRevTables->removeDuplicateStates();
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

View File

@ -126,6 +126,14 @@ public:
);
virtual ~RBBIRuleBuilder();
/**
* Fold together redundant character classes (table columns) and
* redundant states (table rows). Done after initial table generation,
* before serializing the result.
*/
void optimizeTables();
char *fDebugEnv; // controls debug trace output
UErrorCode *fStatus; // Error reporting. Keeping status
UParseError *fParseError; // here avoids passing it everywhere.

View File

@ -91,7 +91,7 @@ RBBISetBuilder::~RBBISetBuilder()
// from the Unicode Sets.
//
//------------------------------------------------------------------------
void RBBISetBuilder::build() {
void RBBISetBuilder::buildRanges() {
RBBINode *usetNode;
RangeDescriptor *rlRange;
@ -245,11 +245,16 @@ void RBBISetBuilder::build() {
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
}
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number.
//
void RBBISetBuilder::buildTrie() {
RangeDescriptor *rlRange;
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number
//
fTrie = utrie2_open(0, // Initial value for all code points.
0, // Error value for out-of-range input.
fStatus);
@ -265,6 +270,22 @@ void RBBISetBuilder::build() {
}
void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
U_ASSERT(left >= 1);
U_ASSERT(right > left);
for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
int32_t rangeNum = rd->fNum & ~DICT_BIT;
int32_t rangeDict = rd->fNum & DICT_BIT;
if (rangeNum == right) {
rd->fNum = left | rangeDict;
} else if (rangeNum > right) {
rd->fNum--;
}
}
--fGroupCount;
}
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
@ -446,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() {
lastPrintedGroupNum = groupNum;
RBBIDebugPrintf("%2i ", groupNum);
if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
@ -639,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
void RangeDescriptor::setDictionaryFlag() {
int i;
for (i=0; i<this->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
UnicodeString setName;
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
static const char16_t *dictionary = u"dictionary";
for (i=0; i<fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
RBBINode *setRef = usetNode->fParent;
if (setRef != nullptr) {
RBBINode *varRef = setRef->fParent;
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
setName = varRef->fText;
if (varRef && varRef->fType == RBBINode::varRef) {
const UnicodeString *setName = &varRef->fText;
if (setName->compare(dictionary, -1) == 0) {
fNum |= RBBISetBuilder::DICT_BIT;
break;
}
}
}
if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
this->fNum |= 0x4000;
break;
}
}
}

View File

@ -82,7 +82,8 @@ public:
RBBISetBuilder(RBBIRuleBuilder *rb);
~RBBISetBuilder();
void build();
void buildRanges();
void buildTrie();
void addValToSets(UVector *sets, uint32_t val);
void addValToSet (RBBINode *usetNode, uint32_t val);
int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
@ -93,6 +94,13 @@ public:
UChar32 getFirstChar(int32_t val) const;
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
// character were encountered.
/** merge two character categories that have been identified as having equivalent behavior.
* The ranges belonging to the right category (table column) will be added to the left.
*/
void mergeCategories(int32_t left, int32_t right);
static constexpr int32_t DICT_BIT = 0x4000;
#ifdef RBBI_DEBUG
void printSets();
void printRanges();

View File

@ -22,6 +22,7 @@
#include "rbbidata.h"
#include "cstring.h"
#include "uassert.h"
#include "uvectr32.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
@ -761,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() {
// if sd->fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
// the fLookAhead field or this state also.
// the fLookAhead field for this state also.
if (endMarker->fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
@ -1077,7 +1078,128 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
}
#endif
//
// findDuplCharClassFrom()
//
bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
if (table_base != table_dupl) {
break;
}
}
if (table_base == table_dupl) {
return true;
}
}
}
return false;
}
//
// removeColumn()
//
void RBBITableBuilder::removeColumn(int32_t column) {
int32_t numStates = fDStates->size();
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
U_ASSERT(column < sd->fDtran->size());
sd->fDtran->removeElementAt(column);
}
}
/*
* findDuplicateState
*/
bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (; firstState<numStates-1; ++firstState) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
for (duplState=firstState+1; duplState<numStates; ++duplState) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
if (firstSD->fAccepting != duplSD->fAccepting ||
firstSD->fLookAhead != duplSD->fLookAhead ||
firstSD->fTagsIdx != duplSD->fTagsIdx) {
continue;
}
bool rowsMatch = true;
for (int32_t col=0; col < numCols; ++col) {
int32_t firstVal = firstSD->fDtran->elementAti(col);
int32_t duplVal = duplSD->fDtran->elementAti(col);
if (!((firstVal == duplVal) ||
((firstVal == firstState || firstVal == duplState) &&
(duplVal == firstState || duplVal == duplState)))) {
rowsMatch = false;
break;
}
}
if (rowsMatch) {
return true;
}
}
}
return false;
}
void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fDStates->size());
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
fDStates->removeElementAt(duplState);
delete duplSD;
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (int32_t state=0; state<numStates; ++state) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
for (int32_t col=0; col<numCols; col++) {
int32_t existingVal = sd->fDtran->elementAti(col);
int32_t newVal = existingVal;
if (existingVal == duplState) {
newVal = keepState;
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd->fDtran->setElementAt(newVal, col);
}
if (sd->fAccepting == duplState) {
sd->fAccepting = keepState;
} else if (sd->fAccepting > duplState) {
sd->fAccepting--;
}
if (sd->fLookAhead == duplState) {
sd->fLookAhead = keepState;
} else if (sd->fLookAhead > duplState) {
sd->fLookAhead--;
}
}
}
/*
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
int32_t firstState = 3;
int32_t duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
}
}
//-----------------------------------------------------------------------------
//
@ -1106,7 +1228,6 @@ int32_t RBBITableBuilder::getTableSize() const {
}
//-----------------------------------------------------------------------------
//
// exportTable() export the state transition table in the format required
@ -1256,7 +1377,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
fPositions = NULL;
fDtran = NULL;
fDtran = new UVector(lastInputSymbol+1, *fStatus);
fDtran = new UVector32(lastInputSymbol+1, *fStatus);
if (U_FAILURE(*fStatus)) {
return;
}
@ -1264,7 +1385,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return;
}
fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized.
fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized.
// It is indexed by input symbols, and will
// hold the next state number for each
// symbol.

View File

@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN
class RBBIRuleScanner;
class RBBIRuleBuilder;
class UVector32;
//
// class RBBITableBuilder is part of the RBBI rule compiler.
@ -42,9 +43,24 @@ public:
void build();
int32_t getTableSize() const; // Return the runtime size in bytes of
// the built state table
void exportTable(void *where); // fill in the runtime state table.
// Sufficient memory must exist at
// the specified location.
/** Fill in the runtime state table. Sufficient memory must exist at the specified location.
*/
void exportTable(void *where);
/** Find duplicate (redundant) character classes, beginning after the specifed
* pair, within this state table. This is an iterator-like function, used to
* identify char classes (state table columns) that can be eliminated.
*/
bool findDuplCharClassFrom(int &baseClass, int &duplClass);
/** Remove a column from the state table. Used when two character categories
* have been found equivalent, and merged together, to eliminate the uneeded table column.
*/
void removeColumn(int32_t column);
/** Check for, and remove dupicate states (table rows). */
void removeDuplicateStates();
private:
@ -60,8 +76,29 @@ private:
void flagTaggedStates();
void mergeRuleStatusVals();
/**
* Merge redundant state table columns, eliminating character classes with identical behavior.
* Done after the state tables are generated, just before converting to their run-time format.
*/
int32_t mergeColumns();
void addRuleRootNodes(UVector *dest, RBBINode *node);
/** Find the next duplicate state. An iterator function.
* @param firstState (in/out) begin looking at this state, return the first of the
* pair of duplicates.
* @param duplicateState returns the duplicate state of fistState
* @return true if a duplicate pair of states was found.
*/
bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
/** Remove a duplicate state/
* @param keepState First of the duplicate pair. Keep it.
* @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
* to refer to keepState instead.
*/
void removeState(int32_t keepState, int32_t duplState);
// Set functions for UVector.
// TODO: make a USet subclass of UVector
@ -112,7 +149,7 @@ public:
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
UVector *fDtran; // Transitions out of this state.
UVector32 *fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates

View File

@ -90,7 +90,6 @@ struct UStringPrepProfile{
UTrie sprepTrie;
const uint16_t* mappingData;
UDataMemory* sprepData;
const UBiDiProps *bdp; /* used only if checkBiDi is set */
int32_t refCount;
UBool isDataLoaded;
UBool doNFKC;

View File

@ -152,9 +152,6 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode)
/* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
uprv_memset(pBiDi, 0, sizeof(UBiDi));
/* get BiDi properties */
pBiDi->bdp=ubidi_getSingleton();
/* allocate memory for arrays as requested */
if(maxLength>0) {
if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
@ -925,7 +922,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
else
match=0;
if(match!=c && /* has a matching char */
ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
/* special case: process synonyms
create an opening entry for each synonym */
if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
@ -3033,7 +3030,7 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
if( pBiDi->fnClassCallback == NULL ||
(dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
{
dir = ubidi_getClass(pBiDi->bdp, c);
dir = ubidi_getClass(c);
}
if(dir >= U_CHAR_DIRECTION_COUNT) {
dir = (UCharDirection)ON;

View File

@ -44,13 +44,6 @@ struct UBiDiProps {
#define INCLUDED_FROM_UBIDI_PROPS_C
#include "ubidi_props_data.h"
/* UBiDiProps singleton ----------------------------------------------------- */
U_CFUNC const UBiDiProps *
ubidi_getSingleton() {
return &ubidi_props_singleton;
}
/* set of property starts for UnicodeSet ------------------------------------ */
static UBool U_CALLCONV
@ -64,7 +57,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32
}
U_CFUNC void
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
int32_t i, length;
UChar32 c, start, limit;
@ -76,19 +69,19 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
}
/* add the start code point of each same-value range of the trie */
utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);
utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
/* add the code points from the bidi mirroring table */
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
for(i=0; i<length; ++i) {
c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]);
sa->addRange(sa->set, c, c+1);
}
/* add the code points from the Joining_Group array where the value changes */
start=bdp->indexes[UBIDI_IX_JG_START];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
jgArray=bdp->jgArray;
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
jgArray=ubidi_props_singleton.jgArray;
for(;;) {
prev=0;
while(start<limit) {
@ -103,11 +96,11 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
/* add the limit code point if the last value was not 0 (it is now start==limit) */
sa->add(sa->set, limit);
}
if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) {
if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) {
/* switch to the second Joining_Group range */
start=bdp->indexes[UBIDI_IX_JG_START2];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
jgArray=bdp->jgArray2;
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
jgArray=ubidi_props_singleton.jgArray2;
} else {
break;
}
@ -121,14 +114,8 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
/* property access functions ------------------------------------------------ */
U_CFUNC int32_t
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
int32_t max;
if(bdp==NULL) {
return -1;
}
max=bdp->indexes[UBIDI_MAX_VALUES_INDEX];
ubidi_getMaxValue(UProperty which) {
int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX];
switch(which) {
case UCHAR_BIDI_CLASS:
return (max&UBIDI_CLASS_MASK);
@ -144,19 +131,19 @@ ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
}
U_CAPI UCharDirection
ubidi_getClass(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getClass(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UCharDirection)UBIDI_GET_CLASS(props);
}
U_CFUNC UBool
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_isMirrored(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
}
static UChar32
getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
getMirror(UChar32 c, uint16_t props) {
int32_t delta=UBIDI_GET_MIRROR_DELTA(props);
if(delta!=UBIDI_ESC_MIRROR_DELTA) {
return c+delta;
@ -167,8 +154,8 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
int32_t i, length;
UChar32 c2;
mirrors=bdp->mirrors;
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
mirrors=ubidi_props_singleton.mirrors;
length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
/* linear search */
for(i=0; i<length; ++i) {
@ -188,80 +175,80 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
}
U_CFUNC UChar32
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
return getMirror(bdp, c, props);
ubidi_getMirror(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return getMirror(c, props);
}
U_CFUNC UBool
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_isBidiControl(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
}
U_CFUNC UBool
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_isJoinControl(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
}
U_CFUNC UJoiningType
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getJoiningType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
}
U_CFUNC UJoiningGroup
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
ubidi_getJoiningGroup(UChar32 c) {
UChar32 start, limit;
start=bdp->indexes[UBIDI_IX_JG_START];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
if(start<=c && c<limit) {
return (UJoiningGroup)bdp->jgArray[c-start];
return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start];
}
start=bdp->indexes[UBIDI_IX_JG_START2];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
if(start<=c && c<limit) {
return (UJoiningGroup)bdp->jgArray2[c-start];
return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start];
}
return U_JG_NO_JOINING_GROUP;
}
U_CFUNC UBidiPairedBracketType
ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getPairedBracketType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT);
}
U_CFUNC UChar32
ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getPairedBracket(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
if((props&UBIDI_BPT_MASK)==0) {
return c;
} else {
return getMirror(bdp, c, props);
return getMirror(c, props);
}
}
/* public API (see uchar.h) ------------------------------------------------- */
U_CFUNC UCharDirection
u_charDirection(UChar32 c) {
return ubidi_getClass(&ubidi_props_singleton, c);
u_charDirection(UChar32 c) {
return ubidi_getClass(c);
}
U_CFUNC UBool
u_isMirrored(UChar32 c) {
return ubidi_isMirrored(&ubidi_props_singleton, c);
return ubidi_isMirrored(c);
}
U_CFUNC UChar32
u_charMirror(UChar32 c) {
return ubidi_getMirror(&ubidi_props_singleton, c);
return ubidi_getMirror(c);
}
U_STABLE UChar32 U_EXPORT2
u_getBidiPairedBracket(UChar32 c) {
return ubidi_getPairedBracket(&ubidi_props_singleton, c);
return ubidi_getPairedBracket(c);
}

View File

@ -31,46 +31,40 @@ U_CDECL_BEGIN
/* library API -------------------------------------------------------------- */
struct UBiDiProps;
typedef struct UBiDiProps UBiDiProps;
U_CFUNC const UBiDiProps *
ubidi_getSingleton(void);
U_CFUNC void
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode);
ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
/* property access functions */
U_CFUNC int32_t
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which);
ubidi_getMaxValue(UProperty which);
U_CAPI UCharDirection
ubidi_getClass(const UBiDiProps *bdp, UChar32 c);
ubidi_getClass(UChar32 c);
U_CFUNC UBool
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c);
ubidi_isMirrored(UChar32 c);
U_CFUNC UChar32
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c);
ubidi_getMirror(UChar32 c);
U_CFUNC UBool
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c);
ubidi_isBidiControl(UChar32 c);
U_CFUNC UBool
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c);
ubidi_isJoinControl(UChar32 c);
U_CFUNC UJoiningType
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c);
ubidi_getJoiningType(UChar32 c);
U_CFUNC UJoiningGroup
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c);
ubidi_getJoiningGroup(UChar32 c);
U_CFUNC UBidiPairedBracketType
ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c);
ubidi_getPairedBracketType(UChar32 c);
U_CFUNC UChar32
ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c);
ubidi_getPairedBracket(UChar32 c);
/* file definitions --------------------------------------------------------- */

View File

@ -254,8 +254,6 @@ struct UBiDi {
*/
const UBiDi * pParaBiDi;
const UBiDiProps *bdp;
/* alias pointer to the current text */
const UChar *text;

View File

@ -77,9 +77,12 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
/* data access primitives --------------------------------------------------- */
#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie() {
return &ucase_props_singleton.trie;
}
#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
/* number of bits in an 8-bit integer value */
static const uint8_t flagsOffset[256]={
@ -128,8 +131,8 @@ static const uint8_t flagsOffset[256]={
U_CAPI UChar32 U_EXPORT2
ucase_tolower(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
c+=UCASE_GET_DELTA(props);
}
} else {
@ -145,7 +148,7 @@ ucase_tolower(UChar32 c) {
U_CAPI UChar32 U_EXPORT2
ucase_toupper(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props);
}
@ -162,7 +165,7 @@ ucase_toupper(UChar32 c) {
U_CAPI UChar32 U_EXPORT2
ucase_totitle(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props);
}
@ -223,7 +226,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
}
props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
/* add the one simple case mapping, no matter what type it is */
int32_t delta=UCASE_GET_DELTA(props);
@ -419,6 +422,138 @@ FullCaseFoldingIterator::next(UnicodeString &full) {
return c;
}
namespace LatinCase {
const int8_t TO_LOWER_NORMAL[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
};
const int8_t TO_LOWER_TR_LT[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
};
const int8_t TO_UPPER_NORMAL[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
};
const int8_t TO_UPPER_TR[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
};
} // namespace LatinCase
U_NAMESPACE_END
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
@ -439,7 +574,7 @@ ucase_getTypeOrIgnorable(UChar32 c) {
static inline int32_t
getDotType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
return props&UCASE_DOT_MASK;
} else {
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
@ -878,8 +1013,8 @@ ucase_toFullLower(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
result=c+UCASE_GET_DELTA(props);
}
} else {
@ -1024,7 +1159,7 @@ toUpperOrTitle(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
result=c+UCASE_GET_DELTA(props);
}
@ -1169,8 +1304,8 @@ ucase_toFullTitle(UChar32 c,
U_CAPI UChar32 U_EXPORT2
ucase_fold(UChar32 c, uint32_t options) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
c+=UCASE_GET_DELTA(props);
}
} else {
@ -1234,8 +1369,8 @@ ucase_toFullFolding(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
result=c+UCASE_GET_DELTA(props);
}
} else {

View File

@ -26,6 +26,7 @@
#include "putilimp.h"
#include "uset_imp.h"
#include "udataswp.h"
#include "utrie2.h"
#ifdef __cplusplus
U_NAMESPACE_BEGIN
@ -148,6 +149,33 @@ private:
int32_t rowCpIndex;
};
/**
* Fast case mapping data for ASCII/Latin.
* Linear arrays of delta bytes: 0=no mapping; EXC=exception.
* Deltas must not cross the ASCII boundary, or else they cannot be easily used
* in simple UTF-8 code.
*/
namespace LatinCase {
/** Case mapping/folding data for code points up to U+017F. */
constexpr UChar LIMIT = 0x180;
/** U+017F case-folds and uppercases crossing the ASCII boundary. */
constexpr UChar LONG_S = 0x17f;
/** Exception: Complex mapping, or too-large delta. */
constexpr int8_t EXC = -0x80;
/** Deltas for lowercasing for most locales, and default case folding. */
extern const int8_t TO_LOWER_NORMAL[LIMIT];
/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
extern const int8_t TO_LOWER_TR_LT[LIMIT];
/** Deltas for uppercasing for most locales. */
extern const int8_t TO_UPPER_NORMAL[LIMIT];
/** Deltas for uppercasing for tr/az. */
extern const int8_t TO_UPPER_TR[LIMIT];
} // namespace LatinCase
U_NAMESPACE_END
#endif
@ -308,6 +336,9 @@ enum {
/* definitions for 16-bit case properties word ------------------------------ */
U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie();
/* 2-bit constants for types of cased characters */
#define UCASE_TYPE_MASK 3
enum {
@ -320,10 +351,14 @@ enum {
#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
#define UCASE_IGNORABLE 4
#define UCASE_SENSITIVE 8
#define UCASE_EXCEPTION 0x10
#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
#define UCASE_DOT_MASK 0x60
enum {
UCASE_NO_DOT=0, /* normal characters with cc=0 */

View File

@ -165,9 +165,7 @@ appendResult(int32_t cpLength, int32_t result, const UChar *s,
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
} // namespace
static UChar32 U_CALLCONV
UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
@ -199,36 +197,227 @@ utf8_caseContextIterator(void *context, int8_t dir) {
return U_SENTINEL;
}
/*
* Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
/**
* caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
* caseLocale < 0: Case-folds [srcStart..srcLimit[.
*/
static void
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
const uint8_t *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex=srcStart;
while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
void toLower(int32_t caseLocale, uint32_t options,
const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToLower;
if (caseLocale == UCASE_LOC_ROOT ||
(caseLocale >= 0 ?
!(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
(options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
latinToLower = LatinCase::TO_LOWER_NORMAL;
} else {
latinToLower = LatinCase::TO_LOWER_TR_LT;
}
const UTrie2 *trie = ucase_getTrie();
int32_t prev = srcStart;
int32_t srcIndex = srcStart;
for (;;) {
// fast path for simple cases
int32_t cpStart;
csc->cpStart=cpStart=srcIndex;
UChar32 c;
U8_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
if(c<0) {
// Malformed UTF-8.
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
for (;;) {
if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
c = U_SENTINEL;
break;
}
uint8_t lead = src[srcIndex++];
if (lead <= 0x7f) {
int8_t d = latinToLower[lead];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 1;
c = lead;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
sink, options, edits, errorCode);
char ascii = (char)(lead + d);
sink.Append(&ascii, 1);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
prev = srcIndex;
continue;
} else if (lead < 0xe3) {
uint8_t t;
if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
(t = src[srcIndex] - 0x80) <= 0x3f) {
// U+0080..U+017F
++srcIndex;
c = ((lead - 0xc0) << 6) | t;
int8_t d = latinToLower[c];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 2;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendTwoBytes(c + d, sink);
if (edits != nullptr) {
edits->addReplace(2, 2);
}
prev = srcIndex;
continue;
}
} else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
(srcIndex + 2) <= srcLimit &&
U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
// most of CJK: no case mappings
srcIndex += 2;
continue;
}
cpStart = --srcIndex;
U8_NEXT(src, srcIndex, srcLimit, c);
if (c < 0) {
// ill-formed UTF-8
continue;
}
uint16_t props = UTRIE2_GET16(trie, c);
if (UCASE_HAS_EXCEPTION(props)) { break; }
int32_t delta;
if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
prev = srcIndex;
}
if (c < 0) {
break;
}
// slow path
const UChar *s;
if (caseLocale >= 0) {
csc->cpStart = cpStart;
csc->cpLimit = srcIndex;
c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
} else {
const UChar *s;
c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
c = ucase_toFullFolding(c, &s, options);
}
if (c >= 0) {
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
prev = srcIndex;
}
}
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
sink, options, edits, errorCode);
}
void toUpper(int32_t caseLocale, uint32_t options,
const uint8_t *src, UCaseContext *csc, int32_t srcLength,
icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToUpper;
if (caseLocale == UCASE_LOC_TURKISH) {
latinToUpper = LatinCase::TO_UPPER_TR;
} else {
latinToUpper = LatinCase::TO_UPPER_NORMAL;
}
const UTrie2 *trie = ucase_getTrie();
int32_t prev = 0;
int32_t srcIndex = 0;
for (;;) {
// fast path for simple cases
int32_t cpStart;
UChar32 c;
for (;;) {
if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
c = U_SENTINEL;
break;
}
uint8_t lead = src[srcIndex++];
if (lead <= 0x7f) {
int8_t d = latinToUpper[lead];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 1;
c = lead;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
sink, options, edits, errorCode);
char ascii = (char)(lead + d);
sink.Append(&ascii, 1);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
prev = srcIndex;
continue;
} else if (lead < 0xe3) {
uint8_t t;
if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
(t = src[srcIndex] - 0x80) <= 0x3f) {
// U+0080..U+017F
++srcIndex;
c = ((lead - 0xc0) << 6) | t;
int8_t d = latinToUpper[c];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 2;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendTwoBytes(c + d, sink);
if (edits != nullptr) {
edits->addReplace(2, 2);
}
prev = srcIndex;
continue;
}
} else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
(srcIndex + 2) <= srcLength &&
U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
// most of CJK: no case mappings
srcIndex += 2;
continue;
}
cpStart = --srcIndex;
U8_NEXT(src, srcIndex, srcLength, c);
if (c < 0) {
// ill-formed UTF-8
continue;
}
uint16_t props = UTRIE2_GET16(trie, c);
if (UCASE_HAS_EXCEPTION(props)) { break; }
int32_t delta;
if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
prev = srcIndex;
}
if (c < 0) {
break;
}
// slow path
csc->cpStart = cpStart;
csc->cpLimit = srcIndex;
const UChar *s;
c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
if (c >= 0) {
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
prev = srcIndex;
}
}
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
sink, options, edits, errorCode);
}
} // namespace
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC void U_CALLCONV
@ -335,10 +524,9 @@ ucasemap_internalUTF8ToTitle(
if(titleLimit<index) {
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
_caseMap(caseLocale, options, ucase_toFullLower,
src, &csc,
titleLimit, index,
sink, edits, errorCode);
toLower(caseLocale, options,
src, &csc, titleLimit, index,
sink, edits, errorCode);
if(U_FAILURE(errorCode)) {
return;
}
@ -538,8 +726,8 @@ ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
_caseMap(
caseLocale, options, ucase_toFullLower,
toLower(
caseLocale, options,
src, &csc, 0, srcLength,
sink, edits, errorCode);
}
@ -555,9 +743,9 @@ ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
_caseMap(
caseLocale, options, ucase_toFullUpper,
src, &csc, 0, srcLength,
toUpper(
caseLocale, options,
src, &csc, srcLength,
sink, edits, errorCode);
}
}
@ -567,22 +755,10 @@ ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_B
const uint8_t *src, int32_t srcLength,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex = 0;
while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
int32_t cpStart = srcIndex;
UChar32 c;
U8_NEXT(src, srcIndex, srcLength, c);
if(c<0) {
// Malformed UTF-8.
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
sink, options, edits, errorCode);
} else {
const UChar *s;
c = ucase_toFullFolding(c, &s, options);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
}
}
toLower(
-1, options,
src, nullptr, 0, srcLength,
sink, edits, errorCode);
}
void

View File

@ -60,15 +60,6 @@ u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
int32_t *matchLen1, int32_t *matchLen2,
UErrorCode *pErrorCode);
/**
* Are the Unicode properties loaded?
* This must be used before internal functions are called that do
* not perform this check.
* Generate a debug assertion failure if data is not loaded.
*/
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode);
#ifdef __cplusplus
U_NAMESPACE_BEGIN

View File

@ -42,14 +42,6 @@
/* getting a uint32_t properties word from the data */
#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return FALSE;
}
return TRUE;
}
/* API functions ------------------------------------------------------------ */
/* Gets the Unicode character's general category.*/

View File

@ -60,11 +60,12 @@
* To avoid dependency on other code, this list is hard coded here.
* When an ignorable code point is found and is unmappable, the default callbacks
* will ignore them.
* For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
* For a list of the default ignorable code points, use this link:
* https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
*
* This list should be sync with the one in CharsetCallback.java
*/
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
(c == 0x00AD) || \
(c == 0x034F) || \
(c == 0x061C) || \
@ -74,26 +75,15 @@
(0x180B <= c && c <= 0x180E) || \
(0x200B <= c && c <= 0x200F) || \
(0x202A <= c && c <= 0x202E) || \
(c == 0x2060) || \
(0x2066 <= c && c <= 0x2069) || \
(0x2061 <= c && c <= 0x2064) || \
(0x206A <= c && c <= 0x206F) || \
(0x2060 <= c && c <= 0x206F) || \
(c == 0x3164) || \
(0x0FE00 <= c && c <= 0x0FE0F) || \
(c == 0x0FEFF) || \
(c == 0x0FFA0) || \
(0x01BCA0 <= c && c <= 0x01BCA3) || \
(0x01D173 <= c && c <= 0x01D17A) || \
(c == 0x0E0001) || \
(0x0E0020 <= c && c <= 0x0E007F) || \
(0x0E0100 <= c && c <= 0x0E01EF) || \
(c == 0x2065) || \
(0x0FFF0 <= c && c <= 0x0FFF8) || \
(c == 0x0E0000) || \
(0x0E0002 <= c && c <= 0x0E001F) || \
(0x0E0080 <= c && c <= 0x0E00FF) || \
(0x0E01F0 <= c && c <= 0x0E0FFF) \
)
(0xFE00 <= c && c <= 0xFE0F) || \
(c == 0xFEFF) || \
(c == 0xFFA0) || \
(0xFFF0 <= c && c <= 0xFFF8) || \
(0x1BCA0 <= c && c <= 0x1BCA3) || \
(0x1D173 <= c && c <= 0x1D17A) || \
(0xE0000 <= c && c <= 0xE0FFF))
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */

View File

@ -615,7 +615,7 @@ public:
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
private:
static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);

View File

@ -482,9 +482,9 @@
/* Otherwise use the predefined value. */
#elif !defined(__cplusplus)
# define U_CPLUSPLUS_VERSION 0
#elif __cplusplus >= 201402L
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
# define U_CPLUSPLUS_VERSION 14
#elif __cplusplus >= 201103L
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
# define U_CPLUSPLUS_VERSION 11
#else
// C++98 or C++03
@ -749,8 +749,10 @@ namespace std {
#else
/*
* Notes:
* Visual Studio 10 (_MSC_VER>=1600) defines char16_t but
* does not support u"abc" string literals.
* Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
* and does not support u"abc" string literals.
* Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
* both char16_t and u"abc" string literals.
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
* does not support u"abc" string literals.
* C++11 and C11 require support for UTF-16 literals

View File

@ -59,11 +59,16 @@ private:
*/
UText fText;
#ifndef U_HIDE_INTERNAL_API
public:
#endif /* U_HIDE_INTERNAL_API */
/**
* The rule data for this BreakIterator instance
* The rule data for this BreakIterator instance.
* Not for general use; Public only for testing purposes.
* @internal
*/
RBBIDataWrapper *fData;
private:
/**
* The iteration state - current position, rule status for the current position,
@ -114,13 +119,6 @@ private:
*/
UnhandledEngine *fUnhandledBreakEngine;
/**
*
* The type of the break iterator, or -1 if it has not been set.
* @internal
*/
int32_t fBreakType;
/**
* Counter for the number of characters encountered with the "dictionary"
* flag set.
@ -631,12 +629,6 @@ private:
*/
void reset(void);
/**
* Set the type of the break iterator.
* @internal
*/
void setBreakType(int32_t type);
/**
* Common initialization function, used by constructors and bufferClone.
* @internal
@ -683,6 +675,13 @@ private:
* @internal
*/
void dumpCache();
/**
* Debugging function only.
* @internal
*/
void dumpTables();
#endif /* U_HIDE_INTERNAL_API */
};

View File

@ -299,6 +299,10 @@ typedef int8_t UBool;
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif

View File

@ -1521,6 +1521,7 @@ private:
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode& ec);
//----------------------------------------------------------------

View File

@ -58,7 +58,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 60
#define U_ICU_VERSION_MAJOR_NUM 61
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
@ -84,7 +84,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _60
#define U_ICU_VERSION_SUFFIX _61
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -119,7 +119,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "60.1"
#define U_ICU_VERSION "61.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@ -132,13 +132,13 @@
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "60"
#define U_ICU_VERSION_SHORT "61"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "60.1"
#define U_ICU_DATA_VERSION "61.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

View File

@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");

View File

@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
ucase_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_BIDI:
ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);
ubidi_addPropertyStarts(&sa, &status);
break;
default:
status = U_INTERNAL_PROGRAM_ERROR;
@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
return i.fSet;
}
namespace {
// Cache some sets for other services -------------------------------------- ***
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
// memory leak checker tools
#define _dbgct(me)
} // namespace
//----------------------------------------------------------------
// Constructors &c
//----------------------------------------------------------------
@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
// Implementation: Pattern parsing
//----------------------------------------------------------------
namespace {
/**
* A small all-inline class to manage a UnicodeSet pointer. Add
* operator->() etc. as needed.
@ -424,6 +429,10 @@ public:
}
};
constexpr int32_t MAX_DEPTH = 100;
} // namespace
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
if (depth > MAX_DEPTH) {
ec = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Syntax characters: [ ] ^ - & { }
@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (setMode) {
case 1:
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
break;
case 2:
chars.skipIgnored(opts);
@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// Property set implementation
//----------------------------------------------------------------
namespace {
static UBool numericValueFilter(UChar32 ch, void* context) {
return u_getNumericValue(ch) == *(double*)context;
}
@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
return uscript_hasScript(ch, *(UScriptCode*)context);
}
} // namespace
/**
* Generic filter-based scanning code for UCD property UnicodeSets.
*/
@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
}
}
namespace {
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
/* Note: we use ' ' in compiler code page */
int32_t j = 0;
@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
return TRUE;
}
} // namespace
//----------------------------------------------------------------
// Property set API
//----------------------------------------------------------------

View File

@ -38,8 +38,6 @@
U_NAMESPACE_USE
#define GET_BIDI_PROPS() ubidi_getSingleton()
/* general properties API functions ----------------------------------------- */
struct BinaryProperty;
@ -62,15 +60,15 @@ static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32
}
static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_isBidiControl(GET_BIDI_PROPS(), c);
return ubidi_isBidiControl(c);
}
static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_isMirrored(GET_BIDI_PROPS(), c);
return ubidi_isMirrored(c);
}
static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_isJoinControl(GET_BIDI_PROPS(), c);
return ubidi_isJoinControl(c);
}
#if UCONFIG_NO_NORMALIZATION
@ -329,11 +327,11 @@ static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*
}
static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c);
return (int32_t)ubidi_getPairedBracketType(c);
}
static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
return ubidi_getMaxValue(which);
}
#if UCONFIG_NO_NORMALIZATION
@ -351,11 +349,11 @@ static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope
}
static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
return ubidi_getJoiningGroup(c);
}
static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
return ubidi_getJoiningType(c);
}
static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {

View File

@ -342,18 +342,16 @@ static void
_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
UChar digitBase,
UBool isLogical, UBool lastStrongWasAL) {
const UBiDiProps *bdp;
int32_t i;
UChar c;
bdp=ubidi_getSingleton();
digitBase-=0x30;
/* the iteration direction depends on the type of input */
if(isLogical) {
for(i=0; i<length; ++i) {
c=s[i];
switch(ubidi_getClass(bdp, c)) {
switch(ubidi_getClass(c)) {
case U_LEFT_TO_RIGHT: /* L */
case U_RIGHT_TO_LEFT: /* R */
lastStrongWasAL=FALSE;
@ -373,7 +371,7 @@ _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
} else {
for(i=length; i>0; /* pre-decrement in the body */) {
c=s[--i];
switch(ubidi_getClass(bdp, c)) {
switch(ubidi_getClass(c)) {
case U_LEFT_TO_RIGHT: /* L */
case U_RIGHT_TO_LEFT: /* R */
lastStrongWasAL=FALSE;

View File

@ -347,10 +347,6 @@ usprep_getProfile(const char* path,
newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
if(newProfile->checkBiDi) {
newProfile->bdp = ubidi_getSingleton();
}
LocalMemory<UStringPrepKey> key;
LocalMemory<char> keyName;
LocalMemory<char> keyPath;
@ -735,7 +731,7 @@ usprep_prepare( const UStringPrepProfile* profile,
}
if(profile->checkBiDi) {
direction = ubidi_getClass(profile->bdp, ch);
direction = ubidi_getClass(ch);
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}

View File

@ -52,16 +52,8 @@ int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE
/* string casing ------------------------------------------------------------ */
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
static inline int32_t
inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s,
int32_t cpLength, uint32_t options, icu::Edits *edits) {
@ -134,7 +126,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
return destIndex;
}
static inline int32_t
inline int32_t
appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
if(destIndex<destCapacity) {
dest[destIndex]=c;
@ -144,28 +136,34 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
return destIndex+1;
}
static inline int32_t
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
}
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if((destIndex+length)<=destCapacity) {
u_memcpy(dest+destIndex, s, length);
}
destIndex+=length;
int32_t
appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(edits!=NULL) {
edits->addUnchanged(length);
}
return destIndex;
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if((destIndex+length)<=destCapacity) {
u_memcpy(dest+destIndex, s, length);
}
return destIndex + length;
}
static UChar32 U_CALLCONV
inline int32_t
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
if (length <= 0) {
return destIndex;
}
return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
}
UChar32 U_CALLCONV
utf16_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) {
return U_SENTINEL;
}
/*
* Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
/**
* caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
* caseLocale < 0: Case-folds [srcStart..srcLimit[.
*/
static int32_t
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex=srcStart;
int32_t destIndex=0;
while(srcIndex<srcLimit) {
int32_t cpStart;
csc->cpStart=cpStart=srcIndex;
int32_t toLower(int32_t caseLocale, uint32_t options,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToLower;
if (caseLocale == UCASE_LOC_ROOT ||
(caseLocale >= 0 ?
!(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
(options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
latinToLower = LatinCase::TO_LOWER_NORMAL;
} else {
latinToLower = LatinCase::TO_LOWER_TR_LT;
}
const UTrie2 *trie = ucase_getTrie();
int32_t destIndex = 0;
int32_t prev = srcStart;
int32_t srcIndex = srcStart;
for (;;) {
// fast path for simple cases
UChar lead;
while (srcIndex < srcLimit) {
lead = src[srcIndex];
int32_t delta;
if (lead < LatinCase::LONG_S) {
int8_t d = latinToLower[lead];
if (d == LatinCase::EXC) { break; }
++srcIndex;
if (d == 0) { continue; }
delta = d;
} else if (lead >= 0xd800) {
break; // surrogate or higher
} else {
uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
if (UCASE_HAS_EXCEPTION(props)) { break; }
++srcIndex;
if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
}
lead += delta;
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - 1 - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendUChar(dest, destIndex, destCapacity, lead);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
if (srcIndex >= srcLimit) {
break;
}
// slow path
int32_t cpStart = srcIndex++;
UChar trail;
UChar32 c;
U16_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
c = U16_GET_SUPPLEMENTARY(lead, trail);
++srcIndex;
} else {
c = lead;
}
const UChar *s;
c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (caseLocale >= 0) {
csc->cpStart = cpStart;
csc->cpLimit = srcIndex;
c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
} else {
c = ucase_toFullFolding(c, &s, options);
}
if (c >= 0) {
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, cpStart - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
}
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - prev, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
return destIndex;
}
int32_t toUpper(int32_t caseLocale, uint32_t options,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc, int32_t srcLength,
icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToUpper;
if (caseLocale == UCASE_LOC_TURKISH) {
latinToUpper = LatinCase::TO_UPPER_TR;
} else {
latinToUpper = LatinCase::TO_UPPER_NORMAL;
}
const UTrie2 *trie = ucase_getTrie();
int32_t destIndex = 0;
int32_t prev = 0;
int32_t srcIndex = 0;
for (;;) {
// fast path for simple cases
UChar lead;
while (srcIndex < srcLength) {
lead = src[srcIndex];
int32_t delta;
if (lead < LatinCase::LONG_S) {
int8_t d = latinToUpper[lead];
if (d == LatinCase::EXC) { break; }
++srcIndex;
if (d == 0) { continue; }
delta = d;
} else if (lead >= 0xd800) {
break; // surrogate or higher
} else {
uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
if (UCASE_HAS_EXCEPTION(props)) { break; }
++srcIndex;
if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
}
lead += delta;
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - 1 - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendUChar(dest, destIndex, destCapacity, lead);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
if (srcIndex >= srcLength) {
break;
}
// slow path
int32_t cpStart;
csc->cpStart = cpStart = srcIndex++;
UChar trail;
UChar32 c;
if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
c = U16_GET_SUPPLEMENTARY(lead, trail);
++srcIndex;
} else {
c = lead;
}
csc->cpLimit = srcIndex;
const UChar *s;
c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
if (c >= 0) {
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, cpStart - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
}
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - prev, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV
@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
destIndex+=
_caseMap(
caseLocale, options, ucase_toFullLower,
toLower(
caseLocale, options,
dest+destIndex, destCapacity-destIndex,
src, &csc,
titleLimit, index,
src, &csc, titleLimit, index,
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
int32_t destIndex = _caseMap(
caseLocale, options, ucase_toFullLower,
int32_t destIndex = toLower(
caseLocale, options,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, errorCode);
@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
destIndex = _caseMap(
caseLocale, options, ucase_toFullUpper,
destIndex = toUpper(
caseLocale, options,
dest, destCapacity,
src, &csc, 0, srcLength,
src, &csc, srcLength,
edits, errorCode);
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex = 0;
int32_t destIndex = 0;
while (srcIndex < srcLength) {
int32_t cpStart = srcIndex;
UChar32 c;
U16_NEXT(src, srcIndex, srcLength, c);
const UChar *s;
c = ucase_toFullFolding(c, &s, options);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
int32_t destIndex = toLower(
-1, options,
dest, destCapacity,
src, nullptr, 0, srcLength,
edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}

View File

@ -1126,7 +1126,6 @@ isASCIIOkBiDi(const char *s, int32_t length) {
UBool
UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
const UBiDiProps *bdp=ubidi_getSingleton();
// [IDNA2008-Tables]
// 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
for(int32_t i=0; i<labelLength; ++i) {
@ -1148,7 +1147,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
}
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
for(;;) {
UJoiningType type=ubidi_getJoiningType(bdp, c);
UJoiningType type=ubidi_getJoiningType(c);
if(type==U_JT_TRANSPARENT) {
if(j==0) {
return FALSE;
@ -1166,7 +1165,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
return FALSE;
}
U16_NEXT_UNSAFE(label, j, c);
UJoiningType type=ubidi_getJoiningType(bdp, c);
UJoiningType type=ubidi_getJoiningType(c);
if(type==U_JT_TRANSPARENT) {
// just skip this character
} else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {

View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for ICU 60.1.
# Generated by GNU Autoconf 2.69 for ICU 61.1.
#
# Report bugs to <http://icu-project.org/bugs>.
#
@ -582,8 +582,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ICU'
PACKAGE_TARNAME='International Components for Unicode'
PACKAGE_VERSION='60.1'
PACKAGE_STRING='ICU 60.1'
PACKAGE_VERSION='61.1'
PACKAGE_STRING='ICU 61.1'
PACKAGE_BUGREPORT='http://icu-project.org/bugs'
PACKAGE_URL='http://icu-project.org'
@ -1368,7 +1368,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures ICU 60.1 to adapt to many kinds of systems.
\`configure' configures ICU 61.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1435,7 +1435,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ICU 60.1:";;
short | recursive ) echo "Configuration of ICU 61.1:";;
esac
cat <<\_ACEOF
@ -1571,7 +1571,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ICU configure 60.1
ICU configure 61.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@ -2263,7 +2263,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ICU $as_me 60.1, which was
It was created by ICU $as_me 61.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@ -8409,7 +8409,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ICU $as_me 60.1, which was
This file was extended by ICU $as_me 61.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -8463,7 +8463,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
ICU config.status 60.1
ICU config.status 61.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@ -594,7 +594,7 @@ $(BUILDDIR)/%.cnv: $(UCMSRCDIR)/%.ucm $(TOOLBINDIR)/makeconv$(TOOLEXEEXT)
# RES FILES
### curr res
$(CURRBLDDIR)/%.res: $(CURRSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(CURRBLDDIR)/%.res: $(CURRSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb --usePoolBundle $(GENRBOPTS) -i $(BUILDDIR) -s $(CURRSRCDIR) -d $(CURRBLDDIR) $(<F)
# copy the curr/pool.res file from the source folder to the build output folder
@ -602,7 +602,7 @@ $(CURRBLDDIR)/%.res: $(CURRSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT
$(CURRBLDDIR)/pool.res: $(CURRSRCDIR)/pool.res
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $(CURRSRCDIR)/pool.res $(CURRBLDDIR)/pool.res
$(CURRBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(CURR_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(CURRBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(CURR_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(OUTTMPDIR)/$(CURR_TREE) -d $(CURRBLDDIR) $(INDEX_NAME).txt
$(CURR_INDEX_FILE): $(SRCLISTDEPS)
@ -618,7 +618,7 @@ $(CURR_INDEX_FILE): $(SRCLISTDEPS)
echo "}" >> $@;
### lang res
$(LANGBLDDIR)/%.res: $(LANGSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(LANGBLDDIR)/%.res: $(LANGSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb --usePoolBundle $(GENRBOPTS) -i $(BUILDDIR) -s $(LANGSRCDIR) -d $(LANGBLDDIR) $(<F)
# copy the lang/pool.res file from the source folder to the build output folder
@ -626,7 +626,7 @@ $(LANGBLDDIR)/%.res: $(LANGSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT
$(LANGBLDDIR)/pool.res: $(LANGSRCDIR)/pool.res
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $(LANGSRCDIR)/pool.res $(LANGBLDDIR)/pool.res
$(LANGBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(LANG_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(LANGBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(LANG_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(OUTTMPDIR)/$(LANG_TREE) -d $(LANGBLDDIR) $(INDEX_NAME).txt
$(LANG_INDEX_FILE): $(SRCLISTDEPS)
@ -642,7 +642,7 @@ $(LANG_INDEX_FILE): $(SRCLISTDEPS)
echo "}" >> $@;
### region res
$(REGIONBLDDIR)/%.res: $(REGIONSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(REGIONBLDDIR)/%.res: $(REGIONSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb --usePoolBundle $(GENRBOPTS) -i $(BUILDDIR) -s $(REGIONSRCDIR) -d $(REGIONBLDDIR) $(<F)
# copy the region/pool.res file from the source folder to the build output folder
@ -650,7 +650,7 @@ $(REGIONBLDDIR)/%.res: $(REGIONSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $
$(REGIONBLDDIR)/pool.res: $(REGIONSRCDIR)/pool.res
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $(REGIONSRCDIR)/pool.res $(REGIONBLDDIR)/pool.res
$(REGIONBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(REGION_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(REGIONBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(REGION_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(OUTTMPDIR)/$(REGION_TREE) -d $(REGIONBLDDIR) $(INDEX_NAME).txt
$(REGION_INDEX_FILE): $(SRCLISTDEPS)
@ -666,7 +666,7 @@ $(REGION_INDEX_FILE): $(SRCLISTDEPS)
echo "}" >> $@;
### zone res
$(ZONEBLDDIR)/%.res: $(ZONESRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(ZONEBLDDIR)/%.res: $(ZONESRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb --usePoolBundle $(GENRBOPTS) -i $(BUILDDIR) -s $(ZONESRCDIR) -d $(ZONEBLDDIR) $(<F)
# copy the zone/pool.res file from the source folder to the build output folder
@ -674,7 +674,7 @@ $(ZONEBLDDIR)/%.res: $(ZONESRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT
$(ZONEBLDDIR)/pool.res: $(ZONESRCDIR)/pool.res
$(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $(ZONESRCDIR)/pool.res $(ZONEBLDDIR)/pool.res
$(ZONEBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(ZONE_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(ZONEBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(ZONE_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(OUTTMPDIR)/$(ZONE_TREE) -d $(ZONEBLDDIR) $(INDEX_NAME).txt
$(ZONE_INDEX_FILE): $(SRCLISTDEPS)
@ -714,10 +714,10 @@ $(UNIT_INDEX_FILE): $(SRCLISTDEPS)
echo "}" >> $@;
### collation res
$(COLBLDDIR)/%.res: $(COLSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(COLBLDDIR)/%.res: $(COLSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(COLSRCDIR) -d $(COLBLDDIR) $(<F)
$(COLBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(COLLATION_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(COLBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(COLLATION_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(OUTTMPDIR)/$(COLLATION_TREE) -d $(COLBLDDIR) $(INDEX_NAME).txt
$(COLLATION_INDEX_FILE): $(SRCLISTDEPS)
@ -733,10 +733,10 @@ $(COLLATION_INDEX_FILE): $(SRCLISTDEPS)
echo "}" >> $@;
### brk res
$(BRKBLDDIR)/%.res: $(BRKSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(BRK_FILES) $(DICT_FILES) $(DAT_FILES)
$(BRKBLDDIR)/%.res: $(BRKSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(BRK_FILES) $(DICT_FILES) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(BRKSRCDIR) -d $(BRKBLDDIR) $(<F)
$(BRKBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(BREAK_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(BRKBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(BREAK_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(OUTTMPDIR)/$(BREAK_TREE) -d $(BRKBLDDIR) $(INDEX_NAME).txt
$(BRK_RES_INDEX_FILE): $(SRCLISTDEPS)
@ -752,10 +752,10 @@ $(BRK_RES_INDEX_FILE): $(SRCLISTDEPS)
echo "}" >> $@;
### RBNF res
$(RBNFBLDDIR)/%.res: $(RBNFSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(RBNFBLDDIR)/%.res: $(RBNFSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(RBNFSRCDIR) -d $(RBNFBLDDIR) $(<F)
$(RBNFBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(RBNF_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(RBNFBLDDIR)/$(INDEX_NAME).res: $(OUTTMPDIR)/$(RBNF_TREE)/$(INDEX_NAME).txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(OUTTMPDIR)/$(RBNF_TREE) -d $(RBNFBLDDIR) $(INDEX_NAME).txt
$(RBNF_INDEX_FILE): $(SRCLISTDEPS)
@ -771,13 +771,13 @@ $(RBNF_INDEX_FILE): $(SRCLISTDEPS)
echo "}" >> $@;
### TRANSLIT res
$(TRANSLITBLDDIR)/%.res: $(TRANSLITSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(TRANSLITBLDDIR)/%.res: $(TRANSLITSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -s $(TRANSLITSRCDIR) -d $(TRANSLITBLDDIR) $(<F)
### normal (locale) res
all-RES: $(RES_FILES)
$(BUILDDIR)/%.res: $(LOCSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES)
$(BUILDDIR)/%.res: $(LOCSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) $(DAT_FILES) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb --usePoolBundle $(GENRBOPTS) -i $(BUILDDIR) -s $(LOCSRCDIR) -d $(BUILDDIR) $(<F)
# copy the locales/pool.res file from the source folder to the build output folder
@ -797,7 +797,7 @@ ZONEINFO=$(TZCODE_DIR)/zoneinfo64.txt
# Override the normal genrb for zoneinfo to always pull from
# icu/source/tools/tzcode/zoneinfo64.txt
$(BUILDDIR)/zoneinfo64.res: $(ZONEINFO) $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(BUILDDIR)/zoneinfo64.res: $(ZONEINFO) $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
@echo Note: $(MISCSRCDIR)/zoneinfo.txt is IGNORED because $(TZDATA) is present.
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -q -i $(BUILDDIR) -d $(BUILDDIR) $(ZONEINFO)
@ -810,7 +810,7 @@ $(ZONEINFO): $(TZDATA)
endif
# zoneinfo has some issues. Ignore some warnings with -q
$(BUILDDIR)/%.res: $(MISCSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(BUILDDIR)/%.res: $(MISCSRCDIR)/%.txt $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -q -i $(BUILDDIR) -s $(MISCSRCDIR) -d $(BUILDDIR) $(<F)
@ -831,7 +831,7 @@ $(INDEX_FILE): $(SRCLISTDEPS)
clean-resindex:
-$(RMV) $(BUILDDIR)/$(INDEX_NAME).txt $(PKGDATA_LIST)
$(BUILDDIR)/$(INDEX_NAME).res: $(INDEX_FILE) $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
$(BUILDDIR)/$(INDEX_NAME).res: $(INDEX_FILE) $(TOOLBINDIR)/genrb$(TOOLEXEEXT) | $(BUILDDIR)/cnvalias.icu
$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -d $(BUILDDIR) $(INDEX_FILE)
# The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)

View File

@ -1,6 +1,6 @@
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
BRK_RES_CLDR_VERSION = 32.0.1
BRK_RES_CLDR_VERSION = 33
# A list of txt's to build
# Note:
#

View File

@ -42,6 +42,18 @@
#
#----
#
# IP address whitelisting
#
# Parts of the build process (notably building the new ICU data filescin step 4)
# require http: access to files in the CLDR repository; for example, processing
# the files in icu4c/source/data/xml/ may require access to
# http://www.unicode.org/repos/cldr/trunk/common/dtd/ldml.dtd
#
# The IP address of the system requesting such access be whitelisted with Unicode,
# otherwise there may be timeout failures; contact Rick McGowan.
#
#----
#
# There are several environment variables that need to be defined.
#
# a) Java- and ant-related variables
@ -78,7 +90,7 @@
# files are used in addition to the CLDR files as inputs to the CLDR data build
# process for ICU):
#
# icu/trunk/source/data/icu-config.xml - Update <locales> to add or remove
# icu4c/source/data/icu-config.xml - Update <locales> to add or remove
# CLDR locales for inclusion in ICU. Update <paths> to prefer
# alt forms for certain paths, or to exclude certain paths; note
# that <paths> items can only have draft or alt attributes.
@ -89,11 +101,11 @@
# should also be included in <locales>, per PMC policy decision
# 2012-05-02 (see http://bugs.icu-project.org/trac/ticket/9298).
#
# icu/trunk/source/data/build.xml - If you are adding or removing break
# icu4c/source/data/build.xml - If you are adding or removing break
# iterators, you need to update <fileset id="brkitr" ...> under
# <target name="clean" ...> to clean the correct set of files.
#
# icu/trunk/source/data/xml/ - If you are adding a new locale, break
# icu4c/source/data/xml/ - If you are adding a new locale, break
# iterator, collation tailoring, or rule-based number formatter,
# you may need to add a corresponding xml file in (respectively)
# the main/, brkitr/, collation/, or rbnf/ subdirectory here.
@ -158,6 +170,11 @@ make check 2>&1 | tee /tmp/icu4c-oldData-makeCheck.txt
# necessary CLDR tools including LDML2ICUConverter, ConvertTransforms, etc.
# This process will take several minutes.
# Keep a log so you can investigate anything that looks suspicious.
#
# If you see timeout errors when building the rbnf data, for example, then the
# system you are building on likely does not have its IP address whitelisted with
# Unicode for access to the CLDR repository, see note on "IP address whitelisting"
# near the top of this file.
cd $ICU4C_DIR/source/data
ant clean

View File

@ -5,7 +5,7 @@ af{
collations{
standard{
Sequence{"&N<<<ʼn"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,7 +5,7 @@ am{
collations{
standard{
Sequence{"[reorder Ethi]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ ar{
"&ت<<ة<<<ﺔ<<<ﺓ"
"&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -397,7 +397,7 @@ ar{
"&ۓ‎=ﮰ‎=ﮱ"
"&ۀ‎=ﮤ‎=ﮥ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -11,7 +11,7 @@ as{
"&[before 1]ত<ৎ=ত্\u200D"
"&হ<ক্ষ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ az{
"[import az-u-co-standard]"
"[reorder others]"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -26,7 +26,7 @@ az{
"&H<x<<<X"
"&Z<w<<<W"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ be{
"&Е<ё<<<Ё"
"&у<ў<<<Ў"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,7 +5,7 @@ bg{
collations{
standard{
Sequence{"[reorder Cyrl]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ bn{
"[reorder Beng Deva Guru Gujr Orya Taml Telu Knda Mlym Sinh]"
"&ঔ<ং<ঃ<ঁ"
}
Version{"32.0.1"}
Version{"33"}
}
traditional{
Sequence{
@ -629,7 +629,7 @@ bn{
"&যৌ<<<য়ৌ"
"&য্<<<য়্"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,11 +5,11 @@ bs{
collations{
search{
Sequence{"[import hr-u-co-search]"}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{"[import hr]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,7 +5,7 @@ bs_Cyrl{
collations{
standard{
Sequence{"[import sr]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ ca{
"[import und-u-co-search]"
"&L<ŀ=l·<<<Ŀ=L·"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,7 +5,7 @@ chr{
collations{
standard{
Sequence{"[reorder Cher]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -1,6 +1,6 @@
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
COLLATION_CLDR_VERSION = 32.0.1
COLLATION_CLDR_VERSION = 33
# A list of txt's to build
# Note:
#

View File

@ -11,7 +11,7 @@ cs{
"&S<š<<<Š"
"&Z<ž<<<Ž"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -14,7 +14,7 @@ cy{
"&R<rh<<<Rh<<<RH"
"&T<th<<<Th<<<TH"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ da{
"[import da-u-co-standard]"
"[caseFirst off]"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -21,7 +21,7 @@ da{
"&[before 1]ǀ<æ<<<Æ<<ä<<<Ä<ø<<<Ø<<ö<<<Ö<<ő<<<Ő<å<<<Å<<<aa<<<Aa<<<AA"
"&oe<<œ<<<Œ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,14 +9,14 @@ de{
"&OE<<ö<<<Ö"
"&UE<<ü<<<Ü"
}
Version{"32.0.1"}
Version{"33"}
}
search{
Sequence{
"[import und-u-co-search]"
"[import de-u-co-phonebk]"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -10,7 +10,7 @@ de_AT{
"&u<ü<<<Ü"
"&ss<ß<<<ẞ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -14,7 +14,7 @@ dsb{
"&S<š<<<Š<ś<<<Ś"
"&Z<ž<<<Ž<ź<<<Ź"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -17,7 +17,7 @@ ee{
"&T<ts<<<Ts<<<TS"
"&V<ʋ<<<Ʋ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ el{
"[normalization on]"
"[reorder Grek]"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ en_US_POSIX{
"&A<*'\u0020'-'/'<*0-'@'<*ABCDEFGHIJKLMNOPQRSTUVWXYZ<*'['-'`'<*abcdefghijklmnopqrstuvwxyz"
"<*'{'-'\u007F'"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -12,7 +12,7 @@ eo{
"&S<ŝ<<<Ŝ"
"&U<ŭ<<<Ŭ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,11 +8,11 @@ es{
"[import und-u-co-search]"
"&N<ñ<<<Ñ"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{"&N<ñ<<<Ñ"}
Version{"32.0.1"}
Version{"33"}
}
traditional{
Sequence{
@ -20,7 +20,7 @@ es{
"&C<ch<<<Ch<<<CH"
"&l<ll<<<Ll<<<LL"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ et{
"&[before 1]T<š<<<Š<z<<<Z<ž<<<Ž"
"&[before 1]X<õ<<<Õ<ä<<<Ä<ö<<<Ö<ü<<<Ü"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -16,7 +16,7 @@ fa{
"&ۏ<ه<<ە<<ہ<<ة<<ۃ<<ۀ<<ھ"
"&ی<<*ىےيېۑۍێ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,7 +5,7 @@ fa_AF{
collations{
standard{
Sequence{"[import ps]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ fi{
"[import und-u-co-search]"
"[import fi-u-co-trad]"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -20,7 +20,7 @@ fi{
"&Z\u0335<<ʒ<<<Ʒ"
"&[before 1]ǀ<å<<<Å<ä<<<Ä<<æ<<<Æ<ö<<<Ö<<ø<<<Ø"
}
Version{"32.0.1"}
Version{"33"}
}
traditional{
Sequence{
@ -31,7 +31,7 @@ fi{
"&Y<<ü<<<Ü<<ű<<<Ű"
"&[before 1]ǀ<å<<<Å<ä<<<Ä<<æ<<<Æ<ö<<<Ö<<ø<<<Ø<<ő<<<Ő<<õ<<<Õ<<œ<<<Œ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,7 +5,7 @@ fil{
collations{
standard{
Sequence{"&N<ñ<<<Ñ<ng<<<Ng<<<NG"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ fo{
"[import und-u-co-search]"
"[import fo-u-co-standard]"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -18,7 +18,7 @@ fo{
"&Y<<ü<<<Ü<<ű<<<Ű"
"&[before 1]ǀ<æ<<<Æ<<ä<<<Ä<<ę<<<Ę<ø<<<Ø<<ö<<<Ö<<ő<<<Ő<<œ<<<Œ<å<<<Å<<<aa<<<Aa<<<AA"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,7 +5,7 @@ fr_CA{
collations{
standard{
Sequence{"[backwards 2]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -5,11 +5,11 @@ gl{
collations{
search{
Sequence{"[import es-u-co-search]"}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{"[import es]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ gu{
"[reorder Gujr Deva Beng Guru Orya Taml Telu Knda Mlym Sinh]"
"&ૐ<ં<<ઁ<"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -12,7 +12,7 @@ ha{
"&T<ts<<<Ts<<<TS"
"&Y<ƴ<<<ʼy<<<''y<<<Ƴ<<<ʼY<<<''Y"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ haw{
"&a<e<<<E<i<<<I<o<<<O<u<<<U"
"&w<ʻ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -11,7 +11,7 @@ he{
"&״"
"<<'\u0022'"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -20,7 +20,7 @@ he{
"&[before 2]''<<׳"
"&[before 2]'\u0022'<<״"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ hi{
"[reorder Deva Beng Guru Gujr Orya Taml Telu Knda Mlym Sinh]"
"&ॐ<ं<<ँ<"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -9,7 +9,7 @@ hr{
"[import hr-u-co-standard]"
"[reorder others]"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -21,7 +21,7 @@ hr{
"&S<š<<<Š"
"&Z<ž<<<Ž"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -13,7 +13,7 @@ hsb{
"&S<š<<<Š"
"&Z<ž<<<Ž<ź<<<Ź"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -44,7 +44,7 @@ hu{
"&Zs<<<Zzs/zs"
"&ZS<<<ZZS/ZS"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ hy{
"[reorder Armn]"
"&ք<և<<<Եւ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -15,7 +15,7 @@ ig{
"&S<sh<<<Sh<<<SH"
"&U<ụ<<<Ụ"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -8,7 +8,7 @@ is{
"[import und-u-co-search]"
"[import is-u-co-standard]"
}
Version{"32.0.1"}
Version{"33"}
}
standard{
Sequence{
@ -21,7 +21,7 @@ is{
"&[before 1]z<ý<<<Ý"
"&[before 1]ǀ<æ<<<Æ<<ä<<<Ä<ö<<<Ö<<ø<<<Ø<å<<<Å"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -5,7 +5,7 @@ ka{
collations{
standard{
Sequence{"[reorder Geor]"}
Version{"32.0.1"}
Version{"33"}
}
}
}

View File

@ -10,7 +10,7 @@ kk{
"&Ұ<ү<<<Ү"
"&[before 1]ь<і<<<І"
}
Version{"32.0.1"}
Version{"33"}
}
}
}

Some files were not shown because too many files have changed in this diff Show More