ICU-1115 move UnicodeSet from i18n to common; also move affiliated classes; update USet to wrap UnicodeSet

X-SVN-Rev: 8721
This commit is contained in:
Alan Liu 2002-05-29 18:36:09 +00:00
parent 678323650f
commit 237c0e67c3
34 changed files with 1204 additions and 992 deletions

View File

@ -63,7 +63,8 @@ normlzr.o unorm.o chariter.o schriter.o uchriter.o uiter.o \
uchar.o uprops.o bidi.o ubidi.o ubidiwrt.o ubidiln.o ushape.o unames.o \
ucln_cmn.o uscript.o umemstrm.o ucmp8.o uvector.o digitlst.o \
brkiter.o brkdict.o ubrk.o dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o \
unicode.o scsu.o convert.o utrie.o uset.o
unicode.o scsu.o convert.o utrie.o uset.o \
unifilt.o unifunct.o uniset.o upropset.o usetiter.o util.o
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))

View File

@ -44,7 +44,7 @@ RSC=rc.exe
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c
# ADD CPP /nologo /G6 /MD /Za /W3 /GX /Zi /O2 /Ob2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /FD /GF /c
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
@ -71,8 +71,8 @@ LINK32=link.exe
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c
# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /FR /FD /GF /GZ /c
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c
# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /FR /FD /GF /GZ /c
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x409 /d "_DEBUG"
@ -98,8 +98,8 @@ LINK32=link.exe
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c
# ADD CPP /nologo /MD /Za /W3 /D"NDEBUG" /D"WIN64" /D"_WINDOWS" /D"_MBCS" /D"_USRDLL" /D"COMMON_EXPORTS" /D"U_COMMON_IMPLEMENTATION" /D"UDATA_STATIC_LIB" /FD /GF /c /O2 /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c
# ADD CPP /nologo /MD /Za /W3 /GX /Zi /O2 /D "NDEBUG" /D "WIN64" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GF /QIA64_fmaopt /Wp64 /Zm600 /c
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win64
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win64
# ADD BASE RSC /l 0x409 /d "NDEBUG"
@ -108,9 +108,9 @@ BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IA64
# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /machine:IA64 /out:"..\..\bin\icuuc21.dll" /implib:"..\..\lib\icuuc.lib" /libpath:"..\..\lib" /incremental:no
# SUBTRACT LINK32 /debug
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IX86 /machine:IA64
# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /machine:IX86 /out:"..\..\bin\icuuc21.dll" /implib:"..\..\lib\icuuc.lib" /libpath:"..\..\lib" /machine:IA64
# SUBTRACT LINK32 /debug
!ELSEIF "$(CFG)" == "common - Win64 Debug"
@ -125,8 +125,8 @@ LINK32=link.exe
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c
# ADD CPP /nologo /MDd /Za /W3 /Gm /D"_DEBUG" /D"WIN64" /D"_WINDOWS" /D"_MBCS" /D"_USRDLL" /D"COMMON_EXPORTS" /D"U_COMMON_IMPLEMENTATION" /D"UDATA_STATIC_LIB" /FR /FD /GF /GZ /c /Od /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c
# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN64" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GF /GZ /QIA64_fmaopt /Wp64 /Zm600 /c
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win64
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win64
# ADD BASE RSC /l 0x409 /d "_DEBUG"
@ -135,9 +135,8 @@ BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IA64 /pdbtype:sept
# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /debug /machine:IA64 /out:"..\..\bin\icuuc21d.dll" /implib:"..\..\lib\icuucd.lib" /pdbtype:sept /libpath:"..\..\lib" /incremental:no
# SUBTRACT LINK32
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IX86 /pdbtype:sept /machine:IA64
# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /incremental:no /debug /machine:IX86 /out:"..\..\bin\icuuc21d.dll" /implib:"..\..\lib\icuucd.lib" /pdbtype:sept /libpath:"..\..\lib" /machine:IA64
!ENDIF
@ -380,6 +379,18 @@ SOURCE=.\unicode.cpp
# End Source File
# Begin Source File
SOURCE=.\unifilt.cpp
# End Source File
# Begin Source File
SOURCE=.\unifunct.cpp
# End Source File
# Begin Source File
SOURCE=.\uniset.cpp
# End Source File
# Begin Source File
SOURCE=.\unistr.cpp
# End Source File
# Begin Source File
@ -392,6 +403,10 @@ SOURCE=.\uprops.c
# End Source File
# Begin Source File
SOURCE=.\upropset.cpp
# End Source File
# Begin Source File
SOURCE=.\uresbund.c
# End Source File
# Begin Source File
@ -404,7 +419,11 @@ SOURCE=.\uscript.c
# End Source File
# Begin Source File
SOURCE=.\uset.c
SOURCE=.\uset.cpp
# End Source File
# Begin Source File
SOURCE=.\usetiter.cpp
# End Source File
# Begin Source File
@ -432,6 +451,10 @@ SOURCE=.\utf_impl.c
# End Source File
# Begin Source File
SOURCE=.\util.cpp
# End Source File
# Begin Source File
SOURCE=.\utrie.c
# End Source File
# Begin Source File
@ -817,6 +840,53 @@ InputPath=.\unicode\normlzr.h
# End Source File
# Begin Source File
SOURCE=.\unicode\parsepos.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\putil.h
!IF "$(CFG)" == "common - Win32 Release"
@ -1150,6 +1220,10 @@ InputPath=.\unicode\scsu.h
# End Source File
# Begin Source File
SOURCE=.\symtable.h
# End Source File
# Begin Source File
SOURCE=.\unicode\ubidi.h
!IF "$(CFG)" == "common - Win32 Release"
@ -1872,6 +1946,194 @@ InputPath=.\unicode\unicode.h
# End Source File
# Begin Source File
SOURCE=.\unicode\unifilt.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\unifunct.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\unimatch.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\uniset.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\unistr.h
!IF "$(CFG)" == "common - Win32 Release"
@ -1974,6 +2236,10 @@ SOURCE=.\uprops.h
# End Source File
# Begin Source File
SOURCE=.\upropset.h
# End Source File
# Begin Source File
SOURCE=.\unicode\urename.h
!IF "$(CFG)" == "common - Win32 Release"
@ -2170,7 +2436,97 @@ InputPath=.\unicode\uscript.h
# End Source File
# Begin Source File
SOURCE=.\uset.h
SOURCE=.\unicode\uset.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\uset.h
"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\uset.h
"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\uset.h
"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\uset.h
"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\usetiter.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
@ -2464,6 +2820,10 @@ InputPath=.\unicode\utf8.h
# End Source File
# Begin Source File
SOURCE=.\util.h
# End Source File
# Begin Source File
SOURCE=.\utrie.h
# End Source File
# Begin Source File

View File

@ -48,6 +48,7 @@ u_cleanup(void)
}
}
upropset_cleanup();
unorm_cleanup();
unames_cleanup();
uchar_cleanup();

View File

@ -46,4 +46,6 @@ U_CFUNC UBool udata_cleanup(void);
U_CFUNC UBool putil_cleanup(void);
U_CFUNC UBool upropset_cleanup(void);
#endif

View File

@ -36,7 +36,7 @@ U_NAMESPACE_BEGIN
* @see java.text.Format
*/
class U_I18N_API ParsePosition {
class U_COMMON_API ParsePosition {
public:
/**
* Default constructor, the index starts with 0 as default.

View File

@ -13,6 +13,15 @@
U_NAMESPACE_BEGIN
/**
* U_ETHER is used to represent character values for positions outside
* a range. For example, transliterator uses this to represent
* characters outside the range contextStart..contextLimit-1. This
* allows explicit matching by rules and UnicodeSets of text outside a
* defined range.
*/
#define U_ETHER ((UChar)0xFFFF)
/**
* <code>UnicodeFilter</code> defines a protocol for selecting a
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
@ -38,7 +47,7 @@ U_NAMESPACE_BEGIN
* @see UnicodeFilterLogic
* @stable
*/
class U_I18N_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
public:
/**

View File

@ -23,7 +23,7 @@ class TransliterationRuleData;
* that perform match and/or replace operations on Unicode strings.
* @author Alan Liu
*/
class U_I18N_API UnicodeFunctor {
class U_COMMON_API UnicodeFunctor {
public:

View File

@ -53,7 +53,7 @@ enum UMatchDegree {
* <code>UnicodeMatcher</code> defines a protocol for objects that can
* match a range of characters in a Replaceable string.
*/
class U_I18N_API UnicodeMatcher {
class U_COMMON_API UnicodeMatcher {
public:

View File

@ -216,7 +216,7 @@ class UVector;
* @author Alan Liu
* @stable
*/
class U_I18N_API UnicodeSet : public UnicodeFilter {
class U_COMMON_API UnicodeSet : public UnicodeFilter {
int32_t len; // length of list used; 0 <= len <= capacity
int32_t capacity; // capacity of list
@ -821,6 +821,51 @@ public:
*/
virtual UChar32 getRangeEnd(int32_t index) const;
/**
* Serializes this set into an array of 16-bit integers. The array
* has following format (each line is one 16-bit integer):
*
* length = (n+2*m) | (m!=0?0x8000:0)
* bmpLength = n; present if m!=0
* bmp[0]
* bmp[1]
* ...
* bmp[n-1]
* supp-high[0]
* supp-low[0]
* supp-high[1]
* supp-low[1]
* ...
* supp-high[m-1]
* supp-low[m-1]
*
* The array starts with a header. After the header are n bmp
* code points, then m supplementary code points. Either n or m
* or both may be zero. n+2*m is always <= 0x7FFF.
*
* If there are no supplementary characters (if m==0) then the
* header is one 16-bit integer, 'length', with value n.
*
* If there are supplementary characters (if m!=0) then the header
* is two 16-bit integers. The first, 'length', has value
* (n+2*m)|0x8000. The second, 'bmpLength', has value n.
*
* After the header the code points are stored in ascending order.
* Supplementary code points are stored as most significant 16
* bits followed by least significant 16 bits.
*
* @param dest pointer to buffer of destCapacity 16-bit integers.
* May be NULL only if destCapacity is zero.
* @param destCapacity size of dest, or zero. Must not be negative.
* @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR
* if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if
* n+2*m+(m!=0?2:1) > destCapacity.
* @return the total length of the serialized format, including
* the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
* than U_BUFFER_OVERFLOW_ERROR.
*/
int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
/**
* Reallocate this objects internal structures to take up the least
* possible space, without changing this object's value.

View File

@ -0,0 +1,231 @@
/*
*******************************************************************************
*
* Copyright (C) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uset.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002mar07
* created by: Markus W. Scherer
*
* C version of UnicodeSet.
*/
#ifndef __USET_H__
#define __USET_H__
#include "unicode/utypes.h"
struct USet;
typedef struct USet USet;
enum {
USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 /**< enough for any single-code point set */
};
/**
* A serialized form of a Unicode set. Limited manipulations are
* possible directly on a serialized set.
*/
struct USerializedSet {
const uint16_t *array;
int32_t bmpLength, length;
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
};
typedef struct USerializedSet USerializedSet;
/**
* Creates a USet object that contains the range of characters
* start..end, inclusive.
* @param start first character of the range, inclusive
* @param end last character of the range, inclusive
* @return a newly created USet. The caller must call uset_close() on
* it when done.
*/
U_CAPI USet * U_EXPORT2
uset_open(UChar32 start, UChar32 end);
/**
* Disposes of the storage used by a USet object. This function should
* be called exactly once for objects returned by uset_open().
* @param set the object to dispose of
*/
U_CAPI void U_EXPORT2
uset_close(USet *set);
/**
* Adds the given character to the given USet. After this call,
* uset_contains(set, c) will return TRUE.
* @param set the object to which to add the character
* @param c the character to add
*/
U_CAPI void U_EXPORT2
uset_add(USet *set, UChar32 c);
/**
* Removes the given character from the given USet. After this call,
* uset_contains(set, c) will return FALSE.
* @param set the object from which to remove the character
* @param c the character to remove
*/
U_CAPI void U_EXPORT2
uset_remove(USet *set, UChar32 c);
/**
* Returns TRUE if the given USet contains no characters and no
* strings.
* @param set the set
* @return true if set is empty
*/
U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet *set);
/**
* Returns TRUE if the given USet contains the given character.
* @param set the set
* @return true if set contains c
*/
U_CAPI UBool U_EXPORT2
uset_contains(const USet *set, UChar32 c);
/**
* Returns the number of characters and strings contained in the given
* USet.
* @param set the set
* @return a non-negative integer counting the characters and strings
* contained in set
*/
U_CAPI int32_t U_EXPORT2
uset_size(const USet* set);
/**
* Returns the number of disjoint ranges of characters contained in
* the given set. Ignores any strings contained in the set.
* @param set the set
* @return a non-negative integer counting the character ranges
* contained in set
*/
U_CAPI int32_t U_EXPORT2
uset_countRanges(const USet *set);
/**
* Returns a range of characters contained in the given set.
* @param set the set
* @param rangeIndex a non-negative integer in the range 0..
* uset_countRanges(set)-1
* @param pStart pointer to variable to receive first character
* in range, inclusive
* @param pEnd pointer to variable to receive last character in range,
* inclusive
* @return true if rangeIndex is value, otherwise false
*/
U_CAPI UBool U_EXPORT2
uset_getRange(const USet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pEnd);
/**
* Serializes this set into an array of 16-bit integers. The array
* has following format (each line is one 16-bit integer):
*
* length = (n+2*m) | (m!=0?0x8000:0)
* bmpLength = n; present if m!=0
* bmp[0]
* bmp[1]
* ...
* bmp[n-1]
* supp-high[0]
* supp-low[0]
* supp-high[1]
* supp-low[1]
* ...
* supp-high[m-1]
* supp-low[m-1]
*
* The array starts with a header. After the header are n bmp
* code points, then m supplementary code points. Either n or m
* or both may be zero. n+2*m is always <= 0x7FFF.
*
* If there are no supplementary characters (if m==0) then the
* header is one 16-bit integer, 'length', with value n.
*
* If there are supplementary characters (if m!=0) then the header
* is two 16-bit integers. The first, 'length', has value
* (n+2*m)|0x8000. The second, 'bmpLength', has value n.
*
* After the header the code points are stored in ascending order.
* Supplementary code points are stored as most significant 16
* bits followed by least significant 16 bits.
*
* @param set the set
* @param dest pointer to buffer of destCapacity 16-bit integers.
* May be NULL only if destCapacity is zero.
* @param destCapacity size of dest, or zero. Must not be negative.
* @param pErrorCode pointer to the error code. Will be set to
* U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
* U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
* @return the total length of the serialized format, including
* the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
* than U_BUFFER_OVERFLOW_ERROR.
*/
U_CAPI int32_t U_EXPORT2
uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode);
/**
* Given a serialized array, fill in the given serialized set object.
* @param fillSet pointer to result
* @param src pointer to start of array
* @param srcLength length of array
* @return true if the given array is valid, otherwise false
*/
U_CAPI UBool U_EXPORT2
uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength);
/**
* Set the USerializedSet to contain the given character (and nothing
* else).
*/
U_CAPI void U_EXPORT2
uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c);
/**
* Returns TRUE if the given USerializedSet contains the given
* character.
* @param set the serialized set
* @return true if set contains c
*/
U_CAPI UBool U_EXPORT2
uset_serializedContains(const USerializedSet *set, UChar32 c);
/**
* Returns the number of disjoint ranges of characters contained in
* the given serialized set. Ignores any strings contained in the
* set.
* @param set the serialized set
* @return a non-negative integer counting the character ranges
* contained in set
*/
U_CAPI int32_t U_EXPORT2
uset_countSerializedRanges(const USerializedSet *set);
/**
* Returns a range of characters contained in the given serialized
* set.
* @param set the serialized set
* @param rangeIndex a non-negative integer in the range 0..
* uset_countSerializedRanges(set)-1
* @param pStart pointer to variable to receive first character
* in range, inclusive
* @param pEnd pointer to variable to receive last character in range,
* inclusive
* @return true if rangeIndex is value, otherwise false
*/
U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pEnd);
#endif

View File

@ -3,8 +3,8 @@
* Copyright (c) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* $Source: /xsrl/Nsvn/icu/icu/source/i18n/unicode/Attic/usetiter.h,v $
* $Revision: 1.5 $
* $Source: /xsrl/Nsvn/icu/icu/source/common/unicode/usetiter.h,v $
* $Revision: 1.1 $
**********************************************************************
*/
#ifndef USETITER_H
@ -49,7 +49,7 @@ class UnicodeString;
* @author M. Davis
* @draft
*/
class U_I18N_API UnicodeSetIterator {
class U_COMMON_API UnicodeSetIterator {
protected:

View File

@ -8,7 +8,6 @@
#include "unicode/unifilt.h"
#include "unicode/rep.h"
#include "rbt_rule.h"
U_NAMESPACE_BEGIN

View File

@ -0,0 +1,28 @@
/*
**********************************************************************
* Copyright (c) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* $Source: /xsrl/Nsvn/icu/icu/source/common/unifunct.cpp,v $
* $Date: 2002/05/29 18:33:33 $
* $Revision: 1.1 $
**********************************************************************
*/
#include "unicode/unifunct.h"
U_NAMESPACE_BEGIN
const char UnicodeFunctor::fgClassID = 0;
UnicodeMatcher* UnicodeFunctor::toMatcher() const {
return 0;
}
UnicodeReplacer* UnicodeFunctor::toReplacer() const {
return 0;
}
U_NAMESPACE_END
//eof

View File

@ -14,7 +14,6 @@
#include "unicode/uscript.h"
#include "symtable.h"
#include "cmemory.h"
#include "rbt_rule.h"
#include "uhash.h"
#include "upropset.h"
#include "util.h"
@ -780,7 +779,7 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
// Strings, if any, have length != 0, so we don't worry
// about them here. If we ever allow zero-length strings
// we much check for them here.
if (contains(TransliterationRule::ETHER)) {
if (contains(U_ETHER)) {
return incremental ? U_PARTIAL_MATCH : U_MATCH;
} else {
return U_MISMATCH;
@ -1382,6 +1381,87 @@ UnicodeSet& UnicodeSet::compact() {
return *this;
}
int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
int32_t bmpLength, length, destLength;
if (U_FAILURE(ec)) {
return 0;
}
if (destCapacity<0 || (destCapacity>0 && dest==NULL)) {
ec=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* count necessary 16-bit units */
length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH
// assert(length>=0);
if (length==0) {
/* empty set */
if (destCapacity>0) {
*dest=0;
} else {
ec=U_BUFFER_OVERFLOW_ERROR;
}
return 1;
}
/* now length>0 */
if (this->list[length-1]<=0xffff) {
/* all BMP */
bmpLength=length;
} else if (this->list[0]>=0x10000) {
/* all supplementary */
bmpLength=0;
length*=2;
} else {
/* some BMP, some supplementary */
for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
length=bmpLength+2*(length-bmpLength);
}
/* length: number of 16-bit array units */
if (length>0x7fff) {
/* there are only 15 bits for the length in the first serialized word */
ec=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
/*
* total serialized length:
* number of 16-bit array units (length) +
* 1 length unit (always) +
* 1 bmpLength unit (if there are supplementary values)
*/
destLength=length+((length>bmpLength)?2:1);
if (destLength<=destCapacity) {
const UChar32 *p;
int32_t i;
*dest=(uint16_t)length;
if (length>bmpLength) {
*dest|=0x8000;
*++dest=(uint16_t)bmpLength;
}
++dest;
/* write the BMP part of the array */
p=this->list;
for (i=0; i<bmpLength; ++i) {
*dest++=(uint16_t)*p++;
}
/* write the supplementary part of the array */
for (; i<length; i+=2) {
*dest++=(uint16_t)(*p>>16);
*dest++=(uint16_t)*p++;
}
} else {
ec=U_BUFFER_OVERFLOW_ERROR;
}
return destLength;
}
//----------------------------------------------------------------
// Implementation: Pattern parsing
//----------------------------------------------------------------
@ -1789,7 +1869,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
if (anchor == 2) {
rebuildPattern = TRUE;
newPat.append((UChar)SymbolTable::SYMBOL_REF);
add(TransliterationRule::ETHER);
add(U_ETHER);
}
mode = 4;
break;
@ -1833,13 +1913,13 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
return;
}
// Treat a trailing '$' as indicating ETHER. This code is only
// Treat a trailing '$' as indicating U_ETHER. This code is only
// executed if symbols == NULL; otherwise other code parses the
// anchor.
if (lastChar == (UChar)SymbolTable::SYMBOL_REF && !isLastLiteral) {
rebuildPattern = TRUE;
newPat.append(lastChar);
add(TransliterationRule::ETHER);
add(U_ETHER);
}
else if (lastChar != NONE) {

View File

@ -32,7 +32,7 @@
#include "ustr_imp.h"
#include "umutex.h"
#include "utrie.h"
#include "uset.h"
#include "unicode/uset.h"
#include "unormimp.h"
/*

View File

@ -20,8 +20,8 @@
#include "unicode/utypes.h"
#include "unicode/uiter.h"
#include "unicode/unorm.h"
#include "unicode/uset.h"
#include "utrie.h"
#include "uset.h"
#include "ustr_imp.h"
/*

View File

@ -13,7 +13,7 @@
#include "unicode/uchar.h"
#include "hash.h"
#include "mutex.h"
#include "ucln_in.h"
#include "ucln.h"
#include "charstr.h"
@ -115,10 +115,9 @@ static const UChar INCLUSIONS_PATTERN[] =
// "[^\\u3401-\\u4DB5 \\u4E01-\\u9FA5 \\uAC01-\\uD7A3 \\uD801-\\uDB7F \\uDB81-\\uDBFF \\uDC01-\\uDFFF \\uE001-\\uF8FF \\U0001044F-\\U0001CFFF \\U0001D801-\\U0001FFFF \\U00020001-\\U0002A6D6 \\U0002A6D8-\\U0002F7FF \\U0002FA1F-\\U000E0000 \\U000E0081-\\U000EFFFF \\U000F0001-\\U000FFFFD \\U00100001-\\U0010FFFD]"
/**
* Cleanup function for transliterator component; delegates to
* Transliterator::cleanupRegistry().
* Cleanup function for UnicodePropertySet
*/
U_CFUNC UBool unicodePropertySet_cleanup(void) {
U_CFUNC UBool upropset_cleanup(void) {
if (NAME_MAP != NULL) {
delete NAME_MAP; NAME_MAP = NULL;
delete CATEGORY_MAP; CATEGORY_MAP = NULL;
@ -524,8 +523,6 @@ void UnicodePropertySet::init() {
CATEGORY_MAP = new Hashtable(TRUE);
SCRIPT_CACHE = new UnicodeSet[(size_t)USCRIPT_CODE_LIMIT];
ucln_i18n_registerCleanup(); // Call this when allocating statics
// NOTE: We munge all search keys to have no whitespace
// and upper case. As such, all stored keys should have
// this format.

View File

@ -1,499 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uset.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002mar07
* created by: Markus W. Scherer
*
* Poor man's C version of UnicodeSet, with only basic functions.
* The main data structure, the array of range limits, is
* the same as in UnicodeSet, except that the HIGH value is not stored.
*
* There are functions to efficiently serialize a USet into an array of uint16_t
* and functions to use such a serialized form efficiently without
* instantiating a new USet.
*
* If we needed more of UnicodeSet's functionality, then we should
* move UnicodeSet from the i18n to the common library and
* use it directly.
* The only part of this code that would still be useful is the serialization
* and the functions that use the serialized form directly.
*/
#include "unicode/utypes.h"
#include "cmemory.h"
#include "uset.h"
#define USET_STATIC_CAPACITY 12
#define USET_GROW_DELTA 20
struct USet {
UChar32 *array;
int32_t length, capacity;
UChar32 staticBuffer[USET_STATIC_CAPACITY];
};
U_CAPI USet * U_EXPORT2
uset_open(UChar32 start, UChar32 limit) {
USet *set;
set=(USet *)uprv_malloc(sizeof(USet));
if(set!=NULL) {
/* initialize to an empty set */
set->array=set->staticBuffer;
set->length=0;
set->capacity=USET_STATIC_CAPACITY;
/* set initial range */
if(start<=0) {
start=0; /* UChar32 may be signed! */
}
if(limit>0x110000) {
limit=0x110000;
}
if(start<limit) {
set->array[0]=start;
if(limit<0x110000) {
set->array[1]=limit;
set->length=2;
} else {
set->length=1;
}
}
}
return set;
}
U_CAPI void U_EXPORT2
uset_close(USet *set) {
if(set!=NULL) {
if(set->array!=set->staticBuffer) {
uprv_free(set->array);
}
uprv_free(set);
}
}
static U_INLINE int32_t
findChar(const UChar32 *array, int32_t length, UChar32 c) {
int32_t i;
/* check the last range limit first for more efficient appending */
if(length>0) {
if(c>=array[length-1]) {
return length;
}
/* do not check the last range limit again in the loop below */
--length;
}
for(i=0; i<length && c>=array[i]; ++i) {}
return i;
}
static UBool
addRemove(USet *set, UChar32 c, int32_t doRemove) {
int32_t i, length, more;
if(set==NULL || (uint32_t)c>0x10ffff) {
return FALSE;
}
length=set->length;
i=findChar(set->array, length, c);
if((i&1)^doRemove) {
/* c is already in the set */
return TRUE;
}
/* how many more array items do we need? */
if(i<length && (c+1)==set->array[i]) {
/* c is just before the following range, extend that in-place by one */
set->array[i]=c;
if(i>0) {
--i;
if(c==set->array[i]) {
/* the previous range collapsed, remove it */
set->length=length-=2;
if(i<length) {
uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
}
}
}
return TRUE;
} else if(i>0 && c==set->array[i-1]) {
/* c is just after the previous range, extend that in-place by one */
if(++c<=0x10ffff) {
set->array[i-1]=c;
if(i<length && c==set->array[i]) {
/* the following range collapsed, remove it */
--i;
set->length=length-=2;
if(i<length) {
uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
}
}
} else {
/* extend the previous range (had limit 0x10ffff) to the end of Unicode */
set->length=i-1;
}
return TRUE;
} else if(i==length && c==0x10ffff) {
/* insert one range limit c */
more=1;
} else {
/* insert two range limits c, c+1 */
more=2;
}
/* insert <more> range limits */
if(length+more>set->capacity) {
/* reallocate */
int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
UChar32 *newArray=(UChar32 *)uprv_malloc(newCapacity*4);
if(newArray==NULL) {
return FALSE;
}
set->capacity=newCapacity;
uprv_memcpy(newArray, set->array, length*4);
if(set->array!=set->staticBuffer) {
uprv_free(set->array);
}
set->array=newArray;
}
if(i<length) {
uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
}
set->array[i]=c;
if(more==2) {
set->array[i+1]=c+1;
}
set->length+=more;
return TRUE;
}
U_CAPI UBool U_EXPORT2
uset_add(USet *set, UChar32 c) {
return addRemove(set, c, 0);
}
U_CAPI void U_EXPORT2
uset_remove(USet *set, UChar32 c) {
addRemove(set, c, 1);
}
U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet *set) {
return set==NULL || set->length<=0;
}
U_CAPI UBool U_EXPORT2
uset_contains(const USet *set, UChar32 c) {
int32_t i;
if(set==NULL || (uint32_t)c>0x10ffff) {
return FALSE;
}
i=findChar(set->array, set->length, c);
return (UBool)(i&1);
}
U_CAPI int32_t U_EXPORT2
uset_containsOne(const USet *set) {
if( set!=NULL &&
((set->length==2 && set->array[0]==(set->array[1]-1)) ||
(set->length==1 && set->array[0]==0x10ffff))
) {
return (int32_t)set->array[0];
} else {
return -1;
}
}
U_CAPI int32_t U_EXPORT2
uset_countRanges(const USet *set) {
if(set==NULL) {
return 0;
} else {
return (set->length+1)/2;
}
}
U_CAPI UBool U_EXPORT2
uset_getRange(const USet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pLimit) {
if(set==NULL || rangeIndex<0) {
return FALSE;
}
rangeIndex*=2;
if(rangeIndex<set->length) {
*pStart=set->array[rangeIndex++];
if(rangeIndex<set->length) {
*pLimit=set->array[rangeIndex];
} else {
*pLimit=0x110000;
}
return TRUE;
} else {
return FALSE;
}
}
/*
* Serialize a USet into 16-bit units.
* Store BMP code points as themselves with one 16-bit unit each.
*
* Important: the code points in the array are in ascending order,
* therefore all BMP code points precede all supplementary code points.
*
* Store each supplementary code point in 2 16-bit units,
* simply with higher-then-lower 16-bit halfs.
*
* Precede the entire list with the length.
* If there are supplementary code points, then set bit 15 in the length
* and add the bmpLength between it and the array.
*
* In other words:
* - all BMP: (length=bmpLength) BMP, .., BMP
* - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
*/
U_CAPI int32_t U_EXPORT2
uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
int32_t bmpLength, length, destLength;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(set==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* count necessary 16-bit units */
length=set->length;
if(length==0) {
/* empty set */
if(destCapacity>0) {
*dest=0;
}
return 1;
}
/* now length>0 */
if(set->array[length-1]<=0xffff) {
/* all BMP */
bmpLength=length;
} else if(set->array[0]>=0x10000) {
/* all supplementary */
bmpLength=0;
length*=2;
} else {
/* some BMP, some supplementary */
for(bmpLength=0; bmpLength<length && set->array[bmpLength]<=0xffff; ++bmpLength) {}
length=bmpLength+2*(length-bmpLength);
}
/* length: number of 16-bit array units */
if(length>0x7fff) {
/* there are only 15 bits for the length in the first serialized word */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
/*
* total serialized length:
* number of 16-bit array units (length) +
* 1 length unit (always) +
* 1 bmpLength unit (if there are supplementary values)
*/
destLength=length+1+(length>bmpLength);
if(destLength<=destCapacity) {
const UChar32 *p;
int32_t i;
*dest=(uint16_t)length;
if(length>bmpLength) {
*dest|=0x8000;
*++dest=(uint16_t)bmpLength;
}
++dest;
/* write the BMP part of the array */
p=set->array;
for(i=0; i<bmpLength; ++i) {
*dest++=(uint16_t)*p++;
}
/* write the supplementary part of the array */
for(; i<length; i+=2) {
*dest++=(uint16_t)(*p>>16);
*dest++=(uint16_t)*p++;
}
} else {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
return destLength;
}
U_CAPI UBool U_EXPORT2
uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcCapacity) {
int32_t length;
if(fillSet==NULL) {
return FALSE;
}
if(src==NULL || srcCapacity<=0) {
fillSet->length=fillSet->bmpLength=0;
return FALSE;
}
length=*src++;
if(length&0x8000) {
/* there are supplementary values */
length&=0x7fff;
if(srcCapacity<(2+length)) {
fillSet->length=fillSet->bmpLength=0;
return FALSE;
}
fillSet->bmpLength=*src++;
} else {
/* only BMP values */
if(srcCapacity<(1+length)) {
fillSet->length=fillSet->bmpLength=0;
return FALSE;
}
fillSet->bmpLength=length;
}
fillSet->array=src;
fillSet->length=length;
return TRUE;
}
U_CAPI void U_EXPORT2
uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c) {
if(fillSet==NULL || (uint32_t)c>0x10ffff) {
return;
}
fillSet->array=fillSet->staticArray;
if(c<0xffff) {
fillSet->bmpLength=fillSet->length=2;
fillSet->staticArray[0]=(uint16_t)c;
fillSet->staticArray[1]=(uint16_t)c+1;
} else if(c==0xffff) {
fillSet->bmpLength=1;
fillSet->length=3;
fillSet->staticArray[0]=0xffff;
fillSet->staticArray[1]=1;
fillSet->staticArray[2]=0;
} else if(c<0x10ffff) {
fillSet->bmpLength=0;
fillSet->length=4;
fillSet->staticArray[0]=(uint16_t)(c>>16);
fillSet->staticArray[1]=(uint16_t)c;
++c;
fillSet->staticArray[2]=(uint16_t)(c>>16);
fillSet->staticArray[3]=(uint16_t)c;
} else /* c==0x10ffff */ {
fillSet->bmpLength=0;
fillSet->length=2;
fillSet->staticArray[0]=0x10;
fillSet->staticArray[1]=0xffff;
}
}
U_CAPI UBool U_EXPORT2
uset_serializedContains(const USerializedSet *set, UChar32 c) {
const uint16_t *array;
if(set==NULL || (uint32_t)c>0x10ffff) {
return FALSE;
}
array=set->array;
if(c<=0xffff) {
/* find c in the BMP part */
int32_t i, bmpLength=set->bmpLength;
for(i=0; i<bmpLength && (uint16_t)c>=array[i]; ++i) {}
return (UBool)(i&1);
} else {
/* find c in the supplementary part */
int32_t i, length=set->length;
uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
for(i=set->bmpLength;
i<length && (high>array[i] || (high==array[i] && low>=array[i+1]));
i+=2) {}
/* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
return (UBool)(((i+set->bmpLength)&2)!=0);
}
}
U_CAPI int32_t U_EXPORT2
uset_countSerializedRanges(const USerializedSet *set) {
if(set==NULL) {
return 0;
}
return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
}
U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pLimit) {
const uint16_t *array;
int32_t bmpLength, length;
if(set==NULL || rangeIndex<0 || pStart==NULL || pLimit==NULL) {
return FALSE;
}
array=set->array;
length=set->length;
bmpLength=set->bmpLength;
rangeIndex*=2; /* address start/limit pairs */
if(rangeIndex<bmpLength) {
*pStart=array[rangeIndex++];
if(rangeIndex<bmpLength) {
*pLimit=array[rangeIndex];
} else if(rangeIndex<length) {
*pLimit=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
} else {
*pLimit=0x110000;
}
return TRUE;
} else {
rangeIndex-=bmpLength;
rangeIndex*=2; /* address pairs of pairs of units */
length-=bmpLength;
if(rangeIndex<length) {
array+=bmpLength;
*pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
rangeIndex+=2;
if(rangeIndex<length) {
*pLimit=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
} else {
*pLimit=0x110000;
}
return TRUE;
} else {
return FALSE;
}
}
}

View File

@ -0,0 +1,374 @@
/*
*******************************************************************************
*
* Copyright (C) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uset.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002mar07
* created by: Markus W. Scherer
*
* The serialized structure, the array of range limits, is
* the same as in UnicodeSet, except that the HIGH value is not stored.
*
* There are functions to efficiently serialize a USet into an array of uint16_t
* and functions to use such a serialized form efficiently without
* instantiating a new USet.
*/
#include "unicode/utypes.h"
#include "cmemory.h"
#include "unicode/uset.h"
#include "unicode/uniset.h"
#define USET_STATIC_CAPACITY 12
#define USET_GROW_DELTA 20
U_CAPI USet * U_EXPORT2
uset_open(UChar32 start, UChar32 end) {
return (USet*) new UnicodeSet(start, end);
}
U_CAPI void U_EXPORT2
uset_close(USet *set) {
delete (UnicodeSet*) set;
}
U_CAPI void U_EXPORT2
uset_add(USet *set, UChar32 c) {
((UnicodeSet*) set)->add(c);
}
U_CAPI void U_EXPORT2
uset_remove(USet *set, UChar32 c) {
((UnicodeSet*) set)->remove(c);
}
U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet *set) {
return ((const UnicodeSet*) set)->isEmpty();
}
U_CAPI UBool U_EXPORT2
uset_contains(const USet *set, UChar32 c) {
return ((const UnicodeSet*) set)->contains(c);
}
U_CAPI int32_t U_EXPORT2
uset_size(const USet* set) {
return ((const UnicodeSet*) set)->size();
}
U_CAPI int32_t U_EXPORT2
uset_getRangeCount(const USet *set) {
return ((const UnicodeSet*) set)->getRangeCount();
}
U_CAPI UBool U_EXPORT2
uset_getRange(const USet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pEnd) {
if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
return FALSE;
}
const UnicodeSet* us = (const UnicodeSet*) set;
*pStart = us->getRangeStart(rangeIndex);
*pEnd = us->getRangeEnd(rangeIndex);
return TRUE;
}
/*
* Serialize a USet into 16-bit units.
* Store BMP code points as themselves with one 16-bit unit each.
*
* Important: the code points in the array are in ascending order,
* therefore all BMP code points precede all supplementary code points.
*
* Store each supplementary code point in 2 16-bit units,
* simply with higher-then-lower 16-bit halfs.
*
* Precede the entire list with the length.
* If there are supplementary code points, then set bit 15 in the length
* and add the bmpLength between it and the array.
*
* In other words:
* - all BMP: (length=bmpLength) BMP, .., BMP
* - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
*/
U_CAPI int32_t U_EXPORT2
uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *ec) {
if (ec==NULL || U_FAILURE(*ec)) {
return 0;
}
return ((const UnicodeSet*) set)->serialize(dest, destCapacity, *ec);
}
U_CAPI UBool U_EXPORT2
uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength) {
int32_t length;
if(fillSet==NULL) {
return FALSE;
}
if(src==NULL || srcLength<=0) {
fillSet->length=fillSet->bmpLength=0;
return FALSE;
}
length=*src++;
if(length&0x8000) {
/* there are supplementary values */
length&=0x7fff;
if(srcLength<(2+length)) {
fillSet->length=fillSet->bmpLength=0;
return FALSE;
}
fillSet->bmpLength=*src++;
} else {
/* only BMP values */
if(srcLength<(1+length)) {
fillSet->length=fillSet->bmpLength=0;
return FALSE;
}
fillSet->bmpLength=length;
}
fillSet->array=src;
fillSet->length=length;
return TRUE;
}
U_CAPI void U_EXPORT2
uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c) {
if(fillSet==NULL || (uint32_t)c>0x10ffff) {
return;
}
fillSet->array=fillSet->staticArray;
if(c<0xffff) {
fillSet->bmpLength=fillSet->length=2;
fillSet->staticArray[0]=(uint16_t)c;
fillSet->staticArray[1]=(uint16_t)c+1;
} else if(c==0xffff) {
fillSet->bmpLength=1;
fillSet->length=3;
fillSet->staticArray[0]=0xffff;
fillSet->staticArray[1]=1;
fillSet->staticArray[2]=0;
} else if(c<0x10ffff) {
fillSet->bmpLength=0;
fillSet->length=4;
fillSet->staticArray[0]=(uint16_t)(c>>16);
fillSet->staticArray[1]=(uint16_t)c;
++c;
fillSet->staticArray[2]=(uint16_t)(c>>16);
fillSet->staticArray[3]=(uint16_t)c;
} else /* c==0x10ffff */ {
fillSet->bmpLength=0;
fillSet->length=2;
fillSet->staticArray[0]=0x10;
fillSet->staticArray[1]=0xffff;
}
}
U_CAPI UBool U_EXPORT2
uset_serializedContains(const USerializedSet *set, UChar32 c) {
const uint16_t *array;
if(set==NULL || (uint32_t)c>0x10ffff) {
return FALSE;
}
array=set->array;
if(c<=0xffff) {
/* find c in the BMP part */
int32_t i, bmpLength=set->bmpLength;
for(i=0; i<bmpLength && (uint16_t)c>=array[i]; ++i) {}
return (UBool)(i&1);
} else {
/* find c in the supplementary part */
int32_t i, length=set->length;
uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
for(i=set->bmpLength;
i<length && (high>array[i] || (high==array[i] && low>=array[i+1]));
i+=2) {}
/* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
return (UBool)(((i+set->bmpLength)&2)!=0);
}
}
U_CAPI int32_t U_EXPORT2
uset_countSerializedRanges(const USerializedSet *set) {
if(set==NULL) {
return 0;
}
return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
}
U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pEnd) {
const uint16_t *array;
int32_t bmpLength, length;
if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) {
return FALSE;
}
array=set->array;
length=set->length;
bmpLength=set->bmpLength;
rangeIndex*=2; /* address start/limit pairs */
if(rangeIndex<bmpLength) {
*pStart=array[rangeIndex++];
if(rangeIndex<bmpLength) {
*pEnd=array[rangeIndex];
} else if(rangeIndex<length) {
*pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
} else {
*pEnd=0x110000;
}
--*pEnd;
return TRUE;
} else {
rangeIndex-=bmpLength;
rangeIndex*=2; /* address pairs of pairs of units */
length-=bmpLength;
if(rangeIndex<length) {
array+=bmpLength;
*pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
rangeIndex+=2;
if(rangeIndex<length) {
*pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
} else {
*pEnd=0x110000;
}
--*pEnd;
return TRUE;
} else {
return FALSE;
}
}
}
// TODO Investigate incorporating this code into UnicodeSet to improve
// efficiency.
// ---
// static U_INLINE int32_t
// findChar(const UChar32 *array, int32_t length, UChar32 c) {
// int32_t i;
//
// /* check the last range limit first for more efficient appending */
// if(length>0) {
// if(c>=array[length-1]) {
// return length;
// }
//
// /* do not check the last range limit again in the loop below */
// --length;
// }
//
// for(i=0; i<length && c>=array[i]; ++i) {}
// return i;
// }
//
// static UBool
// addRemove(USet *set, UChar32 c, int32_t doRemove) {
// int32_t i, length, more;
//
// if(set==NULL || (uint32_t)c>0x10ffff) {
// return FALSE;
// }
//
// length=set->length;
// i=findChar(set->array, length, c);
// if((i&1)^doRemove) {
// /* c is already in the set */
// return TRUE;
// }
//
// /* how many more array items do we need? */
// if(i<length && (c+1)==set->array[i]) {
// /* c is just before the following range, extend that in-place by one */
// set->array[i]=c;
// if(i>0) {
// --i;
// if(c==set->array[i]) {
// /* the previous range collapsed, remove it */
// set->length=length-=2;
// if(i<length) {
// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
// }
// }
// }
// return TRUE;
// } else if(i>0 && c==set->array[i-1]) {
// /* c is just after the previous range, extend that in-place by one */
// if(++c<=0x10ffff) {
// set->array[i-1]=c;
// if(i<length && c==set->array[i]) {
// /* the following range collapsed, remove it */
// --i;
// set->length=length-=2;
// if(i<length) {
// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
// }
// }
// } else {
// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
// set->length=i-1;
// }
// return TRUE;
// } else if(i==length && c==0x10ffff) {
// /* insert one range limit c */
// more=1;
// } else {
// /* insert two range limits c, c+1 */
// more=2;
// }
//
// /* insert <more> range limits */
// if(length+more>set->capacity) {
// /* reallocate */
// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
// UChar32 *newArray=(UChar32 *)uprv_malloc(newCapacity*4);
// if(newArray==NULL) {
// return FALSE;
// }
// set->capacity=newCapacity;
// uprv_memcpy(newArray, set->array, length*4);
//
// if(set->array!=set->staticBuffer) {
// uprv_free(set->array);
// }
// set->array=newArray;
// }
//
// if(i<length) {
// uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
// }
// set->array[i]=c;
// if(more==2) {
// set->array[i+1]=c+1;
// }
// set->length+=more;
//
// return TRUE;
// }
//
// U_CAPI UBool U_EXPORT2
// uset_add(USet *set, UChar32 c) {
// return addRemove(set, c, 0);
// }
//
// U_CAPI void U_EXPORT2
// uset_remove(USet *set, UChar32 c) {
// addRemove(set, c, 1);
// }

View File

@ -1,94 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uset.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002mar07
* created by: Markus W. Scherer
*
* Poor man's C version of UnicodeSet, with only basic functions.
* See uset.c for more details.
*/
#ifndef __USET_H__
#define __USET_H__
#include "unicode/utypes.h"
struct USet;
typedef struct USet USet;
enum {
USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 /**< enough for any single-code point set */
};
struct USerializedSet {
const uint16_t *array;
int32_t bmpLength, length;
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
};
typedef struct USerializedSet USerializedSet;
U_CAPI USet * U_EXPORT2
uset_open(UChar32 start, UChar32 limit);
U_CAPI void U_EXPORT2
uset_close(USet *set);
U_CAPI UBool U_EXPORT2
uset_add(USet *set, UChar32 c);
U_CAPI void U_EXPORT2
uset_remove(USet *set, UChar32 c);
U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet *set);
U_CAPI UBool U_EXPORT2
uset_contains(const USet *set, UChar32 c);
/**
* Check if the set contains exactly one code point.
*
* @return The code point if the set contains exactly one, otherwise -1.
*/
U_CAPI int32_t U_EXPORT2
uset_containsOne(const USet *set);
U_CAPI int32_t U_EXPORT2
uset_countRanges(const USet *set);
U_CAPI UBool U_EXPORT2
uset_getRange(const USet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pLimit);
U_CAPI int32_t U_EXPORT2
uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode);
U_CAPI UBool U_EXPORT2
uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcCapacity);
/**
* Set the USerializedSet to contain exactly c.
*/
U_CAPI void U_EXPORT2
uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c);
U_CAPI UBool U_EXPORT2
uset_serializedContains(const USerializedSet *set, UChar32 c);
U_CAPI int32_t U_EXPORT2
uset_countSerializedRanges(const USerializedSet *set);
U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex,
UChar32 *pStart, UChar32 *pLimit);
#endif

View File

@ -3,9 +3,9 @@
* Copyright (c) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* $Source: /xsrl/Nsvn/icu/icu/source/i18n/Attic/usetiter.cpp,v $
* $Date: 2002/04/30 22:11:47 $
* $Revision: 1.2 $
* $Source: /xsrl/Nsvn/icu/icu/source/common/usetiter.cpp,v $
* $Date: 2002/05/29 18:33:33 $
* $Revision: 1.1 $
**********************************************************************
*/
#include "unicode/usetiter.h"

View File

@ -22,7 +22,7 @@ U_NAMESPACE_BEGIN
class UnicodeMatcher;
class ICU_Utility {
class U_COMMON_API ICU_Utility {
public:
/**

View File

@ -49,12 +49,12 @@ ucal.o calendar.o gregocal.o timezone.o simpletz.o \
sortkey.o bocsu.o coleitr.o coll.o ucoleitr.o \
ucol.o ucol_bld.o ucol_cnt.o ucol_elm.o ucol_tok.o ucol_wgt.o tblcoll.o \
strmatch.o usearch.o search.o stsearch.o \
uniset.o unifltlg.o translit.o utrans.o \
unifltlg.o translit.o utrans.o \
cpdtrans.o hextouni.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o \
nultrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o \
name2uni.o uni2name.o unitohex.o nortrans.o unifilt.o quant.o transreg.o \
nfrs.o nfrule.o nfsubs.o rbnf.o upropset.o util.o esctrn.o unesctrn.o \
funcrepl.o strrepl.o tridpars.o unifunct.o caniter.o usetiter.o \
name2uni.o uni2name.o unitohex.o nortrans.o quant.o transreg.o \
nfrs.o nfrule.o nfsubs.o rbnf.o esctrn.o unesctrn.o \
funcrepl.o strrepl.o tridpars.o caniter.o \
currency.o

View File

@ -6,7 +6,7 @@
*/
#include "hash.h"
#include "uset.h"
#include "unicode/uset.h"
#include "unormimp.h"
#include "caniter.h"
#include "cmemory.h"
@ -395,7 +395,7 @@ Hashtable *CanonicalIterator::getEquivalents2(const UChar *segment, int32_t segL
USerializedSet starts;
// cycle through all the characters
UChar32 cp, limit = 0;
UChar32 cp, end = 0;
int32_t i = 0, j;
for (i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) {
// see if any character is at the start of some decomposition
@ -404,7 +404,7 @@ Hashtable *CanonicalIterator::getEquivalents2(const UChar *segment, int32_t segL
continue;
}
// if so, see which decompositions match
for(j = 0, cp = limit; cp < limit || uset_getSerializedRange(&starts, j++, &cp, &limit); ++cp) {
for(j = 0, cp = end+1; cp <= end || uset_getSerializedRange(&starts, j++, &cp, &end); ++cp) {
//Hashtable *remainder = extract(cp, segment, segLen, i, status);
Hashtable *remainder = extract(cp, segment, segLen, i, status);
if (remainder == NULL) continue;

View File

@ -99,7 +99,7 @@ LINK32=link.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /FD /c
# ADD CPP /nologo /MD /Za /W3 /I "..\..\include" /I "..\..\source\common" /D"WIN64" /D"NDEBUG" /D"_WINDOWS" /D"_MBCS" /D"_USRDLL" /D"I18N_EXPORTS" /D"U_I18N_IMPLEMENTATION" /FR /FD /GF /c /O2 /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600
# ADD CPP /nologo /MD /Za /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\source\common" /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GF /QIA64_fmaopt /Wp64 /Zm600 /c
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win64
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win64
# ADD BASE RSC /l 0x409 /d "NDEBUG"
@ -108,8 +108,8 @@ BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IA64
# ADD LINK32 icuuc.lib /nologo /base:"0x4a900000" /dll /machine:IA64 /out:"..\..\bin\icuin21.dll" /implib:"..\..\lib\icuin.lib" /libpath:"..\..\lib" /incremental:no
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IX86 /machine:IA64
# ADD LINK32 icuuc.lib /nologo /base:"0x4a900000" /dll /machine:IX86 /out:"..\..\bin\icuin21.dll" /implib:"..\..\lib\icuin.lib" /libpath:"..\..\lib" /machine:IA64
# SUBTRACT LINK32 /debug
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
@ -126,7 +126,7 @@ LINK32=link.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /FD /GZ /c
# ADD CPP /nologo /MDd /Za /W3 /Gm /I "..\..\include" /I "..\..\source\common" /D"_WINDOWS" /D"_USRDLL" /D"I18N_EXPORTS" /D"U_I18N_IMPLEMENTATION" /D"WIN64" /D"_DEBUG" /D"_MBCS" /D"UDATA_MAP" /FR /FD /GF /GZ /c /Od /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600
# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /Zi /Od /I "..\..\include" /I "..\..\source\common" /D "_WINDOWS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /D "WIN64" /D "_DEBUG" /D "_MBCS" /D "UDATA_MAP" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GF /GZ /QIA64_fmaopt /Wp64 /Zm600 /c
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win64
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win64
# ADD BASE RSC /l 0x409 /d "_DEBUG"
@ -135,9 +135,8 @@ BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IA64 /pdbtype:sept
# ADD LINK32 icuucd.lib /nologo /base:"0x4a900000" /dll /debug /machine:IA64 /out:"..\..\bin\icuin21d.dll" /implib:"..\..\lib\icuind.lib" /pdbtype:sept /libpath:"..\..\lib" /incremental:no
# SUBTRACT LINK32
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IX86 /pdbtype:sept /machine:IA64
# ADD LINK32 icuucd.lib /nologo /base:"0x4a900000" /dll /incremental:no /debug /machine:IX86 /out:"..\..\bin\icuin21d.dll" /implib:"..\..\lib\icuind.lib" /pdbtype:sept /libpath:"..\..\lib" /machine:IA64
!ENDIF
@ -400,22 +399,10 @@ SOURCE=.\uni2name.cpp
# End Source File
# Begin Source File
SOURCE=.\unifilt.cpp
# End Source File
# Begin Source File
SOURCE=.\unifltlg.cpp
# End Source File
# Begin Source File
SOURCE=.\unifunct.cpp
# End Source File
# Begin Source File
SOURCE=.\uniset.cpp
# End Source File
# Begin Source File
SOURCE=.\unitohex.cpp
# End Source File
# Begin Source File
@ -424,22 +411,10 @@ SOURCE=.\unum.cpp
# End Source File
# Begin Source File
SOURCE=.\upropset.cpp
# End Source File
# Begin Source File
SOURCE=.\usearch.cpp
# End Source File
# Begin Source File
SOURCE=.\usetiter.cpp
# End Source File
# Begin Source File
SOURCE=.\util.cpp
# End Source File
# Begin Source File
SOURCE=.\utrans.cpp
# End Source File
# End Group
@ -1385,53 +1360,6 @@ InputPath=.\unicode\parseerr.h
# End Source File
# Begin Source File
SOURCE=.\unicode\parsepos.h
!IF "$(CFG)" == "i18n - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\parsepos.h
"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\quant.h
# End Source File
# Begin Source File
@ -2225,53 +2153,6 @@ SOURCE=.\uni2name.h
# End Source File
# Begin Source File
SOURCE=.\unicode\unifilt.h
!IF "$(CFG)" == "i18n - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\unifilt.h
"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\unifltlg.h
!IF "$(CFG)" == "i18n - Win32 Release"
@ -2319,100 +2200,6 @@ InputPath=.\unicode\unifltlg.h
# End Source File
# Begin Source File
SOURCE=.\unicode\unifunct.h
!IF "$(CFG)" == "i18n - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\unifunct.h
"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\unimatch.h
!IF "$(CFG)" == "i18n - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\unimatch.h
"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\unirepl.h
!IF "$(CFG)" == "i18n - Win32 Release"
@ -2460,53 +2247,6 @@ InputPath=.\unicode\unirepl.h
# End Source File
# Begin Source File
SOURCE=.\unicode\uniset.h
!IF "$(CFG)" == "i18n - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\uniset.h
"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\unitohex.h
!IF "$(CFG)" == "i18n - Win32 Release"
@ -2652,61 +2392,10 @@ InputPath=.\unicode\usearch.h
# End Source File
# Begin Source File
SOURCE=.\unicode\usetiter.h
!IF "$(CFG)" == "i18n - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\usetiter.h
"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\usrchimp.h
# End Source File
# Begin Source File
SOURCE=.\util.h
# End Source File
# Begin Source File
SOURCE=.\unicode\utrans.h
!IF "$(CFG)" == "i18n - Win32 Release"

View File

@ -21,8 +21,6 @@ static const UChar FORWARD_OP[] = {32,62,32,0}; // " > "
U_NAMESPACE_BEGIN
const UChar TransliterationRule::ETHER = 0xFFFF;
/**
* Construct a new rule with the given input, output text, and other
* attributes. A cursor position may be specified for the output text.

View File

@ -49,16 +49,6 @@ class UnicodeFunctor;
*/
class TransliterationRule {
public:
/**
* The character at index i, where i < contextStart || i >= contextLimit,
* is ETHER. This allows explicit matching by rules and UnicodeSets
* of text outside the context. In traditional terms, this allows anchoring
* at the start and/or end.
*/
static const UChar ETHER;
private:
// TODO Eliminate the pattern and keyLength data members. They

View File

@ -20,7 +20,6 @@
static UBool i18n_cleanup(void)
{
transliterator_cleanup();
unicodePropertySet_cleanup();
dateFormatSymbols_cleanup();
timeZone_cleanup();
ucol_cleanup();

View File

@ -1,13 +0,0 @@
#include "unicode/unifunct.h"
const char UnicodeFunctor::fgClassID = 0;
UnicodeMatcher* UnicodeFunctor::toMatcher() const {
return 0;
}
UnicodeReplacer* UnicodeFunctor::toReplacer() const {
return 0;
}
//eof

View File

@ -18,7 +18,7 @@
#define __GENPROPS_H__
#include "unicode/utypes.h"
#include "uset.h"
#include "unicode/uset.h"
/* file definitions */
#define DATA_NAME "unorm"

View File

@ -25,7 +25,7 @@
#include "filestrm.h"
#include "unicode/udata.h"
#include "utrie.h"
#include "uset.h"
#include "unicode/uset.h"
#include "unewdata.h"
#include "unormimp.h"
#include "gennorm.h"
@ -806,7 +806,7 @@ setHangulJamoSpecials() {
norm->combiningFlags=1;
/* for each Jamo L create a set with its associated Hangul block */
norm->canonStart=uset_open(hangul, hangul+21*28);
norm->canonStart=uset_open(hangul, hangul+21*28-1);
hangul+=21*28;
}
@ -1000,13 +1000,14 @@ postParseFn(void *context, uint32_t code, Norm *norm) {
c=norm->nfd[0];
otherNorm=createNorm(c);
if(otherNorm->canonStart==NULL) {
otherNorm->canonStart=uset_open(code, code+1);
otherNorm->canonStart=uset_open(code, code);
if(otherNorm->canonStart==NULL) {
fprintf(stderr, "gennorm error: out of memory in uset_open()\n");
exit(U_MEMORY_ALLOCATION_ERROR);
}
} else {
if(!uset_add(otherNorm->canonStart, code)) {
uset_add(otherNorm->canonStart, code);
if(!uset_contains(otherNorm->canonStart, code)) {
fprintf(stderr, "gennorm error: uset_add(setOf(U+%4lx), U+%4x)\n", c, code);
exit(U_INTERNAL_PROGRAM_ERROR);
}
@ -1201,15 +1202,29 @@ makeFCD() {
}
}
/**
* If the given set contains exactly one character, then return it.
* Otherwise return -1.
*/
static int32_t
usetContainsOne(const USet* set) {
if (uset_size(set) == 1) {
UChar32 start, end;
uset_getRange(set, 0, &start, &end);
return start;
}
return -1;
}
static void
makeCanonSetFn(void *context, uint32_t code, Norm *norm) {
if(!uset_isEmpty(norm->canonStart)) {
if(norm->canonStart!=NULL && !uset_isEmpty(norm->canonStart)) {
uint16_t *table;
int32_t c, tableLength;
UErrorCode errorCode=U_ZERO_ERROR;
/* does the set contain exactly one code point? */
c=uset_containsOne(norm->canonStart);
c=usetContainsOne(norm->canonStart);
/* add an entry to the BMP or supplementary search table */
if(code<=0xffff) {