2017-01-20 00:20:31 +00:00
|
|
|
// © 2016 and later: Unicode, Inc. and others.
|
2016-06-15 18:58:17 +00:00
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
2003-02-28 21:37:55 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2016-05-31 21:45:07 +00:00
|
|
|
* Copyright (C) 2003-2011, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
2003-02-28 21:37:55 +00:00
|
|
|
*
|
|
|
|
*******************************************************************************
|
2003-05-06 01:22:23 +00:00
|
|
|
* file name: nptrans.h
|
2017-02-03 18:57:23 +00:00
|
|
|
* encoding: UTF-8
|
2003-02-28 21:37:55 +00:00
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2003feb1
|
|
|
|
* created by: Ram Viswanadha
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef NPTRANS_H
|
|
|
|
#define NPTRANS_H
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
2003-05-06 01:22:23 +00:00
|
|
|
|
|
|
|
#if !UCONFIG_NO_IDNA
|
2003-05-29 01:15:29 +00:00
|
|
|
#if !UCONFIG_NO_TRANSLITERATION
|
2003-05-06 01:22:23 +00:00
|
|
|
|
2003-02-28 21:37:55 +00:00
|
|
|
#include "unicode/uniset.h"
|
|
|
|
#include "unicode/ures.h"
|
|
|
|
#include "unicode/translit.h"
|
|
|
|
|
2006-09-04 16:28:24 +00:00
|
|
|
#include "intltest.h"
|
2003-02-28 21:37:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
#define ASCII_SPACE 0x0020
|
|
|
|
|
|
|
|
class NamePrepTransform {
|
|
|
|
|
|
|
|
private :
|
|
|
|
Transliterator *mapping;
|
|
|
|
UnicodeSet unassigned;
|
|
|
|
UnicodeSet prohibited;
|
|
|
|
UnicodeSet labelSeparatorSet;
|
|
|
|
UResourceBundle *bundle;
|
|
|
|
NamePrepTransform(UParseError& parseError, UErrorCode& status);
|
|
|
|
|
|
|
|
|
|
|
|
public :
|
|
|
|
|
|
|
|
static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
|
|
|
|
|
2003-04-24 16:16:13 +00:00
|
|
|
virtual ~NamePrepTransform();
|
2003-02-28 21:37:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
inline UBool isProhibited(UChar32 ch);
|
2011-05-16 20:46:27 +00:00
|
|
|
|
2003-02-28 21:37:55 +00:00
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
|
|
|
*/
|
|
|
|
inline UClassID getDynamicClassID() const { return getStaticClassID(); }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for this class.
|
|
|
|
*/
|
|
|
|
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Map every character in input stream with mapping character
|
|
|
|
* in the mapping table and populate the output stream.
|
|
|
|
* For any individual character the mapping table may specify
|
|
|
|
* that that a character be mapped to nothing, mapped to one
|
|
|
|
* other character or to a string of other characters.
|
|
|
|
*
|
|
|
|
* @param src Pointer to UChar buffer containing a single label
|
|
|
|
* @param srcLength Number of characters in the source label
|
|
|
|
* @param dest Pointer to the destination buffer to receive the output
|
|
|
|
* @param destCapacity The capacity of destination array
|
|
|
|
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
|
|
|
|
* If TRUE unassigned values are treated as normal Unicode code point.
|
|
|
|
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
|
|
|
|
* @param status ICU error code in/out parameter.
|
|
|
|
* Must fulfill U_SUCCESS before the function call.
|
|
|
|
* @return The number of UChars in the destination buffer
|
|
|
|
*/
|
|
|
|
int32_t map(const UChar* src, int32_t srcLength,
|
|
|
|
UChar* dest, int32_t destCapacity,
|
|
|
|
UBool allowUnassigned,
|
|
|
|
UParseError* parseError,
|
|
|
|
UErrorCode& status );
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Prepare the input stream with for use. This operation maps, normalizes(NFKC),
|
|
|
|
* checks for prohited and BiDi characters in the order defined by RFC 3454
|
|
|
|
*
|
|
|
|
* @param src Pointer to UChar buffer containing a single label
|
|
|
|
* @param srcLength Number of characters in the source label
|
|
|
|
* @param dest Pointer to the destination buffer to receive the output
|
|
|
|
* @param destCapacity The capacity of destination array
|
|
|
|
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
|
|
|
|
* If TRUE unassigned values are treated as normal Unicode code point.
|
|
|
|
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
|
|
|
|
* @param status ICU error code in/out parameter.
|
|
|
|
* Must fulfill U_SUCCESS before the function call.
|
|
|
|
* @return The number of UChars in the destination buffer
|
|
|
|
*/
|
|
|
|
int32_t process(const UChar* src, int32_t srcLength,
|
|
|
|
UChar* dest, int32_t destCapacity,
|
|
|
|
UBool allowUnassigned,
|
|
|
|
UParseError* parseError,
|
|
|
|
UErrorCode& status );
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Ascertain if the given code point is a label separator as specified by IDNA
|
|
|
|
*
|
|
|
|
* @return TRUE is the code point is a label separator
|
|
|
|
*/
|
|
|
|
UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
|
|
|
|
|
|
|
|
inline UBool isLDHChar(UChar32 ch);
|
2011-05-16 20:46:27 +00:00
|
|
|
|
|
|
|
private:
|
2003-02-28 21:37:55 +00:00
|
|
|
/**
|
|
|
|
* The address of this static class variable serves as this class's ID
|
|
|
|
* for ICU "poor man's RTTI".
|
|
|
|
*/
|
|
|
|
static const char fgClassID;
|
|
|
|
};
|
|
|
|
|
|
|
|
inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
|
|
|
|
// high runner case
|
|
|
|
if(ch>0x007A){
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
|
|
|
if( (ch==0x002D) ||
|
|
|
|
(0x0030 <= ch && ch <= 0x0039) ||
|
|
|
|
(0x0041 <= ch && ch <= 0x005A) ||
|
|
|
|
(0x0061 <= ch && ch <= 0x007A)
|
|
|
|
){
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
2003-05-06 01:22:23 +00:00
|
|
|
|
2003-05-29 01:15:29 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
|
|
|
#else
|
|
|
|
class NamePrepTransform {
|
|
|
|
};
|
2003-05-06 01:22:23 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_IDNA */
|
|
|
|
|
2003-02-28 21:37:55 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hey, Emacs, please set the following:
|
|
|
|
*
|
|
|
|
* Local Variables:
|
|
|
|
* indent-tabs-mode: nil
|
|
|
|
* End:
|
|
|
|
*
|
|
|
|
*/
|