scuffed-code/icu4c/source/common/strprep.h
2003-05-06 01:37:52 +00:00

366 lines
12 KiB
C++

/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: strprep.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#ifndef STRPREP_H
#define STRPREP_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "unicode/uobject.h"
#include "unicode/uniset.h"
#include "unicode/parseerr.h"
U_NAMESPACE_BEGIN
/**\file
*
* This API implements RF 3454 StringPrep standard.
*
* The steps for preparing strings are:
*
* 1) Map -- For each character in the input, check if it has a mapping
* and, if so, replace it with its mapping.
* <ul>
* <li>Delete certain codepoints from the input because their
* presence or absence in the protocol identifies should not
* make two strings different</li>
* <li>Case Mapings
* <br>If Normalization is turned off
* <br> Get mappings from case map tables
* <br>else
* <br> Get mappings from case map tables for normalization
* <br> Use u_getFC_NFKC_Closure for obtaining extra mappings
* </li>
* </ul>
* 2) Normalize -- Possibly normalize the result of step 1 using Unicode
* normalization NFKC.
*
* 3) Prohibit -- Check for any characters that are not allowed in the
* output. If any are found, return an error.
*
* 4) Check bidi -- Possibly check for right-to-left characters, and if
* any are found, make sure that the whole string satisfies the
* requirements for bidirectional strings. If the string does not
* satisfy the requirements for bidirectional strings, return an
* error.
*
* Some StringPrep profiles:
* IDN: "Nameprep" http://www.ietf.org/rfc/rfc3491.txt
* XMPP Node Identifiers: "Nodeprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
* XMPP Resource Identifiers: "Resourceprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
* ANONYMOUS SASL tokens: "plain" http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
* iSCSI http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-03.txt
*/
class StringPrep : public UObject{
protected:
UVersionInfo unicodeVersion; /** The Character repertoire version of this profile */
UBool bidiCheck; /** Option to turn BiDi checking on */
UBool doNFKC; /** Option to turn NFKC on */
/**
* Protected default constructor sub classes
*/
StringPrep(){};
public:
/**
* Destructor
*/
virtual inline ~StringPrep(){};
/**
* Map every character in input stream with mapping character
* in the mapping table and populate the output stream.
* For any individual character the mapping table may specify
* that that a character be mapped to nothing, mapped to one
* other character or to a string of other characters.
*
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
* If TRUE unassigned values are treated as normal Unicode code point.
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*
*/
virtual int32_t map(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* parseError,
UErrorCode& status );
/**
* Normalize the input stream using Normalization Form KC (NFKC)
*
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*
*
*/
virtual int32_t normalize( const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UErrorCode& status );
/**
* Prepare the input stream with for use. This operation maps, normalizes(NFKC),
* checks for prohited and BiDi characters in the order defined by RFC 3454
*
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
* If TRUE unassigned values are treated as normal Unicode code point.
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*
*
*/
virtual int32_t process(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* parseError,
UErrorCode& status );
/**
* Create a profile from prebuilt default Nameprep profile conforming to
* nameprep internet draft (http://www.ietf.org/html.charters/idn-charter.html).
* This is a built-in/unmodifiable profile.
*
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to StringPrep object that is created. Should be deleted by
* by caller
*
*
*/
static StringPrep* createNameprepInstance(UErrorCode& status);
/**
* Create a profile from prebuilt default StringPrep profile conforming to
* RFC 3454 (ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt).
* User defined profiles can be created by getting the default profile and
* adding mappings, removing mappings, turning options ON/OFF and prohibiting
* characters from the output.
*
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to StringPrep object that is created. Should be deleted by
* the caller.
*
*
*/
static StringPrep* createDefaultInstance(UErrorCode& status);
/**
* Ascertain if the given code point is a Letter/Digit/Hyphen in the ASCII range
*
* @return TRUE is the code point is a Letter/Digit/Hyphen
*
*
*/
static inline UBool isLDHChar(UChar32 ch);
/**
* Ascertain if the given code point is a label separator as specified by IDNA
*
* @return TRUE is the code point is a label separator
*
*
*/
virtual UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
/**
* Get the BiDi option of this profile
*
*
*/
inline UBool getCheckBiDi();
/**
* Get the normalization (NFKC) option of this profile
*
* @return The normalization option
*
*
*/
inline UBool getNormalization();
/**
* Get the Unicode version which this profile
* conforms to
*
*
*/
inline void getUnicodeVersion(UVersionInfo& info);
private:
// Boiler plate
/**
* Copy constructor.
*
*/
StringPrep(const StringPrep&);
/**
* Assignment operator.
*
*/
StringPrep& operator=(const StringPrep&);
/**
* Return true if another object is semantically equal to this one.
*
* @param other the object to be compared with.
* @return true if another object is semantically equal to this one.
*
*/
UBool operator==(const StringPrep& other) const {return FALSE;};
/**
* Return true if another object is semantically unequal to this one.
*
* @param other the object to be compared with.
* @return true if another object is semantically unequal to this one.
*
*/
UBool operator!=(const StringPrep& other) const { return !operator==(other); }
public:
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
*
*/
static inline UClassID getStaticClassID();
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
*
*/
virtual inline UClassID getDynamicClassID() const;
protected:
/**
* Sub classes that slightly modify the default profile
* implement this method to remove characters to
* the prohibited list. The default implementation does not
* check if the data is loaded or not. The caller is responsible
* for checking for data.
*
*/
virtual UBool isNotProhibited(UChar32 ch);
/**
* Sub classes that slightly modify the default profile
* implement this method to remove characters to
* the unassigned list. The default implementation does not
* check if the data is loaded or not. The caller is responsible
* for checking for data.
*/
virtual UBool isUnassigned(UChar32 ch);
/**
* Ascertains if uidna.icu data file is loaded.
* If data is not loaded, loads the data file.
*
*
*/
static UBool isDataLoaded(UErrorCode& status);
private:
/**
* The address of this static class variable serves as this class's ID
* for ICU "poor man's RTTI".
*/
static const char fgClassID;
};
inline UBool StringPrep::getCheckBiDi(){
return bidiCheck;
}
inline UBool StringPrep::getNormalization(){
return doNFKC;
}
inline void StringPrep::getUnicodeVersion(UVersionInfo& info){
for(int32_t i=0; i< (int32_t)(sizeof(info)/sizeof(info[0])); i++){
info[i] = unicodeVersion[i];
}
}
inline UClassID StringPrep::getStaticClassID() {
return (UClassID)&fgClassID;
}
inline UClassID StringPrep::getDynamicClassID() const {
return getStaticClassID();
}
inline UBool StringPrep::isLDHChar(UChar32 ch){
// high runner case
if(ch>0x007A){
return FALSE;
}
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
){
return TRUE;
}
return FALSE;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_IDNA */
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/