ICU-750 implement u_strcmpCodePointOrder()
X-SVN-Rev: 3469
This commit is contained in:
parent
43827ea5e2
commit
cf1860d22d
@ -95,7 +95,7 @@ U_CAPI UChar * U_EXPORT2
|
||||
u_strchr32(const UChar *s, UChar32 c);
|
||||
|
||||
/**
|
||||
* Compare two ustrings for bitwise equality.
|
||||
* Compare two Unicode strings for bitwise equality (code unit order).
|
||||
*
|
||||
* @param s1 A string to compare.
|
||||
* @param s2 A string to compare.
|
||||
@ -108,6 +108,25 @@ U_CAPI int32_t U_EXPORT2
|
||||
u_strcmp(const UChar *s1,
|
||||
const UChar *s2);
|
||||
|
||||
/**
|
||||
* Compare two Unicode strings in code point order.
|
||||
* This is different in UTF-16 from u_strcmp() if supplementary characters are present:
|
||||
* In UTF-16, supplementary characters (with code points U+10000 and above) are
|
||||
* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
|
||||
* which means that they compare as less than some other BMP characters like U+feff.
|
||||
* This function compares Unicode strings in code point order.
|
||||
* If eihter of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
|
||||
*
|
||||
* @param s1 A string to compare.
|
||||
* @param s2 A string to compare.
|
||||
* @return a negative/zero/positive integer corresponding to whether
|
||||
* the first string is less than/equal to/greater than the second one
|
||||
* in code point order
|
||||
* @draft
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
|
||||
|
||||
/**
|
||||
* Compare two ustrings for bitwise equality.
|
||||
* Compares at most <TT>n</TT> characters.
|
||||
|
@ -150,6 +150,34 @@ u_strcmp(const UChar *s1,
|
||||
}
|
||||
}
|
||||
|
||||
/* String compare in code point order - u_strcmp() compares in code unit order. */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
|
||||
static const UChar utf16Fixup[32]={
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0x2000, 0xf800, 0xf800, 0xf800, 0xf800
|
||||
};
|
||||
UChar c1, c2;
|
||||
int32_t diff;
|
||||
|
||||
/* rotate each code unit's value so that surrogates get the highest values */
|
||||
for(;;) {
|
||||
c1=*s1;
|
||||
c1+=utf16Fixup[c1>>11]; /* additional "fix-up" line */
|
||||
c2=*s2;
|
||||
c2+=utf16Fixup[c2>>11]; /* additional "fix-up" line */
|
||||
|
||||
/* now c1 and c2 are in UTF-32-compatible order */
|
||||
diff=(int32_t)c1-(int32_t)c2;
|
||||
if(diff!=0 || c1==0 /* redundant: || c2==0 */) {
|
||||
return diff;
|
||||
}
|
||||
++s1;
|
||||
++s2;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
u_strncmp(const UChar *s1,
|
||||
const UChar *s2,
|
||||
|
Loading…
Reference in New Issue
Block a user