ICU-750 implement u_strcmpCodePointOrder()

X-SVN-Rev: 3469
This commit is contained in:
Markus Scherer 2001-01-24 02:36:41 +00:00
parent 43827ea5e2
commit cf1860d22d
2 changed files with 48 additions and 1 deletions

View File

@ -95,7 +95,7 @@ U_CAPI UChar * U_EXPORT2
u_strchr32(const UChar *s, UChar32 c);
/**
* Compare two ustrings for bitwise equality.
* Compare two Unicode strings for bitwise equality (code unit order).
*
* @param s1 A string to compare.
* @param s2 A string to compare.
@ -108,6 +108,25 @@ U_CAPI int32_t U_EXPORT2
u_strcmp(const UChar *s1,
const UChar *s2);
/**
* Compare two Unicode strings in code point order.
* This is different in UTF-16 from u_strcmp() if supplementary characters are present:
* In UTF-16, supplementary characters (with code points U+10000 and above) are
* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
* which means that they compare as less than some other BMP characters like U+feff.
* This function compares Unicode strings in code point order.
* If eihter of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
*
* @param s1 A string to compare.
* @param s2 A string to compare.
* @return a negative/zero/positive integer corresponding to whether
* the first string is less than/equal to/greater than the second one
* in code point order
* @draft
*/
U_CAPI int32_t U_EXPORT2
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
/**
* Compare two ustrings for bitwise equality.
* Compares at most <TT>n</TT> characters.

View File

@ -150,6 +150,34 @@ u_strcmp(const UChar *s1,
}
}
/* String compare in code point order - u_strcmp() compares in code unit order. */
U_CAPI int32_t U_EXPORT2
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
static const UChar utf16Fixup[32]={
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x2000, 0xf800, 0xf800, 0xf800, 0xf800
};
UChar c1, c2;
int32_t diff;
/* rotate each code unit's value so that surrogates get the highest values */
for(;;) {
c1=*s1;
c1+=utf16Fixup[c1>>11]; /* additional "fix-up" line */
c2=*s2;
c2+=utf16Fixup[c2>>11]; /* additional "fix-up" line */
/* now c1 and c2 are in UTF-32-compatible order */
diff=(int32_t)c1-(int32_t)c2;
if(diff!=0 || c1==0 /* redundant: || c2==0 */) {
return diff;
}
++s1;
++s2;
}
}
int32_t
u_strncmp(const UChar *s1,
const UChar *s2,