ICU-9014 and ICU-9015 scx defaults to {sc}, and return that efficiently

X-SVN-Rev: 31327
This commit is contained in:
Markus Scherer 2012-02-03 23:36:05 +00:00
parent 30e3a9e8b2
commit 8d2ddad36c
3 changed files with 53 additions and 9 deletions

View File

@ -1,6 +1,6 @@
/*
********************************************************************************
* Copyright (C) 1996-2011, International Business Machines
* Copyright (C) 1996-2012, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
@ -569,6 +569,10 @@ uscript_hasScript(UChar32 c, UScriptCode sc) {
if(sc==script) {
return TRUE;
}
if(sc>0x7fff) {
/* Guard against bogus input that would make us go past the Script_Extensions terminator. */
return FALSE;
}
while(sc>*scx) {
++scx;
}
@ -592,14 +596,19 @@ uscript_getScriptExtensions(UChar32 c,
}
scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
return 0;
if(capacity==0) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
} else {
scripts[0]=(UScriptCode)scriptX;
}
return 1;
}
length=0;
scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
scx=scriptExtensions+scx[1];
}
length=0;
do {
sx=*scx++;
if(length<capacity) {

View File

@ -477,7 +477,14 @@ uscript_hasScript(UChar32 c, UScriptCode sc);
/**
* Writes code point c's Script_Extensions as a list of UScriptCode values
* to the output scripts array.
* to the output scripts array and returns the number of script codes.
* - If c does have Script_Extensions, then the Script property value
* (normally Common or Inherited) is not included.
* - If c does not have Script_Extensions, then the one Script code is written to the output array.
* - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
* In other words, if the return value is 1,
* then the output array contains exactly c's single Script code.
* If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
*
* Some characters are commonly used in multiple scripts.
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
@ -495,9 +502,9 @@ uscript_hasScript(UChar32 c, UScriptCode sc);
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return number of script codes in c's Script_Extensions,
* @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
* written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
* @draft ICU 4.6
* @draft ICU 49
*/
U_DRAFT int32_t U_EXPORT2
uscript_getScriptExtensions(UChar32 c,

View File

@ -434,6 +434,10 @@ void TestHasScript() {
) {
log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
}
if(uscript_hasScript(0x0640, 0xaffe)) {
/* An unguarded implementation might go into an infinite loop. */
log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
}
}
void TestGetScriptExtensions() {
@ -472,14 +476,36 @@ void TestGetScriptExtensions() {
log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d != 3 - %s\n",
(int)length, u_errorName(errorCode));
}
/* U+063F has only a Script code, no Script_Extensions. */
errorCode=U_ZERO_ERROR;
length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
(int)length, u_errorName(errorCode));
}
/* invalid code points */
errorCode=U_ZERO_ERROR;
length=uscript_getScriptExtensions(-1, scripts, LENGTHOF(scripts), &errorCode);
if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
length=uscript_getScriptExtensions(0x110000, scripts, LENGTHOF(scripts), &errorCode);
if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
(int)length, u_errorName(errorCode));
}
/* normal usage */
errorCode=U_ZERO_ERROR;
length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
if(U_FAILURE(errorCode) || length!=0) {
log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 0 - %s\n",
length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
if(U_FAILURE(errorCode) || length!=3 ||
scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_SYRIAC || scripts[2]!=USCRIPT_MANDAIC
@ -487,11 +513,13 @@ void TestGetScriptExtensions() {
log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
length=uscript_getScriptExtensions(0xfdf2, scripts, LENGTHOF(scripts), &errorCode);
if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
(int)length, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
length=uscript_getScriptExtensions(0xff65, scripts, LENGTHOF(scripts), &errorCode);
if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",