2000-06-30 00:29:46 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2000, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: ushape.c
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2000jun29
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
2000-08-12 01:01:03 +00:00
|
|
|
#include "unicode/uchar.h"
|
|
|
|
#include "unicode/ustring.h"
|
|
|
|
#include "cmemory.h"
|
2000-06-30 00:29:46 +00:00
|
|
|
#include "unicode/ushape.h"
|
|
|
|
|
2000-08-12 01:01:03 +00:00
|
|
|
#if UTF_SIZE<16
|
|
|
|
/*
|
|
|
|
* This implementation assumes that the internal encoding is UTF-16
|
|
|
|
* or UTF-32, not UTF-8.
|
|
|
|
* The main assumption is that the Arabic characters and their
|
|
|
|
* presentation forms each fit into a single UChar.
|
|
|
|
* With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
|
|
|
|
* characters.
|
|
|
|
*/
|
|
|
|
# error This implementation assumes UTF-16 or UTF-32 (check UTF_SIZE)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function shapes European digits to Arabic-Indic digits
|
|
|
|
* in-place, writing over the input characters.
|
|
|
|
* Since we know that we are only looking for BMP code points,
|
|
|
|
* we can safely just work with code units (again, at least UTF-16).
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
|
|
|
|
UChar digitBase,
|
|
|
|
UBool isLogical, UBool lastStrongWasAL) {
|
|
|
|
int32_t i;
|
|
|
|
UChar c;
|
|
|
|
|
|
|
|
digitBase-=0x30;
|
|
|
|
|
|
|
|
/* the iteration direction depends on the type of input */
|
|
|
|
if(isLogical) {
|
|
|
|
for(i=0; i<length; ++i) {
|
|
|
|
c=s[i];
|
|
|
|
switch(u_charDirection(c)) {
|
|
|
|
case U_LEFT_TO_RIGHT: /* L */
|
|
|
|
case U_RIGHT_TO_LEFT: /* R */
|
|
|
|
lastStrongWasAL=FALSE;
|
|
|
|
break;
|
|
|
|
case U_RIGHT_TO_LEFT_ARABIC: /* AL */
|
|
|
|
lastStrongWasAL=TRUE;
|
|
|
|
break;
|
|
|
|
case U_EUROPEAN_NUMBER: /* EN */
|
|
|
|
if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
|
2000-08-21 21:40:35 +00:00
|
|
|
s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
|
2000-08-12 01:01:03 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default :
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for(i=length; i>0; /* pre-decrement in the body */) {
|
|
|
|
c=s[--i];
|
|
|
|
switch(u_charDirection(c)) {
|
|
|
|
case U_LEFT_TO_RIGHT: /* L */
|
|
|
|
case U_RIGHT_TO_LEFT: /* R */
|
|
|
|
lastStrongWasAL=FALSE;
|
|
|
|
break;
|
|
|
|
case U_RIGHT_TO_LEFT_ARABIC: /* AL */
|
|
|
|
lastStrongWasAL=TRUE;
|
|
|
|
break;
|
|
|
|
case U_EUROPEAN_NUMBER: /* EN */
|
|
|
|
if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
|
2000-08-21 21:40:35 +00:00
|
|
|
s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
|
2000-08-12 01:01:03 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
default :
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-06-30 00:29:46 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
u_shapeArabic(const UChar *source, int32_t sourceLength,
|
|
|
|
UChar *dest, int32_t destSize,
|
|
|
|
uint32_t options,
|
|
|
|
UErrorCode *pErrorCode) {
|
2000-08-12 01:01:03 +00:00
|
|
|
/* usual error checking */
|
2000-06-30 00:29:46 +00:00
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2000-08-14 23:05:10 +00:00
|
|
|
/* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
|
|
|
|
if( source==NULL || sourceLength<-1 ||
|
|
|
|
dest==NULL && destSize!=0 || destSize<0 ||
|
2000-08-12 01:01:03 +00:00
|
|
|
options>=U_SHAPE_DIGIT_TYPE_RESERVED ||
|
|
|
|
(options&U_SHAPE_LENGTH_MASK)==U_SHAPE_LENGTH_RESERVED ||
|
|
|
|
(options&U_SHAPE_LETTERS_MASK)==U_SHAPE_LETTERS_RESERVED ||
|
|
|
|
(options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED
|
|
|
|
) {
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* determine the source length */
|
|
|
|
if(sourceLength==-1) {
|
|
|
|
sourceLength=u_strlen(source);
|
|
|
|
}
|
|
|
|
if(sourceLength==0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check that source and destination do not overlap */
|
2000-08-14 23:05:10 +00:00
|
|
|
if( dest!=NULL &&
|
|
|
|
(source<=dest && dest<source+sourceLength ||
|
|
|
|
dest<=source && source<dest+destSize)
|
2000-08-12 01:01:03 +00:00
|
|
|
) {
|
|
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
|
|
|
|
/* currently, only number shaping is supported */
|
|
|
|
*pErrorCode=U_UNSUPPORTED_ERROR;
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* No letter shaping:
|
|
|
|
* just make sure the destination is large enough and copy the string.
|
|
|
|
*/
|
|
|
|
if(destSize<sourceLength) {
|
2000-08-14 23:05:10 +00:00
|
|
|
/* this catches preflighting, too */
|
2000-08-12 01:01:03 +00:00
|
|
|
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
|
|
|
return sourceLength;
|
|
|
|
}
|
|
|
|
uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
|
|
|
|
destSize=sourceLength;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform number shaping.
|
|
|
|
* With UTF-16 or UTF-32, the length of the string is constant.
|
|
|
|
* The easiest way to do this is to operate on the destination and
|
|
|
|
* "shape" the digits in-place.
|
|
|
|
*/
|
|
|
|
if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) {
|
|
|
|
UChar digitBase;
|
|
|
|
int32_t i;
|
|
|
|
|
|
|
|
/* select the requested digit group */
|
|
|
|
switch(options&U_SHAPE_DIGIT_TYPE_MASK) {
|
|
|
|
case U_SHAPE_DIGIT_TYPE_AN:
|
|
|
|
digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
|
|
|
|
break;
|
|
|
|
case U_SHAPE_DIGIT_TYPE_AN_EXTENDED:
|
|
|
|
digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* will never occur because of validity checks above */
|
2000-08-21 21:40:35 +00:00
|
|
|
digitBase=0;
|
2000-08-12 01:01:03 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* perform the requested operation */
|
|
|
|
switch(options&U_SHAPE_DIGITS_MASK) {
|
|
|
|
case U_SHAPE_DIGITS_EN2AN:
|
|
|
|
/* add (digitBase-'0') to each European (ASCII) digit code point */
|
|
|
|
digitBase-=0x30;
|
|
|
|
for(i=0; i<destSize; ++i) {
|
|
|
|
if(((uint32_t)dest[i]-0x30)<10) {
|
|
|
|
dest[i]+=digitBase;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case U_SHAPE_DIGITS_AN2EN:
|
|
|
|
/* subtract (digitBase-'0') from each Arabic digit code point */
|
|
|
|
for(i=0; i<destSize; ++i) {
|
|
|
|
if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
|
|
|
|
dest[i]-=digitBase-0x30;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
|
|
|
|
_shapeToArabicDigitsWithContext(dest, destSize,
|
|
|
|
digitBase,
|
|
|
|
(UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
|
|
|
|
FALSE);
|
|
|
|
break;
|
|
|
|
case U_SHAPE_DIGITS_ALEN2AN_INIT_AL:
|
|
|
|
_shapeToArabicDigitsWithContext(dest, destSize,
|
|
|
|
digitBase,
|
|
|
|
(UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
|
|
|
|
TRUE);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* will never occur because of validity checks above */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return destSize;
|
2000-06-30 00:29:46 +00:00
|
|
|
}
|