1870215131
X-SVN-Rev: 40527
704 lines
15 KiB
C++
704 lines
15 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 2003, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
*
|
|
* File line.cpp
|
|
*
|
|
* Modification History:
|
|
*
|
|
* Date Name Description
|
|
* 03/18/2003 weiv Creation.
|
|
*******************************************************************************
|
|
*/
|
|
|
|
#include "line.h"
|
|
#include <stdio.h>
|
|
|
|
UnicodeSet * Line::needsQuoting = NULL;
|
|
|
|
void
|
|
Line::init()
|
|
{
|
|
len = 0;
|
|
expLen = 0;
|
|
strength = UCOL_OFF;
|
|
strengthFromEmpty = UCOL_OFF;
|
|
cumulativeStrength = UCOL_OFF;
|
|
expStrength = UCOL_OFF;
|
|
previous = NULL;
|
|
next = NULL;
|
|
left = NULL;
|
|
right = NULL;
|
|
isContraction = FALSE;
|
|
isExpansion = FALSE;
|
|
isRemoved = FALSE;
|
|
isReset = FALSE;
|
|
expIndex = 0;
|
|
firstCC = 0;
|
|
lastCC = 0;
|
|
sortKey = NULL;
|
|
}
|
|
|
|
Line::Line()
|
|
{
|
|
init();
|
|
memset(name, 0, 25*sizeof(UChar));
|
|
memset(expansionString, 0, 25*sizeof(UChar));
|
|
}
|
|
|
|
Line::Line(const UChar* name, int32_t len)
|
|
{
|
|
init();
|
|
this->len = len;
|
|
u_memcpy(this->name, name, len);
|
|
memset(expansionString, 0, 25*sizeof(UChar));
|
|
UChar32 c;
|
|
U16_GET(name, 0, 0, len, c);
|
|
firstCC = u_getCombiningClass(c);
|
|
U16_GET(name, 0, len-1, len, c);
|
|
lastCC = u_getCombiningClass(c);
|
|
}
|
|
|
|
Line::Line(const UChar name)
|
|
{
|
|
init();
|
|
len = 1;
|
|
this->name[0] = name;
|
|
this->name[1] = 0;
|
|
memset(expansionString, 0, 25*sizeof(UChar));
|
|
firstCC = u_getCombiningClass(name);
|
|
lastCC = firstCC;
|
|
}
|
|
|
|
Line::Line(const UnicodeString &string)
|
|
{
|
|
init();
|
|
setTo(string);
|
|
}
|
|
|
|
Line::Line(const char *buff, int32_t buffLen, UErrorCode &status) :
|
|
previous(NULL),
|
|
next(NULL),
|
|
left(NULL),
|
|
right(NULL)
|
|
{
|
|
initFromString(buff, buffLen, status);
|
|
}
|
|
|
|
Line::Line(const Line &other) :
|
|
previous(NULL),
|
|
next(NULL),
|
|
left(NULL),
|
|
right(NULL)
|
|
{
|
|
*this = other;
|
|
}
|
|
|
|
Line &
|
|
Line::operator=(const Line &other) {
|
|
len = other.len;
|
|
expLen = other.expLen;
|
|
strength = other.strength;
|
|
strengthFromEmpty = other.strengthFromEmpty;
|
|
cumulativeStrength = other.cumulativeStrength;
|
|
expStrength = other.expStrength;
|
|
isContraction = other.isContraction;
|
|
isExpansion = other.isExpansion;
|
|
isRemoved = other.isRemoved;
|
|
isReset = other.isReset;
|
|
expIndex = other.expIndex;
|
|
firstCC = other.firstCC;
|
|
lastCC = other.lastCC;
|
|
u_strcpy(name, other.name);
|
|
u_strcpy(expansionString, other.expansionString);
|
|
sortKey = other.sortKey;
|
|
left = other.left;
|
|
right = other.right;
|
|
return *this;
|
|
}
|
|
|
|
UBool
|
|
Line::operator==(const Line &other) const {
|
|
if(this == &other) {
|
|
return TRUE;
|
|
}
|
|
if(len != other.len) {
|
|
return FALSE;
|
|
}
|
|
if(u_strcmp(name, other.name) != 0) {
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
UBool
|
|
Line::equals(const Line &other) const {
|
|
if(this == &other) {
|
|
return TRUE;
|
|
}
|
|
if(len != other.len) {
|
|
return FALSE;
|
|
}
|
|
if(u_strcmp(name, other.name) != 0) {
|
|
return FALSE;
|
|
}
|
|
if(strength != other.strength) {
|
|
return FALSE;
|
|
}
|
|
if(expLen != other.expLen) {
|
|
return FALSE;
|
|
}
|
|
if(u_strcmp(expansionString, other.expansionString)) {
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
UBool
|
|
Line::operator!=(const Line &other) const {
|
|
return !(*this == other);
|
|
}
|
|
|
|
|
|
Line::~Line() {
|
|
}
|
|
|
|
void
|
|
Line::copyArray(Line *dest, const Line *src, int32_t size) {
|
|
int32_t i = 0;
|
|
for(i = 0; i < size; i++) {
|
|
dest[i] = src[i];
|
|
}
|
|
}
|
|
|
|
void
|
|
Line::setName(const UChar* name, int32_t len) {
|
|
this->len = len;
|
|
u_memcpy(this->name, name, len);
|
|
UChar32 c;
|
|
U16_GET(name, 0, 0, len, c);
|
|
firstCC = u_getCombiningClass(c);
|
|
U16_GET(name, 0, len-1, len, c);
|
|
lastCC = u_getCombiningClass(c);
|
|
}
|
|
|
|
void
|
|
Line::setToConcat(const Line *first, const Line *second) {
|
|
u_strcpy(name, first->name);
|
|
u_strcat(name, second->name);
|
|
len = first->len + second->len;
|
|
firstCC = first->firstCC;
|
|
lastCC = second->lastCC;
|
|
}
|
|
|
|
UnicodeString
|
|
Line::stringToName(UChar *string, int32_t len) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UnicodeString result;
|
|
char buffer[256];
|
|
int32_t i = 0;
|
|
UChar32 c;
|
|
while(i < len) {
|
|
U16_NEXT(string, i, len, c);
|
|
if(c < 0x10000) {
|
|
sprintf(buffer, "%04X ", c);
|
|
} else {
|
|
sprintf(buffer, "%06X ", c);
|
|
}
|
|
result.append(buffer);
|
|
}
|
|
i = 0;
|
|
while(i < len) {
|
|
U16_NEXT(string, i, len, c);
|
|
u_charName(c, U_EXTENDED_CHAR_NAME, buffer, 256, &status);
|
|
result.append("{");
|
|
result.append(buffer);
|
|
result.append("} ");
|
|
}
|
|
/*
|
|
for(i = 0; i < len; i++) {
|
|
sprintf(buffer, "%04X ", string[i]);
|
|
result.append(buffer);
|
|
}
|
|
for(i = 0; i < len; i++) {
|
|
u_charName(string[i], U_EXTENDED_CHAR_NAME, buffer, 256, &status);
|
|
result.append("{");
|
|
result.append(buffer);
|
|
result.append("} ");
|
|
}
|
|
*/
|
|
return result;
|
|
}
|
|
|
|
UnicodeString
|
|
Line::toBundleString()
|
|
{
|
|
|
|
UnicodeString result;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
if(!needsQuoting) {
|
|
needsQuoting = new UnicodeSet("[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]", status);
|
|
}
|
|
UChar NFC[50];
|
|
int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
|
|
result.append("\"");
|
|
if(isReset) {
|
|
result.append("&");
|
|
} else {
|
|
result.append(strengthToString(strength, FALSE, FALSE));
|
|
}
|
|
UBool quote = needsQuoting->containsSome(name) || needsQuoting->containsSome(NFC);
|
|
if(quote) {
|
|
result.append("'");
|
|
}
|
|
if(NFC[0] == 0x22) {
|
|
result.append("\\u0022");
|
|
} else {
|
|
result.append(NFC, NFCLen);
|
|
}
|
|
if(quote && NFC[0] != 0x0027) {
|
|
result.append("'");
|
|
}
|
|
if(expLen && !isReset) {
|
|
quote = needsQuoting->containsSome(expansionString);
|
|
result.append(" / ");
|
|
if(quote) {
|
|
result.append("'");
|
|
}
|
|
result.append(expansionString);
|
|
if(quote) {
|
|
result.append("'");
|
|
}
|
|
}
|
|
result.append("\" //");
|
|
|
|
result.append(stringToName(NFC, NFCLen));
|
|
if(expLen && !isReset) {
|
|
result.append(" / ");
|
|
result.append(stringToName(expansionString, expLen));
|
|
}
|
|
result.append("\n");
|
|
return result;
|
|
}
|
|
|
|
UnicodeString
|
|
Line::toHTMLString()
|
|
{
|
|
UnicodeString result;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UChar NFC[50];
|
|
int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
|
|
result.append("<span title=\"");
|
|
result.append(stringToName(NFC, NFCLen));
|
|
if(expLen && !isReset) {
|
|
result.append(" / ");
|
|
result.append(stringToName(expansionString, expLen));
|
|
}
|
|
result.append("\">");
|
|
if(isReset) {
|
|
result.append("&");
|
|
} else {
|
|
result.append(strengthToString(strength, FALSE, TRUE));
|
|
}
|
|
result.append(NFC, NFCLen);
|
|
if(expLen && !isReset) {
|
|
result.append(" / ");
|
|
result.append(expansionString);
|
|
}
|
|
result.append("</span><br>\n");
|
|
return result;
|
|
}
|
|
|
|
UnicodeString
|
|
Line::toString(UBool pretty) {
|
|
UnicodeString result;
|
|
if(!pretty) {
|
|
result.setTo(name);
|
|
if(expLen) {
|
|
result.append("/");
|
|
result.append(expansionString);
|
|
}
|
|
} else {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
UChar NFC[50];
|
|
int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
|
|
result.setTo(NFC, NFCLen);
|
|
if(expLen) {
|
|
result.append("/");
|
|
result.append(expansionString);
|
|
}
|
|
/*
|
|
if(NFCLen != len || u_strncmp(name, NFC, len) != 0) {
|
|
result.append("(NFC: ");
|
|
result.append(NFC, NFCLen);
|
|
result.append(stringToName(NFC, NFCLen));
|
|
result.append(")");
|
|
}
|
|
*/
|
|
result.append(" # ");
|
|
result.append(stringToName(NFC, NFCLen));
|
|
if(expLen) {
|
|
result.append("/ ");
|
|
result.append(stringToName(expansionString, expLen));
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
void
|
|
Line::setTo(const UnicodeString &string) {
|
|
int32_t len = string.length();
|
|
u_strncpy(name, string.getBuffer(), len);
|
|
name[len] = 0;
|
|
this->len = len;
|
|
UChar32 c;
|
|
U16_GET(name, 0, 0, len, c);
|
|
firstCC = u_getCombiningClass(c);
|
|
U16_GET(name, 0, len-1, len, c);
|
|
lastCC = u_getCombiningClass(c);
|
|
}
|
|
|
|
void
|
|
Line::setTo(const UChar32 n) {
|
|
UBool isError = FALSE;
|
|
len = 0; // we are setting the line to char, not appending
|
|
U16_APPEND(name, len, 25, n, isError);
|
|
name[len] = 0;
|
|
firstCC = u_getCombiningClass(n);
|
|
lastCC = firstCC;
|
|
}
|
|
|
|
|
|
UnicodeString
|
|
Line::strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result)
|
|
{
|
|
int i;
|
|
int numIndents = strength+1;
|
|
if(strength > UCOL_IDENTICAL) {
|
|
return result;
|
|
} else if(strength == UCOL_IDENTICAL) {
|
|
numIndents = 5;
|
|
}
|
|
for(i = 0; i < numIndents*indentSize; i++) {
|
|
result.append(" ");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
UnicodeString
|
|
Line::strengthToString(UColAttributeValue strength, UBool pretty, UBool html) {
|
|
UnicodeString result;
|
|
if(html) {
|
|
switch(strength) {
|
|
case UCOL_IDENTICAL:
|
|
result.append(" = ");
|
|
break;
|
|
case UCOL_QUATERNARY:
|
|
result.append(" <<<< ");
|
|
break;
|
|
case UCOL_TERTIARY:
|
|
result.append(" <<< ");
|
|
break;
|
|
case UCOL_SECONDARY:
|
|
result.append(" << ");
|
|
break;
|
|
case UCOL_PRIMARY:
|
|
result.append(" < ");
|
|
break;
|
|
case UCOL_OFF:
|
|
result.append(" >? ");
|
|
break;
|
|
default:
|
|
result.append(" ?! ");
|
|
break;
|
|
}
|
|
} else {
|
|
switch(strength) {
|
|
case UCOL_IDENTICAL:
|
|
if(pretty) {
|
|
result.append(" ");
|
|
}
|
|
result.append(" = ");
|
|
break;
|
|
case UCOL_QUATERNARY:
|
|
if(pretty) {
|
|
result.append(" ");
|
|
}
|
|
result.append(" <<<< ");
|
|
break;
|
|
case UCOL_TERTIARY:
|
|
//u_fprintf(file, "<3");
|
|
if(pretty) {
|
|
result.append(" ");
|
|
}
|
|
result.append(" <<< ");
|
|
break;
|
|
case UCOL_SECONDARY:
|
|
//u_fprintf(file, "<2");
|
|
if(pretty) {
|
|
result.append(" ");
|
|
}
|
|
result.append(" << ");
|
|
break;
|
|
case UCOL_PRIMARY:
|
|
//u_fprintf(file, "<1");
|
|
if(pretty) {
|
|
result.append(" ");
|
|
}
|
|
result.append(" < ");
|
|
break;
|
|
case UCOL_OFF:
|
|
result.append(" >? ");
|
|
break;
|
|
default:
|
|
result.append(" ?! ");
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Line *
|
|
Line::nextInteresting() {
|
|
Line *result = this->next;
|
|
while(result && result->strength != UCOL_IDENTICAL) {
|
|
result = result->next;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void
|
|
Line::append(const UChar* n, int32_t length)
|
|
{
|
|
u_strncat(name, n, length);
|
|
name[len+length] = 0;
|
|
len += length;
|
|
UChar32 end;
|
|
U16_GET(n, 0, length-1, length, end);
|
|
lastCC = u_getCombiningClass(end);
|
|
}
|
|
|
|
void
|
|
Line::append(const UChar n)
|
|
{
|
|
name[len] = n;
|
|
name[len+1] = 0;
|
|
len++;
|
|
lastCC = u_getCombiningClass(n);
|
|
}
|
|
|
|
void
|
|
Line::append(const Line &l)
|
|
{
|
|
append(l.name, l.len);
|
|
lastCC = l.lastCC;
|
|
}
|
|
|
|
void
|
|
Line::clear()
|
|
{
|
|
name[0] = 0;
|
|
len = 0;
|
|
}
|
|
|
|
int32_t
|
|
Line::write(char *buff, int32_t, UErrorCode &)
|
|
{
|
|
/*
|
|
UChar name[25];
|
|
int32_t len;
|
|
UChar expansionString[25];
|
|
int32_t expLen;
|
|
|
|
UColAttributeValue strength;
|
|
UColAttributeValue strengthFromEmpty;
|
|
UColAttributeValue cumulativeStrength;
|
|
UColAttributeValue expStrength;
|
|
|
|
Line *previous;
|
|
Line *next;
|
|
|
|
UBool isContraction;
|
|
UBool isExpansion;
|
|
UBool isRemoved;
|
|
UBool isReset;
|
|
|
|
int32_t expIndex;
|
|
uint8_t firstCC;
|
|
uint8_t lastCC;
|
|
*/
|
|
int32_t resLen = 0;
|
|
int32_t i = 0;
|
|
sprintf(buff+resLen, "%04X", name[0]);
|
|
resLen += 4;
|
|
for(i = 1; i < len; i++) {
|
|
sprintf(buff+resLen, " %04X", name[i]);
|
|
resLen += 5;
|
|
}
|
|
sprintf(buff+resLen, "/");
|
|
resLen += 1;
|
|
|
|
i = 0;
|
|
if(expLen) {
|
|
sprintf(buff+resLen, "%04X", expansionString[0]);
|
|
resLen += 4;
|
|
for(i = 1; i < expLen; i++) {
|
|
sprintf(buff+resLen, " %04X", expansionString[i]);
|
|
resLen += 5;
|
|
}
|
|
}
|
|
sprintf(buff+resLen, "; ");
|
|
resLen += 2;
|
|
|
|
sprintf(buff+resLen, "%02i ", strength);
|
|
resLen += 3;
|
|
sprintf(buff+resLen, "%02i", strengthFromEmpty);
|
|
resLen += 2;
|
|
sprintf(buff+resLen, "%02i", cumulativeStrength);
|
|
resLen += 2;
|
|
sprintf(buff+resLen, "%02i", expStrength);
|
|
resLen += 2;
|
|
|
|
// Various flags. The only interesting ones are isReset and isRemoved. We will not output removed lines
|
|
//sprintf(buff+resLen, "%1i%1i%1i%1i ", isContraction, isExpansion, isRemoved, isReset);
|
|
//resLen += 5;
|
|
sprintf(buff+resLen, "%1i%1i ", isRemoved, isReset);
|
|
resLen += 3;
|
|
|
|
// first and last CC
|
|
// can be calculated on reading
|
|
//sprintf(buff+resLen, "%03i %03i ", firstCC, lastCC);
|
|
//resLen += 8;
|
|
|
|
sprintf(buff+resLen, "%08X", expIndex);
|
|
resLen += 8;
|
|
|
|
buff[resLen] = 0;
|
|
|
|
return resLen;
|
|
}
|
|
|
|
void
|
|
Line::initFromString(const char *buff, int32_t, UErrorCode &)
|
|
{
|
|
int32_t bufIndex = 0;
|
|
int32_t i = 0;
|
|
|
|
sscanf(buff+bufIndex, "%04X", &name[i]);
|
|
i++;
|
|
bufIndex += 4;
|
|
while(buff[bufIndex] != '/') {
|
|
sscanf(buff+bufIndex, " %04X", &name[i]);
|
|
i++;
|
|
bufIndex += 5;
|
|
}
|
|
len = i;
|
|
name[len] = 0;
|
|
bufIndex++;
|
|
|
|
if(i > 1) {
|
|
isContraction = TRUE;
|
|
} else {
|
|
isContraction = FALSE;
|
|
}
|
|
|
|
if(buff[bufIndex] == ';') {
|
|
isExpansion = FALSE;
|
|
bufIndex += 2;
|
|
expansionString[0] = 0;
|
|
expLen = 0;
|
|
} else {
|
|
i = 0;
|
|
sscanf(buff+bufIndex, "%04X", &expansionString[i]);
|
|
i++;
|
|
bufIndex += 4;
|
|
while(buff[bufIndex] != ';') {
|
|
sscanf(buff+bufIndex, " %04X", &expansionString[i]);
|
|
i++;
|
|
bufIndex += 5;
|
|
}
|
|
expLen = i;
|
|
expansionString[expLen] = 0;
|
|
bufIndex += 2;
|
|
}
|
|
sscanf(buff+bufIndex, "%02i ", &strength);
|
|
bufIndex += 3;
|
|
sscanf(buff+bufIndex, "%02i", &strengthFromEmpty);
|
|
bufIndex += 2;
|
|
sscanf(buff+bufIndex, "%02i", &cumulativeStrength);
|
|
bufIndex += 2;
|
|
sscanf(buff+bufIndex, "%02i", &expStrength);
|
|
bufIndex += 2;
|
|
|
|
sscanf(buff+bufIndex, "%1i%1i ", &isRemoved, &isReset);
|
|
bufIndex += 3;
|
|
|
|
sscanf(buff+bufIndex, "%08X", &expIndex);
|
|
bufIndex += 8;
|
|
|
|
// calculate first and last CC
|
|
UChar32 c;
|
|
U16_GET(name, 0, 0, len, c);
|
|
firstCC = u_getCombiningClass(c);
|
|
U16_GET(name, 0, len-1, len, c);
|
|
lastCC = u_getCombiningClass(c);
|
|
}
|
|
|
|
void
|
|
Line::swapCase(UChar *string, int32_t &sLen)
|
|
{
|
|
UChar32 c = 0;
|
|
int32_t i = 0, j = 0;
|
|
UChar buff[256];
|
|
UBool isError = FALSE;
|
|
while(i < sLen) {
|
|
U16_NEXT(string, i, sLen, c);
|
|
if(u_isUUppercase(c)) {
|
|
c = u_tolower(c);
|
|
} else if(u_isULowercase(c)) {
|
|
c = u_toupper(c);
|
|
}
|
|
U16_APPEND(buff, j, 256, c, isError);
|
|
}
|
|
buff[j] = 0;
|
|
u_strcpy(string, buff);
|
|
sLen = j;
|
|
}
|
|
|
|
|
|
void
|
|
Line::swapCase()
|
|
{
|
|
swapCase(name, len);
|
|
swapCase(expansionString, expLen);
|
|
}
|
|
|
|
UnicodeString
|
|
Line::dumpSortkey()
|
|
{
|
|
|
|
char buffer[256];
|
|
char *buff = buffer;
|
|
*buff = 0;
|
|
uint8_t *key = sortKey;
|
|
if(sortKey) {
|
|
while(*key) {
|
|
sprintf(buff, "%02X ", *key);
|
|
key++;
|
|
buff += 3;
|
|
if(buff - buffer > 252) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return UnicodeString(buffer);
|
|
}
|
|
|