// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2003, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * * File line.cpp * * Modification History: * * Date Name Description * 03/18/2003 weiv Creation. ******************************************************************************* */ #include "line.h" #include UnicodeSet * Line::needsQuoting = NULL; void Line::init() { len = 0; expLen = 0; strength = UCOL_OFF; strengthFromEmpty = UCOL_OFF; cumulativeStrength = UCOL_OFF; expStrength = UCOL_OFF; previous = NULL; next = NULL; left = NULL; right = NULL; isContraction = FALSE; isExpansion = FALSE; isRemoved = FALSE; isReset = FALSE; expIndex = 0; firstCC = 0; lastCC = 0; sortKey = NULL; } Line::Line() { init(); memset(name, 0, 25*sizeof(UChar)); memset(expansionString, 0, 25*sizeof(UChar)); } Line::Line(const UChar* name, int32_t len) { init(); this->len = len; u_memcpy(this->name, name, len); memset(expansionString, 0, 25*sizeof(UChar)); UChar32 c; U16_GET(name, 0, 0, len, c); firstCC = u_getCombiningClass(c); U16_GET(name, 0, len-1, len, c); lastCC = u_getCombiningClass(c); } Line::Line(const UChar name) { init(); len = 1; this->name[0] = name; this->name[1] = 0; memset(expansionString, 0, 25*sizeof(UChar)); firstCC = u_getCombiningClass(name); lastCC = firstCC; } Line::Line(const UnicodeString &string) { init(); setTo(string); } Line::Line(const char *buff, int32_t buffLen, UErrorCode &status) : previous(NULL), next(NULL), left(NULL), right(NULL) { initFromString(buff, buffLen, status); } Line::Line(const Line &other) : previous(NULL), next(NULL), left(NULL), right(NULL) { *this = other; } Line & Line::operator=(const Line &other) { len = other.len; expLen = other.expLen; strength = other.strength; strengthFromEmpty = other.strengthFromEmpty; cumulativeStrength = other.cumulativeStrength; expStrength = other.expStrength; isContraction = other.isContraction; isExpansion = other.isExpansion; isRemoved = other.isRemoved; isReset = other.isReset; expIndex = other.expIndex; firstCC = other.firstCC; lastCC = other.lastCC; u_strcpy(name, other.name); u_strcpy(expansionString, other.expansionString); sortKey = other.sortKey; left = other.left; right = other.right; return *this; } UBool Line::operator==(const Line &other) const { if(this == &other) { return TRUE; } if(len != other.len) { return FALSE; } if(u_strcmp(name, other.name) != 0) { return FALSE; } return TRUE; } UBool Line::equals(const Line &other) const { if(this == &other) { return TRUE; } if(len != other.len) { return FALSE; } if(u_strcmp(name, other.name) != 0) { return FALSE; } if(strength != other.strength) { return FALSE; } if(expLen != other.expLen) { return FALSE; } if(u_strcmp(expansionString, other.expansionString)) { return FALSE; } return TRUE; } UBool Line::operator!=(const Line &other) const { return !(*this == other); } Line::~Line() { } void Line::copyArray(Line *dest, const Line *src, int32_t size) { int32_t i = 0; for(i = 0; i < size; i++) { dest[i] = src[i]; } } void Line::setName(const UChar* name, int32_t len) { this->len = len; u_memcpy(this->name, name, len); UChar32 c; U16_GET(name, 0, 0, len, c); firstCC = u_getCombiningClass(c); U16_GET(name, 0, len-1, len, c); lastCC = u_getCombiningClass(c); } void Line::setToConcat(const Line *first, const Line *second) { u_strcpy(name, first->name); u_strcat(name, second->name); len = first->len + second->len; firstCC = first->firstCC; lastCC = second->lastCC; } UnicodeString Line::stringToName(UChar *string, int32_t len) { UErrorCode status = U_ZERO_ERROR; UnicodeString result; char buffer[256]; int32_t i = 0; UChar32 c; while(i < len) { U16_NEXT(string, i, len, c); if(c < 0x10000) { sprintf(buffer, "%04X ", c); } else { sprintf(buffer, "%06X ", c); } result.append(buffer); } i = 0; while(i < len) { U16_NEXT(string, i, len, c); u_charName(c, U_EXTENDED_CHAR_NAME, buffer, 256, &status); result.append("{"); result.append(buffer); result.append("} "); } /* for(i = 0; i < len; i++) { sprintf(buffer, "%04X ", string[i]); result.append(buffer); } for(i = 0; i < len; i++) { u_charName(string[i], U_EXTENDED_CHAR_NAME, buffer, 256, &status); result.append("{"); result.append(buffer); result.append("} "); } */ return result; } UnicodeString Line::toBundleString() { UnicodeString result; UErrorCode status = U_ZERO_ERROR; if(!needsQuoting) { needsQuoting = new UnicodeSet("[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]", status); } UChar NFC[50]; int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status); result.append("\""); if(isReset) { result.append("&"); } else { result.append(strengthToString(strength, FALSE, FALSE)); } UBool quote = needsQuoting->containsSome(name) || needsQuoting->containsSome(NFC); if(quote) { result.append("'"); } if(NFC[0] == 0x22) { result.append("\\u0022"); } else { result.append(NFC, NFCLen); } if(quote && NFC[0] != 0x0027) { result.append("'"); } if(expLen && !isReset) { quote = needsQuoting->containsSome(expansionString); result.append(" / "); if(quote) { result.append("'"); } result.append(expansionString); if(quote) { result.append("'"); } } result.append("\" //"); result.append(stringToName(NFC, NFCLen)); if(expLen && !isReset) { result.append(" / "); result.append(stringToName(expansionString, expLen)); } result.append("\n"); return result; } UnicodeString Line::toHTMLString() { UnicodeString result; UErrorCode status = U_ZERO_ERROR; UChar NFC[50]; int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status); result.append(""); if(isReset) { result.append("&"); } else { result.append(strengthToString(strength, FALSE, TRUE)); } result.append(NFC, NFCLen); if(expLen && !isReset) { result.append(" / "); result.append(expansionString); } result.append("
\n"); return result; } UnicodeString Line::toString(UBool pretty) { UnicodeString result; if(!pretty) { result.setTo(name); if(expLen) { result.append("/"); result.append(expansionString); } } else { UErrorCode status = U_ZERO_ERROR; UChar NFC[50]; int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status); result.setTo(NFC, NFCLen); if(expLen) { result.append("/"); result.append(expansionString); } /* if(NFCLen != len || u_strncmp(name, NFC, len) != 0) { result.append("(NFC: "); result.append(NFC, NFCLen); result.append(stringToName(NFC, NFCLen)); result.append(")"); } */ result.append(" # "); result.append(stringToName(NFC, NFCLen)); if(expLen) { result.append("/ "); result.append(stringToName(expansionString, expLen)); } } return result; } void Line::setTo(const UnicodeString &string) { int32_t len = string.length(); u_strncpy(name, string.getBuffer(), len); name[len] = 0; this->len = len; UChar32 c; U16_GET(name, 0, 0, len, c); firstCC = u_getCombiningClass(c); U16_GET(name, 0, len-1, len, c); lastCC = u_getCombiningClass(c); } void Line::setTo(const UChar32 n) { UBool isError = FALSE; len = 0; // we are setting the line to char, not appending U16_APPEND(name, len, 25, n, isError); name[len] = 0; firstCC = u_getCombiningClass(n); lastCC = firstCC; } UnicodeString Line::strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result) { int i; int numIndents = strength+1; if(strength > UCOL_IDENTICAL) { return result; } else if(strength == UCOL_IDENTICAL) { numIndents = 5; } for(i = 0; i < numIndents*indentSize; i++) { result.append(" "); } return result; } UnicodeString Line::strengthToString(UColAttributeValue strength, UBool pretty, UBool html) { UnicodeString result; if(html) { switch(strength) { case UCOL_IDENTICAL: result.append("          = "); break; case UCOL_QUATERNARY: result.append("        <<<< "); break; case UCOL_TERTIARY: result.append("      <<< "); break; case UCOL_SECONDARY: result.append("    << "); break; case UCOL_PRIMARY: result.append("  < "); break; case UCOL_OFF: result.append(" >? "); break; default: result.append(" ?! "); break; } } else { switch(strength) { case UCOL_IDENTICAL: if(pretty) { result.append(" "); } result.append(" = "); break; case UCOL_QUATERNARY: if(pretty) { result.append(" "); } result.append(" <<<< "); break; case UCOL_TERTIARY: //u_fprintf(file, "<3"); if(pretty) { result.append(" "); } result.append(" <<< "); break; case UCOL_SECONDARY: //u_fprintf(file, "<2"); if(pretty) { result.append(" "); } result.append(" << "); break; case UCOL_PRIMARY: //u_fprintf(file, "<1"); if(pretty) { result.append(" "); } result.append(" < "); break; case UCOL_OFF: result.append(" >? "); break; default: result.append(" ?! "); break; } } return result; } Line * Line::nextInteresting() { Line *result = this->next; while(result && result->strength != UCOL_IDENTICAL) { result = result->next; } return result; } void Line::append(const UChar* n, int32_t length) { u_strncat(name, n, length); name[len+length] = 0; len += length; UChar32 end; U16_GET(n, 0, length-1, length, end); lastCC = u_getCombiningClass(end); } void Line::append(const UChar n) { name[len] = n; name[len+1] = 0; len++; lastCC = u_getCombiningClass(n); } void Line::append(const Line &l) { append(l.name, l.len); lastCC = l.lastCC; } void Line::clear() { name[0] = 0; len = 0; } int32_t Line::write(char *buff, int32_t, UErrorCode &) { /* UChar name[25]; int32_t len; UChar expansionString[25]; int32_t expLen; UColAttributeValue strength; UColAttributeValue strengthFromEmpty; UColAttributeValue cumulativeStrength; UColAttributeValue expStrength; Line *previous; Line *next; UBool isContraction; UBool isExpansion; UBool isRemoved; UBool isReset; int32_t expIndex; uint8_t firstCC; uint8_t lastCC; */ int32_t resLen = 0; int32_t i = 0; sprintf(buff+resLen, "%04X", name[0]); resLen += 4; for(i = 1; i < len; i++) { sprintf(buff+resLen, " %04X", name[i]); resLen += 5; } sprintf(buff+resLen, "/"); resLen += 1; i = 0; if(expLen) { sprintf(buff+resLen, "%04X", expansionString[0]); resLen += 4; for(i = 1; i < expLen; i++) { sprintf(buff+resLen, " %04X", expansionString[i]); resLen += 5; } } sprintf(buff+resLen, "; "); resLen += 2; sprintf(buff+resLen, "%02i ", strength); resLen += 3; sprintf(buff+resLen, "%02i", strengthFromEmpty); resLen += 2; sprintf(buff+resLen, "%02i", cumulativeStrength); resLen += 2; sprintf(buff+resLen, "%02i", expStrength); resLen += 2; // Various flags. The only interesting ones are isReset and isRemoved. We will not output removed lines //sprintf(buff+resLen, "%1i%1i%1i%1i ", isContraction, isExpansion, isRemoved, isReset); //resLen += 5; sprintf(buff+resLen, "%1i%1i ", isRemoved, isReset); resLen += 3; // first and last CC // can be calculated on reading //sprintf(buff+resLen, "%03i %03i ", firstCC, lastCC); //resLen += 8; sprintf(buff+resLen, "%08X", expIndex); resLen += 8; buff[resLen] = 0; return resLen; } void Line::initFromString(const char *buff, int32_t, UErrorCode &) { int32_t bufIndex = 0; int32_t i = 0; sscanf(buff+bufIndex, "%04X", &name[i]); i++; bufIndex += 4; while(buff[bufIndex] != '/') { sscanf(buff+bufIndex, " %04X", &name[i]); i++; bufIndex += 5; } len = i; name[len] = 0; bufIndex++; if(i > 1) { isContraction = TRUE; } else { isContraction = FALSE; } if(buff[bufIndex] == ';') { isExpansion = FALSE; bufIndex += 2; expansionString[0] = 0; expLen = 0; } else { i = 0; sscanf(buff+bufIndex, "%04X", &expansionString[i]); i++; bufIndex += 4; while(buff[bufIndex] != ';') { sscanf(buff+bufIndex, " %04X", &expansionString[i]); i++; bufIndex += 5; } expLen = i; expansionString[expLen] = 0; bufIndex += 2; } sscanf(buff+bufIndex, "%02i ", &strength); bufIndex += 3; sscanf(buff+bufIndex, "%02i", &strengthFromEmpty); bufIndex += 2; sscanf(buff+bufIndex, "%02i", &cumulativeStrength); bufIndex += 2; sscanf(buff+bufIndex, "%02i", &expStrength); bufIndex += 2; sscanf(buff+bufIndex, "%1i%1i ", &isRemoved, &isReset); bufIndex += 3; sscanf(buff+bufIndex, "%08X", &expIndex); bufIndex += 8; // calculate first and last CC UChar32 c; U16_GET(name, 0, 0, len, c); firstCC = u_getCombiningClass(c); U16_GET(name, 0, len-1, len, c); lastCC = u_getCombiningClass(c); } void Line::swapCase(UChar *string, int32_t &sLen) { UChar32 c = 0; int32_t i = 0, j = 0; UChar buff[256]; UBool isError = FALSE; while(i < sLen) { U16_NEXT(string, i, sLen, c); if(u_isUUppercase(c)) { c = u_tolower(c); } else if(u_isULowercase(c)) { c = u_toupper(c); } U16_APPEND(buff, j, 256, c, isError); } buff[j] = 0; u_strcpy(string, buff); sLen = j; } void Line::swapCase() { swapCase(name, len); swapCase(expansionString, expLen); } UnicodeString Line::dumpSortkey() { char buffer[256]; char *buff = buffer; *buff = 0; uint8_t *key = sortKey; if(sortKey) { while(*key) { sprintf(buff, "%02X ", *key); key++; buff += 3; if(buff - buffer > 252) { break; } } } return UnicodeString(buffer); }