// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2013-2016, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uscript_props.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2013feb16 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/uscript.h" #include "unicode/utf16.h" #include "ustr_imp.h" #include "cmemory.h" namespace { // Script metadata (script properties). // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt // 0 = NOT_ENCODED, no sample character, default false script properties. // Bits 20.. 0: sample character // Bits 23..21: usage const int32_t UNKNOWN = 1 << 21; const int32_t EXCLUSION = 2 << 21; const int32_t LIMITED_USE = 3 << 21; // st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 const int32_t RECOMMENDED = 5 << 21; // Bits 31..24: Single-bit flags const int32_t RTL = 1 << 24; const int32_t LB_LETTERS = 1 << 25; const int32_t CASED = 1 << 26; const int32_t SCRIPT_PROPS[] = { // Begin copy-paste output from // tools/trunk/unicode/py/parsescriptmetadata.py 0x0040 | RECOMMENDED, // Zyyy 0x0308 | RECOMMENDED, // Zinh 0x0628 | RECOMMENDED | RTL, // Arab 0x0531 | RECOMMENDED | CASED, // Armn 0x0995 | RECOMMENDED, // Beng 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 0x13C4 | LIMITED_USE | CASED, // Cher 0x03E2 | EXCLUSION | CASED, // Copt 0x042F | RECOMMENDED | CASED, // Cyrl 0x10414 | EXCLUSION | CASED, // Dsrt 0x0905 | RECOMMENDED, // Deva 0x12A0 | RECOMMENDED, // Ethi 0x10D3 | RECOMMENDED, // Geor 0x10330 | EXCLUSION, // Goth 0x03A9 | RECOMMENDED | CASED, // Grek 0x0A95 | RECOMMENDED, // Gujr 0x0A15 | RECOMMENDED, // Guru 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 0xAC00 | RECOMMENDED, // Hang 0x05D0 | RECOMMENDED | RTL, // Hebr 0x304B | RECOMMENDED | LB_LETTERS, // Hira 0x0C95 | RECOMMENDED, // Knda 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 0x004C | RECOMMENDED | CASED, // Latn 0x0D15 | RECOMMENDED, // Mlym 0x1826 | LIMITED_USE, // Mong 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 0x168F | EXCLUSION, // Ogam 0x10300 | EXCLUSION, // Ital 0x0B15 | RECOMMENDED, // Orya 0x16A0 | EXCLUSION, // Runr 0x0D85 | RECOMMENDED, // Sinh 0x0710 | LIMITED_USE | RTL, // Syrc 0x0B95 | RECOMMENDED, // Taml 0x0C15 | RECOMMENDED, // Telu 0x078C | RECOMMENDED | RTL, // Thaa 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 0x0F40 | RECOMMENDED, // Tibt 0x14C0 | LIMITED_USE, // Cans 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii 0x1703 | EXCLUSION, // Tglg 0x1723 | EXCLUSION, // Hano 0x1743 | EXCLUSION, // Buhd 0x1763 | EXCLUSION, // Tagb 0x280E | UNKNOWN, // Brai 0x10800 | EXCLUSION | RTL, // Cprt 0x1900 | LIMITED_USE, // Limb 0x10000 | EXCLUSION, // Linb 0x10480 | EXCLUSION, // Osma 0x10450 | EXCLUSION, // Shaw 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 0x10380 | EXCLUSION, // Ugar 0, 0x1A00 | EXCLUSION, // Bugi 0x2C00 | EXCLUSION | CASED, // Glag 0x10A00 | EXCLUSION | RTL, // Khar 0xA800 | LIMITED_USE, // Sylo 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 0x2D30 | LIMITED_USE, // Tfng 0x103A0 | EXCLUSION, // Xpeo 0x1B05 | LIMITED_USE, // Bali 0x1BC0 | LIMITED_USE, // Batk 0, 0x11005 | EXCLUSION, // Brah 0xAA00 | LIMITED_USE, // Cham 0, 0, 0, 0, 0x13153 | EXCLUSION, // Egyp 0, 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 0x16B1C | EXCLUSION, // Hmng 0x10CA1 | EXCLUSION | RTL | CASED, // Hung 0, 0xA984 | LIMITED_USE, // Java 0xA90A | LIMITED_USE, // Kali 0, 0, 0x1C00 | LIMITED_USE, // Lepc 0x10647 | EXCLUSION, // Lina 0x0840 | LIMITED_USE | RTL, // Mand 0, 0x10980 | EXCLUSION | RTL, // Mero 0x07CA | LIMITED_USE | RTL, // Nkoo 0x10C00 | EXCLUSION | RTL, // Orkh 0x1036B | EXCLUSION, // Perm 0xA840 | EXCLUSION, // Phag 0x10900 | EXCLUSION | RTL, // Phnx 0x16F00 | LIMITED_USE, // Plrd 0, 0, 0, 0, 0, 0, 0xA549 | LIMITED_USE, // Vaii 0, 0x12000 | EXCLUSION, // Xsux 0, 0xFDD0 | UNKNOWN, // Zzzz 0x102A0 | EXCLUSION, // Cari 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 0x10280 | EXCLUSION, // Lyci 0x10920 | EXCLUSION | RTL, // Lydi 0x1C5A | LIMITED_USE, // Olck 0xA930 | EXCLUSION, // Rjng 0xA882 | LIMITED_USE, // Saur 0x1D850 | EXCLUSION, // Sgnw 0x1B83 | LIMITED_USE, // Sund 0, 0xABC0 | LIMITED_USE, // Mtei 0x10840 | EXCLUSION | RTL, // Armi 0x10B00 | EXCLUSION | RTL, // Avst 0x11103 | LIMITED_USE, // Cakm 0xAC00 | RECOMMENDED, // Kore 0x11083 | EXCLUSION, // Kthi 0x10AD8 | EXCLUSION | RTL, // Mani 0x10B60 | EXCLUSION | RTL, // Phli 0x10B8F | EXCLUSION | RTL, // Phlp 0, 0x10B40 | EXCLUSION | RTL, // Prti 0x0800 | EXCLUSION | RTL, // Samr 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 0, 0, 0xA6A0 | LIMITED_USE, // Bamu 0xA4D0 | LIMITED_USE, // Lisu 0, 0x10A60 | EXCLUSION | RTL, // Sarb 0x16AE6 | EXCLUSION, // Bass 0x1BC20 | EXCLUSION, // Dupl 0x10500 | EXCLUSION, // Elba 0x11315 | EXCLUSION, // Gran 0, 0, 0x1E802 | EXCLUSION | RTL, // Mend 0x109A0 | EXCLUSION | RTL, // Merc 0x10A95 | EXCLUSION | RTL, // Narb 0x10896 | EXCLUSION | RTL, // Nbat 0x10873 | EXCLUSION | RTL, // Palm 0x112BE | EXCLUSION, // Sind 0x118B4 | EXCLUSION | CASED, // Wara 0, 0, 0x16A4F | EXCLUSION, // Mroo 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu 0x11183 | EXCLUSION, // Shrd 0x110D0 | EXCLUSION, // Sora 0x11680 | EXCLUSION, // Takr 0x18229 | EXCLUSION | LB_LETTERS, // Tang 0, 0x14400 | EXCLUSION, // Hluw 0x11208 | EXCLUSION, // Khoj 0x11484 | EXCLUSION, // Tirh 0x10537 | EXCLUSION, // Aghb 0x11152 | EXCLUSION, // Mahj 0x11717 | EXCLUSION | LB_LETTERS, // Ahom 0x108F4 | EXCLUSION | RTL, // Hatr 0x1160E | EXCLUSION, // Modi 0x1128F | EXCLUSION, // Mult 0x11AC0 | EXCLUSION, // Pauc 0x1158E | EXCLUSION, // Sidd 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm 0x11C0E | EXCLUSION, // Bhks 0x11C72 | EXCLUSION, // Marc 0x11412 | LIMITED_USE, // Newa 0x104B5 | LIMITED_USE | CASED, // Osge 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb 0x1112 | RECOMMENDED, // Jamo 0, 0x11D10 | EXCLUSION, // Gonm 0x11A5C | EXCLUSION, // Soyo 0x11A0B | EXCLUSION, // Zanb 0x1180B | EXCLUSION, // Dogr 0x11D71 | LIMITED_USE, // Gong 0x11EE5 | EXCLUSION, // Maka 0x16E40 | EXCLUSION | CASED, // Medf 0x10D12 | LIMITED_USE | RTL, // Rohg 0x10F42 | EXCLUSION | RTL, // Sogd 0x10F19 | EXCLUSION | RTL, // Sogo // End copy-paste from parsescriptmetadata.py }; int32_t getScriptProps(UScriptCode script) { if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { return SCRIPT_PROPS[script]; } else { return 0; } } } // namespace U_CAPI int32_t U_EXPORT2 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return 0; } if(capacity < 0 || (capacity > 0 && dest == NULL)) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return 0; } int32_t sampleChar = getScriptProps(script) & 0x1fffff; int32_t length; if(sampleChar == 0) { length = 0; } else { length = U16_LENGTH(sampleChar); if(length <= capacity) { int32_t i = 0; U16_APPEND_UNSAFE(dest, i, sampleChar); } } return u_terminateUChars(dest, capacity, length, pErrorCode); } U_COMMON_API icu::UnicodeString U_EXPORT2 uscript_getSampleUnicodeString(UScriptCode script) { icu::UnicodeString sample; int32_t sampleChar = getScriptProps(script) & 0x1fffff; if(sampleChar != 0) { sample.append(sampleChar); } return sample; } U_CAPI UScriptUsage U_EXPORT2 uscript_getUsage(UScriptCode script) { return (UScriptUsage)((getScriptProps(script) >> 21) & 7); } U_CAPI UBool U_EXPORT2 uscript_isRightToLeft(UScriptCode script) { return (getScriptProps(script) & RTL) != 0; } U_CAPI UBool U_EXPORT2 uscript_breaksBetweenLetters(UScriptCode script) { return (getScriptProps(script) & LB_LETTERS) != 0; } U_CAPI UBool U_EXPORT2 uscript_isCased(UScriptCode script) { return (getScriptProps(script) & CASED) != 0; }