/* ********************************************************************** * Copyright (c) 2000, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 01/17/2000 aliu Ported from Java. ********************************************************************** */ #include "unicode/jamohang.h" #include "unicode/rep.h" #include "unicode/unifilt.h" #include "unicode/unicode.h" /** * ID for this transliterator. */ const char* JamoHangulTransliterator::_ID = "Jamo-Hangul"; /** * Constructs a transliterator. */ JamoHangulTransliterator::JamoHangulTransliterator(UnicodeFilter* adoptedFilter) : Transliterator(_ID, adoptedFilter) { setMaximumContextLength(3); } /** * Copy constructor. */ JamoHangulTransliterator::JamoHangulTransliterator(const JamoHangulTransliterator& o) : Transliterator(o) { } /** * Assignment operator. */ JamoHangulTransliterator& JamoHangulTransliterator::operator=( const JamoHangulTransliterator& o) { Transliterator::operator=(o); return *this; } /** * Transliterator API. */ Transliterator* JamoHangulTransliterator::clone(void) const { return new JamoHangulTransliterator(*this); } /** * Implements {@link Transliterator#handleTransliterate}. */ void JamoHangulTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, UBool isIncremental) const { /** * Performs transliteration changing Jamo to Hangul */ int32_t cursor = offsets.start; int32_t limit = offsets.limit; if (cursor >= limit) return; int32_t count; UChar last = filteredCharAt(text, cursor++); UnicodeString str("a", 1); while (cursor <= limit) { UChar next = 0xFFFE; // go over end of string, just in case if (cursor < limit) next = filteredCharAt(text, cursor); UChar replacement = composeHangul(last, next, count); if (replacement != last) { str[0] = replacement; text.handleReplaceBetween(cursor-1, cursor-1 + count, str); limit = limit - count + 1; // fix up limit 2 => -1, 1 => 0 last = replacement; if (next == 0xFFFE) break; // don't change cursor, so we revisit char } else { ++cursor; last = next; } } ++limit; offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; offsets.start = cursor; } // These constants are from the Unicode book's algorithm. #define SBase (0xAC00) #define LBase (0x1100) #define VBase (0x1161) #define TBase (0x11A7) #define LCount (19) #define VCount (21) #define TCount (28) #define NCount (VCount * TCount) // 588 #define SCount (LCount * NCount) // 11172 #define LLimit (0x1200) /** * Return composed character (if it is a modern jamo) * last otherwise. * If there is a replacement, returns count[0] = 2 if ch was used, 1 otherwise */ UChar JamoHangulTransliterator::composeHangul(UChar last, UChar ch, int32_t& count) { count = 2; // default is replace 2 chars // check to see if two current characters are L and V int32_t LIndex = last - LBase; if (0 <= LIndex && LIndex < LCount) { int32_t VIndex = ch - VBase; if (0 <= VIndex && VIndex < VCount) { // make syllable of form LV return (UChar)(SBase + (LIndex * VCount + VIndex) * TCount); } else { // it is isolated, so fix! count = 1; // not using ch return (UChar)(SBase + (LIndex * VCount) * TCount); } } // if neither case was true, see if we have an isolated Jamo we need to fix if (LBase <= last && last < LLimit) { // need to fix: it is either medial or final! int32_t VIndex = last - VBase; if (0 <= VIndex && VIndex < VCount) { LIndex = 0x110B - LBase; // use empty consonant // make syllable of form LV count = 1; // not using ch return (UChar)(SBase + (LIndex * VCount + VIndex) * TCount); } // ok, see if final. Use null consonant + a + final int32_t TIndex = last - TBase; if (0 <= TIndex && TIndex <= TCount) { // need to fix! count = 1; // not using ch return (UChar)(0xC544 + TIndex); } } // check to see if two current characters are LV and T int32_t SIndex = last - SBase; if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0) { int32_t TIndex = ch - TBase; if (0 <= TIndex && TIndex <= TCount) { // make syllable of form LVT return (UChar)(last + TIndex); } } return last; }