scuffed-code/icu4c/source/i18n/jamohang.cpp

154 lines
4.8 KiB
C++
Raw Normal View History

/*
**********************************************************************
* Copyright (c) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/17/2000 aliu Ported from Java.
**********************************************************************
*/
#include "unicode/jamohang.h"
#include "unicode/rep.h"
#include "unicode/unifilt.h"
#include "unicode/unicode.h"
/**
* ID for this transliterator.
*/
const char* JamoHangulTransliterator::_ID = "Jamo-Hangul";
/**
* Constructs a transliterator.
*/
JamoHangulTransliterator::JamoHangulTransliterator(UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter) {
setMaximumContextLength(3);
}
/**
* Copy constructor.
*/
JamoHangulTransliterator::JamoHangulTransliterator(const JamoHangulTransliterator& o) :
Transliterator(o) {
}
/**
* Assignment operator.
*/
JamoHangulTransliterator& JamoHangulTransliterator::operator=(
const JamoHangulTransliterator& o) {
Transliterator::operator=(o);
return *this;
}
/**
* Transliterator API.
*/
Transliterator* JamoHangulTransliterator::clone(void) const {
return new JamoHangulTransliterator(*this);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void JamoHangulTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
UBool isIncremental) const {
/**
* Performs transliteration changing Jamo to Hangul
*/
int32_t cursor = offsets.start;
int32_t limit = offsets.limit;
if (cursor >= limit) return;
int32_t count;
UChar last = filteredCharAt(text, cursor++);
UnicodeString str("a", 1);
while (cursor <= limit) {
UChar next = 0xFFFF; // go over end of string, just in case
if (cursor < limit) next = filteredCharAt(text, cursor);
UChar replacement = composeHangul(last, next, count);
if (replacement != last) {
str[0] = replacement;
text.handleReplaceBetween(cursor-1, cursor-1 + count, str);
limit = limit - count + 1; // fix up limit 2 => -1, 1 => 0
last = replacement;
if (next == 0xFFFF) break;
// don't change cursor, so we revisit char
} else {
++cursor;
last = next;
}
}
++limit;
offsets.contextLimit += limit - offsets.limit;
offsets.limit = limit;
offsets.start = cursor;
}
// These constants are from the Unicode book's algorithm.
#define SBase (0xAC00)
#define LBase (0x1100)
#define VBase (0x1161)
#define TBase (0x11A7)
#define LCount (19)
#define VCount (21)
#define TCount (28)
#define NCount (VCount * TCount) // 588
#define SCount (LCount * NCount) // 11172
#define LLimit (0x1200)
/**
* Return composed character (if it is a modern jamo)
* last otherwise.
* If there is a replacement, returns count[0] = 2 if ch was used, 1 otherwise
*/
UChar JamoHangulTransliterator::composeHangul(UChar last, UChar ch, int32_t& count) {
count = 2; // default is replace 2 chars
// check to see if two current characters are L and V
int32_t LIndex = last - LBase;
if (0 <= LIndex && LIndex < LCount) {
int32_t VIndex = ch - VBase;
if (0 <= VIndex && VIndex < VCount) {
// make syllable of form LV
return (UChar)(SBase + (LIndex * VCount + VIndex) * TCount);
} else {
// it is isolated, so fix!
count = 1; // not using ch
return (UChar)(SBase + (LIndex * VCount) * TCount);
}
}
// if neither case was true, see if we have an isolated Jamo we need to fix
if (LBase <= last && last < LLimit) {
// need to fix: it is either medial or final!
int32_t VIndex = last - VBase;
if (0 <= VIndex && VIndex < VCount) {
LIndex = 0x110B - LBase; // use empty consonant
// make syllable of form LV
count = 1; // not using ch
return (UChar)(SBase + (LIndex * VCount + VIndex) * TCount);
}
// ok, see if final. Use null consonant + a + final
int32_t TIndex = last - TBase;
if (0 <= TIndex && TIndex <= TCount) { // need to fix!
count = 1; // not using ch
return (UChar)(0xC544 + TIndex);
}
}
// check to see if two current characters are LV and T
int32_t SIndex = last - SBase;
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0) {
int32_t TIndex = ch - TBase;
if (0 <= TIndex && TIndex <= TCount) {
// make syllable of form LVT
return (UChar)(last + TIndex);
}
}
return last;
}