ICU-199 add Jamo-Hangul and Hangul-Jamo

X-SVN-Rev: 618
This commit is contained in:
Alan Liu 2000-01-18 01:55:52 +00:00
parent 9058e22250
commit 44562e956f
6 changed files with 529 additions and 0 deletions

View File

@ -0,0 +1,135 @@
/*
**********************************************************************
* Copyright (c) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/17/2000 aliu Ported from Java.
**********************************************************************
*/
#include "unicode/hangjamo.h"
#include "unicode/rep.h"
#include "unicode/unifilt.h"
#include "unicode/unicode.h"
// These constants are from the Unicode book's algorithm.
#define SBase (0xAC00)
#define LBase (0x1100)
#define VBase (0x1161)
#define TBase (0x11A7)
#define LCount (19)
#define VCount (21)
#define TCount (28)
#define NCount (VCount * TCount) // 588
#define SCount (LCount * NCount) // 11172
/**
* ID for this transliterator.
*/
const char* HangulJamoTransliterator::_ID = "Hangul-Jamo";
/**
* Constructs a transliterator.
*/
HangulJamoTransliterator::HangulJamoTransliterator(UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter) {
}
/**
* Copy constructor.
*/
HangulJamoTransliterator::HangulJamoTransliterator(const HangulJamoTransliterator& o) :
Transliterator(o) {
}
/**
* Assignment operator.
*/
HangulJamoTransliterator& HangulJamoTransliterator::operator=(
const HangulJamoTransliterator& o) {
Transliterator::operator=(o);
return *this;
}
/**
* Transliterator API.
*/
Transliterator* HangulJamoTransliterator::clone(void) const {
return new HangulJamoTransliterator(*this);
}
/**
* Transliterates a segment of a string. <code>Transliterator</code> API.
* @param text the string to be transliterated
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= text.length()</code>.
* @return the new limit index
*/
int32_t HangulJamoTransliterator::transliterate(Replaceable& text,
int32_t start, int32_t limit) const {
int32_t offsets[3] = { start, limit, start };
handleKeyboardTransliterate(text, offsets);
return offsets[LIMIT];
}
/**
* Implements {@link Transliterator#handleKeyboardTransliterate}.
*/
void HangulJamoTransliterator::handleKeyboardTransliterate(Replaceable& text,
int32_t offsets[3]) const {
int32_t cursor = offsets[CURSOR];
int32_t limit = offsets[LIMIT];
UnicodeString replacement;
while (cursor < limit) {
UChar c = filteredCharAt(text, cursor);
if (decomposeHangul(c, replacement)) {
text.handleReplaceBetween(cursor, cursor+1, replacement);
cursor += replacement.length(); // skip over replacement
limit += replacement.length() - 1; // fix up limit
} else {
++cursor;
}
}
offsets[LIMIT] = limit;
offsets[CURSOR] = cursor;
}
bool_t HangulJamoTransliterator::decomposeHangul(UChar s, UnicodeString& result) {
int32_t SIndex = s - SBase;
if (0 > SIndex || SIndex >= SCount) {
return FALSE;
}
int32_t L = LBase + SIndex / NCount;
int32_t V = VBase + (SIndex % NCount) / TCount;
int32_t T = TBase + SIndex % TCount;
result.truncate(0);
result.append((UChar)L);
result.append((UChar)V);
if (T != TBase) {
result.append((UChar)T);
}
return TRUE;
}
UChar HangulJamoTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {
UChar c;
const UnicodeFilter* filter = getFilter();
return (filter == 0) ? text.charAt(i) :
(filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);
}
/**
* Return the length of the longest context required by this transliterator.
* This is <em>preceding</em> context.
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
* @return maximum number of preceding context characters this
* transliterator needs to examine
*/
int32_t HangulJamoTransliterator::getMaximumContextLength(void) const {
return 0;
}

View File

@ -165,10 +165,18 @@ SOURCE=.\gregocal.cpp
# End Source File
# Begin Source File
SOURCE=.\hangjamo.cpp
# End Source File
# Begin Source File
SOURCE=.\hextouni.cpp
# End Source File
# Begin Source File
SOURCE=.\jamohang.cpp
# End Source File
# Begin Source File
SOURCE=.\mergecol.cpp
# End Source File
# Begin Source File
@ -718,6 +726,25 @@ InputPath=.\unicode\gregocal.h
# End Source File
# Begin Source File
SOURCE=.\unicode\hangjamo.h
!IF "$(CFG)" == "i18n - Win32 Release"
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\hangjamo.h
"..\..\include\unicode\hangjamo.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\hangjamo.h ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\hextouni.h
!IF "$(CFG)" == "i18n - Win32 Release"
@ -745,6 +772,25 @@ InputPath=.\unicode\hextouni.h
# End Source File
# Begin Source File
SOURCE=.\unicode\jamohang.h
!IF "$(CFG)" == "i18n - Win32 Release"
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\jamohang.h
"..\..\include\unicode\jamohang.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\jamohang.h ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\mergecol.h
# End Source File
# Begin Source File

View File

@ -0,0 +1,158 @@
/*
**********************************************************************
* Copyright (c) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/17/2000 aliu Ported from Java.
**********************************************************************
*/
#include "unicode/jamohang.h"
#include "unicode/rep.h"
#include "unicode/unifilt.h"
#include "unicode/unicode.h"
/**
* ID for this transliterator.
*/
const char* JamoHangulTransliterator::_ID = "Jamo-Hangul";
/**
* Constructs a transliterator.
*/
JamoHangulTransliterator::JamoHangulTransliterator(UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter) {
}
/**
* Copy constructor.
*/
JamoHangulTransliterator::JamoHangulTransliterator(const JamoHangulTransliterator& o) :
Transliterator(o) {
}
/**
* Assignment operator.
*/
JamoHangulTransliterator& JamoHangulTransliterator::operator=(
const JamoHangulTransliterator& o) {
Transliterator::operator=(o);
return *this;
}
/**
* Transliterator API.
*/
Transliterator* JamoHangulTransliterator::clone(void) const {
return new JamoHangulTransliterator(*this);
}
/**
* Transliterates a segment of a string. <code>Transliterator</code> API.
* @param text the string to be transliterated
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= text.length()</code>.
* @return the new limit index
*/
int32_t JamoHangulTransliterator::transliterate(Replaceable& text,
int32_t start, int32_t limit) const {
int32_t offsets[3] = { start, limit, start };
handleKeyboardTransliterate(text, offsets);
return offsets[LIMIT];
}
/**
* Implements {@link Transliterator#handleKeyboardTransliterate}.
*/
void JamoHangulTransliterator::handleKeyboardTransliterate(Replaceable& text,
int32_t offsets[3]) const {
/**
* Performs transliteration changing Jamo to Hangul
*/
int32_t cursor = offsets[CURSOR];
int32_t limit = offsets[LIMIT];
if (cursor >= limit) {
return;
}
// get last character
UChar last = filteredCharAt(text, cursor++);
UnicodeString str = UNICODE_STRING("a", 1);
while (cursor < limit) {
UChar c = filteredCharAt(text, cursor);
UChar replacement = composeHangul(last, c);
if (replacement != 0) {
str[0] = replacement;
text.handleReplaceBetween(cursor-1, cursor+1, str);
last = replacement;
// leave cursor where it is
--limit; // fix up limit
} else {
++cursor;
}
}
offsets[LIMIT] = limit + 1;
offsets[CURSOR] = cursor;
}
// These constants are from the Unicode book's algorithm.
// There's no need to make them class members, since they
// are only used here.
#define SBase (0xAC00)
#define LBase (0x1100)
#define VBase (0x1161)
#define TBase (0x11A7)
#define LCount (19)
#define VCount (21)
#define TCount (28)
#define NCount (VCount * TCount) // 588
#define SCount (LCount * NCount) // 11172
/**
* Return composed character (if it composes)
* 0 otherwise
*/
UChar JamoHangulTransliterator::composeHangul(UChar last, UChar ch) {
// check to see if two current characters are L and V
int32_t LIndex = last - LBase;
if (0 <= LIndex && LIndex < LCount) {
int32_t VIndex = ch - VBase;
if (0 <= VIndex && VIndex < VCount) {
// make syllable of form LV
return (UChar)(SBase + (LIndex * VCount + VIndex) * TCount);
}
}
// check to see if two current characters are LV and T
int32_t SIndex = last - SBase;
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0) {
int32_t TIndex = ch - TBase;
if (0 <= TIndex && TIndex <= TCount) {
// make syllable of form LVT
return (UChar)(last + TIndex);
}
}
// if neither case was true, skip
return 0x0000;
}
UChar JamoHangulTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {
UChar c;
const UnicodeFilter* filter = getFilter();
return (filter == 0) ? text.charAt(i) :
(filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);
}
/**
* Return the length of the longest context required by this transliterator.
* This is <em>preceding</em> context.
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
* @return maximum number of preceding context characters this
* transliterator needs to examine
*/
int32_t JamoHangulTransliterator::getMaximumContextLength(void) const {
return 3;
}

View File

@ -24,6 +24,8 @@
#include "unicode/nultrans.h"
#include "unicode/putil.h"
#include "unicode/cpdtrans.h"
#include "unicode/jamohang.h"
#include "unicode/hangjamo.h"
/**
* Dictionary of known transliterators. Keys are <code>String</code>
@ -893,6 +895,8 @@ void Transliterator::initializeCache(void) {
status = U_ZERO_ERROR; // Reset status for following calls
_registerInstance(new HexToUnicodeTransliterator(), status);
_registerInstance(new UnicodeToHexTransliterator(), status);
_registerInstance(new JamoHangulTransliterator(), status);
_registerInstance(new HangulJamoTransliterator(), status);
_registerInstance(new NullTransliterator(), status);
cacheInitialized = TRUE;

View File

@ -0,0 +1,91 @@
/*
**********************************************************************
* Copyright (c) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/17/2000 aliu Ported from Java.
**********************************************************************
*/
#ifndef HANGJAMO_H
#define HANGJAMO_H
#include "unicode/translit.h"
/**
* A transliterator that converts Hangul to Jamo.
*
* @author Mark Davis
* @version $RCSfile: hangjamo.h,v $ $Revision: 1.1 $ $Date: 2000/01/18 01:55:52 $
*/
class U_I18N_API HangulJamoTransliterator : public Transliterator {
/**
* ID for this transliterator.
*/
static const char* _ID;
public:
/**
* Constructs a transliterator.
*/
HangulJamoTransliterator(UnicodeFilter* adoptedFilter = 0);
/**
* Destructor.
*/
virtual ~HangulJamoTransliterator();
/**
* Copy constructor.
*/
HangulJamoTransliterator(const HangulJamoTransliterator&);
/**
* Assignment operator.
*/
HangulJamoTransliterator& operator=(const HangulJamoTransliterator&);
/**
* Transliterator API.
*/
Transliterator* clone(void) const;
/**
* Transliterates a segment of a string. <code>Transliterator</code> API.
* @param text the string to be transliterated
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= text.length()</code>.
* @return the new limit index
*/
virtual int32_t transliterate(Replaceable &text,
int32_t start, int32_t limit) const;
/**
* Implements {@link Transliterator#handleKeyboardTransliterate}.
*/
virtual void handleKeyboardTransliterate(Replaceable& text,
int32_t offsets[3]) const;
/**
* Return the length of the longest context required by this transliterator.
* This is <em>preceding</em> context.
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
* @return maximum number of preceding context characters this
* transliterator needs to examine
*/
virtual int32_t getMaximumContextLength(void) const;
private:
static bool_t decomposeHangul(UChar s, UnicodeString& result);
UChar filteredCharAt(Replaceable& text, int32_t i) const;
};
inline HangulJamoTransliterator::~HangulJamoTransliterator() {}
#endif

View File

@ -0,0 +1,95 @@
/*
**********************************************************************
* Copyright (c) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/17/2000 aliu Ported from Java.
**********************************************************************
*/
#ifndef JAMOHANG_H
#define JAMOHANG_H
#include "unicode/translit.h"
/**
* A transliterator that converts Jamo to Hangul.
*
* @author Mark Davis
* @version $RCSfile: jamohang.h,v $ $Revision: 1.1 $ $Date: 2000/01/18 01:55:52 $
*/
class U_I18N_API JamoHangulTransliterator : public Transliterator {
/**
* ID for this transliterator.
*/
static const char* _ID;
public:
/**
* Constructs a transliterator.
*/
JamoHangulTransliterator(UnicodeFilter* adoptedFilter = 0);
/**
* Destructor.
*/
virtual ~JamoHangulTransliterator();
/**
* Copy constructor.
*/
JamoHangulTransliterator(const JamoHangulTransliterator&);
/**
* Assignment operator.
*/
JamoHangulTransliterator& operator=(const JamoHangulTransliterator&);
/**
* Transliterator API.
*/
Transliterator* clone(void) const;
/**
* Transliterates a segment of a string. <code>Transliterator</code> API.
* @param text the string to be transliterated
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= text.length()</code>.
* @return the new limit index
*/
virtual int32_t transliterate(Replaceable &text,
int32_t start, int32_t limit) const;
/**
* Implements {@link Transliterator#handleKeyboardTransliterate}.
*/
virtual void handleKeyboardTransliterate(Replaceable& text,
int32_t offsets[3]) const;
/**
* Return the length of the longest context required by this transliterator.
* This is <em>preceding</em> context.
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
* @return maximum number of preceding context characters this
* transliterator needs to examine
*/
virtual int32_t getMaximumContextLength(void) const;
private:
/**
* Return composed character (if it composes)
* 0 otherwise
*/
static UChar composeHangul(UChar last, UChar ch);
UChar filteredCharAt(Replaceable& text, int32_t i) const;
};
inline JamoHangulTransliterator::~JamoHangulTransliterator() {}
#endif