ICU-199 add Jamo-Hangul and Hangul-Jamo
X-SVN-Rev: 618
This commit is contained in:
parent
9058e22250
commit
44562e956f
135
icu4c/source/i18n/hangjamo.cpp
Normal file
135
icu4c/source/i18n/hangjamo.cpp
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 01/17/2000 aliu Ported from Java.
|
||||
**********************************************************************
|
||||
*/
|
||||
#include "unicode/hangjamo.h"
|
||||
#include "unicode/rep.h"
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/unicode.h"
|
||||
|
||||
// These constants are from the Unicode book's algorithm.
|
||||
|
||||
#define SBase (0xAC00)
|
||||
#define LBase (0x1100)
|
||||
#define VBase (0x1161)
|
||||
#define TBase (0x11A7)
|
||||
#define LCount (19)
|
||||
#define VCount (21)
|
||||
#define TCount (28)
|
||||
#define NCount (VCount * TCount) // 588
|
||||
#define SCount (LCount * NCount) // 11172
|
||||
|
||||
/**
|
||||
* ID for this transliterator.
|
||||
*/
|
||||
const char* HangulJamoTransliterator::_ID = "Hangul-Jamo";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
HangulJamoTransliterator::HangulJamoTransliterator(UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
HangulJamoTransliterator::HangulJamoTransliterator(const HangulJamoTransliterator& o) :
|
||||
Transliterator(o) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
HangulJamoTransliterator& HangulJamoTransliterator::operator=(
|
||||
const HangulJamoTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterator API.
|
||||
*/
|
||||
Transliterator* HangulJamoTransliterator::clone(void) const {
|
||||
return new HangulJamoTransliterator(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterates a segment of a string. <code>Transliterator</code> API.
|
||||
* @param text the string to be transliterated
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= text.length()</code>.
|
||||
* @return the new limit index
|
||||
*/
|
||||
int32_t HangulJamoTransliterator::transliterate(Replaceable& text,
|
||||
int32_t start, int32_t limit) const {
|
||||
int32_t offsets[3] = { start, limit, start };
|
||||
handleKeyboardTransliterate(text, offsets);
|
||||
return offsets[LIMIT];
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleKeyboardTransliterate}.
|
||||
*/
|
||||
void HangulJamoTransliterator::handleKeyboardTransliterate(Replaceable& text,
|
||||
int32_t offsets[3]) const {
|
||||
int32_t cursor = offsets[CURSOR];
|
||||
int32_t limit = offsets[LIMIT];
|
||||
|
||||
UnicodeString replacement;
|
||||
while (cursor < limit) {
|
||||
UChar c = filteredCharAt(text, cursor);
|
||||
if (decomposeHangul(c, replacement)) {
|
||||
text.handleReplaceBetween(cursor, cursor+1, replacement);
|
||||
cursor += replacement.length(); // skip over replacement
|
||||
limit += replacement.length() - 1; // fix up limit
|
||||
} else {
|
||||
++cursor;
|
||||
}
|
||||
}
|
||||
|
||||
offsets[LIMIT] = limit;
|
||||
offsets[CURSOR] = cursor;
|
||||
}
|
||||
|
||||
bool_t HangulJamoTransliterator::decomposeHangul(UChar s, UnicodeString& result) {
|
||||
int32_t SIndex = s - SBase;
|
||||
if (0 > SIndex || SIndex >= SCount) {
|
||||
return FALSE;
|
||||
}
|
||||
int32_t L = LBase + SIndex / NCount;
|
||||
int32_t V = VBase + (SIndex % NCount) / TCount;
|
||||
int32_t T = TBase + SIndex % TCount;
|
||||
result.truncate(0);
|
||||
result.append((UChar)L);
|
||||
result.append((UChar)V);
|
||||
if (T != TBase) {
|
||||
result.append((UChar)T);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UChar HangulJamoTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {
|
||||
UChar c;
|
||||
const UnicodeFilter* filter = getFilter();
|
||||
return (filter == 0) ? text.charAt(i) :
|
||||
(filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the length of the longest context required by this transliterator.
|
||||
* This is <em>preceding</em> context.
|
||||
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
|
||||
* @return maximum number of preceding context characters this
|
||||
* transliterator needs to examine
|
||||
*/
|
||||
int32_t HangulJamoTransliterator::getMaximumContextLength(void) const {
|
||||
return 0;
|
||||
}
|
@ -165,10 +165,18 @@ SOURCE=.\gregocal.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\hangjamo.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\hextouni.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\jamohang.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\mergecol.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -718,6 +726,25 @@ InputPath=.\unicode\gregocal.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\hangjamo.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\hangjamo.h
|
||||
|
||||
"..\..\include\unicode\hangjamo.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\hangjamo.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\hextouni.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
@ -745,6 +772,25 @@ InputPath=.\unicode\hextouni.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\jamohang.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\jamohang.h
|
||||
|
||||
"..\..\include\unicode\jamohang.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\jamohang.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\mergecol.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
158
icu4c/source/i18n/jamohang.cpp
Normal file
158
icu4c/source/i18n/jamohang.cpp
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 01/17/2000 aliu Ported from Java.
|
||||
**********************************************************************
|
||||
*/
|
||||
#include "unicode/jamohang.h"
|
||||
#include "unicode/rep.h"
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/unicode.h"
|
||||
|
||||
/**
|
||||
* ID for this transliterator.
|
||||
*/
|
||||
const char* JamoHangulTransliterator::_ID = "Jamo-Hangul";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
JamoHangulTransliterator::JamoHangulTransliterator(UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
JamoHangulTransliterator::JamoHangulTransliterator(const JamoHangulTransliterator& o) :
|
||||
Transliterator(o) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
JamoHangulTransliterator& JamoHangulTransliterator::operator=(
|
||||
const JamoHangulTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterator API.
|
||||
*/
|
||||
Transliterator* JamoHangulTransliterator::clone(void) const {
|
||||
return new JamoHangulTransliterator(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterates a segment of a string. <code>Transliterator</code> API.
|
||||
* @param text the string to be transliterated
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= text.length()</code>.
|
||||
* @return the new limit index
|
||||
*/
|
||||
int32_t JamoHangulTransliterator::transliterate(Replaceable& text,
|
||||
int32_t start, int32_t limit) const {
|
||||
int32_t offsets[3] = { start, limit, start };
|
||||
handleKeyboardTransliterate(text, offsets);
|
||||
return offsets[LIMIT];
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleKeyboardTransliterate}.
|
||||
*/
|
||||
void JamoHangulTransliterator::handleKeyboardTransliterate(Replaceable& text,
|
||||
int32_t offsets[3]) const {
|
||||
/**
|
||||
* Performs transliteration changing Jamo to Hangul
|
||||
*/
|
||||
int32_t cursor = offsets[CURSOR];
|
||||
int32_t limit = offsets[LIMIT];
|
||||
if (cursor >= limit) {
|
||||
return;
|
||||
}
|
||||
|
||||
// get last character
|
||||
UChar last = filteredCharAt(text, cursor++);
|
||||
UnicodeString str = UNICODE_STRING("a", 1);
|
||||
while (cursor < limit) {
|
||||
UChar c = filteredCharAt(text, cursor);
|
||||
UChar replacement = composeHangul(last, c);
|
||||
if (replacement != 0) {
|
||||
str[0] = replacement;
|
||||
text.handleReplaceBetween(cursor-1, cursor+1, str);
|
||||
last = replacement;
|
||||
// leave cursor where it is
|
||||
--limit; // fix up limit
|
||||
} else {
|
||||
++cursor;
|
||||
}
|
||||
}
|
||||
|
||||
offsets[LIMIT] = limit + 1;
|
||||
offsets[CURSOR] = cursor;
|
||||
}
|
||||
|
||||
// These constants are from the Unicode book's algorithm.
|
||||
// There's no need to make them class members, since they
|
||||
// are only used here.
|
||||
|
||||
#define SBase (0xAC00)
|
||||
#define LBase (0x1100)
|
||||
#define VBase (0x1161)
|
||||
#define TBase (0x11A7)
|
||||
#define LCount (19)
|
||||
#define VCount (21)
|
||||
#define TCount (28)
|
||||
#define NCount (VCount * TCount) // 588
|
||||
#define SCount (LCount * NCount) // 11172
|
||||
|
||||
/**
|
||||
* Return composed character (if it composes)
|
||||
* 0 otherwise
|
||||
*/
|
||||
UChar JamoHangulTransliterator::composeHangul(UChar last, UChar ch) {
|
||||
// check to see if two current characters are L and V
|
||||
int32_t LIndex = last - LBase;
|
||||
if (0 <= LIndex && LIndex < LCount) {
|
||||
int32_t VIndex = ch - VBase;
|
||||
if (0 <= VIndex && VIndex < VCount) {
|
||||
// make syllable of form LV
|
||||
return (UChar)(SBase + (LIndex * VCount + VIndex) * TCount);
|
||||
}
|
||||
}
|
||||
// check to see if two current characters are LV and T
|
||||
int32_t SIndex = last - SBase;
|
||||
if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0) {
|
||||
int32_t TIndex = ch - TBase;
|
||||
if (0 <= TIndex && TIndex <= TCount) {
|
||||
// make syllable of form LVT
|
||||
return (UChar)(last + TIndex);
|
||||
}
|
||||
}
|
||||
// if neither case was true, skip
|
||||
return 0x0000;
|
||||
}
|
||||
|
||||
UChar JamoHangulTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {
|
||||
UChar c;
|
||||
const UnicodeFilter* filter = getFilter();
|
||||
return (filter == 0) ? text.charAt(i) :
|
||||
(filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the length of the longest context required by this transliterator.
|
||||
* This is <em>preceding</em> context.
|
||||
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
|
||||
* @return maximum number of preceding context characters this
|
||||
* transliterator needs to examine
|
||||
*/
|
||||
int32_t JamoHangulTransliterator::getMaximumContextLength(void) const {
|
||||
return 3;
|
||||
}
|
@ -24,6 +24,8 @@
|
||||
#include "unicode/nultrans.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/cpdtrans.h"
|
||||
#include "unicode/jamohang.h"
|
||||
#include "unicode/hangjamo.h"
|
||||
|
||||
/**
|
||||
* Dictionary of known transliterators. Keys are <code>String</code>
|
||||
@ -893,6 +895,8 @@ void Transliterator::initializeCache(void) {
|
||||
status = U_ZERO_ERROR; // Reset status for following calls
|
||||
_registerInstance(new HexToUnicodeTransliterator(), status);
|
||||
_registerInstance(new UnicodeToHexTransliterator(), status);
|
||||
_registerInstance(new JamoHangulTransliterator(), status);
|
||||
_registerInstance(new HangulJamoTransliterator(), status);
|
||||
_registerInstance(new NullTransliterator(), status);
|
||||
|
||||
cacheInitialized = TRUE;
|
||||
|
91
icu4c/source/i18n/unicode/hangjamo.h
Normal file
91
icu4c/source/i18n/unicode/hangjamo.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 01/17/2000 aliu Ported from Java.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef HANGJAMO_H
|
||||
#define HANGJAMO_H
|
||||
|
||||
#include "unicode/translit.h"
|
||||
|
||||
/**
|
||||
* A transliterator that converts Hangul to Jamo.
|
||||
*
|
||||
* @author Mark Davis
|
||||
* @version $RCSfile: hangjamo.h,v $ $Revision: 1.1 $ $Date: 2000/01/18 01:55:52 $
|
||||
*/
|
||||
class U_I18N_API HangulJamoTransliterator : public Transliterator {
|
||||
|
||||
/**
|
||||
* ID for this transliterator.
|
||||
*/
|
||||
static const char* _ID;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
HangulJamoTransliterator(UnicodeFilter* adoptedFilter = 0);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~HangulJamoTransliterator();
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
HangulJamoTransliterator(const HangulJamoTransliterator&);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
HangulJamoTransliterator& operator=(const HangulJamoTransliterator&);
|
||||
|
||||
/**
|
||||
* Transliterator API.
|
||||
*/
|
||||
Transliterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Transliterates a segment of a string. <code>Transliterator</code> API.
|
||||
* @param text the string to be transliterated
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= text.length()</code>.
|
||||
* @return the new limit index
|
||||
*/
|
||||
virtual int32_t transliterate(Replaceable &text,
|
||||
int32_t start, int32_t limit) const;
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleKeyboardTransliterate}.
|
||||
*/
|
||||
virtual void handleKeyboardTransliterate(Replaceable& text,
|
||||
int32_t offsets[3]) const;
|
||||
|
||||
/**
|
||||
* Return the length of the longest context required by this transliterator.
|
||||
* This is <em>preceding</em> context.
|
||||
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
|
||||
* @return maximum number of preceding context characters this
|
||||
* transliterator needs to examine
|
||||
*/
|
||||
virtual int32_t getMaximumContextLength(void) const;
|
||||
|
||||
private:
|
||||
|
||||
static bool_t decomposeHangul(UChar s, UnicodeString& result);
|
||||
|
||||
UChar filteredCharAt(Replaceable& text, int32_t i) const;
|
||||
};
|
||||
|
||||
inline HangulJamoTransliterator::~HangulJamoTransliterator() {}
|
||||
|
||||
#endif
|
95
icu4c/source/i18n/unicode/jamohang.h
Normal file
95
icu4c/source/i18n/unicode/jamohang.h
Normal file
@ -0,0 +1,95 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 01/17/2000 aliu Ported from Java.
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef JAMOHANG_H
|
||||
#define JAMOHANG_H
|
||||
|
||||
#include "unicode/translit.h"
|
||||
|
||||
/**
|
||||
* A transliterator that converts Jamo to Hangul.
|
||||
*
|
||||
* @author Mark Davis
|
||||
* @version $RCSfile: jamohang.h,v $ $Revision: 1.1 $ $Date: 2000/01/18 01:55:52 $
|
||||
*/
|
||||
class U_I18N_API JamoHangulTransliterator : public Transliterator {
|
||||
|
||||
/**
|
||||
* ID for this transliterator.
|
||||
*/
|
||||
static const char* _ID;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
JamoHangulTransliterator(UnicodeFilter* adoptedFilter = 0);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~JamoHangulTransliterator();
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
JamoHangulTransliterator(const JamoHangulTransliterator&);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*/
|
||||
JamoHangulTransliterator& operator=(const JamoHangulTransliterator&);
|
||||
|
||||
/**
|
||||
* Transliterator API.
|
||||
*/
|
||||
Transliterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Transliterates a segment of a string. <code>Transliterator</code> API.
|
||||
* @param text the string to be transliterated
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= text.length()</code>.
|
||||
* @return the new limit index
|
||||
*/
|
||||
virtual int32_t transliterate(Replaceable &text,
|
||||
int32_t start, int32_t limit) const;
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleKeyboardTransliterate}.
|
||||
*/
|
||||
virtual void handleKeyboardTransliterate(Replaceable& text,
|
||||
int32_t offsets[3]) const;
|
||||
|
||||
/**
|
||||
* Return the length of the longest context required by this transliterator.
|
||||
* This is <em>preceding</em> context.
|
||||
* @param direction either <code>FORWARD</code> or <code>REVERSE</code>
|
||||
* @return maximum number of preceding context characters this
|
||||
* transliterator needs to examine
|
||||
*/
|
||||
virtual int32_t getMaximumContextLength(void) const;
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* Return composed character (if it composes)
|
||||
* 0 otherwise
|
||||
*/
|
||||
static UChar composeHangul(UChar last, UChar ch);
|
||||
|
||||
UChar filteredCharAt(Replaceable& text, int32_t i) const;
|
||||
};
|
||||
|
||||
inline JamoHangulTransliterator::~JamoHangulTransliterator() {}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user