86 lines
3.2 KiB
C
86 lines
3.2 KiB
C
|
/*
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 2001, International Business Machines
|
||
|
* Corporation and others. All Rights Reserved.
|
||
|
*******************************************************************************
|
||
|
* file name: genjp.
|
||
|
* encoding: US-ASCII
|
||
|
*
|
||
|
* Modification history
|
||
|
* Date Name Comments
|
||
|
* 10/13/2001 weiv created
|
||
|
*
|
||
|
* The GenJP class is useful for generating various stuff related to Japanese language.
|
||
|
* Right now, it uses ICU to generate rules for JIS X 4061 compliant collation.
|
||
|
* Also, it is useful for getting compatibility versions of the characters.
|
||
|
*/
|
||
|
|
||
|
#ifndef ICU_GENJP
|
||
|
#define ICU_GENJP
|
||
|
|
||
|
#include <stdio.h>
|
||
|
|
||
|
#include "unicode/utypes.h"
|
||
|
#include "unicode/uchar.h"
|
||
|
#include "unicode/unorm.h"
|
||
|
#include "ucmpe32.h"
|
||
|
#include "cmemory.h"
|
||
|
|
||
|
static const uint32_t _bufferSize = 256;
|
||
|
static const UChar _vowels[] = { 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0 }; // katakana AIUEO
|
||
|
static const UChar _prolongedSoundMark = 0x30FC;
|
||
|
static const UChar _voicedMark = 0x3099;
|
||
|
static const UChar _hiraganaIterationMark = 0x309D;
|
||
|
static const UChar _hiraganaVoicedIterationMark = 0x309E;
|
||
|
static const UChar _katakanaIterationMark = 0x30FD;
|
||
|
static const UChar _katakanaVoicedIterationMark = 0x30FE;
|
||
|
|
||
|
static const UChar _hiraganaStart = 0x3041;
|
||
|
static const UChar _hiraganaEnd = 0x3094;
|
||
|
static const UChar _katakanaStart = 0x30A1;
|
||
|
static const UChar _katakanaEnd = 0x30FA;
|
||
|
|
||
|
static const char *_tertiaryLess = "\"<<<";
|
||
|
static const char *_equal = " \"=";
|
||
|
|
||
|
|
||
|
|
||
|
class GenJP {
|
||
|
public:
|
||
|
GenJP();
|
||
|
~GenJP();
|
||
|
UChar getHalf(UChar u); // Gets the compatibility version of an UChar.
|
||
|
// The structure holds halfwidth and fullwidth compatibility characters.
|
||
|
UBool isSemivoiced(UChar ch, UErrorCode &status); // Is a code point semivoiced
|
||
|
UBool isVoiced(UChar ch, UErrorCode &status); // Is a code point voiced
|
||
|
|
||
|
void writeHeader(UErrorCode &status);
|
||
|
void processLengthMark(UErrorCode &status); // This will do small vowels and generate rules for the length mark
|
||
|
void processIterationMark(UErrorCode &status); // This will generate the rules for the iteration mark
|
||
|
void processCompatibility(UErrorCode &status); // This will generate the rules for making compatibility chars
|
||
|
// equal with their normal counter part (only halfwidth and fullwidth).
|
||
|
void equalKatakanaToHiragana(UErrorCode &status); // This will generate the rules &K=K=hK=H
|
||
|
void printOutKanji(UErrorCode &status); // Just prints out Kanji ordering...
|
||
|
void writeFooter(UErrorCode &status);
|
||
|
|
||
|
private:
|
||
|
const char *getRelation();
|
||
|
UChar getHiragana(UChar katakana);
|
||
|
const char *getName(const UChar ch, UErrorCode &status);
|
||
|
char *printUnicodeStuff(UChar *zTStuff, char *resBuf);
|
||
|
void processIterationMark(UChar katakana, UErrorCode &status);
|
||
|
void processVoicedIterationMark(UChar katakana, UErrorCode &status);
|
||
|
void processVoicedKana(UChar katakana, UErrorCode &status);
|
||
|
void processSemivoicedKana(UChar katakana, UErrorCode &status);
|
||
|
CompactEIntArray *kanaToHalf;
|
||
|
FILE *out;
|
||
|
char *nameBuff;
|
||
|
UBool wasReset;
|
||
|
};
|
||
|
|
||
|
inline UChar GenJP::getHiragana(UChar katakana) {
|
||
|
return katakana - (_katakanaStart - _hiraganaStart);
|
||
|
}
|
||
|
|
||
|
#endif
|