96 lines
2.9 KiB
C
96 lines
2.9 KiB
C
|
/*
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 2012-2014, International Business Machines
|
||
|
* Corporation and others. All Rights Reserved.
|
||
|
*******************************************************************************
|
||
|
* collationbasedatabuilder.h
|
||
|
*
|
||
|
* created on: 2012aug11
|
||
|
* created by: Markus W. Scherer
|
||
|
*/
|
||
|
|
||
|
#ifndef __COLLATIONBASEDATABUILDER_H__
|
||
|
#define __COLLATIONBASEDATABUILDER_H__
|
||
|
|
||
|
#include "unicode/utypes.h"
|
||
|
|
||
|
#if !UCONFIG_NO_COLLATION
|
||
|
|
||
|
#include "unicode/uniset.h"
|
||
|
#include "unicode/unistr.h"
|
||
|
#include "collation.h"
|
||
|
#include "collationdata.h"
|
||
|
#include "collationdatabuilder.h"
|
||
|
#include "normalizer2impl.h"
|
||
|
#include "utrie2.h"
|
||
|
#include "uvectr32.h"
|
||
|
#include "uvectr64.h"
|
||
|
#include "uvector.h"
|
||
|
|
||
|
U_NAMESPACE_BEGIN
|
||
|
|
||
|
/**
|
||
|
* Low-level base CollationData builder.
|
||
|
*/
|
||
|
class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder {
|
||
|
public:
|
||
|
CollationBaseDataBuilder(UErrorCode &errorCode);
|
||
|
|
||
|
virtual ~CollationBaseDataBuilder();
|
||
|
|
||
|
void init(UErrorCode &errorCode);
|
||
|
|
||
|
/**
|
||
|
* Sets the Han ranges as ranges of offset CE32s.
|
||
|
* Note: Unihan extension A sorts after the other BMP ranges.
|
||
|
* See http://www.unicode.org/reports/tr10/#Implicit_Weights
|
||
|
*
|
||
|
* @param ranges array of ranges of [:Unified_Ideograph:] in collation order,
|
||
|
* as (start, end) code point pairs
|
||
|
* @param length number of code points (not pairs)
|
||
|
* @param errorCode in/out error code
|
||
|
*/
|
||
|
void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode);
|
||
|
|
||
|
void setNumericPrimary(uint32_t np) { numericPrimary = np; }
|
||
|
|
||
|
virtual UBool isCompressibleLeadByte(uint32_t b) const;
|
||
|
|
||
|
void setCompressibleLeadByte(uint32_t b);
|
||
|
|
||
|
static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
|
||
|
static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
|
||
|
|
||
|
virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
|
||
|
|
||
|
void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
|
||
|
void addRootElement(int64_t ce, UErrorCode &errorCode);
|
||
|
|
||
|
void addReorderingGroup(uint32_t firstByte, uint32_t lastByte,
|
||
|
const UnicodeString &groupScripts,
|
||
|
UErrorCode &errorCode);
|
||
|
|
||
|
virtual void build(CollationData &data, UErrorCode &errorCode);
|
||
|
|
||
|
void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode);
|
||
|
|
||
|
private:
|
||
|
int32_t writeRootElementsRange(
|
||
|
uint32_t prevPrimary, uint32_t p, int32_t i,
|
||
|
UVector32 &table, UErrorCode &errorCode);
|
||
|
|
||
|
// Flags for which primary-weight lead bytes are compressible.
|
||
|
UBool compressibleBytes[256];
|
||
|
uint32_t numericPrimary;
|
||
|
uint32_t firstHanPrimary;
|
||
|
uint32_t lastHanPrimary;
|
||
|
int32_t hanStep;
|
||
|
UVector64 rootElements;
|
||
|
UnicodeString scripts;
|
||
|
};
|
||
|
|
||
|
U_NAMESPACE_END
|
||
|
|
||
|
#endif // !UCONFIG_NO_COLLATION
|
||
|
#endif // __COLLATIONBASEDATABUILDER_H__
|