2016-06-15 18:58:17 +00:00
|
|
|
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
2009-03-09 23:40:15 +00:00
|
|
|
/*
|
|
|
|
******************************************************************************
|
|
|
|
*
|
2016-05-31 21:45:07 +00:00
|
|
|
* Copyright (C) 2008-2016, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
2009-03-09 23:40:15 +00:00
|
|
|
*
|
|
|
|
******************************************************************************
|
2009-09-16 01:08:54 +00:00
|
|
|
* file name: uspoof_conf.h
|
2009-03-09 23:40:15 +00:00
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2009Jan05
|
|
|
|
* created by: Andy Heninger
|
|
|
|
*
|
|
|
|
* Internal classes for compiling confusable data into its binary (runtime) form.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __USPOOF_BUILDCONF_H__
|
|
|
|
#define __USPOOF_BUILDCONF_H__
|
|
|
|
|
2009-05-05 02:03:27 +00:00
|
|
|
#if !UCONFIG_NO_NORMALIZATION
|
|
|
|
|
2009-05-04 05:45:27 +00:00
|
|
|
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
|
|
|
|
2016-02-24 00:49:51 +00:00
|
|
|
#include "unicode/uregex.h"
|
|
|
|
#include "uhash.h"
|
2009-03-09 23:40:15 +00:00
|
|
|
#include "uspoof_impl.h"
|
|
|
|
|
2009-10-01 22:52:39 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2009-03-09 23:40:15 +00:00
|
|
|
// SPUString
|
|
|
|
// Holds a string that is the result of one of the mappings defined
|
|
|
|
// by the confusable mapping data (confusables.txt from Unicode.org)
|
|
|
|
// Instances of SPUString exist during the compilation process only.
|
|
|
|
|
|
|
|
struct SPUString : public UMemory {
|
|
|
|
UnicodeString *fStr; // The actual string.
|
|
|
|
int32_t fStrTableIndex; // Index into the final runtime data for this string.
|
|
|
|
// (or, for length 1, the single string char itself,
|
|
|
|
// there being no string table entry for it.)
|
|
|
|
SPUString(UnicodeString *s);
|
|
|
|
~SPUString();
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// String Pool A utility class for holding the strings that are the result of
|
|
|
|
// the spoof mappings. These strings will utimately end up in the
|
|
|
|
// run-time String Table.
|
|
|
|
// This is sort of like a sorted set of strings, except that ICU's anemic
|
|
|
|
// built-in collections don't support those, so it is implemented with a
|
|
|
|
// combination of a uhash and a UVector.
|
|
|
|
|
|
|
|
|
|
|
|
class SPUStringPool : public UMemory {
|
|
|
|
public:
|
|
|
|
SPUStringPool(UErrorCode &status);
|
|
|
|
~SPUStringPool();
|
|
|
|
|
|
|
|
// Add a string. Return the string from the table.
|
|
|
|
// If the input parameter string is already in the table, delete the
|
|
|
|
// input parameter and return the existing string.
|
|
|
|
SPUString *addString(UnicodeString *src, UErrorCode &status);
|
|
|
|
|
|
|
|
|
|
|
|
// Get the n-th string in the collection.
|
|
|
|
SPUString *getByIndex(int32_t i);
|
|
|
|
|
|
|
|
// Sort the contents; affects the ordering of getByIndex().
|
|
|
|
void sort(UErrorCode &status);
|
|
|
|
|
|
|
|
int32_t size();
|
|
|
|
|
|
|
|
private:
|
|
|
|
UVector *fVec; // Elements are SPUString *
|
|
|
|
UHashtable *fHash; // Key: UnicodeString Value: SPUString
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// class ConfusabledataBuilder
|
|
|
|
// An instance of this class exists while the confusable data is being built from source.
|
|
|
|
// It encapsulates the intermediate data structures that are used for building.
|
|
|
|
// It exports one static function, to do a confusable data build.
|
|
|
|
|
|
|
|
class ConfusabledataBuilder : public UMemory {
|
|
|
|
private:
|
|
|
|
SpoofImpl *fSpoofImpl;
|
|
|
|
UChar *fInput;
|
|
|
|
UHashtable *fSLTable;
|
|
|
|
UHashtable *fSATable;
|
|
|
|
UHashtable *fMLTable;
|
|
|
|
UHashtable *fMATable;
|
|
|
|
UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
|
|
|
|
|
|
|
|
// The binary data is first assembled into the following four collections, then
|
|
|
|
// copied to its final raw-memory destination.
|
|
|
|
UVector *fKeyVec;
|
|
|
|
UVector *fValueVec;
|
|
|
|
UnicodeString *fStringTable;
|
|
|
|
UVector *fStringLengthsTable;
|
|
|
|
|
|
|
|
SPUStringPool *stringPool;
|
|
|
|
URegularExpression *fParseLine;
|
|
|
|
URegularExpression *fParseHexNum;
|
|
|
|
int32_t fLineNum;
|
|
|
|
|
|
|
|
ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
|
|
|
|
~ConfusabledataBuilder();
|
|
|
|
void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
|
|
|
|
|
|
|
|
// Add an entry to the key and value tables being built
|
|
|
|
// input: data from SLTable, MATable, etc.
|
|
|
|
// outut: entry added to fKeyVec and fValueVec
|
|
|
|
void addKeyEntry(UChar32 keyChar, // The key character
|
|
|
|
UHashtable *table, // The table, one of SATable, MATable, etc.
|
|
|
|
int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
|
|
|
|
UErrorCode &status);
|
|
|
|
|
|
|
|
// From an index into fKeyVec & fValueVec
|
|
|
|
// get a UnicodeString with the corresponding mapping.
|
2011-01-25 23:38:42 +00:00
|
|
|
UnicodeString getMapping(int32_t index);
|
2009-03-09 23:40:15 +00:00
|
|
|
|
|
|
|
// Populate the final binary output data array with the compiled data.
|
|
|
|
void outputData(UErrorCode &status);
|
|
|
|
|
|
|
|
public:
|
|
|
|
static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
|
|
|
|
int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
|
|
|
|
};
|
2009-10-01 22:52:39 +00:00
|
|
|
U_NAMESPACE_END
|
2009-03-09 23:40:15 +00:00
|
|
|
|
2009-05-05 02:03:27 +00:00
|
|
|
#endif
|
2009-05-04 05:45:27 +00:00
|
|
|
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
|
|
|
|
#endif // __USPOOF_BUILDCONF_H__
|