/* ******************************************************************************* * * Copyright (C) 2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* */ // file name: genregexcasing.cpp // // Program to generate the casing data for use by ICU regular expressions. // The data declarations output when running this program are to be copied // into the file i18n/regexcmp.h // // See the function RegexCompile::findCaseInsensitiveStarters() for more explanation. #include "unicode/uniset.h" #include "unicode/usetiter.h" #include "iostream" #include #include #include #include using namespace std; std::string sstring(const UnicodeString &us) { string retString; us.toUTF8String(retString); return retString; } int main() { std::map> cmap; for (UChar32 cp = 0; cp<=0x10ffff; cp++) { UnicodeSet s(cp, cp); s.closeOver(USET_CASE_INSENSITIVE); UnicodeSetIterator setIter(s); while (setIter.next()) { if (!setIter.isString()) { continue; } const UnicodeString &str = setIter.getString(); cout << "Got a string for \"" << sstring(UnicodeString(cp)) << "\" [\\u" << hex << cp << "]\n"; cout << " \"" << sstring(str) << "\" ["; for (int32_t j=0; j data; for (const auto &mapPair: cmap) { Item dataForCP; dataForCP.fCP = mapPair.first; dataForCP.fStrIndex = outString.length(); for (UChar32 valCP: mapPair.second) { outString.append(valCP); dataForCP.fCount++; } data.push_back(dataForCP); } std::cout << " static const UChar32 RECaseFixCodePoints[] = {" ; int items=0; for (const Item &d: data) { if (items++ % 10 == 0) { std::cout << "\n "; } std::cout << "0x" << d.fCP << ", "; } std::cout << "0x110000};\n\n"; std::cout << " static const int16_t RECaseFixStringOffsets[] = {"; items = 0; for (const Item &d: data) { if (items++ % 10 == 0) { std::cout << "\n "; } std::cout << "0x" << d.fStrIndex << ", "; } std::cout << "0};\n\n"; std::cout << " static const int16_t RECaseFixCounts[] = {"; items = 0; for (const Item &d: data) { if (items++ % 10 == 0) { std::cout << "\n "; } std::cout << "0x" << d.fCount << ", "; } std::cout << "0};\n\n"; std::cout << " static const UChar RECaseFixData[] = {"; for (int i=0; i