scuffed-code/icu4c/source/i18n/region.cpp

684 lines
23 KiB
C++

/*
*******************************************************************************
* Copyright (C) 2013, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
*
* File REGION.CPP
*
* Modification History:*
* Date Name Description
* 01/15/13 Emmons Original Port from ICU4J
********************************************************************************
*/
/**
* \file
* \brief C++ API: Region classes (territory containment)
*/
#include "unicode/region.h"
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/ures.h"
#include "unicode/decimfmt.h"
#include "ucln_in.h"
#include "cstring.h"
#include "uhash.h"
#include "umutex.h"
#include "uresimp.h"
#include "region_impl.h"
#if !UCONFIG_NO_FORMATTING
U_CDECL_BEGIN
static void U_CALLCONV
deleteRegion(void *obj) {
delete (icu::Region *)obj;
}
/**
* Cleanup callback func
*/
static UBool U_CALLCONV region_cleanup(void)
{
icu::Region::cleanupRegionData();
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
static UMutex gRegionDataLock = U_MUTEX_INITIALIZER;
static UBool regionDataIsLoaded = false;
static UVector* availableRegions[URGN_LIMIT];
static UHashtable *regionAliases;
static UHashtable *regionIDMap;
static UHashtable *numericCodeMap;
static const UChar UNKNOWN_REGION_ID [] = { 0x5A, 0x5A, 0 }; /* "ZZ" */
static const UChar OUTLYING_OCEANIA_REGION_ID [] = { 0x51, 0x4F, 0 }; /* "QO" */
static const UChar WORLD_ID [] = { 0x30, 0x30, 0x31, 0 }; /* "001" */
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegionNameEnumeration)
/*
* Initializes the region data from the ICU resource bundles. The region data
* contains the basic relationships such as which regions are known, what the numeric
* codes are, any known aliases, and the territory containment data.
*
* If the region data has already loaded, then this method simply returns without doing
* anything meaningful.
*/
void Region::loadRegionData() {
if (regionDataIsLoaded) {
return;
}
umtx_lock(&gRegionDataLock);
if (regionDataIsLoaded) { // In case another thread gets to it before we do...
umtx_unlock(&gRegionDataLock);
return;
}
UErrorCode status = U_ZERO_ERROR;
UResourceBundle* regionCodes = NULL;
UResourceBundle* territoryAlias = NULL;
UResourceBundle* codeMappings = NULL;
UResourceBundle* worldContainment = NULL;
UResourceBundle* territoryContainment = NULL;
UResourceBundle* groupingContainment = NULL;
DecimalFormat *df = new DecimalFormat(status);
if (U_FAILURE(status)) {
umtx_unlock(&gRegionDataLock);
return;
}
df->setParseIntegerOnly(TRUE);
regionIDMap = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status);
uhash_setValueDeleter(regionIDMap, deleteRegion);
numericCodeMap = uhash_open(uhash_hashLong,uhash_compareLong,NULL,&status);
regionAliases = uhash_open(uhash_hashUnicodeString,uhash_compareUnicodeString,NULL,&status);
uhash_setKeyDeleter(regionAliases,uprv_deleteUObject);
UResourceBundle *rb = ures_openDirect(NULL,"metadata",&status);
regionCodes = ures_getByKey(rb,"regionCodes",NULL,&status);
territoryAlias = ures_getByKey(rb,"territoryAlias",NULL,&status);
UResourceBundle *rb2 = ures_openDirect(NULL,"supplementalData",&status);
codeMappings = ures_getByKey(rb2,"codeMappings",NULL,&status);
territoryContainment = ures_getByKey(rb2,"territoryContainment",NULL,&status);
worldContainment = ures_getByKey(territoryContainment,"001",NULL,&status);
groupingContainment = ures_getByKey(territoryContainment,"grouping",NULL,&status);
UVector *continents = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
while ( ures_hasNext(worldContainment) ) {
UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment,NULL,&status));
continents->addElement(continentName,status);
}
UVector *groupings = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
while ( ures_hasNext(groupingContainment) ) {
UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment,NULL,&status));
groupings->addElement(groupingName,status);
}
while ( ures_hasNext(regionCodes) ) {
UnicodeString regionID = ures_getNextUnicodeString(regionCodes,NULL,&status);
Region *r = new Region();
r->idStr = regionID;
r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV);
r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known.
uhash_put(regionIDMap,(void *)&(r->idStr),(void *)r,&status);
Formattable result;
UErrorCode ps = U_ZERO_ERROR;
df->parse(r->idStr,result,ps);
if ( U_SUCCESS(ps) ) {
r->code = result.getLong(); // Convert string to number
uhash_iput(numericCodeMap,r->code,(void *)r,&status);
r->type = URGN_SUBCONTINENT;
} else {
r->code = -1;
}
}
// Process the territory aliases
while ( ures_hasNext(territoryAlias) ) {
UResourceBundle *res = ures_getNextResource(territoryAlias,NULL,&status);
const char *aliasFrom = ures_getKey(res);
UnicodeString* aliasFromStr = new UnicodeString(aliasFrom, -1, US_INV);
UnicodeString aliasTo = ures_getUnicodeString(res,&status);
ures_close(res);
Region *aliasToRegion = (Region *) uhash_get(regionIDMap,&aliasTo);
Region *aliasFromRegion = (Region *)uhash_get(regionIDMap,aliasFromStr);
if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region
uhash_put(regionAliases,(void *)aliasFromStr, (void *)aliasToRegion,&status);
} else {
if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
aliasFromRegion = new Region();
aliasFromRegion->idStr.setTo(*aliasFromStr);
aliasFromRegion->idStr.extract(0,aliasFromRegion->idStr.length(),aliasFromRegion->id,sizeof(aliasFromRegion->id),US_INV);
uhash_put(regionIDMap,(void *)&(aliasFromRegion->idStr),(void *)aliasFromRegion,&status);
Formattable result;
UErrorCode ps = U_ZERO_ERROR;
df->parse(aliasFromRegion->idStr,result,ps);
if ( U_SUCCESS(ps) ) {
aliasFromRegion->code = result.getLong(); // Convert string to number
uhash_iput(numericCodeMap,aliasFromRegion->code,(void *)aliasFromRegion,&status);
} else {
aliasFromRegion->code = -1;
}
aliasFromRegion->type = URGN_DEPRECATED;
} else {
aliasFromRegion->type = URGN_DEPRECATED;
}
delete aliasFromStr;
aliasFromRegion->preferredValues = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
UnicodeString currentRegion;
currentRegion.remove();
for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) {
if ( aliasTo.charAt(i) != 0x0020 ) {
currentRegion.append(aliasTo.charAt(i));
}
if ( aliasTo.charAt(i) == 0x0020 || i+1 == aliasTo.length() ) {
Region *target = (Region *)uhash_get(regionIDMap,(void *)&currentRegion);
if (target) {
UnicodeString *preferredValue = new UnicodeString(target->idStr);
aliasFromRegion->preferredValues->addElement((void *)preferredValue,status);
}
currentRegion.remove();
}
}
}
}
// Process the code mappings - This will allow us to assign numeric codes to most of the territories.
while ( ures_hasNext(codeMappings) ) {
UResourceBundle *mapping = ures_getNextResource(codeMappings,NULL,&status);
if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) {
UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status);
UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status);
UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status);
Region *r = (Region *)uhash_get(regionIDMap,(void *)&codeMappingID);
if ( r ) {
Formattable result;
UErrorCode ps = U_ZERO_ERROR;
df->parse(codeMappingNumber,result,ps);
if ( U_SUCCESS(ps) ) {
r->code = result.getLong(); // Convert string to number
uhash_iput(numericCodeMap,r->code,(void *)r,&status);
}
UnicodeString *code3 = new UnicodeString(codeMapping3Letter);
uhash_put(regionAliases,(void *)code3, (void *)r,&status);
}
}
ures_close(mapping);
}
// Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
Region *r;
UnicodeString WORLD_ID_STRING(WORLD_ID);
r = (Region *) uhash_get(regionIDMap,(void *)&WORLD_ID_STRING);
if ( r ) {
r->type = URGN_WORLD;
}
UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID);
r = (Region *) uhash_get(regionIDMap,(void *)&UNKNOWN_REGION_ID_STRING);
if ( r ) {
r->type = URGN_UNKNOWN;
}
for ( int32_t i = 0 ; i < continents->size() ; i++ ) {
r = (Region *) uhash_get(regionIDMap,(void *)continents->elementAt(i));
if ( r ) {
r->type = URGN_CONTINENT;
}
}
delete continents;
for ( int32_t i = 0 ; i < groupings->size() ; i++ ) {
r = (Region *) uhash_get(regionIDMap,(void *)groupings->elementAt(i));
if ( r ) {
r->type = URGN_GROUPING;
}
}
delete groupings;
// Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
// even though it looks like a territory code. Need to handle it here.
UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID);
r = (Region *) uhash_get(regionIDMap,(void *)&OUTLYING_OCEANIA_REGION_ID_STRING);
if ( r ) {
r->type = URGN_SUBCONTINENT;
}
// Load territory containment info from the supplemental data.
while ( ures_hasNext(territoryContainment) ) {
UResourceBundle *mapping = ures_getNextResource(territoryContainment,NULL,&status);
const char *parent = ures_getKey(mapping);
UnicodeString parentStr = UnicodeString(parent, -1 , US_INV);
Region *parentRegion = (Region *) uhash_get(regionIDMap,(void *)&parentStr);
for ( int j = 0 ; j < ures_getSize(mapping); j++ ) {
UnicodeString child = ures_getUnicodeStringByIndex(mapping,j,&status);
Region *childRegion = (Region *) uhash_get(regionIDMap,(void *)&child);
if ( parentRegion != NULL && childRegion != NULL ) {
// Add the child region to the set of regions contained by the parent
if (parentRegion->containedRegions == NULL) {
parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
}
UnicodeString *childStr = new UnicodeString(status);
childStr->fastCopyFrom(childRegion->idStr);
parentRegion->containedRegions->addElement((void *)childStr,status);
// Set the parent region to be the containing region of the child.
// Regions of type GROUPING can't be set as the parent, since another region
// such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
if ( parentRegion->type != URGN_GROUPING) {
childRegion->containingRegion = parentRegion;
}
}
}
ures_close(mapping);
}
// Create the availableRegions lists
int32_t pos = -1;
while ( const UHashElement* element = uhash_nextElement(regionIDMap,&pos)) {
Region *ar = (Region *)element->value.pointer;
if ( availableRegions[ar->type] == NULL ) {
availableRegions[ar->type] = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
}
UnicodeString *arString = new UnicodeString(ar->idStr);
availableRegions[ar->type]->addElement((void *)arString,status);
}
ures_close(territoryContainment);
ures_close(worldContainment);
ures_close(groupingContainment);
ures_close(codeMappings);
ures_close(rb2);
ures_close(territoryAlias);
ures_close(regionCodes);
ures_close(rb);
delete df;
ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup);
regionDataIsLoaded = true;
umtx_unlock(&gRegionDataLock);
}
void Region::cleanupRegionData() {
for (int32_t i = 0 ; i < URGN_LIMIT ; i++ ) {
if ( availableRegions[i] ) {
delete availableRegions[i];
}
}
if (regionAliases) {
uhash_close(regionAliases);
}
if (numericCodeMap) {
uhash_close(numericCodeMap);
}
if (regionIDMap) {
uhash_close(regionIDMap);
}
}
Region::Region ()
: code(-1),
type(URGN_UNKNOWN),
containingRegion(NULL),
containedRegions(NULL),
preferredValues(NULL) {
id[0] = 0;
}
Region::~Region () {
if (containedRegions) {
delete containedRegions;
}
if (preferredValues) {
delete preferredValues;
}
}
/**
* Returns true if the two regions are equal.
*/
UBool
Region::operator==(const Region &that) const {
return (idStr == that.idStr);
}
/**
* Returns true if the two regions are NOT equal; that is, if operator ==() returns false.
*/
UBool
Region::operator!=(const Region &that) const {
return (idStr != that.idStr);
}
/**
* Returns a pointer to a Region using the given region code. The region code can be either 2-letter ISO code,
* 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the LDML specification.
* The identifier will be canonicalized internally using the supplemental metadata as defined in the CLDR.
* If the region code is NULL or not recognized, the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR )
*/
const Region* U_EXPORT2
Region::getInstance(const char *region_code, UErrorCode &status) {
if ( !region_code ) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
loadRegionData();
if (regionIDMap == NULL) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
UnicodeString regionCodeString = UnicodeString(region_code, -1, US_INV);
Region *r = (Region *)uhash_get(regionIDMap,(void *)&regionCodeString);
if ( !r ) {
r = (Region *)uhash_get(regionAliases,(void *)&regionCodeString);
}
if ( !r ) { // Unknown region code
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if ( r->type == URGN_DEPRECATED && r->preferredValues->size() == 1) {
StringEnumeration *pv = r->getPreferredValues();
pv->reset(status);
const UnicodeString *ustr = pv->snext(status);
r = (Region *)uhash_get(regionIDMap,(void *)ustr);
delete pv;
}
return r;
}
/**
* Returns a pointer to a Region using the given numeric region code. If the numeric region code is not recognized,
* the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR ).
*/
const Region* U_EXPORT2
Region::getInstance (int32_t code, UErrorCode &status) {
loadRegionData();
if (numericCodeMap == NULL) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
Region *r = (Region *)uhash_iget(numericCodeMap,code);
if ( !r ) { // Just in case there's an alias that's numeric, try to find it.
UErrorCode fs = U_ZERO_ERROR;
UnicodeString pat = UNICODE_STRING_SIMPLE("00#");
DecimalFormat *df = new DecimalFormat(pat,fs);
UnicodeString id;
id.remove();
df->format(code,id);
delete df;
r = (Region *)uhash_get(regionAliases,&id);
}
if ( !r ) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
if ( r->type == URGN_DEPRECATED && r->preferredValues->size() == 1) {
StringEnumeration *pv = r->getPreferredValues();
pv->reset(status);
const UnicodeString *ustr = pv->snext(status);
r = (Region *)uhash_get(regionIDMap,(void *)ustr);
delete pv;
}
return r;
}
/**
* Returns an enumeration over the IDs of all known regions that match the given type.
*/
StringEnumeration* U_EXPORT2
Region::getAvailable(URegionType type) {
loadRegionData();
UErrorCode status = U_ZERO_ERROR;
return new RegionNameEnumeration(availableRegions[type],status);
return NULL;
}
/**
* Returns a pointer to the region that contains this region. Returns NULL if this region is code "001" (World)
* or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) returns the
* region "039" (Southern Europe).
*/
const Region*
Region::getContainingRegion() const {
loadRegionData();
return containingRegion;
}
/**
* Return a pointer to the region that geographically contains this region and matches the given type,
* moving multiple steps up the containment chain if necessary. Returns NULL if no containing region can be found
* that matches the given type. Note: The URegionTypes = "URGN_GROUPING", "URGN_DEPRECATED", or "URGN_UNKNOWN"
* are not appropriate for use in this API. NULL will be returned in this case. For example, calling this method
* with region "IT" (Italy) for type "URGN_CONTINENT" returns the region "150" ( Europe ).
*/
const Region*
Region::getContainingRegion(URegionType type) const {
loadRegionData();
if ( containingRegion == NULL ) {
return NULL;
}
if ( containingRegion->type == type ) {
return containingRegion;
} else {
return containingRegion->getContainingRegion(type);
}
}
/**
* Return an enumeration over the IDs of all the regions that are immediate children of this region in the
* region hierarchy. These returned regions could be either macro regions, territories, or a mixture of the two,
* depending on the containment data as defined in CLDR. This API may return NULL if this region doesn't have
* any sub-regions. For example, calling this method with region "150" (Europe) returns an enumeration containing
* the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) - "154" (Northern Europe)
* and "155" (Western Europe).
*/
StringEnumeration*
Region::getContainedRegions() const {
loadRegionData();
UErrorCode status = U_ZERO_ERROR;
return new RegionNameEnumeration(containedRegions,status);
}
/**
* Returns an enumeration over the IDs of all the regions that are children of this region anywhere in the region
* hierarchy and match the given type. This API may return an empty enumeration if this region doesn't have any
* sub-regions that match the given type. For example, calling this method with region "150" (Europe) and type
* "URGN_TERRITORY" returns a set containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. )
*/
StringEnumeration*
Region::getContainedRegions( URegionType type ) const {
loadRegionData();
UErrorCode status = U_ZERO_ERROR;
UVector *result = new UVector(NULL, uhash_compareChars, status);
StringEnumeration *cr = getContainedRegions();
for ( int32_t i = 0 ; i < cr->count(status) ; i++ ) {
const char *id = cr->next(NULL,status);
const Region *r = Region::getInstance(id,status);
if ( r->getType() == type ) {
result->addElement((void *)&r->idStr,status);
} else {
StringEnumeration *children = r->getContainedRegions(type);
for ( int32_t j = 0 ; j < children->count(status) ; j++ ) {
const char *id2 = children->next(NULL,status);
const Region *r2 = Region::getInstance(id2,status);
result->addElement((void *)&r2->idStr,status);
}
delete children;
}
}
delete cr;
StringEnumeration* resultEnumeration = new RegionNameEnumeration(result,status);
delete result;
return resultEnumeration;
}
/**
* Returns true if this region contains the supplied other region anywhere in the region hierarchy.
*/
UBool
Region::contains(const Region &other) const {
loadRegionData();
if (!containedRegions) {
return FALSE;
}
if (containedRegions->contains((void *)&other.idStr)) {
return TRUE;
} else {
for ( int32_t i = 0 ; i < containedRegions->size() ; i++ ) {
UnicodeString *crStr = (UnicodeString *)containedRegions->elementAt(i);
Region *cr = (Region *) uhash_get(regionIDMap,(void *)crStr);
if ( cr && cr->contains(other) ) {
return TRUE;
}
}
}
return FALSE;
}
/**
* For deprecated regions, return an enumeration over the IDs of the regions that are the preferred replacement
* regions for this region. Returns NULL for a non-deprecated region. For example, calling this method with region
* "SU" (Soviet Union) would return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc...
*/
StringEnumeration*
Region::getPreferredValues() const {
loadRegionData();
UErrorCode status = U_ZERO_ERROR;
if ( type == URGN_DEPRECATED ) {
return new RegionNameEnumeration(preferredValues,status);
} else {
return NULL;
}
}
/**
* Return this region's canonical region code.
*/
const char*
Region::getRegionCode() const {
return id;
}
int32_t
Region::getNumericCode() const {
return code;
}
/**
* Returns the region type of this region.
*/
URegionType
Region::getType() const {
return type;
}
RegionNameEnumeration::RegionNameEnumeration(UVector *fNameList, UErrorCode& status) {
pos=0;
if (fNameList) {
fRegionNames = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, fNameList->size(),status);
for ( int32_t i = 0 ; i < fNameList->size() ; i++ ) {
UnicodeString* this_region_name = (UnicodeString *)fNameList->elementAt(i);
UnicodeString* new_region_name = new UnicodeString(*this_region_name);
fRegionNames->addElement((void *)new_region_name,status);
}
}
else {
fRegionNames = NULL;
}
}
const UnicodeString*
RegionNameEnumeration::snext(UErrorCode& /*status*/) {
return (const UnicodeString *)fRegionNames->elementAt(pos++);
}
void
RegionNameEnumeration::reset(UErrorCode& /*status*/) {
pos=0;
}
int32_t
RegionNameEnumeration::count(UErrorCode& /*status*/) const {
return (fRegionNames==NULL) ? 0 : fRegionNames->size();
}
RegionNameEnumeration::~RegionNameEnumeration() {
delete fRegionNames;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
//eof