/* ****************************************************************************** * * Copyright (C) 1999-2005, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: udata.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 1999oct25 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #include "unicode/putil.h" #include "umutex.h" #include "cmemory.h" #include "cstring.h" #include "unicode/udata.h" #include "unicode/uversion.h" #include "uhash.h" #include "ucln_cmn.h" #include "putilimp.h" #include "udatamem.h" #include "umapfile.h" #include "ucmndata.h" /*********************************************************************** * * Notes on the organization of the ICU data implementation * * All of the public API is defined in udata.h * * The implementation is split into several files... * * - udata.c (this file) contains higher level code that knows about * the search paths for locating data, caching opened data, etc. * * - umapfile.c contains the low level platform-specific code for actually loading * (memory mapping, file reading, whatever) data into memory. * * - ucmndata.c deals with the tables of contents of ICU data items within * an ICU common format data file. The implementation includes * an abstract interface and support for multiple TOC formats. * All knowledge of any specific TOC format is encapsulated here. * * - udatamem.c has code for managing UDataMemory structs. These are little * descriptor objects for blocks of memory holding ICU data of * various types. */ /* configuration ---------------------------------------------------------- */ /* If you are excruciatingly bored turn this on .. */ /* #define UDATA_DEBUG 1 */ #if defined(UDATA_DEBUG) # include #endif /*********************************************************************** * * static (Global) data * ************************************************************************/ static UDataMemory *gCommonICUData = NULL; /* Pointer to the common ICU data. */ /* May be updated once, if we started with */ /* a stub or subset library. */ static UDataMemory *gStubICUData = NULL; /* If gCommonICUData does get updated, remember */ /* the original one so that it can be cleaned */ /* up when ICU is shut down. */ static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */ static UBool U_CALLCONV udata_cleanup(void) { if (gCommonDataCache) { /* Delete the cache of user data mappings. */ uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */ gCommonDataCache = NULL; /* Cleanup is not thread safe. */ } if (gCommonICUData != NULL) { udata_close(gCommonICUData); /* Clean up common ICU Data */ gCommonICUData = NULL; } if (gStubICUData != NULL) { udata_close(gStubICUData); /* Clean up the stub ICU Data */ gStubICUData = NULL; } return TRUE; /* Everything was cleaned up */ } /* * setCommonICUData. Set a UDataMemory to be the global ICU Data */ static void setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */ UDataMemory *oldData, /* Old ICUData ptr. Overwrite of this value is ok, */ /* of any others is not. */ UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */ /* changed by another thread before we got to it. */ UErrorCode *pErr) { UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr); if (U_FAILURE(*pErr)) { return; } /* For the assignment, other threads must cleanly see either the old */ /* or the new, not some partially initialized new. The old can not be */ /* deleted - someone may still have a pointer to it lying around in */ /* their locals. */ UDatamemory_assign(newCommonData, pData); umtx_lock(NULL); if (gCommonICUData==oldData) { gStubICUData = gCommonICUData; /* remember the old Common Data, so it can be cleaned up. */ gCommonICUData = newCommonData; ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup); } else { if (warn==TRUE) { *pErr = U_USING_DEFAULT_WARNING; } uprv_free(newCommonData); } umtx_unlock(NULL); return; } static const char * findBasename(const char *path) { const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR); if(basename==NULL) { return path; } else { return basename+1; } } #ifdef UDATA_DEBUG static const char * packageNameFromPath(const char *path) { if((path == NULL) || (*path == 0)) { return U_ICUDATA_NAME; } path = findBasename(path); if((path == NULL) || (*path == 0)) { return U_ICUDATA_NAME; } return path; } #endif /*----------------------------------------------------------------------* * * * Cache for common data * * Functions for looking up or adding entries to a cache of * * data that has been previously opened. Avoids a potentially * * expensive operation of re-opening the data for subsequent * * uses. * * * * Data remains cached for the duration of the process. * * * *----------------------------------------------------------------------*/ typedef struct DataCacheElement { char *name; UDataMemory *item; } DataCacheElement; /* * Deleter function for DataCacheElements. * udata cleanup function closes the hash table; hash table in turn calls back to * here for each entry. */ static void U_EXPORT2 U_CALLCONV DataCacheElement_deleter(void *pDCEl) { DataCacheElement *p = (DataCacheElement *)pDCEl; udata_close(p->item); /* unmaps storage */ uprv_free(p->name); /* delete the hash key string. */ uprv_free(pDCEl); /* delete 'this' */ } /* udata_getCacheHashTable() * Get the hash table used to store the data cache entries. * Lazy create it if it doesn't yet exist. */ static UHashtable *udata_getHashTable() { UErrorCode err = U_ZERO_ERROR; UBool cacheIsInitialized; UHashtable *tHT = NULL; umtx_lock(NULL); cacheIsInitialized = (gCommonDataCache != NULL); umtx_unlock(NULL); if (cacheIsInitialized) { return gCommonDataCache; } tHT = uhash_open(uhash_hashChars, uhash_compareChars, &err); uhash_setValueDeleter(tHT, DataCacheElement_deleter); umtx_lock(NULL); if (gCommonDataCache == NULL) { gCommonDataCache = tHT; tHT = NULL; ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup); } umtx_unlock(NULL); if (tHT != NULL) { uhash_close(tHT); } if (U_FAILURE(err)) { return NULL; /* TODO: handle this error better. */ } return gCommonDataCache; } static UDataMemory *udata_findCachedData(const char *path) { UHashtable *htable; UDataMemory *retVal = NULL; DataCacheElement *el; const char *baseName; baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */ htable = udata_getHashTable(); umtx_lock(NULL); el = (DataCacheElement *)uhash_get(htable, baseName); umtx_unlock(NULL); if (el != NULL) { retVal = el->item; } #ifdef UDATA_DEBUG fprintf(stderr, "Cache: [%s] -> %p\n", baseName, retVal); #endif return retVal; } static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) { DataCacheElement *newElement; const char *baseName; int32_t nameLen; UHashtable *htable; UDataMemory *oldValue = NULL; UErrorCode subErr = U_ZERO_ERROR; if (U_FAILURE(*pErr)) { return NULL; } /* Create a new DataCacheElement - the thingy we store in the hash table - * and copy the supplied path and UDataMemoryItems into it. */ newElement = uprv_malloc(sizeof(DataCacheElement)); if (newElement == NULL) { *pErr = U_MEMORY_ALLOCATION_ERROR; return NULL; } newElement->item = UDataMemory_createNewInstance(pErr); if (U_FAILURE(*pErr)) { return NULL; } UDatamemory_assign(newElement->item, item); baseName = findBasename(path); nameLen = (int32_t)uprv_strlen(baseName); newElement->name = uprv_malloc(nameLen+1); if (newElement->name == NULL) { *pErr = U_MEMORY_ALLOCATION_ERROR; return NULL; } uprv_strcpy(newElement->name, baseName); /* Stick the new DataCacheElement into the hash table. */ htable = udata_getHashTable(); umtx_lock(NULL); oldValue = uhash_get(htable, path); if (oldValue != NULL) { subErr = U_USING_DEFAULT_WARNING; } else { uhash_put( htable, newElement->name, /* Key */ newElement, /* Value */ &subErr); } umtx_unlock(NULL); #ifdef UDATA_DEBUG fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name, newElement->item, u_errorName(subErr), newElement->item->vFuncs); #endif if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) { *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */ uprv_free(newElement->name); uprv_free(newElement->item); uprv_free(newElement); return oldValue; } return newElement->item; } /*------------------------------------------------------------------------------- * * TinyString - a small set of really simple string functions, for * the purpose of consolidating buffer overflow code in one place * * Use wherever you would otherwise declare a fixed sized char[xx] buffer. * Do non-growing ops by accessing fields of struct directly * Grow using the append function to automatically extend buffer * as needed. * *-------------------------------------------------------------------------------*/ typedef struct TinyString { char *s; int32_t length; char fStaticBuf[100]; int32_t fCapacity; } TinyString; static void TinyString_init(TinyString *This) { This->s = This->fStaticBuf; *This->s = 0; This->length = 0; This->fCapacity = sizeof(This->fStaticBuf)-1; } static void TinyString_append(TinyString *This, const char *what) { int32_t newLen; newLen = This->length + (int32_t)uprv_strlen(what); if (newLen >= This->fCapacity) { int32_t newCapacity = newLen * 2; char *newBuf = (char *)uprv_malloc(newCapacity+1); if (newBuf != NULL) { uprv_strcpy(newBuf, This->s); if (This->s != This->fStaticBuf) { uprv_free(This->s); } This->s = newBuf; This->fCapacity = newCapacity; } } if (newLen < This->fCapacity) { uprv_strcat(This->s+This->length, what); This->length = newLen; } } static void TinyString_appendn(TinyString *This, const char *what, int32_t n) { int32_t newLen; newLen = This->length + n; if (newLen >= This->fCapacity) { int32_t newCapacity = newLen * 2; char *newBuf = (char *)uprv_malloc(newCapacity+1); if (newBuf != NULL) { uprv_strcpy(newBuf, This->s); if (This->s != This->fStaticBuf) { uprv_free(This->s); } This->s = newBuf; This->fCapacity = newCapacity; } } if (newLen < This->fCapacity) { uprv_strncat(This->s+This->length, what, n); This->length = newLen; } } static void TinyString_dt(TinyString *This) { if (This->s != This->fStaticBuf) { uprv_free(This->s); } TinyString_init(This); } /*----------------------------------------------------------------------*============== * * * Path management. Could be shared with other tools/etc if need be * * later on. * * * *----------------------------------------------------------------------*/ #define U_DATA_PATHITER_BUFSIZ 128 /* Size of local buffer for paths */ /* Overflow causes malloc of larger buf */ typedef struct { const char *path; /* working path (u_icudata_Dir) */ const char *nextPath; /* path following this one */ const char *basename; /* item's basename (icudt22e_mt.res)*/ const char *suffix; /* item suffix (can be null) */ uint32_t basenameLen; /* length of basename */ char *itemPath; /* path passed in with item name */ char itemPathBuf[U_DATA_PATHITER_BUFSIZ]; char *pathBuffer; /* output path for this it'ion */ char pathBufferA[U_DATA_PATHITER_BUFSIZ]; char *packageStub; /* example: "/icudt28b". Will ignore that leaf in set paths. */ char packageStubBuf[U_DATA_PATHITER_BUFSIZ]; uint32_t packageStubLen; UBool checkLastFour; /* if TRUE then allow paths such as '/foo/myapp.dat' * to match, checks last 4 chars of suffix with * last 4 of path, then previous chars. */ } UDataPathIterator; /** * Initialize (or re-initialize) a user-supplied UDataPathIterator * Note: UDataPathIterator does not allocate storage, so it doesn't need to be closed. * * @param iter The iterator to be initialized. Its current state does not matter. * @param path The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leave directories such as /icudt28l * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat * @param suffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly. * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2. * '/blarg/stuff.dat' would also be found. */ static void udata_pathiter_init(UDataPathIterator *iter, const char *path, const char *pkg, const char *item, const char *suffix, UBool doCheckLastFour) { #ifdef UDATA_DEBUG fprintf(stderr, "SUFFIX1=%s PATH=%s\n", suffix, path); #endif /** Path **/ if(path == NULL) { iter->path = u_getDataDirectory(); } else { iter->path = path; } /** Package **/ if(pkg == NULL) { iter->packageStubLen = 0; iter->packageStub=iter->packageStubBuf; iter->packageStub[0] = 0; } else { if(uprv_strlen(pkg) + 2 > U_DATA_PATHITER_BUFSIZ) { iter->packageStub = uprv_malloc(uprv_strlen(pkg)+2); } else { iter->packageStub = iter->packageStubBuf; } iter->packageStub[0] = U_FILE_SEP_CHAR; uprv_strcpy(iter->packageStub+1, pkg); iter->packageStubLen = (int32_t)uprv_strlen(iter->packageStub); #ifdef UDATA_DEBUG fprintf(stderr, "STUB=%s [%d]\n", iter->packageStub, iter->packageStubLen); #endif } /** Item **/ iter->basename = findBasename(item); iter->basenameLen = (int32_t)uprv_strlen(iter->basename); if(iter->basename == NULL) { iter->nextPath = NULL; return; } /** Item path **/ iter->itemPath = iter->itemPathBuf; if(iter->basename == item) { iter->itemPath[0] = 0; iter->nextPath = iter->path; } else { int32_t itemPathLen = (int32_t)(iter->basename-item); if (itemPathLen >= U_DATA_PATHITER_BUFSIZ) { char *t = (char *)uprv_malloc(itemPathLen+1); if (t != NULL) { iter->itemPath = t; } else { /* Malloc failed. Ignore the itemPath. */ itemPathLen = 0; } } uprv_strncpy(iter->itemPath, item, itemPathLen); iter->itemPath[itemPathLen]=0; iter->nextPath = iter->itemPath; } #ifdef UDATA_DEBUG fprintf(stderr, "SUFFIX=%s [%p]\n", suffix, suffix); #endif /** Suffix **/ if(suffix != NULL) { iter->suffix = suffix; } else { iter->suffix = ""; } iter->checkLastFour = doCheckLastFour; /* pathBuffer will hold the output path strings returned by the this iterator * Get an upper bound of possible string size, and make sure that the buffer * is big enough (sum of length of each piece, 2 extra delimiters, + trailing NULL) */ { int32_t maxPathLen = (int32_t)(uprv_strlen(iter->path) + uprv_strlen(item) + uprv_strlen(iter->suffix) + iter->packageStubLen + 3); iter->pathBuffer = iter->pathBufferA; if (maxPathLen >= U_DATA_PATHITER_BUFSIZ) { iter->pathBuffer = (char *)uprv_malloc(maxPathLen); if (iter->pathBuffer == NULL) { iter->pathBuffer = iter->pathBufferA; iter->path = ""; } } } #ifdef UDATA_DEBUG fprintf(stderr, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n", iter, item, iter->path, iter->basename, iter->suffix, iter->itemPath, iter->nextPath, iter->checkLastFour?"TRUE":"false"); #endif } /** * Get the next path on the list. * * @param iter The Iter to be used * @param len If set, pointer to the length of the returned path, for convenience. * @return Pointer to the next path segment, or NULL if there are no more. */ static const char *udata_pathiter_next(UDataPathIterator *iter) { const char *path = NULL; uint32_t pathLen = 0; const char *pathBasename; do { if( iter->nextPath == NULL ) { return NULL; } path = iter->nextPath; if(iter->nextPath == iter->itemPath) { /* we were processing item's path. */ iter->nextPath = iter->path; /* start with regular path next tm. */ pathLen = (int32_t)uprv_strlen(path); } else { /* fix up next for next time */ iter->nextPath = uprv_strchr(path, U_PATH_SEP_CHAR); if(iter->nextPath == NULL) { /* segment: entire path */ pathLen = (int32_t)uprv_strlen(path); } else { /* segment: until next segment */ pathLen = (int32_t)(iter->nextPath - path); if(*iter->nextPath) { /* skip divider */ iter->nextPath ++; } } } if(pathLen == 0) { continue; } #ifdef UDATA_DEBUG fprintf(stderr, "rest of path (IDD) = %s\n", path); fprintf(stderr, " "); { uint32_t qqq; for(qqq=0;qqqpathBuffer, path, pathLen); iter->pathBuffer[pathLen] = 0; /* check for .dat files */ pathBasename = findBasename(iter->pathBuffer); if(iter->checkLastFour == TRUE && (pathLen>=4) && uprv_strncmp(iter->pathBuffer +(pathLen-4),iter->suffix,4)==0 && /* suffix matches */ uprv_strncmp(findBasename(iter->pathBuffer),iter->basename,iter->basenameLen)==0 && /* base matches */ uprv_strlen(pathBasename)==(iter->basenameLen+4)) { /* base+suffix = full len */ #ifdef UDATA_DEBUG fprintf(stderr, "Have %s file on the path: %s\n", iter->suffix, iter->pathBuffer); #endif /* do nothing */ } else { /* regular dir path */ if(iter->pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) { if((pathLen>=4) && uprv_strncmp(iter->pathBuffer+(pathLen-4), ".dat", 4) == 0) { #ifdef UDATA_DEBUG fprintf(stderr, "skipping non-directory .dat file %s\n", iter->pathBuffer); #endif continue; } /* Check if it is a directory with the same name as our package */ if(iter->packageStubLen && (pathLen > iter->packageStubLen) && !uprv_strcmp(iter->pathBuffer + pathLen - iter->packageStubLen, iter->packageStub)) { #ifdef UDATA_DEBUG fprintf(stderr, "Found stub %s ( will add package %s of len %d)\n", iter->packageStub, iter->basename, iter->basenameLen); #endif pathLen -= iter->packageStubLen; } iter->pathBuffer[pathLen++] = U_FILE_SEP_CHAR; } uprv_strncpy(iter->pathBuffer + pathLen, /* + basename */ iter->packageStub+1, iter->packageStubLen-1); pathLen += iter->packageStubLen-1; if(*iter->suffix) /* tack on suffix */ { uprv_strcpy(iter->pathBuffer + pathLen, iter->suffix); pathLen += (int32_t)uprv_strlen(iter->suffix); } } #ifdef UDATA_DEBUG fprintf(stderr, " --> %s\n", iter->pathBuffer); #endif return iter->pathBuffer; } while(iter->path); /* fell way off the end */ return NULL; } /* * Path Iterator Destructor. Clean up any allocated storage */ static void udata_pathiter_dt(UDataPathIterator *iter) { if (iter->itemPath != iter->itemPathBuf) { uprv_free(iter->itemPath); iter->itemPath = NULL; } if (iter->pathBuffer != iter->pathBufferA) { uprv_free(iter->pathBuffer); iter->pathBuffer = NULL; } } /* ==================================================================================*/ /*----------------------------------------------------------------------* * * * Add a static reference to the common data library * * Unless overridden by an explicit udata_setCommonData, this will be * * our common data. * * * *----------------------------------------------------------------------*/ extern const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT; /*----------------------------------------------------------------------* * * * openCommonData Attempt to open a common format (.dat) file * * Map it into memory (if it's not there already) * * and return a UDataMemory object for it. * * * * If the requested data is already open and cached * * just return the cached UDataMem object. * * * *----------------------------------------------------------------------*/ static UDataMemory * openCommonData(const char *path, /* Path from OpenCHoice? */ UBool isICUData, /* ICU Data true if path == NULL */ UErrorCode *pErrorCode) { UDataMemory tData; UDataPathIterator iter; const char *pathBuffer; const char *inBasename; if (U_FAILURE(*pErrorCode)) { return NULL; } UDataMemory_init(&tData); /* ??????? TODO revisit this */ if (isICUData) { /* "mini-cache" for common ICU data */ if(gCommonICUData != NULL) { return gCommonICUData; } tData.pHeader = &U_ICUDATA_ENTRY_POINT; udata_checkCommonData(&tData, pErrorCode); setCommonICUData(&tData, NULL, FALSE, pErrorCode); return gCommonICUData; } /* request is NOT for ICU Data. */ /* Find the base name portion of the supplied path. */ /* inBasename will be left pointing somewhere within the original path string. */ inBasename = findBasename(path); #ifdef UDATA_DEBUG fprintf(stderr, "inBasename = %s\n", inBasename); #endif if(*inBasename==0) { /* no basename. This will happen if the original path was a directory name, */ /* like "a/b/c/". (Fallback to separate files will still work.) */ #ifdef UDATA_DEBUG fprintf(stderr, "ocd: no basename in %s, bailing.\n", path); #endif *pErrorCode=U_FILE_ACCESS_ERROR; return NULL; } /* Is the requested common data file already open and cached? */ /* Note that the cache is keyed by the base name only. The rest of the path, */ /* if any, is not considered. */ { UDataMemory *dataToReturn = udata_findCachedData(inBasename); if (dataToReturn != NULL) { return dataToReturn; } } /* Requested item is not in the cache. * Hunt it down, trying all the path locations */ udata_pathiter_init(&iter, u_getDataDirectory(), inBasename, path, ".dat", TRUE); while((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = udata_pathiter_next(&iter)) != NULL) { #ifdef UDATA_DEBUG fprintf(stderr, "ocd: trying path %s - ", pathBuffer); #endif uprv_mapFile(&tData, pathBuffer); #ifdef UDATA_DEBUG fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded"); #endif } udata_pathiter_dt(&iter); /* Note: this call may invalidate "pathBuffer" */ #if defined(OS390_STUBDATA) && defined(OS390BATCH) if (!UDataMemory_isLoaded(&tData)) { char ourPathBuffer[1024]; /* One more chance, for extendCommonData() */ uprv_strncpy(ourPathBuffer, path, 1019); ourPathBuffer[1019]=0; uprv_strcat(ourPathBuffer, ".dat"); uprv_mapFile(&tData, ourPathBuffer); } #endif if (!UDataMemory_isLoaded(&tData)) { /* no common data */ *pErrorCode=U_FILE_ACCESS_ERROR; return NULL; } /* we have mapped a file, check its header */ udata_checkCommonData(&tData, pErrorCode); /* Cache the UDataMemory struct for this .dat file, * so we won't need to hunt it down and map it again next time * something is needed from it. */ return udata_cacheDataItem(inBasename, &tData, pErrorCode); } #ifdef OS390 # define MAX_STUB_ENTRIES 8 #else # define MAX_STUB_ENTRIES 0 #endif /*----------------------------------------------------------------------* * * * extendICUData If the full set of ICU data was not loaded at * * program startup, load it now. This function will * * be called when the lookup of an ICU data item in * * the common ICU data fails. * * * * The parameter is the UDataMemory in which the * * search for a requested item failed. * * * * return true if new data is loaded, false otherwise.* * * *----------------------------------------------------------------------*/ static UBool extendICUData(UDataMemory *failedData, UErrorCode *pErr) { /* If the data library that we are running with turns out to be the * stub library (or, on the 390, the subset library), we will try to * load a .dat file instead. The stub library has no entries in its * TOC, which is how we identify it here. */ UDataMemory *pData; UDataMemory copyPData; if (failedData->vFuncs->NumEntries(failedData) > MAX_STUB_ENTRIES) { /* Not the stub. We can't extend. */ return FALSE; } /* See if we can explicitly open a .dat file for the ICUData. */ pData = openCommonData( U_ICUDATA_NAME, /* "icudt20l" , for example. */ FALSE, /* Pretend we're not opening ICUData */ pErr); /* How about if there is no pData, eh... */ UDataMemory_init(©PData); if(pData != NULL) { UDatamemory_assign(©PData, pData); copyPData.map = 0; /* The mapping for this data is owned by the hash table */ copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */ /* CommonICUData is also unmapped when ICU is shut down.*/ /* To avoid unmapping the data twice, zero out the map */ /* fields in the UDataMemory that we're assigning */ /* to CommonICUData. */ setCommonICUData(©PData, /* The new common data. */ failedData, /* Old ICUData ptr. Overwrite of this value is ok, */ FALSE, /* No warnings if write didn't happen */ pErr); /* setCommonICUData honors errors; NOP if error set */ } return gCommonICUData != failedData; /* Return true if ICUData pointer was updated. */ /* (Could potentialy have been done by another thread racing */ /* us through here, but that's fine, we still return true */ /* so that current thread will also examine extended data. */ } /*----------------------------------------------------------------------* * * * udata_setCommonData * * * *----------------------------------------------------------------------*/ U_CAPI void U_EXPORT2 udata_setCommonData(const void *data, UErrorCode *pErrorCode) { UDataMemory dataMemory; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } if(data==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } /* do we already have common ICU data set? */ if(gCommonICUData != NULL) { *pErrorCode=U_USING_DEFAULT_WARNING; return; } /* set the data pointer and test for validity */ UDataMemory_init(&dataMemory); UDataMemory_setData(&dataMemory, data); udata_checkCommonData(&dataMemory, pErrorCode); if (U_FAILURE(*pErrorCode)) {return;} /* we have good data */ /* Set it up as the ICU Common Data. */ setCommonICUData(&dataMemory, NULL, TRUE, pErrorCode); } /*--------------------------------------------------------------------------- * * udata_setAppData * *---------------------------------------------------------------------------- */ U_CAPI void U_EXPORT2 udata_setAppData(const char *path, const void *data, UErrorCode *err) { UDataMemory udm; if(err==NULL || U_FAILURE(*err)) { return; } if(data==NULL) { *err=U_ILLEGAL_ARGUMENT_ERROR; return; } UDataMemory_init(&udm); udm.pHeader = data; udata_checkCommonData(&udm, err); udata_cacheDataItem(path, &udm, err); } /*----------------------------------------------------------------------------* * * * checkDataItem Given a freshly located/loaded data item, either * * an entry in a common file or a separately loaded file, * * sanity check its header, and see if the data is * * acceptable to the app. * * If the data is good, create and return a UDataMemory * * object that can be returned to the application. * * Return NULL on any sort of failure. * * * *----------------------------------------------------------------------------*/ static UDataMemory * checkDataItem ( const DataHeader *pHeader, /* The data item to be checked. */ UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */ void *context, /* pass-thru param for above. */ const char *type, /* pass-thru param for above. */ const char *name, /* pass-thru param for above. */ UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */ /* but openChoice should continue with */ /* trying to get data from fallback path. */ UErrorCode *fatalErr /* Bad error, caller should return immediately */ ) { UDataMemory *rDataMem = NULL; /* the new UDataMemory, to be returned. */ if (U_FAILURE(*fatalErr)) { return NULL; } if(pHeader->dataHeader.magic1==0xda && pHeader->dataHeader.magic2==0x27 && (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info)) ) { rDataMem=UDataMemory_createNewInstance(fatalErr); if (U_FAILURE(*fatalErr)) { return NULL; } rDataMem->pHeader = pHeader; } else { /* the data is not acceptable, look further */ /* If we eventually find something good, this errorcode will be */ /* cleared out. */ *nonFatalErr=U_INVALID_FORMAT_ERROR; } return rDataMem; } /* * A note on the ownership of Mapped Memory * * For common format files, ownership resides with the UDataMemory object * that lives in the cache of opened common data. These UDataMemorys are private * to the udata implementation, and are never seen directly by users. * * The UDataMemory objects returned to users will have the address of some desired * data within the mapped region, but they wont have the mapping info itself, and thus * won't cause anything to be removed from memory when they are closed. * * For individual data files, the UDataMemory returned to the user holds the * information necessary to unmap the data on close. If the user independently * opens the same data file twice, two completely independent mappings will be made. * (There is no cache of opened data items from individual files, only a cache of * opened Common Data files, that is, files containing a collection of data items.) * * For common data passed in from the user via udata_setAppData() or * udata_setCommonData(), ownership remains with the user. * * UDataMemory objects themselves, as opposed to the memory they describe, * can be anywhere - heap, stack/local or global. * They have a flag to indicate when they're heap allocated and thus * must be deleted when closed. */ /*----------------------------------------------------------------------------* * * * main data loading functions * * * *----------------------------------------------------------------------------*/ static UDataMemory * doOpenChoice(const char *path, const char *type, const char *name, UDataMemoryIsAcceptable *isAcceptable, void *context, UErrorCode *pErrorCode) { UDataMemory *retVal = NULL; const char *pathBuffer; TinyString tocEntryName; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */ TinyString tocEntryPath; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */ TinyString oldIndFileName; /* ex: icudt28b_ar.res */ TinyString oldStylePath; TinyString oldStylePathBasename; TinyString pkgName; TinyString treeName; #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) TinyString altSepPath; #endif const char *dataPath; const char *tocEntrySuffix; int32_t tocEntrySuffixIndex; const char *tocEntryPathSuffix; UDataMemory dataMemory; UDataMemory *pCommonData; UDataMemory *pEntryData; const DataHeader *pHeader; const char *inBasename; UErrorCode errorCode=U_ZERO_ERROR; const char *treeChar; UBool isICUData = FALSE; if(path == NULL || !strcmp(path, U_ICUDATA_ALIAS) || !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) || !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) { isICUData = TRUE; } #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* remap from alternate path char to the main one */ TinyString_init(&altSepPath); if(path) { char *p; if((p=uprv_strchr(path,U_FILE_ALT_SEP_CHAR))) { TinyString_append(&altSepPath, path); while((p=uprv_strchr(altSepPath.s,U_FILE_ALT_SEP_CHAR))) { *p = U_FILE_SEP_CHAR; } #if defined (UDATA_DEBUG) fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s); #endif path = altSepPath.s; } } #endif TinyString_init(&oldIndFileName); TinyString_init(&tocEntryName); TinyString_init(&tocEntryPath); TinyString_init(&oldStylePath); TinyString_init(&oldStylePathBasename); TinyString_init(&pkgName); TinyString_init(&treeName); if(path==NULL) { TinyString_append(&pkgName, U_ICUDATA_NAME); } else { const char *pkg; const char *first; pkg = uprv_strrchr(path, U_FILE_SEP_CHAR); first = uprv_strchr(path, U_FILE_SEP_CHAR); if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */ /* see if this is an /absolute/path/to/package path */ if(pkg) { TinyString_append(&pkgName, pkg+1); } else { TinyString_append(&pkgName, path); } } else { treeChar = uprv_strchr(path, U_TREE_SEPARATOR); if(treeChar) { TinyString_append(&treeName, treeChar+1); /* following '-' */ if(isICUData) { TinyString_append(&pkgName, U_ICUDATA_NAME); } else { TinyString_appendn(&pkgName, path, (int32_t)(treeChar-path)); if (first == NULL) { /* This user data has no path, but there is a tree name. Look up the correct path from the data cache later. */ path = pkgName.s; } } } else { if(isICUData) { TinyString_append(&pkgName, U_ICUDATA_NAME); } else { TinyString_append(&pkgName, path); } } } } #ifdef UDATA_DEBUG fprintf(stderr, " P=%s T=%s\n", pkgName.s, treeName.s); #endif /* Make up a full name by appending the type to the supplied * name, assuming that a type was supplied. */ /* prepend the package */ TinyString_append(&tocEntryName, pkgName.s); TinyString_append(&tocEntryPath, pkgName.s); TinyString_append(&oldIndFileName, pkgName.s); tocEntrySuffixIndex = tocEntryName.length; if(treeName.s[0]) { TinyString_append(&tocEntryName, U_TREE_ENTRY_SEP_STRING); TinyString_append(&tocEntryName, treeName.s); TinyString_append(&tocEntryPath, U_FILE_SEP_STRING); TinyString_append(&tocEntryPath, treeName.s); } TinyString_append(&oldIndFileName, "_"); TinyString_append(&tocEntryName, U_TREE_ENTRY_SEP_STRING); TinyString_append(&tocEntryPath, U_FILE_SEP_STRING); TinyString_append(&oldIndFileName, name); TinyString_append(&tocEntryName, name); TinyString_append(&tocEntryPath, name); if(type!=NULL && *type!=0) { TinyString_append(&tocEntryName, "."); TinyString_append(&tocEntryName, type); TinyString_append(&tocEntryPath, "."); TinyString_append(&tocEntryPath, type); TinyString_append(&oldIndFileName, "."); TinyString_append(&oldIndFileName, type); } tocEntrySuffix = tocEntryName.s+tocEntrySuffixIndex; /* suffix starts here */ tocEntryPathSuffix = tocEntryPath.s+tocEntrySuffixIndex; /* suffix starts here */ #ifdef UDATA_DEBUG fprintf(stderr, " tocEntryName = %s\n", tocEntryName.s); fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.s); fprintf(stderr, " oldIndFileName = %s\n", oldIndFileName.s); #endif /* the data was not found in the common data, look further, */ /* try to get an individual data file */ if(path == NULL) { path = COMMON_DATA_NAME; inBasename = COMMON_DATA_NAME; } else { if(isICUData) { inBasename=COMMON_DATA_NAME; } else { inBasename=findBasename(path); } } /************************ Begin loop looking for ind. files ***************/ #ifdef UDATA_DEBUG fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", inBasename, packageNameFromPath(path)); #endif /* Deal with a null basename */ if( (*inBasename==0) && (uprv_strlen(path) > 3) ) { /* the purpose of this exercise is to turn /tmp/foo/bar/ into path= /tmp/foo/bar/bar and basename= bar (i.e. /tmp/foo/bar/bar.dat or /tmp/foo/bar/bar_en_US.res ) */ char *rightSlash; TinyString_append(&oldStylePath, path); /* chop off trailing slash */ oldStylePath.length--; oldStylePath.s[oldStylePath.length] = 0; rightSlash = (char*)uprv_strrchr(oldStylePath.s, U_FILE_SEP_CHAR); if(rightSlash != NULL) { rightSlash++; TinyString_append(&oldStylePathBasename, rightSlash); inBasename = oldStylePathBasename.s; TinyString_append(&oldStylePath, U_FILE_SEP_STRING); TinyString_append(&oldStylePath, inBasename); /* one more time, for the base name */ path = oldStylePath.s; } else { *pErrorCode = U_FILE_ACCESS_ERROR; /* hopelessly bad case */ retVal = NULL; goto commonReturn; } } /* End of dealing with a null basename */ dataPath = u_getDataDirectory(); /* Check to make sure that there is a dataPath to iterate over */ if ((dataPath && *dataPath) || !isICUData) { UDataPathIterator iter; /* #1a look in ind. files: package\nam.typ ========================= */ /* init path iterator for individual files */ udata_pathiter_init(&iter, dataPath, pkgName.s, path, tocEntryPathSuffix, FALSE); while((pathBuffer = udata_pathiter_next(&iter))) { #ifdef UDATA_DEBUG fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer); #endif if( uprv_mapFile(&dataMemory, pathBuffer) || (inBasename!=pathBuffer && uprv_mapFile(&dataMemory, inBasename))) { pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, &errorCode, pErrorCode); if (pEntryData != NULL) { /* Data is good. * Hand off ownership of the backing memory to the user's UDataMemory. * and return it. */ pEntryData->mapAddr = dataMemory.mapAddr; pEntryData->map = dataMemory.map; #ifdef UDATA_DEBUG fprintf(stderr, "** Mapped file: %s\n", pathBuffer); #endif udata_pathiter_dt(&iter); retVal = pEntryData; goto commonReturn; } /* the data is not acceptable, or some error occured. Either way, unmap the memory */ udata_close(&dataMemory); /* If we had a nasty error, bail out completely. */ if (U_FAILURE(*pErrorCode)) { udata_pathiter_dt(&iter); retVal = NULL; goto commonReturn; } /* Otherwise remember that we found data but didn't like it for some reason */ errorCode=U_INVALID_FORMAT_ERROR; } #ifdef UDATA_DEBUG fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded"); #endif } udata_pathiter_dt(&iter); /* #1b look in ind. files - with old naming (package_nam.typ not package\nam.typ) ==================== */ /* init path iterator for individual files */ udata_pathiter_init(&iter, dataPath, "", path, oldIndFileName.s, FALSE); while((pathBuffer = udata_pathiter_next(&iter))) { #ifdef UDATA_DEBUG fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer); #endif if( uprv_mapFile(&dataMemory, pathBuffer) || (inBasename!=pathBuffer && uprv_mapFile(&dataMemory, inBasename))) { pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, &errorCode, pErrorCode); if (pEntryData != NULL) { /* Data is good. * Hand off ownership of the backing memory to the user's UDataMemory. * and return it. */ pEntryData->mapAddr = dataMemory.mapAddr; pEntryData->map = dataMemory.map; #ifdef UDATA_DEBUG fprintf(stderr, "** Mapped file: %s\n", pathBuffer); #endif udata_pathiter_dt(&iter); retVal = pEntryData; goto commonReturn; } /* the data is not acceptable, or some error occured. Either way, unmap the memory */ udata_close(&dataMemory); /* If we had a nasty error, bail out completely. */ if (U_FAILURE(*pErrorCode)) { udata_pathiter_dt(&iter); retVal = NULL; goto commonReturn; } /* Otherwise remember that we found data but didn't like it for some reason */ errorCode=U_INVALID_FORMAT_ERROR; } #ifdef UDATA_DEBUG fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded"); #endif } udata_pathiter_dt(&iter); } /* #2 */ /* try to get common data. The loop is for platforms such as the 390 that do * not initially load the full set of ICU data. If the lookup of an ICU data item * fails, the full (but slower to load) set is loaded, the and the loop repeats, * trying the lookup again. Once the full set of ICU data is loaded, the loop wont * repeat because the full set will be checked the first time through. * * The loop also handles the fallback to a .dat file if the application linked * to the stub data library rather than a real library. */ for (;;) { pCommonData=openCommonData(path, isICUData, &errorCode); /** search for pkg **/ if(U_SUCCESS(errorCode)) { int32_t length; /* look up the data piece in the common data */ pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName.s, &length, &errorCode); #ifdef UDATA_DEBUG fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName.s, pHeader, u_errorName(errorCode)); #endif if((pHeader == NULL) && !U_FAILURE(errorCode)) { pHeader=pCommonData->vFuncs->Lookup(pCommonData, oldIndFileName.s, /* oldIndFileName is preceded by a slash */ &length, &errorCode); #ifdef UDATA_DEBUG fprintf(stderr, "[OLD name] %s: pHeader=%p - %s\n", oldIndFileName.s, pHeader, u_errorName(errorCode)); #endif } if(pHeader!=NULL) { pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, &errorCode, pErrorCode); #ifdef UDATA_DEBUG fprintf(stderr, "pEntryData=%p\n", pEntryData); #endif if (U_FAILURE(*pErrorCode)) { retVal = NULL; goto commonReturn; } if (pEntryData != NULL) { pEntryData->length = length; retVal = pEntryData; goto commonReturn; } } } /* Data wasn't found. If we were looking for an ICUData item and there is * more data available, load it and try again, * otherwise break out of this loop. */ if (!(isICUData && pCommonData && extendICUData(pCommonData, &errorCode))) { break; } } /* data not found */ if(U_SUCCESS(*pErrorCode)) { if(U_SUCCESS(errorCode)) { /* file not found */ *pErrorCode=U_FILE_ACCESS_ERROR; } else { /* entry point not found or rejected */ *pErrorCode=errorCode; } } commonReturn: TinyString_dt(&tocEntryName); TinyString_dt(&tocEntryPath); TinyString_dt(&oldIndFileName); TinyString_dt(&oldStylePath); TinyString_dt(&oldStylePathBasename); TinyString_dt(&pkgName); TinyString_dt(&treeName); #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) TinyString_dt(&altSepPath); #endif return retVal; } /* API ---------------------------------------------------------------------- */ U_CAPI UDataMemory * U_EXPORT2 udata_open(const char *path, const char *type, const char *name, UErrorCode *pErrorCode) { #ifdef UDATA_DEBUG fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type); fflush(stderr); #endif if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return NULL; } else if(name==NULL || *name==0) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } else { return doOpenChoice(path, type, name, NULL, NULL, pErrorCode); } } U_CAPI UDataMemory * U_EXPORT2 udata_openChoice(const char *path, const char *type, const char *name, UDataMemoryIsAcceptable *isAcceptable, void *context, UErrorCode *pErrorCode) { #ifdef UDATA_DEBUG fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type); #endif if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return NULL; } else if(name==NULL || *name==0 || isAcceptable==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } else { return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode); } } U_CAPI void U_EXPORT2 udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) { if(pInfo!=NULL) { if(pData!=NULL && pData->pHeader!=NULL) { const UDataInfo *info=&pData->pHeader->info; uint16_t dataInfoSize=udata_getInfoSize(info); if(pInfo->size>dataInfoSize) { pInfo->size=dataInfoSize; } uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2); if(info->isBigEndian!=U_IS_BIG_ENDIAN) { /* opposite endianness */ uint16_t x=info->reservedWord; pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8)); } } else { pInfo->size=0; } } }