b89c229c6e
X-SVN-Rev: 8993
793 lines
26 KiB
C++
793 lines
26 KiB
C++
/*
|
|
**********************************************************************
|
|
* Copyright (C) 1999-2001, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
* Date Name Description
|
|
* 11/24/99 aliu Creation.
|
|
* 09/26/00 aliu Support for equivalency groups added.
|
|
* 01/31/01 aliu Support for ISO 3166 country codes added.
|
|
**********************************************************************
|
|
*/
|
|
|
|
/* This program reads a text file full of parsed time zone data and
|
|
* outputs a binary file, tz.dat, which then goes on to become part of
|
|
* the memory-mapped (or dll) ICU data file.
|
|
*
|
|
* The data file read by this program is generated by a perl script,
|
|
* tz.pl. The input to tz.pl is standard unix time zone data from
|
|
* ftp://elsie.nci.nih.gov.
|
|
*
|
|
* As a matter of policy, the perl script tz.pl wants to do as much of
|
|
* the parsing, data processing, and error checking as possible, and
|
|
* this program wants to just do the binary translation step.
|
|
*
|
|
* See tz.pl for the file format that is READ by this program.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/putil.h"
|
|
#include "cmemory.h"
|
|
#include "cstring.h"
|
|
#include "filestrm.h"
|
|
#include "unewdata.h"
|
|
#include "uoptions.h"
|
|
#include "tzdat.h"
|
|
|
|
#define INPUT_FILE "tz.txt"
|
|
#define OUTPUT_FILE "tz.dat"
|
|
|
|
/* UDataInfo cf. udata.h */
|
|
static UDataInfo dataInfo = {
|
|
sizeof(UDataInfo),
|
|
0,
|
|
|
|
U_IS_BIG_ENDIAN,
|
|
U_CHARSET_FAMILY,
|
|
sizeof(UChar),
|
|
0,
|
|
|
|
{TZ_SIG_0, TZ_SIG_1, TZ_SIG_2, TZ_SIG_3},
|
|
{TZ_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */
|
|
{0, 0, 0, 0} /* dataVersion - will be filled in with year.suffix */
|
|
};
|
|
|
|
|
|
class gentz {
|
|
// These must match SimpleTimeZone!!!
|
|
enum { WALL_TIME = 0,
|
|
STANDARD_TIME,
|
|
UTC_TIME
|
|
};
|
|
|
|
// The largest number of zones we accept as sensible. Anything
|
|
// larger is considered an error. Adjust as needed.
|
|
enum { MAX_ZONES = 1000 };
|
|
|
|
// The maximum sensible GMT offset, in seconds
|
|
static const int32_t MAX_GMT_OFFSET;
|
|
|
|
static const char COMMENT;
|
|
static const char CR;
|
|
static const char LF;
|
|
static const char MINUS;
|
|
static const char SPACE;
|
|
static const char TAB;
|
|
static const char ZERO;
|
|
static const char STANDARD_MARK;
|
|
static const char DST_MARK;
|
|
static const char SEP;
|
|
static const char NUL;
|
|
|
|
static const char* END_KEYWORD;
|
|
|
|
enum { BUFLEN = 1024 };
|
|
char buffer[BUFLEN];
|
|
int32_t lineNumber;
|
|
|
|
// Binary data that we construct from tz.txt and write out as tz.dat
|
|
TZHeader header;
|
|
TZEquivalencyGroup* equivTable;
|
|
OffsetIndex* offsetIndex;
|
|
CountryIndex* countryIndex;
|
|
uint32_t* nameToEquiv;
|
|
char* nameTable;
|
|
|
|
uint32_t equivTableSize; // Total bytes in equivalency group table
|
|
uint32_t offsetIndexSize; // Total bytes in offset index table
|
|
uint32_t countryIndexSize; // Total bytes in country index table
|
|
uint32_t nameToEquivSize; // Total bytes in nameToEquiv
|
|
uint32_t nameTableSize; // Total bytes in name table
|
|
|
|
uint32_t maxPerOffset; // Maximum number of zones per offset
|
|
uint32_t maxPerEquiv; // Maximum number of zones per equivalency group
|
|
uint32_t equivCount; // Number of equivalency groups
|
|
|
|
UBool useCopyright;
|
|
UBool verbose;
|
|
|
|
|
|
public:
|
|
int MMain(int argc, char *argv[]);
|
|
private:
|
|
int32_t writeTzDatFile(const char *destdir);
|
|
void parseTzTextFile(FileStream* in);
|
|
|
|
// High level parsing
|
|
void parseHeader(FileStream* in);
|
|
|
|
TZEquivalencyGroup* parseEquivTable(FileStream* in);
|
|
|
|
void fixupNameToEquiv();
|
|
|
|
void parseDSTRule(char*& p, TZRule& rule);
|
|
|
|
OffsetIndex* parseOffsetIndexTable(FileStream* in);
|
|
|
|
CountryIndex* parseCountryIndexTable(FileStream* in);
|
|
|
|
char* parseNameTable(FileStream* in);
|
|
|
|
// Low level parsing and reading
|
|
void readEndMarker(FileStream* in);
|
|
int32_t readIntegerLine(FileStream* in, int32_t min, int32_t max);
|
|
int32_t _parseInteger(char*& p);
|
|
int32_t parseInteger(char*& p, char nextExpectedChar, int32_t, int32_t);
|
|
int32_t readLine(FileStream* in);
|
|
|
|
// Error handling
|
|
void die(const char* msg);
|
|
};
|
|
|
|
int main(int argc, char *argv[]) {
|
|
gentz x;
|
|
|
|
U_MAIN_INIT_ARGS(argc, argv);
|
|
|
|
return x.MMain(argc, argv);
|
|
}
|
|
|
|
const int32_t gentz::MAX_GMT_OFFSET = (int32_t)24*60*60; // seconds
|
|
const char gentz::COMMENT = '#';
|
|
const char gentz::CR = '\r';
|
|
const char gentz::LF = '\n';
|
|
const char gentz::MINUS = '-';
|
|
const char gentz::SPACE = ' ';
|
|
const char gentz::TAB = '\t';
|
|
const char gentz::ZERO = '0';
|
|
const char gentz::SEP = ',';
|
|
const char gentz::STANDARD_MARK = 's';
|
|
const char gentz::DST_MARK = 'd';
|
|
const char gentz::NUL = '\0';
|
|
const char* gentz::END_KEYWORD = "end";
|
|
|
|
static UOption options[]={
|
|
UOPTION_HELP_H,
|
|
UOPTION_HELP_QUESTION_MARK,
|
|
UOPTION_COPYRIGHT,
|
|
UOPTION_DESTDIR,
|
|
UOPTION_VERBOSE
|
|
};
|
|
|
|
int gentz::MMain(int argc, char* argv[]) {
|
|
/* preset then read command line options */
|
|
options[3].value=u_getDataDirectory();
|
|
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
|
|
|
/* error handling, printing usage message */
|
|
if(argc<0) {
|
|
fprintf(stderr,
|
|
"error in command line argument \"%s\"\n",
|
|
argv[-argc]);
|
|
} else if(argc<2) {
|
|
argc=-1;
|
|
}
|
|
if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
|
|
fprintf(stderr,
|
|
"usage: %s [-options] timezone-file\n"
|
|
"\tread the timezone file produced by tz.pl and create " TZ_DATA_NAME "." TZ_DATA_TYPE "\n"
|
|
"options:\n"
|
|
"\t-h or -? or --help this usage text\n"
|
|
"\t-v or --verbose turn on verbose output\n"
|
|
"\t-c or --copyright include a copyright notice\n"
|
|
"\t-d or --destdir destination directory, followed by the path\n",
|
|
argv[0]);
|
|
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
|
}
|
|
|
|
/* get the options values */
|
|
useCopyright=options[2].doesOccur;
|
|
verbose = options[4].doesOccur;
|
|
|
|
|
|
////////////////////////////////////////////////////////////
|
|
// Read the input file
|
|
////////////////////////////////////////////////////////////
|
|
*buffer = NUL;
|
|
lineNumber = 0;
|
|
if (verbose) {
|
|
fprintf(stdout, "Input file: %s\n", argv[1]);
|
|
}
|
|
FileStream* in = T_FileStream_open(argv[1], "r");
|
|
if (in == 0) {
|
|
die("Cannot open input file");
|
|
}
|
|
parseTzTextFile(in);
|
|
T_FileStream_close(in);
|
|
*buffer = NUL;
|
|
|
|
////////////////////////////////////////////////////////////
|
|
// Write the output file
|
|
////////////////////////////////////////////////////////////
|
|
int32_t wlen = writeTzDatFile(options[3].value);
|
|
if (verbose) {
|
|
fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
|
|
TZ_DATA_NAME, TZ_DATA_TYPE, (long)wlen);
|
|
}
|
|
|
|
return 0; // success
|
|
}
|
|
|
|
int32_t gentz::writeTzDatFile(const char *destdir) {
|
|
UNewDataMemory *pdata;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// Careful: The order in which the tables are written must match the offsets.
|
|
// Our order is:
|
|
// - equiv table
|
|
// - offset index
|
|
// - country index
|
|
// - name index (name to equiv map)
|
|
// - name table (must be last!)
|
|
header.equivTableDelta = sizeof(header);
|
|
header.offsetIndexDelta = header.equivTableDelta + equivTableSize;
|
|
header.countryIndexDelta = header.offsetIndexDelta + offsetIndexSize;
|
|
header.nameIndexDelta = header.countryIndexDelta + countryIndexSize;
|
|
// Must be last:
|
|
header.nameTableDelta = header.nameIndexDelta + nameToEquivSize;
|
|
|
|
/* // Don't need to check for negative values on unsigned numbers.
|
|
if (header.equivTableDelta < 0 ||
|
|
header.offsetIndexDelta < 0 ||
|
|
header.countryIndexDelta < 0 ||
|
|
header.nameIndexDelta < 0 ||
|
|
header.nameTableDelta < 0) {
|
|
die("Table too big -- negative delta");
|
|
}
|
|
*/
|
|
|
|
// Convert equivalency table indices to offsets. This can only
|
|
// be done after the header offsets have been set up.
|
|
fixupNameToEquiv();
|
|
|
|
// Fill in dataInfo with year.suffix
|
|
*(uint16_t*)&(dataInfo.dataVersion[0]) = header.versionYear;
|
|
*(uint16_t*)&(dataInfo.dataVersion[2]) = header.versionSuffix;
|
|
|
|
pdata = udata_create(destdir, TZ_DATA_TYPE, TZ_DATA_NAME, &dataInfo,
|
|
useCopyright ? U_COPYRIGHT_STRING : 0, &status);
|
|
if (U_FAILURE(status)) {
|
|
die("Unable to create data memory");
|
|
}
|
|
|
|
udata_writeBlock(pdata, &header, sizeof(header));
|
|
udata_writeBlock(pdata, equivTable, equivTableSize);
|
|
udata_writeBlock(pdata, offsetIndex, offsetIndexSize);
|
|
udata_writeBlock(pdata, countryIndex, countryIndexSize);
|
|
udata_writeBlock(pdata, nameToEquiv, nameToEquivSize);
|
|
udata_writeBlock(pdata, nameTable, nameTableSize);
|
|
|
|
uint32_t dataLength = udata_finish(pdata, &status);
|
|
if (U_FAILURE(status)) {
|
|
die("Error writing output file");
|
|
}
|
|
|
|
if (dataLength != (sizeof(header) + equivTableSize +
|
|
offsetIndexSize + countryIndexSize +
|
|
nameTableSize + nameToEquivSize
|
|
)) {
|
|
die("Written file doesn't match expected size");
|
|
}
|
|
return dataLength;
|
|
}
|
|
|
|
void gentz::parseTzTextFile(FileStream* in) {
|
|
parseHeader(in);
|
|
|
|
// Read name table, create it, also create nameToEquiv index table
|
|
// as a side effect.
|
|
nameTable = parseNameTable(in);
|
|
|
|
// Parse the equivalency groups
|
|
equivTable = parseEquivTable(in);
|
|
|
|
// Parse the GMT offset index table
|
|
offsetIndex = parseOffsetIndexTable(in);
|
|
|
|
// Parse the ISO 3166 country index table
|
|
countryIndex = parseCountryIndexTable(in);
|
|
}
|
|
|
|
/**
|
|
* Convert equivalency table indices to offsets. The equivalency
|
|
* table offset (in the header) must be set already.
|
|
*/
|
|
void gentz::fixupNameToEquiv() {
|
|
uint32_t i;
|
|
|
|
// First make a list that maps indices to offsets
|
|
uint32_t *offsets = (uint32_t*) uprv_malloc(sizeof(uint32_t) * equivCount);
|
|
/* test for NULL */
|
|
if(offsets == NULL) {
|
|
die("Out of memory");
|
|
}
|
|
offsets[0] = header.equivTableDelta;
|
|
if (offsets[0] % 4 != 0) {
|
|
die("Header size is not 4-aligned");
|
|
}
|
|
TZEquivalencyGroup *eg = equivTable;
|
|
for (i=1; i<equivCount; ++i) {
|
|
offsets[i] = offsets[i-1] + eg->nextEntryDelta;
|
|
if (offsets[i] % 4 != 0) {
|
|
die("Equivalency group table is not 4-aligned");
|
|
}
|
|
eg = (TZEquivalencyGroup*) (eg->nextEntryDelta + (int8_t*)eg);
|
|
}
|
|
|
|
// Now remap index values to offsets
|
|
for (i=0; i<header.count; ++i) {
|
|
uint32_t x = nameToEquiv[i];
|
|
if (x >= equivCount) {
|
|
die("Equiv index out of range");
|
|
}
|
|
nameToEquiv[i] = offsets[x];
|
|
}
|
|
|
|
uprv_free(offsets);
|
|
}
|
|
|
|
TZEquivalencyGroup* gentz::parseEquivTable(FileStream* in) {
|
|
uint32_t n = readIntegerLine(in, 1, MAX_ZONES);
|
|
if (n != equivCount) {
|
|
die("Equivalency table count mismatch");
|
|
}
|
|
|
|
// We don't know how big the whole thing will be yet, but we can use
|
|
// the maxPerEquiv number to compute an upper limit.
|
|
//
|
|
// The gmtOffset field within each struct must be
|
|
// 4-aligned for some architectures. To ensure this, we do two
|
|
// things: 1. The entire struct is 4-aligned. 2. The gmtOffset is
|
|
// placed at a 4-aligned position within the struct. 3. The size
|
|
// of the whole structure is padded out to 4n bytes. We achieve
|
|
// this last condition by adding two bytes of padding after the
|
|
// last entry, if necessary. We adjust
|
|
// the nextEntryDelta and add 2 bytes of padding if necessary.
|
|
uint32_t maxPossibleSize = sizeof(TZEquivalencyGroup) +
|
|
(maxPerEquiv-1) * sizeof(uint16_t);
|
|
// Pad this out
|
|
if ((maxPossibleSize % 4) != 0) {
|
|
maxPossibleSize += 2;
|
|
}
|
|
if ((maxPossibleSize % 4) != 0) {
|
|
die("Bug in 4-align code for equiv table");
|
|
}
|
|
maxPossibleSize *= n; // Get size of entire set of structs.
|
|
|
|
int8_t *result = (int8_t*) uprv_malloc(sizeof(int8_t) * maxPossibleSize);
|
|
if (result == 0) {
|
|
die("Out of memory");
|
|
}
|
|
|
|
// Read each line and construct the corresponding entry
|
|
TZEquivalencyGroup* eg = (TZEquivalencyGroup*)result;
|
|
for (uint32_t i=0; i<n; ++i) {
|
|
char *p;
|
|
|
|
readLine(in);
|
|
|
|
// Each line starts with 's,' or 'd,' to specify the zone type
|
|
char flavor = buffer[0];
|
|
if (buffer[1] != SEP) {
|
|
die("Syntax error in equiv table");
|
|
}
|
|
p = buffer + 2;
|
|
|
|
// This pointer will be adjusted to point to the start of the
|
|
// list of zones in this group.
|
|
uint16_t* pList = 0;
|
|
|
|
switch (flavor) {
|
|
case STANDARD_MARK:
|
|
eg->isDST = 0;
|
|
eg->u.s.zone.gmtOffset = 1000 * // Convert s -> ms
|
|
parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
|
|
pList = &(eg->u.s.count);
|
|
break;
|
|
case DST_MARK:
|
|
eg->isDST = 1;
|
|
eg->u.d.zone.gmtOffset = 1000 * // Convert s -> ms
|
|
parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
|
|
parseDSTRule(p, eg->u.d.zone.onsetRule);
|
|
parseDSTRule(p, eg->u.d.zone.ceaseRule);
|
|
eg->u.d.zone.dstSavings = (uint16_t) parseInteger(p, SEP, 0, 12*60);
|
|
pList = &(eg->u.d.count);
|
|
break;
|
|
default:
|
|
die("Invalid equiv table type marker (not s or d)");
|
|
}
|
|
|
|
// Now parse the list of zones in this group
|
|
uint16_t egCount = (uint16_t) parseInteger(p, SEP, 1, maxPerEquiv);
|
|
*pList++ = egCount;
|
|
for (uint16_t j=0; j<egCount; ++j) {
|
|
*pList++ = (uint16_t) parseInteger(p, (j==(egCount-1)) ? NUL : SEP,
|
|
0, header.count-1);
|
|
}
|
|
|
|
// At this point pList points to the byte after the last byte of this
|
|
// equiv group struct. Time to 4-align it.
|
|
uint16_t structSize = (uint16_t) (((int8_t*)pList) - ((int8_t*)eg));
|
|
if ((structSize % 4) != 0) {
|
|
// assert(structSize % 4 == 2);
|
|
*pList++ = 0xFFFF; // Pad with invalid zone index
|
|
structSize += 2;
|
|
}
|
|
|
|
// Set up next entry delta
|
|
eg->nextEntryDelta = (i==(n-1)) ? (uint16_t) 0 : structSize;
|
|
|
|
eg->reserved = 0; // ignored
|
|
|
|
eg = (TZEquivalencyGroup*) (structSize + (int8_t*)eg);
|
|
}
|
|
equivTableSize = (int8_t*)eg - (int8_t*)result;
|
|
readEndMarker(in);
|
|
if (verbose) {
|
|
fprintf(stdout, " Read %lu equivalency table entries, in-memory size %ld bytes\n",
|
|
(unsigned long)equivCount, (long)equivTableSize);
|
|
}
|
|
return (TZEquivalencyGroup*)result;
|
|
}
|
|
|
|
OffsetIndex* gentz::parseOffsetIndexTable(FileStream* in) {
|
|
uint32_t n = readIntegerLine(in, 1, MAX_ZONES);
|
|
|
|
// We don't know how big the whole thing will be yet, but we can use
|
|
// the maxPerOffset number to compute an upper limit.
|
|
//
|
|
// The gmtOffset field within each OffsetIndex struct must be
|
|
// 4-aligned for some architectures. To ensure this, we do two
|
|
// things: 1. The entire struct is 4-aligned. 2. The gmtOffset is
|
|
// placed at a 4-aligned position within the struct. 3. The size
|
|
// of the whole structure is padded out to 4n bytes. We achieve
|
|
// this last condition by adding two bytes of padding after the
|
|
// last zoneNumber, if count is _even_. That is, the struct size
|
|
// is 10+2count+padding, where padding is (count%2==0 ? 2:0).
|
|
//
|
|
// Note that we don't change the count itself, but rather adjust
|
|
// the nextEntryDelta and add 2 bytes of padding if necessary.
|
|
//
|
|
// Don't try to compute the exact size in advance
|
|
// (unless we want to avoid the use of sizeof(), which may
|
|
// introduce padding that we won't actually employ).
|
|
uint32_t maxPossibleSize = n * (sizeof(OffsetIndex) +
|
|
(maxPerOffset-1) * sizeof(uint16_t));
|
|
|
|
int8_t *result = (int8_t*) uprv_malloc(sizeof(int8_t) * maxPossibleSize);
|
|
if (result == 0) {
|
|
die("Out of memory");
|
|
}
|
|
|
|
// Read each line and construct the corresponding entry
|
|
OffsetIndex* index = (OffsetIndex*)result;
|
|
for (uint32_t i=0; i<n; ++i) {
|
|
uint16_t alignedCount;
|
|
readLine(in);
|
|
char* p = buffer;
|
|
index->gmtOffset = 1000 * // Convert s -> ms
|
|
parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
|
|
index->defaultZone = (uint16_t)parseInteger(p, SEP, 0, header.count-1);
|
|
index->count = (uint16_t)parseInteger(p, SEP, 1, maxPerOffset);
|
|
uint16_t* zoneNumberArray = &(index->zoneNumber);
|
|
UBool sawOffset = FALSE; // Sanity check - make sure offset is in zone list
|
|
for (uint16_t j=0; j<index->count; ++j) {
|
|
zoneNumberArray[j] = (uint16_t)
|
|
parseInteger(p, (j==(index->count-1))?NUL:SEP,
|
|
0, header.count-1);
|
|
if (zoneNumberArray[j] == index->defaultZone) {
|
|
sawOffset = TRUE;
|
|
}
|
|
}
|
|
if (!sawOffset) {
|
|
die("Error: bad offset index entry; default not in zone list");
|
|
}
|
|
alignedCount = index->count;
|
|
if((alignedCount%2)==0) /* force count to be ODD - see above */
|
|
{
|
|
// Use invalid zoneNumber for 2 bytes of padding
|
|
zoneNumberArray[alignedCount++] = (uint16_t)0xFFFF;
|
|
}
|
|
int8_t* nextIndex = (int8_t*)&(zoneNumberArray[alignedCount]);
|
|
|
|
index->nextEntryDelta = (uint16_t) ((i==(n-1)) ? 0 : (nextIndex - (int8_t*)index));
|
|
index = (OffsetIndex*)nextIndex;
|
|
}
|
|
offsetIndexSize = (int8_t*)index - (int8_t*)result;
|
|
if (offsetIndexSize > maxPossibleSize) {
|
|
die("Yikes! Interal error while constructing offset index table");
|
|
}
|
|
readEndMarker(in);
|
|
if (verbose) {
|
|
fprintf(stdout, " Read %lu offset index table entries, in-memory size %ld bytes\n",
|
|
(unsigned long)n, (long)offsetIndexSize);
|
|
}
|
|
return (OffsetIndex*)result;
|
|
}
|
|
|
|
CountryIndex* gentz::parseCountryIndexTable(FileStream* in) {
|
|
uint32_t n = readIntegerLine(in, 1, MAX_ZONES);
|
|
|
|
// We know how big the whole thing will be: Each zone occupies an
|
|
// int, and each country adds 3 ints (one for the intcode, one for
|
|
// next entry offset, one for the zone count). Each int is 16
|
|
// bits.
|
|
//
|
|
// Everything is 16-bits, so we don't 4-align the entries.
|
|
// However, we do pad at the end of the table to make the whole
|
|
// thing of size 4n, if necessary.
|
|
uint32_t expectedSize = n*(sizeof(CountryIndex)-sizeof(uint16_t)) +
|
|
header.count * sizeof(uint16_t);
|
|
uint32_t pad = (4 - (expectedSize % 4)) % 4; // This will be 0 or 2
|
|
int8_t *result = (int8_t*) uprv_malloc(sizeof(int8_t) * (expectedSize + pad));
|
|
if (result == 0) {
|
|
die("Out of memory");
|
|
}
|
|
|
|
// Read each line and construct the corresponding entry.
|
|
// Along the way, make sure we don't write past 'limit'.
|
|
CountryIndex* index = (CountryIndex*)result;
|
|
int8_t* limit = ((int8_t*)result) + expectedSize; // Don't include pad
|
|
uint32_t i;
|
|
for (i=0; i<n && (int8_t*)(&index->zoneNumber) < limit; ++i) {
|
|
readLine(in);
|
|
char* p = buffer;
|
|
index->intcode = (uint16_t)parseInteger(p, SEP, 0, 25*32+25 /*ZZ*/);
|
|
index->count = (uint16_t)parseInteger(p, SEP, 0, header.count-1);
|
|
uint16_t* zoneNumberArray = &(index->zoneNumber);
|
|
if ((int8_t*)(&index->zoneNumber + index->count - 1) >= limit) {
|
|
// Oops -- out of space
|
|
break;
|
|
}
|
|
for (uint16_t j=0; j<index->count; ++j) {
|
|
zoneNumberArray[j] = (uint16_t)
|
|
parseInteger(p, (j==(index->count-1))?NUL:SEP,
|
|
0, header.count-1);
|
|
}
|
|
int8_t* nextIndex = (int8_t*)&(zoneNumberArray[index->count]);
|
|
index->nextEntryDelta = (uint16_t) ((i==(n-1)) ? 0 : (nextIndex - (int8_t*)index));
|
|
index = (CountryIndex*)nextIndex;
|
|
}
|
|
readEndMarker(in);
|
|
|
|
// Make sure size matches expected value, and pad the total size
|
|
countryIndexSize = (int8_t*)index - (int8_t*)result + pad;
|
|
if (i != n || countryIndexSize != expectedSize) {
|
|
die("Yikes! Interal error while constructing offset index table");
|
|
}
|
|
if (pad != 0) {
|
|
countryIndexSize += pad;
|
|
*(uint16_t*)index = 0; // Clear pad bits
|
|
}
|
|
if (verbose) {
|
|
fprintf(stdout, " Read %lu country index table entries, in-memory size %ld bytes\n", (unsigned long)n, (long)countryIndexSize);
|
|
}
|
|
return (CountryIndex*)result;
|
|
}
|
|
|
|
void gentz::parseHeader(FileStream* in) {
|
|
|
|
int32_t version = readIntegerLine(in, 0, 0xFFFF);
|
|
if (version != TZ_FORMAT_VERSION) {
|
|
die("Version mismatch between gentz and input file");
|
|
}
|
|
|
|
// Version string, e.g., "1999j" -> (1999<<16) | 10
|
|
header.versionYear = (uint16_t) readIntegerLine(in, 1990, 0xFFFF);
|
|
header.versionSuffix = (uint16_t) readIntegerLine(in, 0, 0xFFFF);
|
|
|
|
header.count = readIntegerLine(in, 1, MAX_ZONES);
|
|
equivCount = readIntegerLine(in, 1, header.count);
|
|
maxPerOffset = readIntegerLine(in, 1, header.count);
|
|
maxPerEquiv = readIntegerLine(in, 1, equivCount);
|
|
|
|
// Size of name table in bytes
|
|
// (0x00FFFFFF is an arbitrary upper limit; adjust as needed.)
|
|
nameTableSize = readIntegerLine(in, 1, 0x00FFFFFF);
|
|
|
|
readEndMarker(in);
|
|
|
|
if (verbose) {
|
|
fprintf(stdout, " Read header, data version %u(%u), in-memory size %ld bytes\n",
|
|
header.versionYear, header.versionSuffix,
|
|
(unsigned long)sizeof(header));
|
|
}
|
|
}
|
|
|
|
void gentz::parseDSTRule(char*& p, TZRule& rule) {
|
|
rule.month = (uint8_t) parseInteger(p, SEP, 0, 11);
|
|
rule.dowim = (int8_t) parseInteger(p, SEP, -31, 31);
|
|
rule.dow = (int8_t) parseInteger(p, SEP, -7, 7);
|
|
rule.time = (uint16_t) parseInteger(p, SEP, 0, 24*60);
|
|
rule.mode = *p++;
|
|
if (*p++ != SEP) {
|
|
die("Separator missing");
|
|
}
|
|
switch ((char)rule.mode) {
|
|
case 'w':
|
|
rule.mode = WALL_TIME;
|
|
break;
|
|
case 's':
|
|
rule.mode = STANDARD_TIME;
|
|
break;
|
|
case 'u':
|
|
rule.mode = UTC_TIME;
|
|
break;
|
|
default:
|
|
die("Invalid rule time mode");
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse the name table.
|
|
* Each entry of the name table looks like this:
|
|
* |36,Africa/Djibouti
|
|
* The integer is an equivalency table index. We build up a name
|
|
* table, that just contains the names, and we return it. We also
|
|
* build up the name index, which indexes names to equivalency table
|
|
* entries. This is stored in the member variable nameToEquiv.
|
|
*/
|
|
char* gentz::parseNameTable(FileStream* in) {
|
|
int32_t n = readIntegerLine(in, 1, MAX_ZONES);
|
|
if (n != (int32_t)header.count) {
|
|
die("Zone count doesn't match name table count");
|
|
}
|
|
char* names = (char*) uprv_malloc(sizeof(char) * nameTableSize);
|
|
nameToEquiv = (uint32_t*) uprv_malloc(sizeof(uint32_t) * n);
|
|
if (names == 0 || nameToEquiv == 0) {
|
|
die("Out of memory");
|
|
}
|
|
nameToEquivSize = n * sizeof(nameToEquiv[0]);
|
|
char* p = names;
|
|
char* limit = names + nameTableSize;
|
|
for (int32_t i=0; i<n; ++i) {
|
|
readLine(in);
|
|
char* q = buffer;
|
|
// We store an index here for now -- later, in fixNameToEquiv,
|
|
// we convert it to an offset.
|
|
nameToEquiv[i] = (uint32_t) parseInteger(q, SEP, 0, equivCount-1);
|
|
int32_t len = uprv_strlen(q);
|
|
if ((p + len) <= limit) {
|
|
uprv_memcpy(p, q, len);
|
|
p += len;
|
|
*p++ = NUL;
|
|
} else {
|
|
die("Name table longer than declared size");
|
|
}
|
|
}
|
|
if (p != limit) {
|
|
die("Name table shorter than declared size");
|
|
}
|
|
readEndMarker(in);
|
|
if (verbose) {
|
|
fprintf(stdout, " Read %ld names, in-memory size %ld bytes\n",
|
|
(long)n, (long)nameTableSize);
|
|
}
|
|
return names;
|
|
}
|
|
|
|
/**
|
|
* Read the end marker (terminates each list).
|
|
*/
|
|
void gentz::readEndMarker(FileStream* in) {
|
|
readLine(in);
|
|
if (uprv_strcmp(buffer, END_KEYWORD) != 0) {
|
|
die("Keyword 'end' missing");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Read a line from the FileStream and parse it as an
|
|
* integer. There should be nothing else on the line.
|
|
*/
|
|
int32_t gentz::readIntegerLine(FileStream* in, int32_t min, int32_t max) {
|
|
readLine(in);
|
|
char* p = buffer;
|
|
return parseInteger(p, NUL, min, max);
|
|
}
|
|
|
|
/**
|
|
* Parse an integer from the given character buffer.
|
|
* Advance p past the last parsed character. Return
|
|
* the result. The integer must be of the form
|
|
* /-?\d+/.
|
|
*/
|
|
int32_t gentz::_parseInteger(char*& p) {
|
|
int32_t n = 0;
|
|
int32_t digitCount = 0;
|
|
int32_t digit;
|
|
UBool negative = FALSE;
|
|
if (*p == MINUS) {
|
|
++p;
|
|
negative = TRUE;
|
|
}
|
|
for (;;) {
|
|
digit = *p - ZERO;
|
|
if (digit < 0 || digit > 9) {
|
|
break;
|
|
}
|
|
n = 10*n + digit;
|
|
p++;
|
|
digitCount++;
|
|
}
|
|
if (digitCount < 1) {
|
|
die("Unable to parse integer");
|
|
}
|
|
if (negative) {
|
|
n = -n;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
int32_t gentz::parseInteger(char*& p, char nextExpectedChar,
|
|
int32_t min, int32_t max) {
|
|
int32_t n = _parseInteger(p);
|
|
if (*p++ != nextExpectedChar) {
|
|
die("Character following integer unexpected");
|
|
}
|
|
if (n < min || n > max) {
|
|
die("Integer field out of range");
|
|
}
|
|
return n;
|
|
}
|
|
|
|
void gentz::die(const char* msg) {
|
|
fprintf(stderr, "ERROR, %s\n", msg);
|
|
if (*buffer) {
|
|
fprintf(stderr, "Input file line %ld: \"%s\"\n", (long)lineNumber, buffer);
|
|
}
|
|
exit(1);
|
|
}
|
|
|
|
/**
|
|
* Read a line. Trim trailing comment and whitespace. Ignore (skip)
|
|
* blank lines, or comment-only lines. Return the number of characters
|
|
* on the line remaining. On EOF, die.
|
|
*/
|
|
int32_t gentz::readLine(FileStream* in) {
|
|
++lineNumber;
|
|
char* result = T_FileStream_readLine(in, buffer, BUFLEN);
|
|
if (result == 0) {
|
|
*buffer = 0;
|
|
die("Unexpected end of file");
|
|
}
|
|
// Trim off trailing comment
|
|
char* p = uprv_strchr(buffer, COMMENT);
|
|
if (p != 0) {
|
|
*p = NUL;
|
|
}
|
|
// Delete trailing whitespace
|
|
p = buffer + uprv_strlen(buffer);
|
|
while (p > buffer && (p[-1] == CR || p[-1] == LF ||
|
|
p[-1] == SPACE || p[-1] == TAB)) {
|
|
p--;
|
|
}
|
|
*p = NUL;
|
|
// If line is empty after trimming comments & whitespace,
|
|
// then read the next line.
|
|
return (*buffer == NUL) ? readLine(in) : uprv_strlen(buffer);
|
|
}
|