/* ********************************************************************** * Copyright (C) 1999, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 11/24/99 aliu Creation. ********************************************************************** */ /* This program reads a text file full of parsed time zone data and * outputs a binary file, tz.dat, which then goes on to become part of * the memory-mapped (or dll) ICU data file. * * The data file read by this program is generated by a perl script, * tz.pl. The input to tz.pl is standard unix time zone data from * ftp://elsie.nci.nih.gov. * * As a matter of policy, the perl script tz.pl wants to do as much of * the parsing, data processing, and error checking as possible, and * this program wants to just do the binary translation step. * * See tz.pl for the file format that is READ by this program. */ #include #include #include "unicode/utypes.h" #include "cmemory.h" #include "cstring.h" #include "filestrm.h" #include "unicode/udata.h" #include "unewdata.h" #include "uoptions.h" #include "tzdat.h" #define INPUT_FILE "tz.txt" #define OUTPUT_FILE "tz.dat" /* UDataInfo cf. udata.h */ static UDataInfo dataInfo = { sizeof(UDataInfo), 0, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, sizeof(UChar), 0, 0x7a, 0x6f, 0x6e, 0x65, /* see TZ_SIG. Changed to literals, thanks to HP compiler */ TZ_FORMAT_VERSION, 0, 0, 0, /* formatVersion */ 0, 0, 0, 0 /* dataVersion - will be filled in with year.suffix */ }; class gentz { // These must match SimpleTimeZone!!! enum { WALL_TIME = 0, STANDARD_TIME, UTC_TIME }; // The largest number of zones we accept as sensible. Anything // larger is considered an error. Adjust as needed. enum { MAX_ZONES = 1000 }; // The largest maxNameLength we accept as sensible. Adjust as needed. enum { MAX_MAX_NAME_LENGTH = 100 }; // The maximum sensible GMT offset, in seconds static const int32_t MAX_GMT_OFFSET; static const char COMMENT; static const char CR; static const char LF; static const char MINUS; static const char SPACE; static const char TAB; static const char ZERO; static const char SEP; static const char NUL; static const char* END_KEYWORD; enum { BUFLEN = 1024 }; char buffer[BUFLEN]; int32_t lineNumber; TZHeader header; StandardZone* stdZones; DSTZone* dstZones; char* nameTable; int32_t* indexByName; OffsetIndex* indexByOffset; int32_t maxPerOffset; // Maximum number of zones per offset int32_t stdZoneSize; int32_t dstZoneSize; int32_t offsetIndexSize; // Total bytes in offset index table int32_t nameTableSize; // Total bytes in name table UBool useCopyright; public: int main(int argc, const char *argv[]); private: int32_t writeTzDatFile(const char *destdir); void parseTzTextFile(FileStream* in); // High level parsing void parseHeader(FileStream* in); StandardZone* parseStandardZones(FileStream* in); void parse1StandardZone(FileStream* in, StandardZone& zone); DSTZone* parseDSTZones(FileStream* in); void parse1DSTZone(FileStream* in, DSTZone& zone); void parseDSTRule(char*& p, TZRule& rule); int32_t* parseIndexTable(FileStream* in); OffsetIndex* parseOffsetIndexTable(FileStream* in); char* parseNameTable(FileStream* in); // Low level parsing and reading void readEndMarker(FileStream* in); int32_t readIntegerLine(FileStream* in, int32_t min, int32_t max); int32_t _parseInteger(char*& p); int32_t parseInteger(char*& p, char nextExpectedChar, int32_t, int32_t); int32_t readLine(FileStream* in); // Error handling void die(const char* msg); }; int main(int argc, const char *argv[]) { gentz x; return x.main(argc, argv); } const int32_t gentz::MAX_GMT_OFFSET = (int32_t)24*60*60; // seconds const char gentz::COMMENT = '#'; const char gentz::CR = '\r'; const char gentz::LF = '\n'; const char gentz::MINUS = '-'; const char gentz::SPACE = ' '; const char gentz::TAB = '\t'; const char gentz::ZERO = '0'; const char gentz::SEP = ','; const char gentz::NUL = '\0'; const char* gentz::END_KEYWORD = "end"; static UOption options[]={ UOPTION_HELP_H, UOPTION_HELP_QUESTION_MARK, UOPTION_COPYRIGHT, UOPTION_DESTDIR }; int gentz::main(int argc, const char *argv[]) { /* preset then read command line options */ options[3].value=u_getDataDirectory(); argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); } else if(argc<2) { argc=-1; } if(argc<0 || options[0].doesOccur || options[1].doesOccur) { fprintf(stderr, "usage: %s [-options] timezone-file\n" "\tread the timezone file produced by tz.pl and create " TZ_DATA_NAME "." TZ_DATA_TYPE "\n" "\toptions:\n" "\t\t-h or -? or --help this usage text\n" "\t\t-c or --copyright include a copyright notice\n" "\t\t-d or --destdir destination directory, followed by the path\n", argv[0]); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } /* get the options values */ useCopyright=options[2].doesOccur; //////////////////////////////////////////////////////////// // Read the input file //////////////////////////////////////////////////////////// *buffer = NUL; lineNumber = 0; fprintf(stdout, "Input file: %s\n", argv[1]); FileStream* in = T_FileStream_open(argv[1], "r"); if (in == 0) { die("Cannot open input file"); } parseTzTextFile(in); T_FileStream_close(in); *buffer = NUL; //////////////////////////////////////////////////////////// // Write the output file //////////////////////////////////////////////////////////// int32_t wlen = writeTzDatFile(options[3].value); fprintf(stdout, "Output file: %s.%s, %ld bytes\n", TZ_DATA_NAME, TZ_DATA_TYPE, wlen); return 0; // success } int32_t gentz::writeTzDatFile(const char *destdir) { UNewDataMemory *pdata; UErrorCode status = U_ZERO_ERROR; // Fill in dataInfo with year.suffix *(uint16_t*)&(dataInfo.dataVersion[0]) = header.versionYear; *(uint16_t*)&(dataInfo.dataVersion[2]) = header.versionSuffix; pdata = udata_create(destdir, TZ_DATA_TYPE, TZ_DATA_NAME, &dataInfo, useCopyright ? U_COPYRIGHT_STRING : 0, &status); if (U_FAILURE(status)) { die("Unable to create data memory"); } // Careful: This order cannot be changed (without changing // the offset fixup code). udata_writeBlock(pdata, &header, sizeof(header)); udata_writeBlock(pdata, stdZones, stdZoneSize); udata_writeBlock(pdata, dstZones, dstZoneSize); udata_writeBlock(pdata, indexByName, header.count * sizeof(indexByName[0])); udata_writeBlock(pdata, indexByOffset, offsetIndexSize); udata_writeBlock(pdata, nameTable, nameTableSize); uint32_t dataLength = udata_finish(pdata, &status); if (U_FAILURE(status)) { die("Error writing output file"); } if (dataLength != (sizeof(header) + stdZoneSize + dstZoneSize + nameTableSize + header.count * sizeof(indexByName[0]) + offsetIndexSize )) { die("Written file doesn't match expected size"); } return dataLength; } void gentz::parseTzTextFile(FileStream* in) { parseHeader(in); stdZones = parseStandardZones(in); dstZones = parseDSTZones(in); if (header.count != (header.standardCount + header.dstCount)) { die("Zone counts don't add up"); } nameTable = parseNameTable(in); // Fixup the header offsets header.standardDelta = sizeof(header); header.dstDelta = header.standardDelta + stdZoneSize; header.nameIndexDelta = header.dstDelta + dstZoneSize; // Read in index tables after header is mostly fixed up indexByName = parseIndexTable(in); indexByOffset = parseOffsetIndexTable(in); header.offsetIndexDelta = header.nameIndexDelta + header.count * sizeof(indexByName[0]); header.nameTableDelta = header.offsetIndexDelta + offsetIndexSize; if (header.standardDelta < 0 || header.dstDelta < 0 || header.nameTableDelta < 0) { die("Negative offset in header after fixup"); } } /** * Index tables are lists of specifiers of the form /[sd]\d+/, where * the first character determines if it is a standard or DST zone, * and the following number is in the range 0..n-1, where n is the * count of that type of zone. * * Header must already be read in and the offsets must be fixed up. * Standard and DST zones must be read in. */ int32_t* gentz::parseIndexTable(FileStream* in) { uint32_t n = readIntegerLine(in, 1, MAX_ZONES); if (n != header.count) { die("Count mismatch in index table"); } int32_t* result = new int32_t[n]; for (uint32_t i=0; i= header.standardCount) { die("Standard index entry out of range"); } result[i] = header.standardDelta + ((char*)&stdZones[index] - (char*)&stdZones[0]); break; case 'd': if (index >= header.dstCount) { die("DST index entry out of range"); } result[i] = header.dstDelta + ((char*)&dstZones[index] - (char*)&dstZones[0]); break; default: die("Malformed index entry"); break; } } readEndMarker(in); fprintf(stdout, " Read %lu name index table entries, in-memory size %ld bytes\n", n, n * sizeof(int32_t)); return result; } OffsetIndex* gentz::parseOffsetIndexTable(FileStream* in) { uint32_t n = readIntegerLine(in, 1, MAX_ZONES); // We don't know how big the whole thing will be yet, but we can use // the maxPerOffset number to compute an upper limit. // // The gmtOffset field within each OffsetIndex struct must be // 4-aligned for some architectures. To ensure this, we do two // things: 1. The entire struct is 4-aligned. 2. The gmtOffset is // placed at a 4-aligned position within the struct. 3. The size // of the whole structure is padded out to 4n bytes. We achieve // this last condition by adding two bytes of padding after the // last zoneNumber, if count is _even_. That is, the struct size // is 10+2count+padding, where padding is (count%2==0 ? 2:0). // // Note that we don't change the count itself, but rather adjust // the nextEntryDelta and add 2 bytes of padding if necessary. // // Don't try to compute the exact size in advance // (unless we want to avoid the use of sizeof(), which may // introduce padding that we won't actually employ). int32_t maxPossibleSize = n * (sizeof(OffsetIndex) + (maxPerOffset-1) * sizeof(uint16_t)); int8_t *result = new int8_t[maxPossibleSize]; if (result == 0) { die("Out of memory"); } // Read each line and construct the corresponding entry OffsetIndex* index = (OffsetIndex*)result; for (uint32_t i=0; igmtOffset = 1000 * // Convert s -> ms parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET); index->defaultZone = (uint16_t)parseInteger(p, SEP, 0, header.count-1); index->count = (uint16_t)parseInteger(p, SEP, 1, maxPerOffset); uint16_t* zoneNumberArray = &(index->zoneNumber); UBool sawOffset = FALSE; // Sanity check - make sure offset is in zone list for (uint16_t j=0; jcount; ++j) { zoneNumberArray[j] = (uint16_t) parseInteger(p, (j==(index->count-1))?NUL:SEP, 0, header.count-1); if (zoneNumberArray[j] == index->defaultZone) { sawOffset = TRUE; } } if (!sawOffset) { die("Error: bad offset index entry; default not in zone list"); } alignedCount = index->count; if((alignedCount%2)==0) /* force count to be ODD - see above */ { // Use invalid zoneNumber for 2 bytes of padding zoneNumberArray[alignedCount++] = (uint16_t)0xFFFF; } int8_t* nextIndex = (int8_t*)&(zoneNumberArray[alignedCount]); index->nextEntryDelta = (i==(n-1)) ? 0 : (nextIndex - (int8_t*)index); index = (OffsetIndex*)nextIndex; } offsetIndexSize = (int8_t*)index - (int8_t*)result; if (offsetIndexSize > maxPossibleSize) { die("Yikes! Interal error while constructing offset index table"); } readEndMarker(in); fprintf(stdout, " Read %lu offset index table entries, in-memory size %ld bytes\n", n, offsetIndexSize); return (OffsetIndex*)result; } void gentz::parseHeader(FileStream* in) { int32_t ignored; // Version string, e.g., "1999j" -> (1999<<16) | 10 header.versionYear = (uint16_t) readIntegerLine(in, 1990, 0xFFFF); header.versionSuffix = (uint16_t) readIntegerLine(in, 0, 0xFFFF); header.count = readIntegerLine(in, 1, MAX_ZONES); maxPerOffset = readIntegerLine(in, 1, MAX_ZONES); /*header.maxNameLength*/ ignored = readIntegerLine(in, 1, MAX_MAX_NAME_LENGTH); // Size of name table in bytes // (0x00FFFFFF is an arbitrary upper limit; adjust as needed.) nameTableSize = readIntegerLine(in, 1, 0x00FFFFFF); fprintf(stdout, " Read header, data version %u(%u), in-memory size %ld bytes\n", header.versionYear, header.versionSuffix, sizeof(header)); } StandardZone* gentz::parseStandardZones(FileStream* in) { header.standardCount = readIntegerLine(in, 1, MAX_ZONES); StandardZone* zones = new StandardZone[header.standardCount]; if (zones == 0) { die("Out of memory"); } for (uint32_t i=0; i ms parseInteger(p, NUL, -MAX_GMT_OFFSET, MAX_GMT_OFFSET); } DSTZone* gentz::parseDSTZones(FileStream* in) { header.dstCount = readIntegerLine(in, 1, MAX_ZONES); DSTZone* zones = new DSTZone[header.dstCount]; if (zones == 0) { die("Out of memory"); } for (uint32_t i=0; i ms parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET); parseDSTRule(p, zone.onsetRule); parseDSTRule(p, zone.ceaseRule); zone.dstSavings = (uint16_t) parseInteger(p, NUL, 0, 12*60); } void gentz::parseDSTRule(char*& p, TZRule& rule) { rule.month = (uint8_t) parseInteger(p, SEP, 0, 11); rule.dowim = (int8_t) parseInteger(p, SEP, -31, 31); rule.dow = (int8_t) parseInteger(p, SEP, -7, 7); rule.time = (uint16_t) parseInteger(p, SEP, 0, 24*60); rule.mode = *p++; if (*p++ != SEP) { die("Separator missing"); } switch ((char)rule.mode) { case 'w': rule.mode = WALL_TIME; break; case 's': rule.mode = STANDARD_TIME; break; case 'u': rule.mode = UTC_TIME; break; default: die("Invalid rule time mode"); break; } } char* gentz::parseNameTable(FileStream* in) { int32_t n = readIntegerLine(in, 1, MAX_ZONES); if (n != (int32_t)header.count) { die("Zone count doesn't match name table count"); } char* names = new char[nameTableSize]; if (names == 0) { die("Out of memory"); } char* p = names; char* limit = names + nameTableSize; for (int32_t i=0; i 9) { break; } n = 10*n + digit; p++; digitCount++; } if (digitCount < 1) { die("Unable to parse integer"); } if (negative) { n = -n; } return n; } int32_t gentz::parseInteger(char*& p, char nextExpectedChar, int32_t min, int32_t max) { int32_t n = _parseInteger(p); if (*p++ != nextExpectedChar) { die("Character following integer unexpected"); } if (n < min || n > max) { die("Integer field out of range"); } return n; } void gentz::die(const char* msg) { fprintf(stderr, "ERROR, %s\n", msg); if (*buffer) { fprintf(stderr, "Input file line %ld: \"%s\"\n", lineNumber, buffer); } exit(1); } int32_t gentz::readLine(FileStream* in) { ++lineNumber; T_FileStream_readLine(in, buffer, BUFLEN); // Trim off trailing comment char* p = uprv_strchr(buffer, COMMENT); if (p != 0) { // Back up past any space or tab characters before // the comment character. while (p > buffer && (p[-1] == SPACE || p[-1] == TAB)) { p--; } *p = NUL; } // Delete any trailing ^J and/or ^M characters p = buffer + uprv_strlen(buffer); while (p > buffer && (p[-1] == CR || p[-1] == LF)) { p--; } *p = NUL; return uprv_strlen(buffer); }