/* ********************************************************************** * Copyright (C) 1999, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 11/24/99 aliu Creation. ********************************************************************** */ /* This program reads a text file full of parsed time zone data and * outputs a binary file, tz.dat, which then goes on to become part of * the memory-mapped (or dll) ICU data file. * * The data file read by this program is generated by a perl script, * tz.pl. The input to tz.pl is standard unix time zone data from * ftp://elsie.nci.nih.gov. * * As a matter of policy, the perl script tz.pl wants to do as much of * the parsing, data processing, and error checking as possible, and * this program wants to just do the binary translation step. * * See tz.pl for the file format that is READ by this program. */ #include #include #include "utypes.h" #include "cmemory.h" #include "cstring.h" #include "filestrm.h" #include "udata.h" #include "unewdata.h" #include "tzdat.h" #define INPUT_FILE "tz.txt" #define OUTPUT_FILE "tz.dat" #define DATA_NAME "tz" #define DATA_TYPE "dat" #define DATA_COPYRIGHT \ "Copyright (C) 1999, International Business Machines " \ "Corporation and others. All Rights Reserved." /* UDataInfo cf. udata.h */ static const UDataInfo dataInfo = { sizeof(UDataInfo), 0, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, sizeof(UChar), 0, 'z', 'o', 'n', 'e', /* dataFormat */ 1, 0, 0, 0, /* formatVersion */ 1, 9, 9, 9 /* dataVersion */ }; class gentz { // These must match SimpleTimeZone!!! enum { WALL_TIME = 0, STANDARD_TIME, UTC_TIME }; // The largest number of zones we accept as sensible. Anything // larger is considered an error. Adjust as needed. enum { MAX_ZONES = 1000 }; // The maximum sensible GMT offset, in seconds static const int32_t MAX_GMT_OFFSET; static const char COMMENT; static const char CR; static const char LF; static const char MINUS; static const char SPACE; static const char TAB; static const char ZERO; static const char SEP; static const char NUL; static const char* END_KEYWORD; enum { BUFLEN = 1024 }; char buffer[BUFLEN]; TZHeader header; StandardZone* stdZones; DSTZone* dstZones; char* nameTable; int32_t zoneCount; // Total number of zones int32_t stdZoneSize; int32_t dstZoneSize; int32_t nameTableSize; // Total bytes in name table bool_t useCopyright; public: int main(int argc, char *argv[]); private: int32_t writeTzDatFile(FileStream* out); void parseTzTextFile(FileStream* in); // High level parsing void parseHeader(FileStream* in); StandardZone* parseStandardZones(FileStream* in); void parse1StandardZone(FileStream* in, StandardZone& zone); DSTZone* parseDSTZones(FileStream* in); void parse1DSTZone(FileStream* in, DSTZone& zone); void parseDSTRule(char*& p, TZRule& rule); char* parseNameTable(FileStream* in); // Low level parsing and reading int32_t readIntegerLine(FileStream* in, int32_t min, int32_t max); int32_t _parseInteger(char*& p); int32_t parseInteger(char*& p, char nextExpectedChar, int32_t, int32_t); int32_t readLine(FileStream* in); // Error handling void die(const char* msg); void usage(const char* argv0); }; int main(int argc, char *argv[]) { gentz x; return x.main(argc, argv); } const int32_t gentz::MAX_GMT_OFFSET = (int32_t)24*60*60; const char gentz::COMMENT = '#'; const char gentz::CR = ((char)13); const char gentz::LF = ((char)10); const char gentz::MINUS = '-'; const char gentz::SPACE = ' '; const char gentz::TAB = ((char)9); const char gentz::ZERO = '0'; const char gentz::SEP = ','; const char gentz::NUL = ((char)0); const char* gentz::END_KEYWORD = "end"; void gentz::usage(const char* argv0) { fprintf(stderr, "Usage: %s [-c[+|-]] infile outfile\n" " -c[+|-] [do|do not] include copyright (default=+)\n" " infile text file produced by tz.pl\n" " outfile binary file suitable for memory mapping\n", argv0); exit(1); } int gentz::main(int argc, char *argv[]) { // Parse arguments useCopyright = TRUE; const char* infile = 0; const char* outfile = 0; for (int i=1; i (1999<<16) | 10 header.versionYear = (uint16_t) readIntegerLine(in, 0, 0xFFFF); header.versionSuffix = (uint16_t) readIntegerLine(in, 0, 0xFFFF); // Zone count zoneCount = readIntegerLine(in, 0, MAX_ZONES); // Size of name table in bytes // (0x00FFFFFF is an arbitrary upper limit; adjust as needed.) nameTableSize = readIntegerLine(in, 1, 0x00FFFFFF); } StandardZone* gentz::parseStandardZones(FileStream* in) { header.standardCount = readIntegerLine(in, 1, MAX_ZONES); StandardZone* zones = new StandardZone[header.standardCount]; if (zones == 0) { die("Out of memory"); } for (uint32_t i=0; i 9) { break; } n = 10*n + digit; p++; digitCount++; } if (digitCount < 1) { die("Unable to parse integer"); } if (negative) { n = -n; } return n; } int32_t gentz::parseInteger(char*& p, char nextExpectedChar, int32_t min, int32_t max) { int32_t n = _parseInteger(p); if (*p++ != nextExpectedChar) { die("Character following integer unexpected"); } if (n < min || n > max) { die("Integer field out of range"); } return n; } void gentz::die(const char* msg) { fprintf(stderr, "ERROR, %s\n", msg); if (*buffer) { fprintf(stderr, "Current input line: %s\n", buffer); } exit(1); } int32_t gentz::readLine(FileStream* in) { T_FileStream_readLine(in, buffer, BUFLEN); // Trim off trailing comment char* p = icu_strchr(buffer, COMMENT); if (p != 0) { // Back up past any space or tab characters before // the comment character. while (p > buffer && (p[-1] == SPACE || p[-1] == TAB)) { p--; } *p = NUL; } // Delete any trailing ^J and/or ^M characters p = buffer + icu_strlen(buffer); while (p > buffer && (p[-1] == CR || p[-1] == LF)) { p--; } *p = NUL; return icu_strlen(buffer); }