aa7e2fc7b8
X-SVN-Rev: 300
598 lines
18 KiB
C++
598 lines
18 KiB
C++
/*
|
|
**********************************************************************
|
|
* Copyright (C) 1999, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
* Date Name Description
|
|
* 11/24/99 aliu Creation.
|
|
**********************************************************************
|
|
*/
|
|
|
|
/* This program reads a text file full of parsed time zone data and
|
|
* outputs a binary file, tz.dat, which then goes on to become part of
|
|
* the memory-mapped (or dll) ICU data file.
|
|
*
|
|
* The data file read by this program is generated by a perl script,
|
|
* tz.pl. The input to tz.pl is standard unix time zone data from
|
|
* ftp://elsie.nci.nih.gov.
|
|
*
|
|
* As a matter of policy, the perl script tz.pl wants to do as much of
|
|
* the parsing, data processing, and error checking as possible, and
|
|
* this program wants to just do the binary translation step.
|
|
*
|
|
* See tz.pl for the file format that is READ by this program.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "utypes.h"
|
|
#include "cmemory.h"
|
|
#include "cstring.h"
|
|
#include "filestrm.h"
|
|
#include "udata.h"
|
|
#include "unewdata.h"
|
|
#include "tzdat.h"
|
|
|
|
#define INPUT_FILE "tz.txt"
|
|
#define OUTPUT_FILE "tz.dat"
|
|
|
|
#define DATA_COPYRIGHT \
|
|
"Copyright (C) 1999, International Business Machines " \
|
|
"Corporation and others. All Rights Reserved."
|
|
|
|
/* UDataInfo cf. udata.h */
|
|
static const UDataInfo dataInfo = {
|
|
sizeof(UDataInfo),
|
|
0,
|
|
|
|
U_IS_BIG_ENDIAN,
|
|
U_CHARSET_FAMILY,
|
|
sizeof(UChar),
|
|
0,
|
|
|
|
TZ_SIG[0], TZ_SIG[1], TZ_SIG[2], TZ_SIG[3], /* dataFormat */
|
|
TZ_FORMAT_VERSION, 0, 0, 0, /* formatVersion */
|
|
0, 0, 0, 0 /* dataVersion - will be filled in with year.suffix */
|
|
};
|
|
|
|
|
|
class gentz {
|
|
// These must match SimpleTimeZone!!!
|
|
enum { WALL_TIME = 0,
|
|
STANDARD_TIME,
|
|
UTC_TIME
|
|
};
|
|
|
|
// The largest number of zones we accept as sensible. Anything
|
|
// larger is considered an error. Adjust as needed.
|
|
enum { MAX_ZONES = 1000 };
|
|
|
|
// The largest maxNameLength we accept as sensible. Adjust as needed.
|
|
enum { MAX_MAX_NAME_LENGTH = 100 };
|
|
|
|
// The maximum sensible GMT offset, in seconds
|
|
static const int32_t MAX_GMT_OFFSET;
|
|
|
|
static const char COMMENT;
|
|
static const char CR;
|
|
static const char LF;
|
|
static const char MINUS;
|
|
static const char SPACE;
|
|
static const char TAB;
|
|
static const char ZERO;
|
|
static const char SEP;
|
|
static const char NUL;
|
|
|
|
static const char* END_KEYWORD;
|
|
|
|
enum { BUFLEN = 1024 };
|
|
char buffer[BUFLEN];
|
|
int32_t lineNumber;
|
|
|
|
TZHeader header;
|
|
StandardZone* stdZones;
|
|
DSTZone* dstZones;
|
|
char* nameTable;
|
|
int32_t* indexByName;
|
|
OffsetIndex* indexByOffset;
|
|
|
|
int32_t maxPerOffset; // Maximum number of zones per offset
|
|
int32_t stdZoneSize;
|
|
int32_t dstZoneSize;
|
|
int32_t offsetIndexSize; // Total bytes in offset index table
|
|
int32_t nameTableSize; // Total bytes in name table
|
|
|
|
bool_t useCopyright;
|
|
|
|
public:
|
|
int main(int argc, char *argv[]);
|
|
private:
|
|
int32_t writeTzDatFile();
|
|
void parseTzTextFile(FileStream* in);
|
|
|
|
// High level parsing
|
|
void parseHeader(FileStream* in);
|
|
|
|
StandardZone* parseStandardZones(FileStream* in);
|
|
void parse1StandardZone(FileStream* in, StandardZone& zone);
|
|
|
|
DSTZone* parseDSTZones(FileStream* in);
|
|
void parse1DSTZone(FileStream* in, DSTZone& zone);
|
|
void parseDSTRule(char*& p, TZRule& rule);
|
|
|
|
int32_t* parseIndexTable(FileStream* in);
|
|
OffsetIndex* parseOffsetIndexTable(FileStream* in);
|
|
|
|
char* parseNameTable(FileStream* in);
|
|
|
|
// Low level parsing and reading
|
|
void readEndMarker(FileStream* in);
|
|
int32_t readIntegerLine(FileStream* in, int32_t min, int32_t max);
|
|
int32_t _parseInteger(char*& p);
|
|
int32_t parseInteger(char*& p, char nextExpectedChar, int32_t, int32_t);
|
|
int32_t readLine(FileStream* in);
|
|
|
|
// Error handling
|
|
void die(const char* msg);
|
|
void usage(const char* argv0);
|
|
};
|
|
|
|
int main(int argc, char *argv[]) {
|
|
gentz x;
|
|
return x.main(argc, argv);
|
|
}
|
|
|
|
const int32_t gentz::MAX_GMT_OFFSET = (int32_t)24*60*60; // seconds
|
|
const char gentz::COMMENT = '#';
|
|
const char gentz::CR = ((char)13);
|
|
const char gentz::LF = ((char)10);
|
|
const char gentz::MINUS = '-';
|
|
const char gentz::SPACE = ' ';
|
|
const char gentz::TAB = ((char)9);
|
|
const char gentz::ZERO = '0';
|
|
const char gentz::SEP = ',';
|
|
const char gentz::NUL = ((char)0);
|
|
const char* gentz::END_KEYWORD = "end";
|
|
|
|
void gentz::usage(const char* argv0) {
|
|
fprintf(stderr,
|
|
"Usage: %s [-c[+|-]] infile\n"
|
|
" -c[+|-] [do|do not] include copyright (default=+)\n"
|
|
" infile text file produced by tz.pl\n",
|
|
argv0);
|
|
exit(1);
|
|
}
|
|
|
|
int gentz::main(int argc, char *argv[]) {
|
|
////////////////////////////////////////////////////////////
|
|
// Parse arguments
|
|
////////////////////////////////////////////////////////////
|
|
useCopyright = TRUE;
|
|
const char* infile = 0;
|
|
for (int i=1; i<argc; ++i) {
|
|
const char* arg = argv[i];
|
|
if (arg[0] == '-') {
|
|
if (arg[1] != 'c') {
|
|
usage(argv[0]);
|
|
}
|
|
switch (arg[2]) {
|
|
case '+':
|
|
useCopyright = TRUE;
|
|
break;
|
|
case '-':
|
|
useCopyright = FALSE;
|
|
break;
|
|
default:
|
|
usage(argv[0]);
|
|
}
|
|
} else if (infile == 0) {
|
|
infile = arg;
|
|
} else {
|
|
usage(argv[0]);
|
|
}
|
|
}
|
|
if (infile == 0) {
|
|
usage(argv[0]);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
// Read the input file
|
|
////////////////////////////////////////////////////////////
|
|
*buffer = NUL;
|
|
lineNumber = 0;
|
|
fprintf(stdout, "Input file: %s\n", infile);
|
|
FileStream* in = T_FileStream_open(infile, "r");
|
|
if (in == 0) {
|
|
die("Cannot open input file");
|
|
}
|
|
parseTzTextFile(in);
|
|
T_FileStream_close(in);
|
|
*buffer = NUL;
|
|
|
|
////////////////////////////////////////////////////////////
|
|
// Write the output file
|
|
////////////////////////////////////////////////////////////
|
|
int32_t wlen = writeTzDatFile();
|
|
fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
|
|
TZ_DATA_NAME, TZ_DATA_TYPE, wlen);
|
|
|
|
return 0; // success
|
|
}
|
|
|
|
int32_t gentz::writeTzDatFile() {
|
|
UNewDataMemory *pdata;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// Fill in dataInfo with year.suffix
|
|
*(uint16_t*)&(dataInfo.dataVersion[0]) = header.versionYear;
|
|
*(uint16_t*)&(dataInfo.dataVersion[2]) = header.versionSuffix;
|
|
|
|
pdata = udata_create(TZ_DATA_TYPE, TZ_DATA_NAME, &dataInfo,
|
|
useCopyright ? DATA_COPYRIGHT : 0, &status);
|
|
if (U_FAILURE(status)) {
|
|
die("Unable to create data memory");
|
|
}
|
|
|
|
// Careful: This order cannot be changed (without changing
|
|
// the offset fixup code).
|
|
udata_writeBlock(pdata, &header, sizeof(header));
|
|
udata_writeBlock(pdata, stdZones, stdZoneSize);
|
|
udata_writeBlock(pdata, dstZones, dstZoneSize);
|
|
udata_writeBlock(pdata, indexByName, header.count * sizeof(indexByName[0]));
|
|
udata_writeBlock(pdata, indexByOffset, offsetIndexSize);
|
|
udata_writeBlock(pdata, nameTable, nameTableSize);
|
|
|
|
uint32_t dataLength = udata_finish(pdata, &status);
|
|
if (U_FAILURE(status)) {
|
|
die("Error writing output file");
|
|
}
|
|
|
|
if (dataLength != (sizeof(header) + stdZoneSize +
|
|
dstZoneSize + nameTableSize +
|
|
header.count * sizeof(indexByName[0]) +
|
|
offsetIndexSize
|
|
)) {
|
|
die("Written file doesn't match expected size");
|
|
}
|
|
return dataLength;
|
|
}
|
|
|
|
void gentz::parseTzTextFile(FileStream* in) {
|
|
parseHeader(in);
|
|
stdZones = parseStandardZones(in);
|
|
dstZones = parseDSTZones(in);
|
|
if (header.count != (header.standardCount + header.dstCount)) {
|
|
die("Zone counts don't add up");
|
|
}
|
|
nameTable = parseNameTable(in);
|
|
|
|
// Fixup the header offsets
|
|
header.standardDelta = sizeof(header);
|
|
header.dstDelta = header.standardDelta + stdZoneSize;
|
|
header.nameIndexDelta = header.dstDelta + dstZoneSize;
|
|
|
|
// Read in index tables after header is mostly fixed up
|
|
indexByName = parseIndexTable(in);
|
|
indexByOffset = parseOffsetIndexTable(in);
|
|
|
|
header.offsetIndexDelta = header.nameIndexDelta + header.count *
|
|
sizeof(indexByName[0]);
|
|
header.nameTableDelta = header.offsetIndexDelta + offsetIndexSize;
|
|
|
|
if (header.standardDelta < 0 ||
|
|
header.dstDelta < 0 ||
|
|
header.nameTableDelta < 0) {
|
|
die("Negative offset in header after fixup");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Index tables are lists of specifiers of the form /[sd]\d+/, where
|
|
* the first character determines if it is a standard or DST zone,
|
|
* and the following number is in the range 0..n-1, where n is the
|
|
* count of that type of zone.
|
|
*
|
|
* Header must already be read in and the offsets must be fixed up.
|
|
* Standard and DST zones must be read in.
|
|
*/
|
|
int32_t* gentz::parseIndexTable(FileStream* in) {
|
|
uint32_t n = readIntegerLine(in, 1, MAX_ZONES);
|
|
if (n != header.count) {
|
|
die("Count mismatch in index table");
|
|
}
|
|
int32_t* result = new int32_t[n];
|
|
for (uint32_t i=0; i<n; ++i) {
|
|
readLine(in);
|
|
char* p = buffer+1;
|
|
uint32_t index = parseInteger(p, NUL, 0, header.count);
|
|
switch (buffer[0]) {
|
|
case 's':
|
|
if (index >= header.standardCount) {
|
|
die("Standard index entry out of range");
|
|
}
|
|
result[i] = header.standardDelta +
|
|
((char*)&stdZones[index] - (char*)&stdZones[0]);
|
|
break;
|
|
case 'd':
|
|
if (index >= header.dstCount) {
|
|
die("DST index entry out of range");
|
|
}
|
|
result[i] = header.dstDelta +
|
|
((char*)&dstZones[index] - (char*)&dstZones[0]);
|
|
break;
|
|
default:
|
|
die("Malformed index entry");
|
|
break;
|
|
}
|
|
}
|
|
readEndMarker(in);
|
|
fprintf(stdout, " Read %lu name index table entries, in-memory size %ld bytes\n",
|
|
n, n * sizeof(int32_t));
|
|
return result;
|
|
}
|
|
|
|
OffsetIndex* gentz::parseOffsetIndexTable(FileStream* in) {
|
|
uint32_t n = readIntegerLine(in, 1, MAX_ZONES);
|
|
|
|
// We don't know how big the whole thing will be yet, but we can use
|
|
// the maxPerOffset number to compute an upper limit.
|
|
//
|
|
// Structs will not be 4-aligned because we'll be writing them out
|
|
// ourselves. Don't try to compute the exact size in advance
|
|
// (unless we want to avoid the use of sizeof(), which may
|
|
// introduce padding that we won't actually employ).
|
|
int32_t maxPossibleSize = n * (sizeof(OffsetIndex) +
|
|
(maxPerOffset-1) * sizeof(uint16_t));
|
|
|
|
int8_t *result = new int8_t[maxPossibleSize];
|
|
if (result == 0) {
|
|
die("Out of memory");
|
|
}
|
|
|
|
// Read each line and construct the corresponding entry
|
|
OffsetIndex* index = (OffsetIndex*)result;
|
|
for (uint32_t i=0; i<n; ++i) {
|
|
readLine(in);
|
|
char* p = buffer;
|
|
index->gmtOffset = 1000 * // Convert s -> ms
|
|
parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
|
|
index->count = (uint16_t)parseInteger(p, SEP, 1, maxPerOffset);
|
|
uint16_t* zoneNumberArray = &(index->zoneNumber);
|
|
for (uint16_t j=0; j<index->count; ++j) {
|
|
zoneNumberArray[j] = (uint16_t)
|
|
parseInteger(p, (j==(index->count-1))?NUL:SEP,
|
|
0, header.count-1);
|
|
}
|
|
int8_t* nextIndex = (int8_t*)&(zoneNumberArray[index->count]);
|
|
index->nextEntryDelta = (i==(n-1)) ? 0 : (nextIndex - (int8_t*)index);
|
|
index = (OffsetIndex*)nextIndex;
|
|
}
|
|
offsetIndexSize = (int8_t*)index - (int8_t*)result;
|
|
if (offsetIndexSize > maxPossibleSize) {
|
|
die("Yikes! Interal error while constructing offset index table");
|
|
}
|
|
readEndMarker(in);
|
|
fprintf(stdout, " Read %lu offset index table entries, in-memory size %ld bytes\n",
|
|
n, offsetIndexSize);
|
|
return (OffsetIndex*)result;
|
|
}
|
|
|
|
void gentz::parseHeader(FileStream* in) {
|
|
int32_t ignored;
|
|
|
|
// Version string, e.g., "1999j" -> (1999<<16) | 10
|
|
header.versionYear = (uint16_t) readIntegerLine(in, 1990, 0xFFFF);
|
|
header.versionSuffix = (uint16_t) readIntegerLine(in, 0, 0xFFFF);
|
|
|
|
header.count = readIntegerLine(in, 1, MAX_ZONES);
|
|
maxPerOffset = readIntegerLine(in, 1, MAX_ZONES);
|
|
/*header.maxNameLength*/ ignored = readIntegerLine(in, 1, MAX_MAX_NAME_LENGTH);
|
|
|
|
// Size of name table in bytes
|
|
// (0x00FFFFFF is an arbitrary upper limit; adjust as needed.)
|
|
nameTableSize = readIntegerLine(in, 1, 0x00FFFFFF);
|
|
|
|
fprintf(stdout, " Read header, data version %u(%u), in-memory size %ld bytes\n",
|
|
header.versionYear, header.versionSuffix, sizeof(header));
|
|
}
|
|
|
|
StandardZone* gentz::parseStandardZones(FileStream* in) {
|
|
header.standardCount = readIntegerLine(in, 1, MAX_ZONES);
|
|
StandardZone* zones = new StandardZone[header.standardCount];
|
|
if (zones == 0) {
|
|
die("Out of memory");
|
|
}
|
|
for (uint32_t i=0; i<header.standardCount; i++) {
|
|
parse1StandardZone(in, zones[i]);
|
|
}
|
|
readEndMarker(in);
|
|
stdZoneSize = (char*)&stdZones[header.standardCount] - (char*)&stdZones[0];
|
|
fprintf(stdout, " Read %lu standard zones, in-memory size %ld bytes\n",
|
|
header.standardCount, stdZoneSize);
|
|
return zones;
|
|
}
|
|
|
|
void gentz::parse1StandardZone(FileStream* in, StandardZone& zone) {
|
|
readLine(in);
|
|
char* p = buffer;
|
|
/*zone.nameDelta =*/ parseInteger(p, SEP, 0, nameTableSize);
|
|
zone.gmtOffset = 1000 * // Convert s -> ms
|
|
parseInteger(p, NUL, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
|
|
}
|
|
|
|
DSTZone* gentz::parseDSTZones(FileStream* in) {
|
|
header.dstCount = readIntegerLine(in, 1, MAX_ZONES);
|
|
DSTZone* zones = new DSTZone[header.dstCount];
|
|
if (zones == 0) {
|
|
die("Out of memory");
|
|
}
|
|
for (uint32_t i=0; i<header.dstCount; i++) {
|
|
parse1DSTZone(in, zones[i]);
|
|
}
|
|
readEndMarker(in);
|
|
dstZoneSize = (char*)&dstZones[header.dstCount] - (char*)&dstZones[0];
|
|
fprintf(stdout, " Read %lu DST zones, in-memory size %ld bytes\n",
|
|
header.dstCount, dstZoneSize);
|
|
return zones;
|
|
}
|
|
|
|
void gentz::parse1DSTZone(FileStream* in, DSTZone& zone) {
|
|
readLine(in);
|
|
char* p = buffer;
|
|
/*zone.nameDelta =*/ parseInteger(p, SEP, 0, nameTableSize);
|
|
zone.gmtOffset = 1000 * // Convert s -> ms
|
|
parseInteger(p, SEP, -MAX_GMT_OFFSET, MAX_GMT_OFFSET);
|
|
parseDSTRule(p, zone.onsetRule);
|
|
parseDSTRule(p, zone.ceaseRule);
|
|
zone.dstSavings = (uint16_t) parseInteger(p, NUL, 0, 12*60);
|
|
}
|
|
|
|
void gentz::parseDSTRule(char*& p, TZRule& rule) {
|
|
rule.month = (uint8_t) parseInteger(p, SEP, 0, 11);
|
|
rule.dowim = (int8_t) parseInteger(p, SEP, -31, 31);
|
|
rule.dow = (int8_t) parseInteger(p, SEP, -7, 7);
|
|
rule.time = (uint16_t) parseInteger(p, SEP, 0, 24*60);
|
|
rule.mode = *p++;
|
|
if (*p++ != SEP) {
|
|
die("Separator missing");
|
|
}
|
|
switch (rule.mode) {
|
|
case 'w':
|
|
rule.mode = WALL_TIME;
|
|
break;
|
|
case 's':
|
|
rule.mode = STANDARD_TIME;
|
|
break;
|
|
case 'u':
|
|
rule.mode = UTC_TIME;
|
|
break;
|
|
default:
|
|
die("Invalid rule time mode");
|
|
break;
|
|
}
|
|
}
|
|
|
|
char* gentz::parseNameTable(FileStream* in) {
|
|
int32_t n = readIntegerLine(in, 1, MAX_ZONES);
|
|
if (n != (int32_t)header.count) {
|
|
die("Zone count doesn't match name table count");
|
|
}
|
|
char* names = new char[nameTableSize];
|
|
if (names == 0) {
|
|
die("Out of memory");
|
|
}
|
|
char* p = names;
|
|
char* limit = names + nameTableSize;
|
|
for (int32_t i=0; i<n; ++i) {
|
|
int32_t len = readLine(in);
|
|
if ((p + len) <= limit) {
|
|
icu_memcpy(p, buffer, len);
|
|
p += len;
|
|
*p++ = NUL;
|
|
} else {
|
|
die("Name table longer than declared size");
|
|
}
|
|
}
|
|
if (p != limit) {
|
|
die("Name table shorter than declared size");
|
|
}
|
|
readEndMarker(in);
|
|
fprintf(stdout, " Read %ld names, in-memory size %ld bytes\n", n, nameTableSize);
|
|
return names;
|
|
}
|
|
|
|
/**
|
|
* Read the end marker (terminates each list).
|
|
*/
|
|
void gentz::readEndMarker(FileStream* in) {
|
|
readLine(in);
|
|
if (icu_strcmp(buffer, END_KEYWORD) != 0) {
|
|
die("Keyword 'end' missing");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Read a line from the FileStream and parse it as an
|
|
* integer. There should be nothing else on the line.
|
|
*/
|
|
int32_t gentz::readIntegerLine(FileStream* in, int32_t min, int32_t max) {
|
|
readLine(in);
|
|
char* p = buffer;
|
|
return parseInteger(p, NUL, min, max);
|
|
}
|
|
|
|
/**
|
|
* Parse an integer from the given character buffer.
|
|
* Advance p past the last parsed character. Return
|
|
* the result. The integer must be of the form
|
|
* /-?\d+/.
|
|
*/
|
|
int32_t gentz::_parseInteger(char*& p) {
|
|
int32_t n = 0;
|
|
int32_t digitCount = 0;
|
|
int32_t digit;
|
|
bool_t negative = FALSE;
|
|
if (*p == MINUS) {
|
|
++p;
|
|
negative = TRUE;
|
|
}
|
|
for (;;) {
|
|
digit = *p - ZERO;
|
|
if (digit < 0 || digit > 9) {
|
|
break;
|
|
}
|
|
n = 10*n + digit;
|
|
p++;
|
|
digitCount++;
|
|
}
|
|
if (digitCount < 1) {
|
|
die("Unable to parse integer");
|
|
}
|
|
if (negative) {
|
|
n = -n;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
int32_t gentz::parseInteger(char*& p, char nextExpectedChar,
|
|
int32_t min, int32_t max) {
|
|
int32_t n = _parseInteger(p);
|
|
if (*p++ != nextExpectedChar) {
|
|
die("Character following integer unexpected");
|
|
}
|
|
if (n < min || n > max) {
|
|
die("Integer field out of range");
|
|
}
|
|
return n;
|
|
}
|
|
|
|
void gentz::die(const char* msg) {
|
|
fprintf(stderr, "ERROR, %s\n", msg);
|
|
if (*buffer) {
|
|
fprintf(stderr, "Input file line %ld: \"%s\"\n", lineNumber, buffer);
|
|
}
|
|
exit(1);
|
|
}
|
|
|
|
int32_t gentz::readLine(FileStream* in) {
|
|
++lineNumber;
|
|
T_FileStream_readLine(in, buffer, BUFLEN);
|
|
// Trim off trailing comment
|
|
char* p = icu_strchr(buffer, COMMENT);
|
|
if (p != 0) {
|
|
// Back up past any space or tab characters before
|
|
// the comment character.
|
|
while (p > buffer && (p[-1] == SPACE || p[-1] == TAB)) {
|
|
p--;
|
|
}
|
|
*p = NUL;
|
|
}
|
|
// Delete any trailing ^J and/or ^M characters
|
|
p = buffer + icu_strlen(buffer);
|
|
while (p > buffer && (p[-1] == CR || p[-1] == LF)) {
|
|
p--;
|
|
}
|
|
*p = NUL;
|
|
return icu_strlen(buffer);
|
|
}
|