82f6fab817
X-SVN-Rev: 12112
269 lines
10 KiB
C
269 lines
10 KiB
C
/*
|
|
**********************************************************************
|
|
* Copyright (C) 1999-2001, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
* Date Name Description
|
|
* 11/24/99 aliu Creation.
|
|
* 12/13/1999 srl Padded OffsetIndex to 4 byte values
|
|
* 02/01/01 aliu Added country index
|
|
**********************************************************************
|
|
*/
|
|
|
|
#ifndef TZDAT_H
|
|
#define TZDAT_H
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
/* This file defines the format of the memory-mapped data file
|
|
* containing system time zone data for icu. See also gentz
|
|
* and tz.pl.
|
|
*
|
|
* The format is designed specifically to allow certain operations:
|
|
*
|
|
* 1. Performing a fast binary search by name, and locating the
|
|
* corresponding zone data. This is the most important operation.
|
|
* It corresponds to the TimeZone::createTimeZone() method.
|
|
*
|
|
* 2. Performing a fast iteration over zones having a specific GMT
|
|
* offset. For this operation, the zone data need not be
|
|
* retrieved, just the IDs. This corresponds to the
|
|
* TimeZone::createAvailableIDs(int32_t) method.
|
|
*
|
|
* 3. Iterating over all zone IDs. This corresponds to the
|
|
* TimeZone::createAvailableIDs() method.
|
|
*
|
|
* The createAvailableIDs() methods return arrays of pointers to
|
|
* existing static UnicodeString IDs that it owns. Thus
|
|
* createAvailableIDs() needs a way to reference one of these IDs when
|
|
* iterating. Note that these IDs are _not_ stored in the
|
|
* memory-mapped data file, so we cannot store offsets. To solve this
|
|
* problem, we define a canonical index number for each zone. This
|
|
* index number runs from 0..n-1, where n is the total number of
|
|
* zones. The name table is stored in index number order, and we
|
|
* provide a table that is sorted by GMT offset with keys being GMT
|
|
* offset values and values being canonical index numbers.
|
|
*
|
|
* (Later, we might change createAvailableIDs() to return char*
|
|
* strings rather than UnicodeString pointers. In that case, this
|
|
* data structure could be modified to index into the name table
|
|
* directly.)
|
|
*
|
|
* Any field with a name ending in "delta" is an offset value
|
|
* from the first byte of the TZHeader structure, unless otherwise
|
|
* specified.
|
|
*
|
|
* When using the name index table and the offset index table,
|
|
* code can determine whether an indexed zone is a standard
|
|
* zone or a DST zone by examining its delta. If the delta is
|
|
* less than dstDelta, it is a standard zone. Otherwise it
|
|
* is a DST zone.
|
|
*/
|
|
|
|
/* tz.icu data file format
|
|
*
|
|
* Here is the overall structure of the tz.icu file, expressed as a
|
|
* pseudo-C-struct. Refer to actual struct declarations below for
|
|
* more details on each subelement of tz.icu. Many of the elements
|
|
* are of variable size. Padding is used when necessary to align
|
|
* words and longs properly; see structure declarations for details.
|
|
*
|
|
* struct tz.icu {
|
|
*
|
|
* // The header gives offsets to various tables within tz.icu.
|
|
*
|
|
* struct TZHeader header;
|
|
*
|
|
* // The equivalency groups; repeated; one element for each
|
|
* // equivalency group. Each one is of variable size. Typically,
|
|
* // an equivalency group is found given an ID index. The index is
|
|
* // used to find an entry of nameToEquiv[]. That entry is added to
|
|
* // the start of the header to obtain a pointer to one of the
|
|
* // entries equivTable[i]. The number of equivalency groups (n1)
|
|
* // is not stored anywhere; it can be discovered by walking the
|
|
* // table.
|
|
*
|
|
* struct TZEquivalencyGroup equivTable[n1];
|
|
*
|
|
* // An index which groups timezones having the same raw offset
|
|
* // together; repeated; one element for each raw offset struct.
|
|
* // Typically the code walks through this table starting at the
|
|
* // beginning until the desired index is found or the end of the
|
|
* // table is reached. The number of offset groups (n2) is not
|
|
* // stored anywhere; it can be discovered by walking the table.
|
|
*
|
|
* struct OffsetIndex offsetIndex[n2];
|
|
*
|
|
* // An index which groups timezones having the same country
|
|
* // together; repeated; one element for each country. Typically
|
|
* // the code walks through this table starting at the beginning
|
|
* // until the desired country is found or the end of the table is
|
|
* // reached. The number of offset groups (n3) is not stored
|
|
* // anywhere; it can be discovered by walking the table.
|
|
*
|
|
* struct CountryIndex countryIndex[n3];
|
|
*
|
|
* // An array of offsets, one for each name. Each offset, when
|
|
* // added to the start of the header, gives a pointer to an entry
|
|
* // equivTable[i], the equivalency group struct for the given zone.
|
|
* // The nubmer of names is given by TZHeader.count. The order of
|
|
* // entries is the same as nameTable[].
|
|
*
|
|
* uint32 nameToEquiv[header.count];
|
|
*
|
|
* // All the time zone IDs, in sorted order, with 0 termination.
|
|
* // The number of entries is given by TZHeader.count. The total
|
|
* // number of characters in this table (n4) is not stored anywhere;
|
|
* // it can be discovered by walking the table. The order of
|
|
* // entries is the same as nameToEquiv[].
|
|
*
|
|
* char nameTable[n4];
|
|
* };
|
|
*/
|
|
|
|
// Information used to identify and validate the data
|
|
|
|
#define TZ_DATA_NAME "tz"
|
|
#define TZ_DATA_TYPE "icu"
|
|
|
|
#if !UCONFIG_NO_FORMATTING
|
|
|
|
// Fields in UDataInfo:
|
|
|
|
// TZ_SIG[] is encoded as numeric literals for compatibility with the HP compiler
|
|
static const uint8_t TZ_SIG_0 = 0x7a; // z
|
|
static const uint8_t TZ_SIG_1 = 0x6f; // o
|
|
static const uint8_t TZ_SIG_2 = 0x6e; // n
|
|
static const uint8_t TZ_SIG_3 = 0x65; // e
|
|
|
|
// This must match the version number at the top of tz.txt as
|
|
// well as the version number in the udata header.
|
|
static const int8_t TZ_FORMAT_VERSION = 4; // formatVersion[0]
|
|
|
|
struct TZHeader {
|
|
uint16_t versionYear; // e.g. "1999j" -> 1999
|
|
uint16_t versionSuffix; // e.g. "1999j" -> 10
|
|
|
|
uint32_t count; // standardCount + dstCount
|
|
|
|
uint32_t equivTableDelta; // delta to equivalency group table
|
|
uint32_t offsetIndexDelta; // delta to gmtOffset index table
|
|
|
|
uint32_t countryIndexDelta; // delta to country code index table
|
|
|
|
uint32_t nameIndexDelta; // delta to name index table
|
|
// The name index table is an array of 'count' 32-bit offsets from
|
|
// the start of this header to equivalency group table entries.
|
|
|
|
uint32_t nameTableDelta; // delta to name (aka ID) table
|
|
// The name table contains all zone IDs, in sort order, each name
|
|
// terminated by a zero byte.
|
|
};
|
|
|
|
struct StandardZone {
|
|
int32_t gmtOffset; // gmt offset in milliseconds
|
|
};
|
|
|
|
struct TZRule {
|
|
uint8_t month; // month
|
|
int8_t dowim; // dowim
|
|
int8_t dow; // dow
|
|
uint16_t time; // time in minutes
|
|
int8_t mode; // (w/s/u) == TimeZone::TimeMode enum as int
|
|
};
|
|
|
|
struct DSTZone {
|
|
int32_t gmtOffset; // gmtoffset in milliseconds
|
|
uint16_t dstSavings; // savings in minutes
|
|
TZRule onsetRule; // onset rule
|
|
TZRule ceaseRule; // cease rule
|
|
};
|
|
|
|
/**
|
|
* This variable-sized struct represents a time zone equivalency group.
|
|
* This is a set of one or more zones that are identical in GMT offset
|
|
* and rules, but differ in ID. The struct has a variable size because
|
|
* the standard zone has no rule data, and also because it contains a
|
|
* variable number of index values listing the zones in the group.
|
|
* The struct is padded to take up 4n bytes so that 4-byte integers
|
|
* within the struct stay 4-aligned (namely, the gmtOffset members of
|
|
* the zone structs).
|
|
*/
|
|
struct TZEquivalencyGroup {
|
|
uint16_t nextEntryDelta; // 0 for last entry
|
|
uint8_t isDST; // != 0 for DSTZone
|
|
uint8_t reserved;
|
|
union {
|
|
struct {
|
|
StandardZone zone;
|
|
uint16_t count;
|
|
uint16_t index; // There are actually 'count' uint16_t's here
|
|
} s;
|
|
struct {
|
|
DSTZone zone;
|
|
uint16_t count;
|
|
uint16_t index; // There are actually 'count' uint16_t's here
|
|
} d;
|
|
} u;
|
|
// There may be two bytes of padding HERE to make the whole struct
|
|
// have size 4n bytes.
|
|
};
|
|
|
|
/**
|
|
* This variable-sized struct makes up the offset index table. To get
|
|
* from one table entry to the next, add the nextEntryDelta. If the
|
|
* nextEntryDelta is zero then this is the last entry. The offset
|
|
* index table is designed for sequential access, not random access.
|
|
* Given the small number of distinct offsets (39 in 1999j), this
|
|
* suffices.
|
|
*
|
|
* The value of default is the zone within this list that should be
|
|
* selected as the default zone in the absence of any other
|
|
* discriminating information. This information comes from the file
|
|
* tz.default. Note that this is itself a zone number, like
|
|
* those in the array starting at &zoneNumber.
|
|
*
|
|
* The gmtOffset field must be 4-aligned for some architectures. To
|
|
* ensure this, we do two things: 1. The entire struct is 4-aligned.
|
|
* 2. The gmtOffset is placed at a 4-aligned position within the
|
|
* struct. 3. The size of the whole structure is padded out to 4n
|
|
* bytes. We achieve this last condition by adding two bytes of
|
|
* padding after the last zoneNumber, if count is _even_. That is,
|
|
* the struct size is 10+2count+padding, where padding is (count%2==0
|
|
* ? 2:0). See gentz for implementation.
|
|
*/
|
|
struct OffsetIndex {
|
|
int32_t gmtOffset; // in ms - 4-aligned
|
|
uint16_t nextEntryDelta;
|
|
uint16_t defaultZone; // a zone number from 0..TZHeader.count-1
|
|
uint16_t count;
|
|
uint16_t zoneNumber; // There are actually 'count' uint16_t's here
|
|
// Following the 'count' uint16_t's starting with zoneNumber,
|
|
// there may be two bytes of padding to make the whole struct have
|
|
// a size of 4n. nextEntryDelta skips over any padding.
|
|
};
|
|
|
|
/**
|
|
* This variable-sized struct makes up the country index table. To get
|
|
* from one table entry to the next, add the nextEntryDelta. If the
|
|
* nextEntryDelta is zero then this is the last entry. The country
|
|
* index table is designed for sequential access, not random access.
|
|
*
|
|
* The intcode is an integer representation of the two-letter country
|
|
* code. It is computed as (c1-'A')*32 + (c0-'A') where the country
|
|
* code is a two-character string c1 c0, 'A' <= ci <= 'Z'.
|
|
*
|
|
* There are no 4-byte integers in this table, so we don't 4-align the
|
|
* entries.
|
|
*/
|
|
struct CountryIndex {
|
|
uint16_t intcode; // see above
|
|
uint16_t nextEntryDelta;
|
|
uint16_t count;
|
|
uint16_t zoneNumber; // There are actually 'count' uint16_t's here
|
|
};
|
|
|
|
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
|
|
#endif
|