#!/usr/bin/perl ###################################################################### # Copyright (C) 1999, International Business Machines # Corporation and others. All Rights Reserved. ###################################################################### # See: ftp://elsie.nci.nih.gov/pub/tzdata # where is "1999b" or a similar string. ###################################################################### # This script takes time zone data in elsie.nci.nih.gov format and # parses it into a form usable by ICU. The raw data contains more # data than we need, since it contains historical zone data. We # parse out the current zones and create a listing of current zones. # Author: Alan Liu ###################################################################### # This script reads an alias table, $TZ_ALIAS, and creates clones of # standard UNIX zones with alias names. ###################################################################### # To update the zone data, download the latest data from the NIH URL # listed above into a directory. Run this script with the directory # name as an argument. THE DIRECTORY NAME MUST END IN tzdataYYYYR. ###################################################################### # OUTPUT FILE FORMAT (filename $OUT) # # As a matter of policy, this script wants to do as much of # the parsing, data processing, and error checking as possible, # leaving the C++ program that parses this file to just do the binary # translation step. # # - The file is line based, with one record per line. # - Lines may be followed by a comment; the parser must ignore # anything of the form /\s+#.*$/ in each line. # |3065,14400 # Asia/Dubai GMT+4:00 # - The file contains a header and 3 lists. # - The header contains the version of this data file: # 2 original version, without equivalency groups # 3 current version, described here # then the version of the unix data, and other counts: # | 3 # format version number of this file # | 1999 # (tzdata1999j) version of Olson zone # | 10 # data from ftp://elsie.nci.nih.gov # | 402 # total zone count # | 40 # maximum zones per offset (used by gentz) # - Lists start with a count of the records to follow, the records # themselves (one per line), and a single line with the keyword # 'end'. # - The first list is the name table: # | 387 # count of names to follow # | 34,Africa/Abidjan # | 23,Africa/Accra # ... # | end # Each name is terminated by a newline (like all lines in the file). # The zone numbers in other lists refer to this table. The # integer that precedes the name is an index into the equivalency # table, with the first table entry being entry 0. # - The second list is the equivalency table. It lists, in sorted # order, the equivalency groups. Each group represents a # set of one or more zones that have the same GMT offset and the # same rules. While there are about 400 zones, there are less than # 120 equivalency groups (as of this writing). # | 120 # count of equivalency groups to follow # | s,0,1,0 # GMT+0:00 # | d,0,8,1,0,0,w,11,31,0,0,w,20,4,15,16,17,18 # GMT+0:00 Sep 1... # ... # | end # Entries start with 's' for standard zones, or 'd' for DST zones. # Both zone descriptors start with the GMT offset in SECONDS. DST # zones contain, in addition, data for the onset rule and the cease # rule. Each rule is described by the following integers: # month (JAN = 0) # dowim } These two values are in SimpleTimeZone encoded # dow } format for DOM, DOWIM, DOW>=DOM, or DOW<=DOM. # time MINUTES # time mode ('w', 's', 'u') # The last rule integer in the record is the DST savings in MINUTES, # typically 60. # After either a standard or a DST zone, there is a list of the # members of the equivalency group. This consists of a number of # entries to follow (>=1), then the zone numbers themselves. # - The third list is an index by GMT offset. Each line lists the # zones with the same offset. The first number on the line is the # GMT offset in seconds. The second number is the default zone # number in the following list, taken from tz.default. The list # consists of a number of entries to follow (>=1), then the zone # numbers themselves. # | 39 # index by offset entries to follow # | -43200,280,1,280 # -12:00 d=Etc/GMT+12 Etc/GMT+12 # | -39600,374,6,279,366,374,394,396,399 # -11:00 d=Pacific/Apia Etc/GMT+11 MIT Pacific/Apia Pacific/Midway Pacific/Niue Pacific/Pago_Pago # ... # | end ###################################################################### # As of 1999j, here are the various possible values taken by the # rule fields. See code below that generates this data. # # at: 0:00, 0:00s, 1:00, 1:00s, 1:00u, 23:00s, 2:00, 2:00s, 2:30, 2:45s, # 3:00, 3:00s # in: Apr, Dec, Feb, Jan, Jun, Mar, May, Nov, Oct, Sep # letter: -, D, GHST, GMT, HS, S, SLST # on: 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1, # Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15, # Sun>=16, Sun>=23, Sun>=8, Sun>=9, lastFri, lastSun, lastThu # save: 0, 0:20, 0:30, 1:00 # type: - require 5; # Minimum version of perl needed use strict; use Getopt::Long; use vars qw(@FILES $YEAR $DATA_DIR $OUT $SEP @MONTH $VERSION_YEAR $VERSION_SUFFIX $RAW_VERSION $TZ_ALIAS $TZ_DEFAULT $URL $HTML_FILE $TZ_TXT_VERSION %ZONE_ID_TO_INDEX $END_MARKER); require 'dumpvar.pl'; use tzparse; use tzutil; # Current version of the data file. Matches formatVersion[0] in the # binary data file. SEE tzdat.h # 1 - unreleased version (?) # 2 - original version # 3 - added equivalency groups $TZ_TXT_VERSION = 3; # File names $OUT = 'tz.txt'; $TZ_ALIAS = 'tz.alias'; $TZ_DEFAULT = 'tz.default'; # Source of our data $URL = "ftp://elsie.nci.nih.gov/pub"; # Separator between fields in the output file $SEP = ','; # Don't use ':'! # Marker between sections $END_MARKER = 'end'; @FILES = qw(africa antarctica asia australasia backward etcetera europe factory northamerica pacificnew solar87 solar88 solar89 southamerica); # We get the current year from the system here. Later # we double check this against the zone data version. $YEAR = 1900+@{[localtime]}[5]; # Get the current year $DATA_DIR = shift; if (!$DATA_DIR || ! -d $DATA_DIR) { print STDERR "No data directory or invalid directory specified\n\n"; usage(); } if ($DATA_DIR =~ /(tzdata(\d{4})(\w?))/) { $RAW_VERSION = $1; $VERSION_YEAR = $2; $VERSION_SUFFIX = $3; if ($YEAR != $VERSION_YEAR) { print STDERR "WARNING: You appear to be building $VERSION_YEAR data. Don't you want to use current $YEAR data?\n"; usage(); # Add an override option for this check, if needed } $VERSION_SUFFIX =~ tr/a-z/A-Z/; if ($VERSION_SUFFIX =~ /[A-Z]/) { $VERSION_SUFFIX = ord($VERSION_SUFFIX) - ord('A') + 1; } else { if ($VERSION_SUFFIX) { print STDERR "Warning: Ignoring version suffix '$VERSION_SUFFIX' for \"$DATA_DIR\"\n"; } $VERSION_SUFFIX = 0; } print "Time zone version $RAW_VERSION = $VERSION_YEAR($VERSION_SUFFIX)\n"; } else { print STDERR "The directory specified doesn't contain \"tzdataNNNNR\", so I can't tell what version the data is. Please rename the directory and try again.\n"; usage(); } $HTML_FILE = shift; @MONTH = qw(jan feb mar apr may jun jul aug sep oct nov dec); main(); exit(); sub usage { print STDERR "Usage: $0 data_dir [html_out]\n\n"; print STDERR "data_dir contains the unpacked files from\n"; print STDERR "$URL/tzdataYYYYR,\n"; print STDERR "where YYYY is the year and R is the revision\n"; print STDERR "letter.\n"; print STDERR "\n"; print STDERR "Files that are expected to be present are:\n"; print STDERR join(", ", @FILES), "\n"; print STDERR "\n"; print STDERR "[html_out] optional name of HTML file to output\n"; exit 1; } sub main { my (%ZONES, %RULES, @EQUIV); print "Reading"; foreach (@FILES) { if (! -e "$DATA_DIR/$_") { print STDERR "\nMissing file $DATA_DIR/$_\n\n"; usage(); } print "."; TZ::ParseFile("$DATA_DIR/$_", \%ZONES, \%RULES, $YEAR); } print "done\n"; TZ::Postprocess(\%ZONES, \%RULES); my $aliases = incorporateAliases($TZ_ALIAS, \%ZONES); print "Read ", scalar keys %ZONES, " current zones and ", scalar keys %RULES, " rules for $YEAR\n"; # Make sure we have a zone named GMT from either the # UNIX data or the alias table. If not, add one. if (!exists $ZONES{GMT}) { print "Adding GMT zone\n"; my %GMT = ('format' => 'GMT', 'gmtoff' => '0:00', 'rule' => $TZ::STANDARD, 'until' => ''); $ZONES{GMT} = \%GMT; } # Validate names and count total size my $NAME_SIZE = 0; foreach my $z (keys %ZONES) { # Make sure zone IDs only contain invariant chars assertInvariantChars($z); $NAME_SIZE += 1 + length($z); } # Find the maximum number of zones with the same value of # gmtOffset. my %perOffset; # Hash of offset -> count foreach my $z (keys %ZONES) { # Use TZ::ParseOffset to normalize values - probably unnecessary ++$perOffset{TZ::ParseOffset($ZONES{$z}->{gmtoff})}; } my $maxPerOffset = 0; foreach (values %perOffset) { $maxPerOffset = $_ if ($_ > $maxPerOffset); } # Create the offset index table, that includes the zones # for each offset and the default zone for each offset. # This is a hash{$name -> array ref}. Element [0] of # the array is the default name. Elements [1..n] are the # zones for the offset, in sorted order, including the default. my $offsetIndex = createOffsetIndex(\%ZONES, $TZ_DEFAULT); # Group zones into equivalency groups my $maxPerEquiv = 0; TZ::FormZoneEquivalencyGroups(\%ZONES, \%RULES, \@EQUIV); print "Equivalency groups (including unique zones): ", scalar @EQUIV, "\n"; foreach my $eg (@EQUIV) { $maxPerEquiv = @$eg if (@$eg > $maxPerEquiv); } # Sort equivalency table first by GMT offset, then by # alphabetic order of encoded rule string. @EQUIV = sort { my $x = $ZONES{$a->[0]}; my $y = $ZONES{$b->[0]}; TZ::ParseOffset($x->{gmtoff}) <=> TZ::ParseOffset($y->{gmtoff}) || TZ::ZoneCompare($x, $y, \%RULES); } @EQUIV; # Sort the zones in each equivalency table entry foreach my $eg (@EQUIV) { next unless (@$eg > 1); # Skip single-zone entries my @zoneList = sort @$eg; $eg = \@zoneList; } # Create an index from zone ID to index # my $i = 0; foreach my $z (sort keys %ZONES) { $ZONE_ID_TO_INDEX{$z} = $i++; } open(OUT,">$OUT") or die "Can't open $OUT for writing: $!"; ############################################################ # EMIT HEADER ############################################################ # Zone data version print OUT "#--- Header ---\n"; print OUT $TZ_TXT_VERSION, " # format version number of this file\n"; print OUT $VERSION_YEAR, " # ($RAW_VERSION) version of Olson zone\n"; print OUT $VERSION_SUFFIX, " # data from $URL\n"; print OUT scalar keys %ZONES, " # total zone count\n"; # The following counts are all used by gentz during its parse # of the tz.txt file and creation of the tz.dat file, even # if they don't show up in the tz.dat file header. For example, # gentz needs the maxPerOffset to preallocate the offset index # entries. It needs the NAME_SIZE to allocate the big buffer # that will receive all the names. print OUT scalar @EQUIV, " # equivalency groups count\n"; print OUT $maxPerOffset, " # max zones with same gmtOffset\n"; print OUT $maxPerEquiv, " # max zones in an equivalency group\n"; print OUT $NAME_SIZE, " # length of name table in bytes\n"; print OUT $END_MARKER, "\n\n"; ############################################################ # EMIT ZONE TABLE ############################################################ # Output the name table, followed by 'end' keyword print OUT "#--- Zone table ---\n"; print OUT "#| equiv_index,name\n"; print OUT scalar keys %ZONES, " # count of zones to follow\n"; # IMPORTANT: This sort must correspond to the sort # order of UnicodeString::compare. That # is, it must be a plain sort. foreach my $z (sort keys %ZONES) { # Make sure zone IDs only contain invariant chars assertInvariantChars($z); print OUT equivIndexOf($z, \@EQUIV), ',', $z, "\n"; } print OUT $END_MARKER, "\n\n"; ############################################################ # EMIT EQUIVALENCY TABLE ############################################################ print OUT "#--- Equivalency table ---\n"; print OUT "#| ('s'|'d'),zone_spec,id_count,id_list\n"; print OUT scalar @EQUIV, " # count of equivalency groups to follow\n"; $i = 0; foreach my $aref (@EQUIV) { # $aref is an array ref; the array is full of zone IDs # Use the ID of the first array element my $z = $aref->[0]; # Output either 's' or 'd' to indicate standard or DST my $isStd = ($ZONES{$z}->{rule} eq $TZ::STANDARD); print OUT $isStd ? 's,' : 'd,'; # Format the zone my ($spec, $notes) = formatZone($z, $ZONES{$z}, \%RULES); # Now add the equivalency list push @$spec, scalar @$aref; push @$notes, "["; my $min = -1; foreach $z (@$aref) { my $index = $ZONE_ID_TO_INDEX{$z}; # Make sure they are in order die("Unsorted equiv table indices") if ($index <= $min); $min = $index; push @$spec, $index; push @$notes, $z; } push @$notes, "]"; unshift @$notes, $i++; # Insert index of this group at front print OUT join($SEP, @$spec) . " # " . join(' ', @$notes), "\n"; } print OUT $END_MARKER, "\n\n"; ############################################################ # EMIT INDEX BY GMT OFFSET ############################################################ # Create a hash mapping zone name -> integer, from 0..n-1. # Create an array mapping zone number -> name. my %zoneNumber; my @zoneName; $i = 0; foreach (sort keys %ZONES) { $zoneName[$i] = $_; $zoneNumber{$_} = $i++; } # Emit offset index print OUT "#--- Offset index ---\n"; print OUT "#| gmt_offset,default_id,id_count,id_list\n"; print OUT scalar keys %{$offsetIndex}, " # index by offset entries to follow\n"; foreach (sort {$a <=> $b} keys %{$offsetIndex}) { my $aref = $offsetIndex->{$_}; my $def = $aref->[0]; # Make a slice of 1..n my @b = @{$aref}[1..$#{$aref}]; print OUT $_, ",", $zoneNumber{$def}, ",", scalar @b, ",", join(",", map($zoneNumber{$_}, @b)), " # ", formatOffset($_), " d=", $def, " ", join(" ", @b), "\n"; } print OUT $END_MARKER, "\n"; ############################################################ # END ############################################################ close(OUT); print "$OUT written.\n"; # Emit the HTML file if ($HTML_FILE) { emitHTML($HTML_FILE, \%ZONES, \%RULES, \@EQUIV, $offsetIndex, $aliases); print "$HTML_FILE written.\n"; } #::dumpValue($ZONES{"America/Los_Angeles"}); #::dumpValue($RULES{"US"}); #::dumpValue($RULES{"Tonga"}); # Find all the different values of rule fields: # in, at, on, save, type, letter if (0) { my %RULEVALS; foreach my $ruleName (keys %RULES) { for (my $i=0; $i<2; ++$i) { foreach my $key (qw(in on at save type letter)) { if (@{$RULES{$ruleName}} < 2) { print $ruleName, ":"; ::dumpValue($RULES{$ruleName}); } my $x = $RULES{$ruleName}->[$i]->{$key}; $RULEVALS{$key}->{$x} = 1; } } } foreach my $key (sort keys %RULEVALS) { print "$key: ", join(", ", sort keys %{$RULEVALS{$key}}), "\n"; } } } # Create an index of all the zones by GMT offset. This index will # list the zones for each offset and also the default zone for that # offset. # # Param: Ref to zone table # Param: Name of default file # # Return: ref to hash; the hash has offset integers as keys and arrays # of zone names as values. If there are n zone names at an offset, # the array contains n+1 items. The first item, [0], is the default # zone. Items [1..n] are the zones sorted lexically. Thus the # default appears twice, once in slot [0], and once somewhere in # [1..n]. sub createOffsetIndex { my $zones = shift; my $defaultFile = shift; # Create an index by gmtoff. my %offsetMap; foreach (sort keys %{$zones}) { my $offset = TZ::ParseOffset($zones->{$_}->{gmtoff}); push @{$offsetMap{$offset}}, $_; } # Select defaults. We do this by reading the file $defaultFile. # If there are multiple errors, we want to report them all, # so we set a flag and die at the end if there are problems. my %defaults; # key=offset integer, value=zone name my $ok = 1; open(IN, $defaultFile) or die "Can't open $defaultFile: $!"; while () { my $raw = $_; s/\#.*//; # Trim comments next unless (/\S/); # Skip blank lines if (/^\s*(\S+)\s*$/) { my $z = $1; if (! exists $zones->{$z}) { print "Error: Nonexistent zone $z listed in $defaultFile line: $raw"; $ok = 0; next; } my $offset = TZ::ParseOffset($zones->{$z}->{gmtoff}); if (exists $defaults{$offset}) { print "Error: Offset ", formatOffset($offset), " has both ", $defaults{$offset}, " and ", $z, " specified as defaults\n"; $ok = 0; next; } $defaults{$offset} = $z; } else { print "Error: Can't parse line in $defaultFile: $raw"; $ok = 0; } } close(IN); die "Error: Aborting due to errors in $defaultFile\n" unless ($ok); print "Incorporated ", scalar keys %defaults, " defaults from $defaultFile\n"; # Go through and record the default for each GMT offset, and unshift # it into slot [0]. # Fill in the blanks, since the default table will typically # not list a default for every single offset. my $missing; foreach my $gmtoff (keys %offsetMap) { my $aref = $offsetMap{$gmtoff}; my $def; if (exists $defaults{$gmtoff}) { $def = $defaults{$gmtoff}; } else { # If there is an offset for which we have no listed default # in $defaultFile, we try to figure out a reasonable default # ourselves. We ignore any zone named Etc/ because that's not # a "real" zone; it's just one listed as a POSIX convience. # We take the first (alphabetically) zone of what's left, # and if there are more than one of those, we emit a warning. my $ambiguous; # Ignore zones named Etc/ and take the first one we otherwise see; # if there is more than one of those, emit a warning. foreach (sort @{$aref}) { next if (m|^Etc/|i); if (!$def) { $def = $_; } else { $ambiguous = 1; } } $def = $aref->[0] unless ($def); if ($ambiguous) { $missing = 1; print "Warning: No default for GMT", formatOffset($gmtoff), ", using ", $def, "\n"; } } # Push $def onto front of list unshift @{$aref}, $def; } print "Defaults may be specified in $TZ_DEFAULT\n" if ($missing); return \%offsetMap; } # Given a zone and an offset index, return the gmtoff if the name # is a default zone, otherwise return ''. # Param: zone name # Param: zone offset, as a string (that is, raw {gmtoff}) # Param: ref to offset index hash sub isDefault { my $name = shift; my $offset = shift; my $offsetIndex = shift; my $aref = $offsetIndex->{TZ::ParseOffset($offset)}; return ($aref->[0] eq $name); } # Emit an HTML file that contains a description of the system zones. # Param: File name # Param: ref to zone hash # Param: ref to rule hash # Param: ref to equiv table # Param: ref to offset index # Param: ref to alias hash sub emitHTML { my $file = shift; my $zones = shift; my $rules = shift; my $equiv = shift; my $offsetIndex = shift; my $aliases = shift; # These are variables for the template my $_count = scalar keys %{$zones}; my $_equiv = scalar @$equiv; # Build table in order of zone offset my $_offsetTable = "

\n"; foreach (sort {$a <=> $b} keys %{$offsetIndex}) { my $aref = $offsetIndex->{$_}; my $def = $aref->[0]; # Make a slice of 1..n my @b = @{$aref}[1..$#{$aref}]; my $gmtoff = "GMT" . formatOffset($_); $_offsetTable .= "" . "" . "" . "\n"; } $_offsetTable .= "
$gmtoff" . join(", ", map($_ eq $def ? "$_" : "$_", @b)) . "
\n"; # Build table in alphabetical order of zone name my $_nameTable = "

\n"; $_nameTable .= ""; $_nameTable .= ""; $_nameTable .= "\n"; $_nameTable .= ""; $_nameTable .= ""; $_nameTable .= "\n"; # Need a reverse alias table my %revaliases = reverse(%$aliases); foreach my $z (sort keys %$zones) { $_nameTable .= emitHTMLZone($z, $zones->{$z}, $rules, $offsetIndex, $aliases, \%revaliases); } $_nameTable .= "
IDOffsetDST BeginsDST EndsSavings





\n"; # Build equivalency group table my $_equivTable = "

\n"; $_equivTable .= ""; $_equivTable .= "\n"; $_equivTable .= ""; $_equivTable .= ""; $_equivTable .= "\n"; # Equiv table is sorted elsewhere -- output it in native order foreach my $eg (@$equiv) { $_equivTable .= emitHTMLEquiv($eg, $zones, $rules); } $_equivTable .= "
OffsetDST BeginsDST EndsSavingsZones






\n"; # Time stamp my $_timeStamp = localtime; ############################################################ # BEGIN HTML TEMPLATE ############################################################ my $html = <<"END"; ICU System Time Zones

ICU System Time Zones

Version $RAW_VERSION ($VERSION_YEAR.$VERSION_SUFFIX)
Total zone count $_count in $_equiv equivalency groups
Original source $URL
Author Alan Liu <liuas\@us.ibm.com>
This document generated $_timeStamp

Background

A time zone represents an offset applied to Greenwich Mean Time (GMT) to obtain local time. The offset may vary throughout the year, if daylight savings time (DST) is used, or may be the same all year long. Typically, regions closer to the equator do not use DST. If DST is in use, then specific rules define the point at which the offset changes, and the amount by which it changes. Thus, a time zone is described by the following information:

  • An identifying string, or ID. This consists only of invariant characters (see the file utypes.h). It typically has the format continent / city. The city chosen is not the only city in which the zone applies, but rather a representative city for the region. Some IDs consist of three or four uppercase letters; these are legacy zone names that are aliases to standard zone names.
  • An offset from GMT, either positive or negative. Offsets range from approximately minus half a day to plus half a day.

If DST is observed, then three additional pieces of information are needed:

  • The precise date and time during the year when DST begins. This is in the first half of the year in the northern hemisphere, and in the second half of the year in the southern hemisphere.
  • The precise date and time during the year when DST ends. This is in the first half of the year in the southern hemisphere, and in the second half of the year in the northern hemisphere.
  • The amount by which the GMT offset changes when DST is in effect. This is almost always one hour.

System and User Time Zones

ICU supports local time zones through the classes TimeZone and SimpleTimeZone in the C++ API. In the C API, time zones are designated by their ID strings.

Users may construct their own time zone objects by specifying the above information to the C++ API. However, it is more typical for users to use a pre-existing system time zone, since these represent all current international time zones in use. This document lists the system time zones, both in order of GMT offset, and in alphabetical order of ID.

Since this list changes one or more times a year, this document only represents a snapshot. For the current list of ICU system zones, use the method TimeZone::getAvailableIDs().

Notes

The zones are listed in binary sort order. That is, 'A' through 'Z' come before 'a' through 'z'. This is the same order in which the zones are stored internally, and the same order in which they are returned by TimeZone::getAvailableIDs(). The reason for this is that ICU locates zones using a binary search, and the binary search relies on this sort order.

You may notice that zones such as Etc/GMT+1 appear to have the wrong sign for their GMT offset. In fact, their sign is inverted because the the Etc zones follow the POSIX sign conventions. This is the way the original Olson data is set up, and ICU reproduces the Olson data faithfully, including this confusing aspect. See the Olson files for more details.

References

The ICU system time zones are derived from the Olson data at $URL. This is the data used by UNIX systems and is updated one or more times each year. Unlike the Olson zone data, ICU only contains data for current zone usage. There is no support for historical zone data in ICU at this time.


Time Zones in order of GMT offset

Zone listed in bold are the default zone for a given GMT offset. This default is used by ICU if it cannot identify the host OS time zone by name. In that case, it uses the default zone for the host zone offset.

$_offsetTable

Time Zones in order of ID

Zone listed in bold are the default zone for their GMT offset. This default is used by ICU if it cannot identify the host OS time zone by name. In that case, it uses the default zone for the host zone offset. See above for a description of columns. See note above for an explanation of the sort order.

Times suffixed with 's' are in standard time. Times suffixed with 'u' are in UTC time. Times without suffixes are in wall time (that is, either standard time or daylight savings time, depending on which is in effect).

$_nameTable

Time Zone Equivalency Groups

ICU groups zones into equivalency groups. These are groups of zones that are identical in GMT offset and in rules, but that have different IDs. Knowledge of equivalency groups allows ICU to reduce the amount of data stored. More importantly, it allows ICU to apply data for one zone to other equivalent zones when appropriate (e.g., in formatting). Equivalency groups are formed at build time, not at runtime, so the runtime cost to lookup the equivalency group of a given zone is negligible.

$_equivTable END ############################################################ # END HTML TEMPLATE ############################################################ open(HTML, ">$file") or die "Can't open $file for writing: $!"; print HTML $html; close(HTML); } # Make a bookmark name out of a string. This just means normalizing # non-word characters. sub bookmark { local $_ = shift; s/-/m/g; s/\+/p/g; s/\W//g; $_; } # Emit an equivalency group as an HTML table row. Return the string. # Param: ref to array of zone IDs # Param: ref to zone hash # Param: ref to rule hash sub emitHTMLEquiv { my $eg = shift; my $zone = shift; my $rule = shift; local $_ = ""; $_ .= _emitHTMLZone($zone->{$eg->[0]}, $rule); # Don't sort @$eg -- output in native order $_ .= "" . join(" ", @$eg) . ""; $_ .= "\n"; $_; } # Emit a zone description without ID, alias info etc. # Param: zone OBJECT hash ref # Param: rule hash ref sub _emitHTMLZone { my ($zone, $rules) = @_; my $gmtoff = "GMT" . formatOffset(TZ::ParseOffset($zone->{gmtoff})); local $_ = "$gmtoff"; if ($zone->{rule} ne $TZ::STANDARD) { my $rule = $rules->{$zone->{rule}}; $_ .= "" . emitHTMLRule($rule->[0]) . ""; $_ .= "" . emitHTMLRule($rule->[1]) . ""; $_ .= "" . $rule->[0]->{save} . ""; } else { $_ .= ""; } $_; } # Emit a single zone description as HTML table row. Return the string. # Param: Zone name # Param: Zone hash object ref # Param: Ref to rules hash # Param: ref to offset index # Param: ref to alias hash # Param: ref to reverse alias hash sub emitHTMLZone { my ($name, $zone, $rules, $offsetIndex, $aliases, $revaliases) = @_; my $isDefault = isDefault($name, $zone->{gmtoff}, $offsetIndex); my $alias = exists $aliases->{$name} ? $aliases->{$name} : ''; my $revalias = exists $revaliases->{$name} ? $revaliases->{$name} : ''; local $_ = "" . ($isDefault?"":"") . "$name" . ($isDefault?"":"") . ""; $_ .= _emitHTMLZone($zone, $rules); if ($alias) { $_ .= "alias for $alias"; } elsif ($revalias) { $_ .= "alias $revalias"; } else { $_ .= ""; } $_ .= "\n"; $_; } # Emit a zone rule as HTML. Return the string. # Param: Rule hash object ref sub emitHTMLRule { my $rule = shift; $rule->{in} ." ". $rule->{on} ." ". $rule->{at}; } # Read the alias list and create clones with alias names. This # sub should be called AFTER all standard zones have been read in. # Param: File name of alias list # Param: Ref to zone hash # Return: Ref to hash of {alias name -> zone name} sub incorporateAliases { my $aliasFile = shift; my $zones = shift; my $n = 0; my %hash; local *IN; open(IN,$aliasFile) or die "Can't open $aliasFile: $!"; while () { s/\#.*//; # Trim comments next unless (/\S/); # Skip blank lines if (/^\s*(\S+)\s+(\S+)\s*$/) { my ($alias, $original) = ($1, $2); if (exists $zones->{$alias}) { die "Bad alias in $aliasFile: $alias is a standard UNIX zone. " . "Please remove $alias from the alias table.\n"; } if (!exists $zones->{$original}) { die "Bad alias in $aliasFile: $alias maps to the nonexistent " . "zone $original. Please fix this entry in the alias table.\n"; } # Create the alias! $zones->{$alias} = $zones->{$original}; $hash{$alias} = $original; $n++; } else { die "Bad line in alias table $aliasFile: $_\n"; } } print "Incorporated $n aliases from $aliasFile\n"; close(IN); \%hash; } # Format a time zone as a machine-readable line of text. Another # tool will read this line to construct a binary data structure # representing this zone. # Param: Zone name # Param: Zone hash # Param: Ref to hash of all rules # Return: Two array refs, one to the specs, one to the notes sub formatZone { # ($z, $ZONES{$z}, \%RULES) my $name = shift; my $zone = shift; my $rules = shift; my @spec; #my @notes = ( $name ); my @notes; # GMT offset push @notes, ($zone->{gmtoff}=~/^-/?"GMT":"GMT+") . $zone->{gmtoff}; push @spec, TZ::ParseOffset($zone->{gmtoff}); #|rawOffset The new SimpleTimeZone's raw GMT offset #|ID The new SimpleTimeZone's time zone ID. #|startMonth The daylight savings starting month. Month is #| 0-based. eg, 0 for January. #|startDay The daylight savings starting #| day-of-week-in-month. See setStartRule() for a #| complete explanation. #|startDayOfWeek The daylight savings starting day-of-week. See #| setStartRule() for a complete explanation. #|startTime The daylight savings starting time, expressed as the #| number of milliseconds after midnight. #|endMonth The daylight savings ending month. Month is #| 0-based. eg, 0 for January. #|endDay The daylight savings ending day-of-week-in-month. #| See setStartRule() for a complete explanation. #|endDayOfWeek The daylight savings ending day-of-week. See #| setStartRule() for a complete explanation. #|endTime The daylight savings ending time, expressed as the #| number of milliseconds after midnight. my $rule = $zone->{rule}; if ($rule ne $TZ::STANDARD) { $rule = $rules->{$rule}; # $rule is now an array ref, with [0] being the onset and # [1] being the cease. formatRule($rule->[0], \@spec, \@notes); # Onset formatRule($rule->[1], \@spec, \@notes); # Cease my @a = parseTime($rule->[0]->{save}); if ($a[1] ne 'w') { die "Strange DST savings value: \"$rule->[0]->{save}\""; } push @notes, $rule->[0]->{save}; push @spec, $a[0]; } (\@spec, \@notes); } # Format a rule and return the string # Param: reference to rule hash # Param: ref to spec array (this is a result param) # Param: ref to annotation array (this is a result param) sub formatRule { my $rule = shift; my $spec = shift; my $notes = shift; push @$notes, $rule->{in}, $rule->{on}, $rule->{at}; push @$spec, parseMonth($rule->{in}); # Month push @$spec, parseDaySpecifier($rule->{on}); # Day push @$spec, parseTime($rule->{at}); # Time } # Format an offset in seconds and return a string of the form # /[+-]\d{1,2}:\d\d(:\d\d)?/. # Param: Offset in seconds # Return: String sub formatOffset { local $_ = shift; my $result = $_<0 ? "-":"+"; $_ = -$_ if ($_ < 0); my $sec = $_ % 60; $_ = ($_ - $sec) / 60; my $min = $_ % 60; $_ = ($_ - $min) / 60; $min = "0$min" if ($min < 10); $sec = $sec ? ($sec < 10 ? ":0$sec" : ":$sec") : ""; $result . $_ . ":" . $min . $sec; } # Parse a time of the format dd:dds, where s is a suffix character. # Return the time, in minutes, and the suffix, in an array. # Only the suffixes 's' and 'u' are recognized. # Param: String, with optional suffix # Return: Array ( seconds, suffix ). If no suffix, 'w' is used. sub parseTime { local $_ = shift; if (/^(\d{1,2}):(\d\d)([su])?$/) { my $a = ($1*60) + $2; my $s = defined $3?$3:'w'; return ( $a, $s ); } else { die "Cannot parse time \"$_\""; } } # Given a month string, return an integer from 0 (Jan) to 11 (Dec). # Param: Str # Return: Int 0..11. sub parseMonth { local $_ = shift; for (my $i=0; $i<12; $i++) { return $i if (/$MONTH[$i]/i); } die "Can't parse month \"$_\""; } # Given a specifier for the day of the month on which a rule triggers, # return an array of two integers encoding that information. We use # the ICU/java.util.SimpleTimeZone encoding scheme using two integers. # We return the two integers in an array of ( dowim dow ). # Param: String, such as # 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1, # Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15, # Sun>=16, Sun>=23, Sun>=8, Sun>=9, lastFri, lastSun, lastThu # This is the {on} field of the rule hash. # Return: Array of two integers, ( dowim dow ). # The dow has Sunday = 1 .. Saturday = 7. sub parseDaySpecifier { local $_ = shift; #|+If both dayOfWeekInMonth and dayOfWeek are positive, they specify the #| day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday #| of the month). #|+If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify #| the day of week in the month counting backward from the end of the month. #| (e.g., (-1, MONDAY) is the last Monday in the month) #|+If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth #| specifies the day of the month, regardless of what day of the week it is. #| (e.g., (10, 0) is the tenth day of the month) #|+If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth #| specifies the day of the month counting backward from the end of the #| month, regardless of what day of the week it is (e.g., (-2, 0) is the #| next-to-last day of the month). #|+If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the #| first specified day of the week on or after the specfied day of the month. #| (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month #| [or the 15th itself if the 15th is a Sunday].) #|+If dayOfWeek and dayOfWeekInMonth are both negative, they specify the #| last specified day of the week on or before the specified day of the month. #| (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month #| [or the 20th itself if the 20th is a Tuesday].) # dowim dow # >0 >0 day of week in month # <0 >0 day of week in month (from end) # >0 0 day of month # <0 0 day of month (from end; -1 is last dom) # >0 <0 first dow on or after dom # <0 <0 last dow on or before dom my $dowim; my $dow = 0; # Check for straight DOM if (/^\d+$/) { $dowim = $_; $dow = 0; return ( $dowim, $dow ); } # Anything else must have a dow embedded in it; parse it out my @DOW = ( 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat' ); for (my $i=0; $i<@DOW; $i++) { if (s/$DOW[$i]//) { $dow = $i + 1; last; } } if ($dow == 0) { die "Cannot parse day specifier \"$_\""; } # Now we have either >=n, <=n, last, or first. if (/^last$/) { $dowim = -1; } elsif (/^first$/) { $dowim = 1; } elsif (/^>=(\d+)$/) { $dowim = $1; $dow = -$dow; } elsif (/^<=(\d+)$/) { $dowim = -$1; $dow = -$dow; } else { die "Cannot parse day specifier \"$_\""; } ( $dowim, $dow ); } # Confirm that the given ID contains only invariant characters. # See utypes.h for an explanation. # Param: string to be checked sub assertInvariantChars { local $_ = shift; if (/[^A-Za-z0-9 \"%&\'()*+,-.\/:;<=>?_]/) { die "Error: Zone ID \"$_\" contains non-invariant characters\n"; } } # Map ID to equivalency table index. Return the index of the given ID # in the equivalency array. The array contains array refs. Each ref # points to an array of strings. # Param: ID to find # Param: Ref to equiv array (ref to array of refs to arrays of IDs) # Return: Index into array where ID is found, or -1 if not found # NOTE: This function can be eliminated by generating a reverse # mapping hash when we create the equivalency table. sub equivIndexOf { my $id = shift; my $a = shift; for (my $i=0; $i < scalar @{$a}; ++$i) { my $aa = $a->[$i]; foreach (@$aa) { return $i if ($_ eq $id); } } return -1; } __END__