f5f944dee8
X-SVN-Rev: 357
761 lines
28 KiB
Perl
Executable File
761 lines
28 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
######################################################################
|
|
# Copyright (C) 1999, International Business Machines
|
|
# Corporation and others. All Rights Reserved.
|
|
######################################################################
|
|
# See: ftp://elsie.nci.nih.gov/pub/tzdata<year>
|
|
# where <year> is "1999b" or a similar string.
|
|
######################################################################
|
|
# This script takes time zone data in elsie.nci.nih.gov format and
|
|
# parses it into a form usable by ICU. The raw data contains more
|
|
# data than we need, since it contains historical zone data. We
|
|
# parse out the current zones and create a listing of current zones.
|
|
# Author: Alan Liu
|
|
######################################################################
|
|
# This script reads an alias table, $TZ_ALIAS, and creates clones of
|
|
# standard UNIX zones with alias names.
|
|
######################################################################
|
|
# To update the zone data, download the latest data from the NIH URL
|
|
# listed above into a directory. Run this script with the directory
|
|
# name as an argument. THE DIRECTORY NAME MUST END IN tzdataYYYYR.
|
|
######################################################################
|
|
# OUTPUT FILE FORMAT (filename $OUT)
|
|
#
|
|
# As a matter of policy, this script wants to do as much of
|
|
# the parsing, data processing, and error checking as possible,
|
|
# leaving the C++ program that parses this file to just do the binary
|
|
# translation step.
|
|
#
|
|
# - The file is line based, with one record per line.
|
|
# - Lines may be followed by a comment; the parser must ignore
|
|
# anything of the form /\s+#.*$/ in each line.
|
|
# |3065,14400 # Asia/Dubai GMT+4:00
|
|
# - The file contains a header and 5 lists.
|
|
# - The header contains the version of the unix data, the total
|
|
# zone count, the maximum number of zones sharing the same value
|
|
# of gmtOffset, the length of the name table in bytes, and
|
|
# the length of the longest name (not including the terminating
|
|
# zero byte).
|
|
# | 1999 # (tzdata1999j) version of Olson zone
|
|
# | 10 # data from ftp://elsie.nci.nih.gov
|
|
# | 387 # total zone count
|
|
# | 40 # max count of zones with same gmtOffset
|
|
# | 25 # max name length not incl final zero
|
|
# | 5906 # length of name table in bytes
|
|
# - Lists start with a count of the records to follow, the records
|
|
# themselves (one per line), and a single line with the keyword
|
|
# 'end'.
|
|
# - The first list is the list of standard zones:
|
|
# | 208 # count of standard zones to follow
|
|
# | 0,0 # Africa/Abidjan GMT+0:00
|
|
# | 28,10800 # Africa/Addis_Ababa GMT+3:00
|
|
# ...
|
|
# | end
|
|
# Each standard zone record contains two integers. The first
|
|
# is a byte offset into the name table for the name of the zone.
|
|
# The second integer is the GMT offset in SECONDS for this zone.
|
|
# - The second list is the list of DST zones:
|
|
# | 179 # count of dst zones to follow
|
|
# | 15,0,8,1,0,0,w,11,31,0,0,w,20 # Africa/Accra GMT+0:00 Sep 1...
|
|
# | 184,7200,3,-1,6,0,s,8,-1,5,1380,s,60 # Africa/Cairo GMT+2:0...
|
|
# ...
|
|
# | end
|
|
# Each record starts with the same two integers as a standard
|
|
# zone record. Following this are data for the onset rule and
|
|
# the cease rule. Each rule is described by the following integers:
|
|
# month (JAN = 0)
|
|
# dowim } These two values are in SimpleTimeZone encoded
|
|
# dow } format for DOM, DOWIM, DOW>=DOM, or DOW<=DOM.
|
|
# time MINUTES
|
|
# time mode ('w', 's', 'u')
|
|
# The last integer in the record is the DST savings in MINUTES,
|
|
# typically 60.
|
|
# - The third list is the name table:
|
|
# | 387 # count of names to follow
|
|
# | Africa/Abidjan
|
|
# | Africa/Accra
|
|
# ...
|
|
# | end
|
|
# Each name is terminated by a newline (like all lines in the file).
|
|
# The offsets in the first two lists refer to this table.
|
|
# - The fourth list is an index list by name. The index entries
|
|
# themselves are of the form /[sd]\d+/, where the first character
|
|
# indicates standard or DST, and the number that follows indexes
|
|
# into the correpsonding array.
|
|
# | 416 # count of name index table entries to follow
|
|
# | d0 # ACT
|
|
# | d1 # AET
|
|
# | d2 # AGT
|
|
# | d3 # ART
|
|
# | d4 # AST
|
|
# | s0 # Africa/Abidjan
|
|
# ...
|
|
# | end
|
|
# - The fifth list is an index by GMT offset. Each line lists the
|
|
# zones with the same offset. The first number on the line
|
|
# is the GMT offset in seconds. The second number is the default
|
|
# zone number in the following list, taken from tz.default. The
|
|
# third number is the count
|
|
# of zone numbers to follow. Each zone number is an integer from
|
|
# 0..n-1, where n is the total number of zones. The zone numbers
|
|
# refer to the zone list in alphabetical order.
|
|
# | 39 # index by offset entries to follow
|
|
# | -43200,280,1,280 # -12:00 d=Etc/GMT+12 Etc/GMT+12
|
|
# | -39600,374,6,279,366,374,394,396,399 # -11:00 d=Pacific/Apia Etc/GMT+11 MIT Pacific/Apia Pacific/Midway Pacific/Niue Pacific/Pago_Pago
|
|
# ...
|
|
# | end
|
|
######################################################################
|
|
# As of 1999j, here are the various possible values taken by the
|
|
# rule fields. See code below that generates this data.
|
|
#
|
|
# at: 0:00, 0:00s, 1:00, 1:00s, 1:00u, 23:00s, 2:00, 2:00s, 2:30, 2:45s,
|
|
# 3:00, 3:00s
|
|
# in: Apr, Dec, Feb, Jan, Jun, Mar, May, Nov, Oct, Sep
|
|
# letter: -, D, GHST, GMT, HS, S, SLST
|
|
# on: 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1,
|
|
# Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15,
|
|
# Sun>=16, Sun>=23, S un>=8, Sun>=9, lastFri, lastSun, lastThu
|
|
# save: 0, 0:20, 0:30, 1:00
|
|
# type: -
|
|
|
|
require 5; # Minimum version of perl needed
|
|
use strict;
|
|
use Getopt::Long;
|
|
use vars qw(@FILES $YEAR $DATA_DIR $OUT $SEP @MONTH
|
|
$VERSION_YEAR $VERSION_SUFFIX $RAW_VERSION
|
|
$TZ_ALIAS $TZ_DEFAULT);
|
|
require 'dumpvar.pl';
|
|
use tzparse;
|
|
use tzutil;
|
|
|
|
# File names
|
|
$OUT = 'tz.txt';
|
|
$TZ_ALIAS = 'tz.alias';
|
|
$TZ_DEFAULT = 'tz.default';
|
|
|
|
# Separator between fields in the output file
|
|
$SEP = ','; # Don't use ':'!
|
|
|
|
@FILES = qw(africa
|
|
antarctica
|
|
asia
|
|
australasia
|
|
backward
|
|
etcetera
|
|
europe
|
|
factory
|
|
northamerica
|
|
pacificnew
|
|
solar87
|
|
solar88
|
|
solar89
|
|
southamerica);
|
|
|
|
# We get the current year from the system here. Later
|
|
# we double check this against the zone data version.
|
|
$YEAR = 1900+@{[localtime]}[5]; # Get the current year
|
|
|
|
$DATA_DIR = shift;
|
|
|
|
if (!$DATA_DIR || ! -d $DATA_DIR) {
|
|
print STDERR "No data directory or invalid directory specified\n\n";
|
|
usage();
|
|
}
|
|
|
|
if ($DATA_DIR =~ /(tzdata(\d{4})(\w?))/) {
|
|
$RAW_VERSION = $1;
|
|
$VERSION_YEAR = $2;
|
|
$VERSION_SUFFIX = $3;
|
|
if ($YEAR != $VERSION_YEAR) {
|
|
print STDERR "WARNING: You appear to be building $VERSION_YEAR data. Don't you want to use current $YEAR data?\n";
|
|
usage(); # Add an override option for this check, if needed
|
|
}
|
|
$VERSION_SUFFIX =~ tr/a-z/A-Z/;
|
|
if ($VERSION_SUFFIX =~ /[A-Z]/) {
|
|
$VERSION_SUFFIX = ord($VERSION_SUFFIX) - ord('A') + 1;
|
|
} else {
|
|
if ($VERSION_SUFFIX) {
|
|
print STDERR "Warning: Ignoring version suffix '$VERSION_SUFFIX' for \"$DATA_DIR\"\n";
|
|
}
|
|
$VERSION_SUFFIX = 0;
|
|
}
|
|
print "Time zone version $RAW_VERSION = $VERSION_YEAR($VERSION_SUFFIX)\n";
|
|
} else {
|
|
print STDERR "The directory specified doesn't contain \"tzdataNNNNR\", so I can't tell what version the data is. Please rename the directory and try again.\n";
|
|
usage();
|
|
}
|
|
|
|
|
|
@MONTH = qw(jan feb mar apr may jun
|
|
jul aug sep oct nov dec);
|
|
|
|
main();
|
|
|
|
sub usage {
|
|
print STDERR "Usage: $0 data_dir\n\n";
|
|
print STDERR "data_dir contains the unpacked files from\n";
|
|
print STDERR "ftp://elsie.nci.nih.gov/pub/tzdataYYYYR,\n";
|
|
print STDERR "where YYYY is the year and R is the revision\n";
|
|
print STDERR "letter.\n";
|
|
print STDERR "\n";
|
|
print STDERR "Files that are expected to be present are:\n";
|
|
print STDERR join(", ", @FILES), "\n";
|
|
exit 1;
|
|
}
|
|
|
|
sub main {
|
|
my (%ZONES, %RULES, @EQUIV);
|
|
|
|
print "Reading";
|
|
foreach (@FILES) {
|
|
if (! -e "$DATA_DIR/$_") {
|
|
print STDERR "\nMissing file $DATA_DIR/$_\n\n";
|
|
usage();
|
|
}
|
|
print ".";
|
|
TZ::ParseFile("$DATA_DIR/$_", \%ZONES, \%RULES, $YEAR);
|
|
}
|
|
print "done\n";
|
|
|
|
TZ::Postprocess(\%ZONES, \%RULES);
|
|
|
|
incorporateAliases($TZ_ALIAS, \%ZONES);
|
|
|
|
print
|
|
"Read ", scalar keys %ZONES, " current zones and ",
|
|
scalar keys %RULES, " rules for $YEAR\n";
|
|
|
|
# Make sure we have a zone named GMT from either the
|
|
# UNIX data or the alias table. If not, add one.
|
|
if (!exists $ZONES{GMT}) {
|
|
print "Adding GMT zone\n";
|
|
my %GMT = ('format' => 'GMT',
|
|
'gmtoff' => '0:00',
|
|
'rule' => $TZ::STANDARD,
|
|
'until' => '');
|
|
$ZONES{GMT} = \%GMT;
|
|
}
|
|
|
|
# Write out the zone data in a compact readable format.
|
|
|
|
# Create a name table from the zone names. The format of
|
|
# the name table is:
|
|
#
|
|
# The names are listed in lexical order, and each name
|
|
# is assigned an offset. The first name's offset is 0.
|
|
# The offset of name i+1 is the offset of name i + the
|
|
# length of name i + 1 (for the zero byte).
|
|
#
|
|
# Store the offsets in a hash %NAME_OFFSET. Store the
|
|
# names in a big scalar, $NAME_LIST, with "\n" between
|
|
# each name and after the last.
|
|
#
|
|
# Store the length of the entire name table in $NAME_SIZE.
|
|
#
|
|
# Also, count the number of standard and DST zones.
|
|
my $offset = 0;
|
|
my $NAME_LIST = '';
|
|
my %NAME_OFFSET;
|
|
my $STD_COUNT = 0; # Count of standard zones
|
|
my $DST_COUNT = 0; # Count of DST zones
|
|
my $maxNameLen = 0;
|
|
foreach my $z (sort keys %ZONES) {
|
|
# Make sure zone IDs only contain invariant chars
|
|
assertInvariantChars($z);
|
|
|
|
my $len = length($z);
|
|
$NAME_OFFSET{$z} = $offset;
|
|
$offset += $len + 1;
|
|
$NAME_LIST .= "$z\n";
|
|
$maxNameLen = $len if ($len > $maxNameLen);
|
|
if ($ZONES{$z}->{rule} eq $TZ::STANDARD) {
|
|
$STD_COUNT++;
|
|
} else {
|
|
$DST_COUNT++;
|
|
}
|
|
}
|
|
my $NAME_SIZE = $offset;
|
|
|
|
# Find the maximum number of zones with the same value of
|
|
# gmtOffset.
|
|
my %perOffset; # Hash of offset -> count
|
|
foreach my $z (keys %ZONES) {
|
|
# Use parseOffset to normalize values - probably unnecessary
|
|
++$perOffset{parseOffset($ZONES{$z}->{gmtoff})};
|
|
}
|
|
my $maxPerOffset = 0;
|
|
foreach (values %perOffset) {
|
|
$maxPerOffset = $_ if ($_ > $maxPerOffset);
|
|
}
|
|
|
|
open(OUT,">$OUT") or die "Can't open $OUT for writing: $!";
|
|
|
|
############################################################
|
|
# EMIT HEADER
|
|
############################################################
|
|
# Zone data version
|
|
print OUT $VERSION_YEAR, " # ($RAW_VERSION) version of Olson zone\n";
|
|
print OUT $VERSION_SUFFIX, " # data from ftp://elsie.nci.nih.gov\n";
|
|
print OUT scalar keys %ZONES, " # total zone count\n";
|
|
print OUT $maxPerOffset, " # max count of zones with same gmtOffset\n";
|
|
print OUT $maxNameLen, " # max name length not incl final zero\n";
|
|
print OUT $NAME_SIZE, " # length of name table in bytes\n";
|
|
|
|
############################################################
|
|
# EMIT ZONE TABLES
|
|
############################################################
|
|
# Output first the standard zones, then the dst zones.
|
|
# Precede each list with the count of zones to follow,
|
|
# and follow it with the keyword 'end'.
|
|
for my $type (qw(standard dst)) {
|
|
print OUT ($type eq 'standard'
|
|
? $STD_COUNT : $DST_COUNT), " # count of $type zones to follow\n";
|
|
foreach my $z (sort keys %ZONES) {
|
|
my $isStd = ($ZONES{$z}->{rule} eq $TZ::STANDARD);
|
|
next if ($isStd ne ($type eq 'standard'));
|
|
print OUT $NAME_OFFSET{$z}, ",";
|
|
print OUT formatZone($z, $ZONES{$z}, \%RULES), "\n";
|
|
}
|
|
print OUT "end\n"; # 'end' keyword for error checking
|
|
}
|
|
|
|
############################################################
|
|
# EMIT NAME TABLE
|
|
############################################################
|
|
# Output the name table, followed by 'end' keyword
|
|
print OUT scalar keys %ZONES, " # count of names to follow\n";
|
|
print OUT $NAME_LIST, "end\n";
|
|
|
|
############################################################
|
|
# EMIT INDEX BY NAME
|
|
############################################################
|
|
# Output the name index table. Since we don't know structure
|
|
# sizes, we output the index number of each zone. For example,
|
|
# "s0" is the first standard zone, "s1" is the second, etc.
|
|
# Likewise, "d0" is the first DST zone, "d1" is the second, etc.
|
|
|
|
# First compute index IDs, as described above.
|
|
my %indexID;
|
|
my $s = 0;
|
|
my $d = 0;
|
|
foreach my $z (sort keys %ZONES) {
|
|
if ($ZONES{$z}->{rule} eq $TZ::STANDARD) {
|
|
$indexID{$z} = "s$s";
|
|
$s++;
|
|
} else {
|
|
$indexID{$z} = "d$d";
|
|
$d++;
|
|
}
|
|
}
|
|
|
|
# Now emit table sorted by name
|
|
print OUT scalar keys %ZONES, " # count of name index table entries to follow\n";
|
|
foreach my $z (sort keys %ZONES) {
|
|
print OUT $indexID{$z}, " # $z\n";
|
|
}
|
|
print OUT "end\n";
|
|
|
|
############################################################
|
|
# EMIT INDEX BY GMT OFFSET
|
|
############################################################
|
|
# Create a hash mapping zone name -> integer, from 0..n-1.
|
|
# Create an array mapping zone number -> name.
|
|
my %zoneNumber;
|
|
my @zoneName;
|
|
my $i = 0;
|
|
foreach (sort keys %ZONES) {
|
|
$zoneName[$i] = $_;
|
|
$zoneNumber{$_} = $i++;
|
|
}
|
|
|
|
# Create a hash by index. The hash has offset integers as keys
|
|
# and arrays of index numbers as values.
|
|
my %offsetMap;
|
|
foreach (sort keys %ZONES) {
|
|
my $offset = parseOffset($ZONES{$_}->{gmtoff});
|
|
push @{$offsetMap{$offset}}, $zoneNumber{$_};
|
|
}
|
|
|
|
# Select defaults. We do this by reading the file $TZ_DEFAULT.
|
|
# If there are multiple errors, we want to report them all,
|
|
# so we set a flag and die at the end if there are problems.
|
|
my %defaults; # key=offset integer, value=zone #
|
|
my $ok = 1;
|
|
open(IN, $TZ_DEFAULT) or die "Can't open $TZ_DEFAULT: $!";
|
|
while (<IN>) {
|
|
my $raw = $_;
|
|
s/\#.*//; # Trim comments
|
|
next unless (/\S/); # Skip blank lines
|
|
if (/^\s*(\S+)\s*$/) {
|
|
my $z = $1;
|
|
if (! exists $ZONES{$z}) {
|
|
print "Error: Nonexistent zone $z listed in $TZ_DEFAULT line: $raw";
|
|
$ok = 0;
|
|
next;
|
|
}
|
|
my $offset = parseOffset($ZONES{$z}->{gmtoff});
|
|
if (exists $defaults{$offset}) {
|
|
print
|
|
"Error: Offset ", formatOffset($offset), " has both ",
|
|
$zoneName[$defaults{$offset}], " and ", $z,
|
|
" specified as defaults\n";
|
|
$ok = 0;
|
|
next;
|
|
}
|
|
$defaults{$offset} = $zoneNumber{$z};
|
|
} else {
|
|
print "Error: Can't parse line in $TZ_DEFAULT: $raw";
|
|
$ok = 0;
|
|
}
|
|
}
|
|
close(IN);
|
|
die "Error: Aborting due to errors in $TZ_DEFAULT\n" unless ($ok);
|
|
print "Incorporated ", scalar keys %defaults, " defaults from $TZ_DEFAULT\n";
|
|
|
|
# Emit it
|
|
my $missing = 0;
|
|
print OUT scalar keys %offsetMap, " # index by offset entries to follow\n";
|
|
foreach (sort {$a <=> $b} keys %offsetMap) {
|
|
my $aref = $offsetMap{$_};
|
|
my $def = -1;
|
|
if (exists $defaults{$_}) {
|
|
$def = $defaults{$_};
|
|
} else {
|
|
# If there is an offset for which we have no listed default
|
|
# in $TZ_DEFAULT, we try to figure out a reasonable default
|
|
# ourselves. We ignore any zone named Etc/ because that's not
|
|
# a "real" zone; it's just one listed as a POSIX convience.
|
|
# We take the first (alphabetically) zone of what's left,
|
|
# and if there are more than one of those, we emit a warning.
|
|
|
|
my $ambiguous = 0;
|
|
# Ignore zones named Etc/ and take the first one we otherwise see;
|
|
# if there is more than one of those, emit a warning.
|
|
foreach (sort map($zoneName[$_], @{$aref})) {
|
|
next if (m|^Etc/|);
|
|
if ($def < 0) {
|
|
$def = $zoneNumber{$_}
|
|
} else {
|
|
$ambiguous = 1;
|
|
}
|
|
}
|
|
$def = $aref->[0] if ($def < 0);
|
|
if ($ambiguous) {
|
|
$missing = 1;
|
|
print
|
|
"Warning: No default for GMT", formatOffset($_),
|
|
", using ", $zoneName[$def], "\n";
|
|
}
|
|
}
|
|
# Emit the actual line, including a descriptive comment
|
|
print OUT
|
|
$_, ",", $def, ",",
|
|
scalar @{$aref}, ",", join(",", @{$aref}),
|
|
" # ", formatOffset($_), " d=", $zoneName[$def], " ",
|
|
join(" ", map($zoneName[$_], @{$aref})), "\n";
|
|
}
|
|
print "Defaults may be specified in $TZ_DEFAULT\n" if ($missing);
|
|
|
|
print OUT "end\n";
|
|
|
|
############################################################
|
|
# END
|
|
############################################################
|
|
close(OUT);
|
|
print "$OUT written.\n";
|
|
|
|
|
|
if (0) {
|
|
TZ::FormZoneEquivalencyGroups(\%ZONES, \%RULES, \@EQUIV);
|
|
print
|
|
"Equivalency groups (including unique zones): ",
|
|
scalar @EQUIV, "\n";
|
|
}
|
|
|
|
#::dumpValue($ZONES{"America/Los_Angeles"});
|
|
#::dumpValue($RULES{"US"});
|
|
#::dumpValue($RULES{"Tonga"});
|
|
|
|
# Find all the different values of rule fields:
|
|
# in, at, on, save, type, letter
|
|
if (0) {
|
|
my %RULEVALS;
|
|
foreach my $ruleName (keys %RULES) {
|
|
for (my $i=0; $i<2; ++$i) {
|
|
foreach my $key (qw(in on at save type letter)) {
|
|
if (@{$RULES{$ruleName}} < 2) {
|
|
print $ruleName, ":";
|
|
::dumpValue($RULES{$ruleName});
|
|
}
|
|
my $x = $RULES{$ruleName}->[$i]->{$key};
|
|
$RULEVALS{$key}->{$x} = 1;
|
|
}
|
|
}
|
|
}
|
|
foreach my $key (sort keys %RULEVALS) {
|
|
print "$key: ", join(", ", sort keys %{$RULEVALS{$key}}), "\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
# Read the alias list and create clones with alias names. This
|
|
# sub should be called AFTER all standard zones have been read in.
|
|
# Param: File name of alias list
|
|
# Param: Ref to zone hash
|
|
sub incorporateAliases {
|
|
my $aliasFile = shift;
|
|
my $zones = shift;
|
|
my $n = 0;
|
|
local *IN;
|
|
open(IN,$aliasFile) or die "Can't open $aliasFile: $!";
|
|
while (<IN>) {
|
|
s/\#.*//; # Trim comments
|
|
next unless (/\S/); # Skip blank lines
|
|
if (/^\s*(\S+)\s+(\S+)\s*$/) {
|
|
my ($alias, $original) = ($1, $2);
|
|
if (exists $zones->{$alias}) {
|
|
die "Bad alias in $aliasFile: $alias is a standard UNIX zone. " .
|
|
"Please remove $alias from the alias table.\n";
|
|
}
|
|
if (!exists $zones->{$original}) {
|
|
die "Bad alias in $aliasFile: $alias maps to the nonexistent " .
|
|
"zone $original. Please fix this entry in the alias table.\n";
|
|
}
|
|
# Create the alias!
|
|
$zones->{$alias} = $zones->{$original};
|
|
$n++;
|
|
} else {
|
|
die "Bad line in alias table $aliasFile: $_\n";
|
|
}
|
|
}
|
|
print "Incorporated $n aliases from $aliasFile\n";
|
|
close(IN);
|
|
}
|
|
|
|
# Format a time zone as a machine-readable line of text. Another
|
|
# tool will read this line to construct a binary data structure
|
|
# representing this zone.
|
|
# Param: Zone name
|
|
# Param: Zone hash
|
|
# Param: Ref to hash of all rules
|
|
# Return: One line description of this zone.
|
|
sub formatZone { # ($z, $ZONES{$z}, \%RULES)
|
|
my $name = shift;
|
|
my $zone = shift;
|
|
my $rules = shift;
|
|
|
|
my @spec;
|
|
my @notes = ( $name );
|
|
|
|
# GMT offset
|
|
push @notes, ($zone->{gmtoff}=~/^-/?"GMT":"GMT+") . $zone->{gmtoff};
|
|
push @spec, parseOffset($zone->{gmtoff});
|
|
|
|
#|rawOffset The new SimpleTimeZone's raw GMT offset
|
|
#|ID The new SimpleTimeZone's time zone ID.
|
|
#|startMonth The daylight savings starting month. Month is
|
|
#| 0-based. eg, 0 for January.
|
|
#|startDay The daylight savings starting
|
|
#| day-of-week-in-month. See setStartRule() for a
|
|
#| complete explanation.
|
|
#|startDayOfWeek The daylight savings starting day-of-week. See
|
|
#| setStartRule() for a complete explanation.
|
|
#|startTime The daylight savings starting time, expressed as the
|
|
#| number of milliseconds after midnight.
|
|
#|endMonth The daylight savings ending month. Month is
|
|
#| 0-based. eg, 0 for January.
|
|
#|endDay The daylight savings ending day-of-week-in-month.
|
|
#| See setStartRule() for a complete explanation.
|
|
#|endDayOfWeek The daylight savings ending day-of-week. See
|
|
#| setStartRule() for a complete explanation.
|
|
#|endTime The daylight savings ending time, expressed as the
|
|
#| number of milliseconds after midnight.
|
|
|
|
my $rule = $zone->{rule};
|
|
if ($rule ne $TZ::STANDARD) {
|
|
$rule = $rules->{$rule};
|
|
# $rule is now an array ref, with [0] being the onset and
|
|
# [1] being the cease.
|
|
|
|
formatRule($rule->[0], \@spec, \@notes); # Onset
|
|
formatRule($rule->[1], \@spec, \@notes); # Cease
|
|
|
|
my @a = parseTime($rule->[0]->{save});
|
|
if ($a[1] ne 'w') {
|
|
die "Strange DST savings value: \"$rule->[0]->{save}\"";
|
|
}
|
|
push @notes, $rule->[0]->{save};
|
|
push @spec, $a[0];
|
|
}
|
|
|
|
join($SEP, @spec) . " # " . join(' ', @notes);
|
|
}
|
|
|
|
# Format a rule and return the string
|
|
# Param: reference to rule hash
|
|
# Param: ref to spec array (this is a result param)
|
|
# Param: ref to annotation array (this is a result param)
|
|
sub formatRule {
|
|
my $rule = shift;
|
|
my $spec = shift;
|
|
my $notes = shift;
|
|
push @$notes, $rule->{in}, $rule->{on}, $rule->{at};
|
|
push @$spec, parseMonth($rule->{in}); # Month
|
|
push @$spec, parseDaySpecifier($rule->{on}); # Day
|
|
push @$spec, parseTime($rule->{at}); # Time
|
|
}
|
|
|
|
# Parse an offset of the form d, d:dd, or d:dd:dd, or any of the above
|
|
# preceded by a '-'. Return the total number of seconds represented.
|
|
# Param: String
|
|
# Return: Integer number of seconds
|
|
sub parseOffset {
|
|
local $_ = shift;
|
|
if (/^(-)?(\d{1,2})(:(\d\d))?(:(\d\d))?$/) {
|
|
# 1 2 4 6
|
|
my $a = (($2 * 60) + (defined $4?$4:0)) * 60 + (defined $6?$6:0);
|
|
$a = -$a if (defined $1 && $1 eq '-');
|
|
return $a;
|
|
} else {
|
|
die "Cannot parse offset \"$_\"";
|
|
}
|
|
}
|
|
|
|
# Format an offset in seconds and return a string of the form
|
|
# /[+-]\d{1,2}:\d\d(:\d\d)?/.
|
|
# Param: Offset in seconds
|
|
# Return: String
|
|
sub formatOffset {
|
|
local $_ = shift;
|
|
my $result = $_<0 ? "-":"+";
|
|
$_ = -$_ if ($_ < 0);
|
|
my $sec = $_ % 60; $_ = ($_ - $sec) / 60;
|
|
my $min = $_ % 60; $_ = ($_ - $min) / 60;
|
|
$min = "0$min" if ($min < 10);
|
|
$sec = $sec ? ($sec < 10 ? ":0$sec" : ":$sec") : "";
|
|
$result . $_ . ":" . $min . $sec;
|
|
}
|
|
|
|
# Parse a time of the format dd:dds, where s is a suffix character.
|
|
# Return the time, in minutes, and the suffix, in an array.
|
|
# Only the suffixes 's' and 'u' are recognized.
|
|
# Param: String, with optional suffix
|
|
# Return: Array ( seconds, suffix ). If no suffix, 'w' is used.
|
|
sub parseTime {
|
|
local $_ = shift;
|
|
if (/^(\d{1,2}):(\d\d)([su])?$/) {
|
|
my $a = ($1*60) + $2;
|
|
my $s = defined $3?$3:'w';
|
|
return ( $a, $s );
|
|
} else {
|
|
die "Cannot parse time \"$_\"";
|
|
}
|
|
}
|
|
|
|
# Given a month string, return an integer from 0 (Jan) to 11 (Dec).
|
|
# Param: Str
|
|
# Return: Int 0..11.
|
|
sub parseMonth {
|
|
local $_ = shift;
|
|
for (my $i=0; $i<12; $i++) {
|
|
return $i if (/$MONTH[$i]/i);
|
|
}
|
|
die "Can't parse month \"$_\"";
|
|
}
|
|
|
|
# Given a specifier for the day of the month on which a rule triggers,
|
|
# return an array of two integers encoding that information. We use
|
|
# the ICU/java.util.SimpleTimeZone encoding scheme using two integers.
|
|
# We return the two integers in an array of ( dowim dow ).
|
|
# Param: String, such as
|
|
# 1, 12, 15, 18, 2, 20, 21, 22, 23, 25, 28, 3, 30, 31, 4, 7, Fri>=1,
|
|
# Fri>=15, Sat>=1, Sat>=15, Sun<=14, Sun>=1, Sun>=10, Sun>=11, Sun>=15,
|
|
# Sun>=16, Sun>=23, Sun>=8, Sun>=9, lastFri, lastSun, lastThu
|
|
# This is the {on} field of the rule hash.
|
|
# Return: Array of two integers, ( dowim dow ).
|
|
# The dow has Sunday = 1 .. Saturday = 7.
|
|
sub parseDaySpecifier {
|
|
local $_ = shift;
|
|
|
|
#|+If both dayOfWeekInMonth and dayOfWeek are positive, they specify the
|
|
#| day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday
|
|
#| of the month).
|
|
#|+If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify
|
|
#| the day of week in the month counting backward from the end of the month.
|
|
#| (e.g., (-1, MONDAY) is the last Monday in the month)
|
|
#|+If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth
|
|
#| specifies the day of the month, regardless of what day of the week it is.
|
|
#| (e.g., (10, 0) is the tenth day of the month)
|
|
#|+If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth
|
|
#| specifies the day of the month counting backward from the end of the
|
|
#| month, regardless of what day of the week it is (e.g., (-2, 0) is the
|
|
#| next-to-last day of the month).
|
|
#|+If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the
|
|
#| first specified day of the week on or after the specfied day of the month.
|
|
#| (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month
|
|
#| [or the 15th itself if the 15th is a Sunday].)
|
|
#|+If dayOfWeek and dayOfWeekInMonth are both negative, they specify the
|
|
#| last specified day of the week on or before the specified day of the month.
|
|
#| (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month
|
|
#| [or the 20th itself if the 20th is a Tuesday].)
|
|
|
|
# dowim dow
|
|
# >0 >0 day of week in month
|
|
# <0 >0 day of week in month (from end)
|
|
# >0 0 day of month
|
|
# <0 0 day of month (from end; -1 is last dom)
|
|
# >0 <0 first dow on or after dom
|
|
# <0 <0 last dow on or before dom
|
|
|
|
my $dowim;
|
|
my $dow = 0;
|
|
|
|
# Check for straight DOM
|
|
if (/^\d+$/) {
|
|
$dowim = $_;
|
|
$dow = 0;
|
|
return ( $dowim, $dow );
|
|
}
|
|
|
|
# Anything else must have a dow embedded in it; parse it out
|
|
my @DOW = ( 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat' );
|
|
for (my $i=0; $i<@DOW; $i++) {
|
|
if (s/$DOW[$i]//) {
|
|
$dow = $i + 1;
|
|
last;
|
|
}
|
|
}
|
|
if ($dow == 0) {
|
|
die "Cannot parse day specifier \"$_\"";
|
|
}
|
|
|
|
# Now we have either >=n, <=n, last, or first.
|
|
if (/^last$/) {
|
|
$dowim = -1;
|
|
} elsif (/^first$/) {
|
|
$dowim = 1;
|
|
} elsif (/^>=(\d+)$/) {
|
|
$dowim = $1;
|
|
$dow = -$dow;
|
|
} elsif (/^<=(\d+)$/) {
|
|
$dowim = -$1;
|
|
$dow = -$dow;
|
|
} else {
|
|
die "Cannot parse day specifier \"$_\"";
|
|
}
|
|
|
|
( $dowim, $dow );
|
|
}
|
|
|
|
# Confirm that the given ID contains only invariant characters.
|
|
# See utypes.h for an explanation.
|
|
# Param: string to be checked
|
|
sub assertInvariantChars {
|
|
local $_ = shift;
|
|
if (/[^A-Za-z0-9 \"%&\'()*+,-.\/:;<=>?_]/) {
|
|
die "Error: Zone ID \"$_\" contains non-invariant characters\n";
|
|
}
|
|
}
|
|
|
|
__END__
|