scuffed-code/icu4c/source/tools/gentz/tzutil.pm

######################################################################
# Copyright (C) 1999, International Business Machines
# Corporation and others.  All Rights Reserved.
######################################################################
# See: ftp://elsie.nci.nih.gov/pub/tzdata<year>
# where <year> is "1999b" or a similar string.
######################################################################
# This package contains utility functions for time zone data.
# Author: Alan Liu

######################################################################
# Zones - A time zone object is a hash with the following keys:
# {gmtoff} The offset from GMT, e.g. "-5:00"
# {rule}   The name of the rule, e.g. "-", "Canada", "EU", "US"
# {format} The local abbreviation, e.g. "E%sT"
# {until}  Data is good until this year, e.g., "2000".  Often blank.

# These correspond to file entries:
#|# Zone NAME           GMTOFF  RULES   FORMAT  [UNTIL]
#|Zone America/Montreal -4:54:16 -      LMT     1884
#|                      -5:00   Mont    E%sT

# Optionally, a zone may also have the key:
# {link}   An old name for this zone, e.g. "HST" (for Pacific/Honolulu)
# Links come from the file entries:
#|# Link    LINK-FROM               LINK-TO
#|Link      America/New_York        EST5EDT
#|Link      America/Chicago         CST6CDT

# The name of the zone itself is not kept in the zone object.
# Instead, zones are kept in a big hash.  The keys are the names; the
# values are references to the zone objects.  The big hash of all
# zones is referred to in all caps: %ZONES ($ZONES if it's a
# reference).

# Example: $ZONES->{"America/Los_Angeles"} =
#   'format' => 'P%sT'
#   'gmtoff' => '-8:00'
#   'link' => 'US/Pacific-New'
#   'rule' => 'US'
#   'until' => ''

######################################################################
# Rules - A time zone rule is an array with the following elements:
# [0] Onset rule
# [1] Cease rule
# [2] Encoded string

# The onset rule and cease rule have the same format.  They are each
# references to a hash with keys:
# {from}   Start year
# {to}     End year, or "only" or "max"
# {type}   Unknown, usually "-"
# {in}     Month, 3 letters
# {on}     Day specifier, e.g. "lastSun", "Sun>=1", "23"
# {at}     Time, e.g. "2:00", "1:00u"
# {save}   Amount of savings, for the onset; 0 for the cease
# {letter} Guess: the letter that goes into %s in the zone {format}

# These correspond to the file entries thus:
#|# Rule NAME FROM TO   TYPE IN  ON      AT   SAVE LETTER/S
#|Rule   US   1942 only -    Feb 9       2:00 1:00 W # War
#|Rule   US   1945 only -    Sep 30      2:00 0    S
#|Rule   US   1967 max  -    Oct lastSun 2:00 0    S
#|Rule   US   1967 1973 -    Apr lastSun 2:00 1:00 D
#|Rule   US   1974 only -    Jan 6       2:00 1:00 D
#|Rule   US   1975 only -    Feb 23      2:00 1:00 D
#|Rule   US   1976 1986 -    Apr lastSun 2:00 1:00 D
#|Rule   US   1987 max  -    Apr Sun>=1  2:00 1:00 D

# Entry [2], the encoded string, is used to see if two rules are the
# same.  It consists of "[0]->{in},[0]->{on},[0]->{at},[0]->{save};
# [1]->{in},[1]->{on},[1]->{at}".  Note that the separator between
# values is a comma, between onset and cease is a semicolon.  Also
# note that the cease {save} is not used as this is always 0.  The
# whole string is forced to lowercase.

# Rules don't contain their own name.  Like zones, rules are kept in a
# big hash; the keys are the names, the values the references to the
# arrays.  This hash of all rules is referred to in all caps, %RULES
# or for a reference, $RULES.

# Example: $RULES->{"US"} =
#   0  HASH(0x8fa03c)
#      'at' => '2:00'
#      'from' => 1987
#      'in' => 'Apr'
#      'letter' => 'D'
#      'on' => 'Sun>=1'
#      'save' => '1:00'
#      'to' => 'max'
#      'type' => '-'
#   1  HASH(0x8f9fc4)
#      'at' => '2:00'
#      'from' => 1967
#      'in' => 'Oct'
#      'letter' => 'S'
#      'on' => 'lastSun'
#      'save' => 0
#      'to' => 'max'
#      'type' => '-'
#   2  'apr,sun>=1,2:00,1:00;oct,lastsun,2:00'

package TZ;
use strict;
use Carp;
use vars qw(@ISA @EXPORT $VERSION $STANDARD);
require 'dumpvar.pl';

@ISA = qw(Exporter);
@EXPORT = qw(ZoneEquals
             RuleEquals
             FormZoneEquivalencyGroups
             );
$VERSION = '0.1';

$STANDARD = '-'; # Name of the Standard Time rule

######################################################################
# Param: zone object (hash ref)
# Param: zone object (hash ref)
# Param: ref to hash of all rules
# Return: true if two zones are equivalent
sub ZoneEquals {
    my $z1 = shift;
    my $z2 = shift;
    my $RULES = shift;

    ($z1, $z2) = ($z1->{rule}, $z2->{rule});

    return ($z1 eq $z2) || 
        RuleEquals($RULES->{$z1}, $RULES->{$z2});
}

######################################################################
# Param: rule object (hash ref)
# Param: rule object (hash ref)
# Return: true if two rules are equivalent
sub RuleEquals {
    my $r1 = shift;
    my $r2 = shift;

    # Just compare the precomputed encoding strings.
    # defined() catches undefined rules.  The only undefined
    # rule is $STANDARD; any others would be cause by
    # Postprocess().
    return defined($r1) && defined($r2) && $r1->[2] eq $r2->[2];

    # There's actually one more level of equivalency analysis we could
    # do.  This is to recognize that Sun >=1 is the same as First Sun.
    # We don't do this yet.
}

######################################################################
# Given a hash of all zones and a hash of all rules, create a list
# of equivalency groups.  These are groups of zones with the same
# offset and equivalent rules.   Equivalency is tested with
# ZoneEquals and RuleEquals.  The resultant equivalency list is an
# array of refs to groups.  Each group is an array of one or more
# zone names.
# Param: IN  ref to hash of all zones
# Param: IN  ref to hash of all rules
# Param: OUT ref to array to receive group refs
sub FormZoneEquivalencyGroups {
    my ($ZONES, $RULES, $EQUIV) = @_;

    # Group the zones by offset.  This improves efficiency greatly;
    # instead of an n^2 computation, we just need to do n^2 within
    # each offset; a much smaller total number.
    my %ZONES_BY_OFFSET;
    foreach (keys %$ZONES) {
        push @{$ZONES_BY_OFFSET{$ZONES->{$_}->{gmtoff}}}, $_;
    }

    # Find equivalent rules
    foreach my $gmtoff (keys %ZONES_BY_OFFSET) {
        # Make an array of equivalency groups
        # (array of refs to array of names)
        my @equiv;
        foreach my $name1 (@{$ZONES_BY_OFFSET{$gmtoff}}) {
            my $found = 0;
            foreach my $group (@equiv) {
                my $name2 = $group->[0];
                if (ZoneEquals($ZONES->{$name1}, $ZONES->{$name2}, $RULES)) {
                    push @$group, $name1;
                    $found = 1;
                    last;
                }
            }
            if (!$found) {
                my @newGroup = ( $name1 );
                push @equiv, \@newGroup;
            }
        }
        push @$EQUIV, @equiv;
    }
}
ICU-65 update gentz for new binary format and alias table; make pm file names 8.3 X-SVN-Rev: 300 1999-12-05 05:55:28 +00:00			`######################################################################`
			`# Copyright (C) 1999, International Business Machines`
			`# Corporation and others. All Rights Reserved.`
			`######################################################################`
			`# See: ftp://elsie.nci.nih.gov/pub/tzdata<year>`
			`# where <year> is "1999b" or a similar string.`
			`######################################################################`
			`# This package contains utility functions for time zone data.`
			`# Author: Alan Liu`

			`######################################################################`
			`# Zones - A time zone object is a hash with the following keys:`
			`# {gmtoff} The offset from GMT, e.g. "-5:00"`
			`# {rule} The name of the rule, e.g. "-", "Canada", "EU", "US"`
			`# {format} The local abbreviation, e.g. "E%sT"`
			`# {until} Data is good until this year, e.g., "2000". Often blank.`

			`# These correspond to file entries:`
			`#\|# Zone NAME GMTOFF RULES FORMAT [UNTIL]`
			`#\|Zone America/Montreal -4:54:16 - LMT 1884`
			`#\| -5:00 Mont E%sT`

			`# Optionally, a zone may also have the key:`
			`# {link} An old name for this zone, e.g. "HST" (for Pacific/Honolulu)`
			`# Links come from the file entries:`
			`#\|# Link LINK-FROM LINK-TO`
			`#\|Link America/New_York EST5EDT`
			`#\|Link America/Chicago CST6CDT`

			`# The name of the zone itself is not kept in the zone object.`
			`# Instead, zones are kept in a big hash. The keys are the names; the`
			`# values are references to the zone objects. The big hash of all`
			`# zones is referred to in all caps: %ZONES ($ZONES if it's a`
			`# reference).`

			`# Example: $ZONES->{"America/Los_Angeles"} =`
			`# 'format' => 'P%sT'`
			`# 'gmtoff' => '-8:00'`
			`# 'link' => 'US/Pacific-New'`
			`# 'rule' => 'US'`
			`# 'until' => ''`

			`######################################################################`
			`# Rules - A time zone rule is an array with the following elements:`
			`# [0] Onset rule`
			`# [1] Cease rule`
			`# [2] Encoded string`

			`# The onset rule and cease rule have the same format. They are each`
			`# references to a hash with keys:`
			`# {from} Start year`
			`# {to} End year, or "only" or "max"`
			`# {type} Unknown, usually "-"`
			`# {in} Month, 3 letters`
			`# {on} Day specifier, e.g. "lastSun", "Sun>=1", "23"`
			`# {at} Time, e.g. "2:00", "1:00u"`
			`# {save} Amount of savings, for the onset; 0 for the cease`
			`# {letter} Guess: the letter that goes into %s in the zone {format}`

			`# These correspond to the file entries thus:`
			`#\|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S`
			`#\|Rule US 1942 only - Feb 9 2:00 1:00 W # War`
			`#\|Rule US 1945 only - Sep 30 2:00 0 S`
			`#\|Rule US 1967 max - Oct lastSun 2:00 0 S`
			`#\|Rule US 1967 1973 - Apr lastSun 2:00 1:00 D`
			`#\|Rule US 1974 only - Jan 6 2:00 1:00 D`
			`#\|Rule US 1975 only - Feb 23 2:00 1:00 D`
			`#\|Rule US 1976 1986 - Apr lastSun 2:00 1:00 D`
			`#\|Rule US 1987 max - Apr Sun>=1 2:00 1:00 D`

			`# Entry [2], the encoded string, is used to see if two rules are the`
			`# same. It consists of "[0]->{in},[0]->{on},[0]->{at},[0]->{save};`
			`# [1]->{in},[1]->{on},[1]->{at}". Note that the separator between`
			`# values is a comma, between onset and cease is a semicolon. Also`
			`# note that the cease {save} is not used as this is always 0. The`
			`# whole string is forced to lowercase.`

			`# Rules don't contain their own name. Like zones, rules are kept in a`
			`# big hash; the keys are the names, the values the references to the`
			`# arrays. This hash of all rules is referred to in all caps, %RULES`
			`# or for a reference, $RULES.`

			`# Example: $RULES->{"US"} =`
			`# 0 HASH(0x8fa03c)`
			`# 'at' => '2:00'`
			`# 'from' => 1987`
			`# 'in' => 'Apr'`
			`# 'letter' => 'D'`
			`# 'on' => 'Sun>=1'`
			`# 'save' => '1:00'`
			`# 'to' => 'max'`
			`# 'type' => '-'`
			`# 1 HASH(0x8f9fc4)`
			`# 'at' => '2:00'`
			`# 'from' => 1967`
			`# 'in' => 'Oct'`
			`# 'letter' => 'S'`
			`# 'on' => 'lastSun'`
			`# 'save' => 0`
			`# 'to' => 'max'`
			`# 'type' => '-'`
			`# 2 'apr,sun>=1,2:00,1:00;oct,lastsun,2:00'`

			`package TZ;`
			`use strict;`
			`use Carp;`
			`use vars qw(@ISA @EXPORT $VERSION $STANDARD);`
			`require 'dumpvar.pl';`

			`@ISA = qw(Exporter);`
			`@EXPORT = qw(ZoneEquals`
			`RuleEquals`
			`FormZoneEquivalencyGroups`
			`);`
			`$VERSION = '0.1';`

			`$STANDARD = '-'; # Name of the Standard Time rule`

			`######################################################################`
			`# Param: zone object (hash ref)`
			`# Param: zone object (hash ref)`
			`# Param: ref to hash of all rules`
			`# Return: true if two zones are equivalent`
			`sub ZoneEquals {`
			`my $z1 = shift;`
			`my $z2 = shift;`
			`my $RULES = shift;`

			`($z1, $z2) = ($z1->{rule}, $z2->{rule});`

			`return ($z1 eq $z2) \|\|`
			`RuleEquals($RULES->{$z1}, $RULES->{$z2});`
			`}`

			`######################################################################`
			`# Param: rule object (hash ref)`
			`# Param: rule object (hash ref)`
			`# Return: true if two rules are equivalent`
			`sub RuleEquals {`
			`my $r1 = shift;`
			`my $r2 = shift;`

			`# Just compare the precomputed encoding strings.`
			`# defined() catches undefined rules. The only undefined`
			`# rule is $STANDARD; any others would be cause by`
			`# Postprocess().`
			`return defined($r1) && defined($r2) && $r1->[2] eq $r2->[2];`

			`# There's actually one more level of equivalency analysis we could`
			`# do. This is to recognize that Sun >=1 is the same as First Sun.`
			`# We don't do this yet.`
			`}`

			`######################################################################`
			`# Given a hash of all zones and a hash of all rules, create a list`
			`# of equivalency groups. These are groups of zones with the same`
			`# offset and equivalent rules. Equivalency is tested with`
			`# ZoneEquals and RuleEquals. The resultant equivalency list is an`
			`# array of refs to groups. Each group is an array of one or more`
			`# zone names.`
			`# Param: IN ref to hash of all zones`
			`# Param: IN ref to hash of all rules`
			`# Param: OUT ref to array to receive group refs`
			`sub FormZoneEquivalencyGroups {`
			`my ($ZONES, $RULES, $EQUIV) = @_;`

			`# Group the zones by offset. This improves efficiency greatly;`
			`# instead of an n^2 computation, we just need to do n^2 within`
			`# each offset; a much smaller total number.`
			`my %ZONES_BY_OFFSET;`
			`foreach (keys %$ZONES) {`
			`push @{$ZONES_BY_OFFSET{$ZONES->{$_}->{gmtoff}}}, $_;`
			`}`

			`# Find equivalent rules`
			`foreach my $gmtoff (keys %ZONES_BY_OFFSET) {`
			`# Make an array of equivalency groups`
			`# (array of refs to array of names)`
			`my @equiv;`
			`foreach my $name1 (@{$ZONES_BY_OFFSET{$gmtoff}}) {`
			`my $found = 0;`
			`foreach my $group (@equiv) {`
			`my $name2 = $group->[0];`
			`if (ZoneEquals($ZONES->{$name1}, $ZONES->{$name2}, $RULES)) {`
			`push @$group, $name1;`
			`$found = 1;`
			`last;`
			`}`
			`}`
			`if (!$found) {`
			`my @newGroup = ( $name1 );`
			`push @equiv, \@newGroup;`
			`}`
			`}`
			`push @$EQUIV, @equiv;`
			`}`
			`}`