2006-10-27 00:03:21 +00:00
|
|
|
#!/usr/bin/perl
|
|
|
|
|
2017-10-03 02:32:50 +00:00
|
|
|
#
|
|
|
|
# Copyright (C) 2017 and later: Unicode, Inc. and others.
|
|
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
#
|
|
|
|
|
2006-10-27 00:03:21 +00:00
|
|
|
use strict;
|
|
|
|
use Unicode::UCD 'charinfo';
|
|
|
|
use Unicode::Normalize;
|
|
|
|
use utf8;
|
|
|
|
use open ':utf8';
|
|
|
|
|
|
|
|
my $printout = 0;
|
|
|
|
my $braces = 0;
|
|
|
|
my $colls = 0;
|
|
|
|
my $aliased = 0;
|
|
|
|
my $newName = "";
|
|
|
|
my $filename;
|
|
|
|
my $suffix;
|
|
|
|
my $locale;
|
|
|
|
|
|
|
|
NEW_FILE:
|
|
|
|
foreach my $arg (@ARGV) {
|
|
|
|
if($newName =~ /^$/) {
|
|
|
|
$locale = $arg;
|
|
|
|
$locale =~ s#^.*/##g;
|
|
|
|
$locale =~ s/\.txt//;
|
|
|
|
} else {
|
|
|
|
$newName = "";
|
|
|
|
}
|
|
|
|
my $command = "/home/weiv/build/current/bin/uconv -x hex-any/Java -f utf8 -t utf8 $arg";
|
|
|
|
print $command."\n";
|
|
|
|
my @bundle = `$command`;
|
|
|
|
foreach $_ (@bundle) {
|
|
|
|
#while(<>) {
|
|
|
|
#print $ARGV if eof;
|
|
|
|
if(/^\/\//) {
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
if(/collations/) {
|
|
|
|
print "found Collations\n";
|
|
|
|
$colls = 1;
|
|
|
|
if(/alias/) {
|
|
|
|
print "collations are aliased\n";
|
|
|
|
$aliased = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if($aliased) {
|
|
|
|
print "processing aliased data: $_\n";
|
|
|
|
if(/\{/) {
|
|
|
|
print "Braces opened\n";
|
|
|
|
$braces = 1;
|
|
|
|
}
|
|
|
|
if($braces && /\"(.*)\"/) {
|
|
|
|
$newName = $1;
|
|
|
|
print "Aliasing to $newName\n";
|
|
|
|
}
|
|
|
|
if($braces && /\}/) {
|
|
|
|
$braces = 0;
|
|
|
|
print "Braces closed\n";
|
|
|
|
$aliased = 0;
|
|
|
|
print "Switching from $filename to $newName\n";
|
|
|
|
$arg =~ s/$locale\.txt$/$newName\.txt/;
|
|
|
|
print "$arg\n";
|
|
|
|
redo NEW_FILE;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
if(/standard|phonebook|traditional|pinyin|stroke|direct/ && $colls) {
|
|
|
|
print "found $& collation\n";
|
|
|
|
$suffix = "_".uc($&);
|
|
|
|
if(/standard/) {
|
|
|
|
$suffix = "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(/Sequence/ && $colls) {
|
|
|
|
#binmode ARGV, ":utf8";
|
|
|
|
$printout = 1;
|
|
|
|
#$filename = $ARGV;
|
|
|
|
$filename = $locale;
|
|
|
|
if($suffix) {
|
|
|
|
$filename .= "_".$suffix;
|
|
|
|
}
|
|
|
|
$filename .= "_collation.html";
|
|
|
|
print "filename is $filename\n";
|
|
|
|
#open(OUT, ">:utf8", "$filename");
|
|
|
|
open(OUT, ">$filename");
|
|
|
|
printHeading($arg);
|
|
|
|
#next;
|
|
|
|
}
|
|
|
|
my $line = $_;
|
|
|
|
if($line =~ /\{/ && $printout) {
|
|
|
|
$braces++;
|
|
|
|
}
|
|
|
|
if($printout) {
|
|
|
|
print OUT processLine($line);
|
|
|
|
print OUT "\n";
|
|
|
|
}
|
|
|
|
if( $line =~ /\}/ && $printout) {
|
|
|
|
$braces--;
|
|
|
|
if($braces == 0) {
|
|
|
|
$printout = 0;
|
|
|
|
printFooting();
|
|
|
|
close(OUT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sub processLine {
|
|
|
|
my $line = shift;
|
|
|
|
$_ = $line;
|
|
|
|
my $i = 0;
|
|
|
|
my $j = 0;
|
|
|
|
my $result;
|
|
|
|
# remove comments
|
|
|
|
s#//.*$##g;
|
|
|
|
# remove "Sequence" if present
|
|
|
|
s/Sequence\s*//;
|
|
|
|
# remove leading brace if present
|
|
|
|
s/^\s*{//;
|
|
|
|
# remove trailing brace if present
|
|
|
|
s/}\s*$//;
|
|
|
|
# remove trailing quote
|
|
|
|
s/"\s*$//;
|
|
|
|
#remove lead quote
|
|
|
|
s/^\s*"//;
|
|
|
|
#separate options
|
|
|
|
s/(\[.*\])/\n\1/g;
|
|
|
|
#separate resets
|
|
|
|
s/\s*\&\s*/\n\& /g;
|
|
|
|
#separate strengths and insert spaces
|
|
|
|
s/\s*(<{1,4})\s*/\n\1 /g;
|
|
|
|
#separate equals and insert spaces
|
|
|
|
s/\s*=\s*/\n= /g;
|
|
|
|
|
|
|
|
# break into individual reset/strength/setting lines
|
|
|
|
my @lines = split(/\n/);
|
|
|
|
|
|
|
|
my $line;
|
|
|
|
my $name;
|
|
|
|
my $spanEnd = "";
|
|
|
|
my $result = "";
|
|
|
|
my $names = "";
|
|
|
|
my $codes = "";
|
|
|
|
my $lrm = "";
|
|
|
|
|
|
|
|
foreach $line (@lines) {
|
|
|
|
# skip empty lines
|
|
|
|
if($line =~ /^$/) {
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
$spanEnd = "";
|
|
|
|
$name = "";
|
|
|
|
$lrm = "";
|
|
|
|
$line = NFC($line);
|
|
|
|
# for resets and strengths we will get name for elements
|
|
|
|
if($line =~ /<{1,4} |= |& \[.*\]|& /) {
|
|
|
|
$name = "<span title=\"";
|
|
|
|
$names = "";
|
|
|
|
$codes = "";
|
|
|
|
my $start = $&;
|
|
|
|
my $rest = $';
|
|
|
|
for ($j = 0; $j < length($rest); $j++) {
|
|
|
|
my $char = substr($rest, $j, 1);
|
|
|
|
my $charVal = ord($char);
|
|
|
|
# some of elements are part of the syntax, so they are
|
|
|
|
# entered without translation to the name
|
|
|
|
if($charVal == 0x002F || $charVal == 0x007C) {
|
|
|
|
$name .= $codes.$names." $char ";
|
|
|
|
$codes = "";
|
|
|
|
$names = "";
|
|
|
|
} elsif($charVal == 0x0027) { #quote requires more processing
|
|
|
|
#$name .= "'";
|
|
|
|
} else {
|
|
|
|
my $charinfo = charinfo($charVal);
|
|
|
|
$codes .= $charinfo->{'code'}." ";
|
|
|
|
$names .= "{".$charinfo->{'name'}."} ";
|
|
|
|
if($charinfo->{'bidi'} eq "R" || $charinfo->{'bidi'} eq "AL") {
|
|
|
|
$lrm = "‎";
|
|
|
|
}
|
|
|
|
#$name .= $charinfo->{'code'}." {".$charinfo->{'name'}."} ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$name .= $codes.$names."\" >";
|
|
|
|
$spanEnd = "</span>";
|
|
|
|
}
|
|
|
|
#print $name."\n";
|
|
|
|
if($line =~ /^<<<</) {
|
|
|
|
$line = " $line";
|
|
|
|
} elsif($line =~ /^<<</) {
|
|
|
|
$line = " $line";
|
|
|
|
} elsif($line =~ /^<</) {
|
|
|
|
$line = " $line";
|
|
|
|
} elsif($line =~ /^</) {
|
|
|
|
$line = " $line";
|
|
|
|
} elsif($line =~ /^=/) {
|
|
|
|
$line = " $line";
|
|
|
|
}
|
|
|
|
# insert spaces around vertical bars (fix prefixes)
|
|
|
|
|
|
|
|
# insert spaces around slashes (fix expansions)
|
|
|
|
$line =~ s#/# / #g;
|
|
|
|
# replace &
|
|
|
|
$line =~ s/\&/&/g;
|
|
|
|
# replace spaces
|
|
|
|
$line =~ s/ / /g;
|
|
|
|
# replace <
|
|
|
|
$line =~ s/</</g;
|
|
|
|
# replace >
|
|
|
|
$line =~ s/>/>/g;
|
|
|
|
|
|
|
|
#$lines[$i] = $name.$lrm.$line."</span><br>";
|
|
|
|
#$i++;
|
|
|
|
$result .= $name.$lrm.$line.$spanEnd."<br>\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
#$_ = join("\n", @lines);
|
|
|
|
return $result;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
sub printHeading {
|
|
|
|
my $filename = shift;
|
|
|
|
$filename =~ s/\.txt//;
|
|
|
|
print OUT <<"EndOfHeading";
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
|
|
|
</head>
|
|
|
|
# Collation data resource bundle generated for locale: $filename<br>
|
|
|
|
# For platform icu reference platform UCA<br><br>
|
|
|
|
|
|
|
|
|
|
|
|
$filename {<br>
|
|
|
|
CollationElements {<br>
|
|
|
|
Sequence {<br>
|
|
|
|
EndOfHeading
|
|
|
|
}
|
|
|
|
|
|
|
|
sub printFooting {
|
|
|
|
print OUT <<"EndOfFooting";
|
|
|
|
}<br>
|
|
|
|
}<br>
|
|
|
|
}<br>
|
|
|
|
|
|
|
|
</pre>
|
|
|
|
</html>
|
|
|
|
EndOfFooting
|
|
|
|
}
|