scuffed-code/tools/unicode/c/genpname/gensvpa.pl

161 lines
4.9 KiB
Perl
Raw Normal View History

#!/usr/bin/perl
#*
#*******************************************************************************
#* Copyright (C) 2006, International Business Machines
#* Corporation and others. All Rights Reserved.
#*******************************************************************************
#*
#* file name: genspva.pl
#* encoding: US-ASCII
#* tab size: 8 (not used)
#* indentation:4
#*
#* Created by: Ram Viswanadha
#*
#* This file filters iso15924-utf8-<date>.txt
#*
use File::Find;
use File::Basename;
use IO::File;
use Cwd;
use File::Copy;
use Getopt::Long;
use File::Path;
use File::Copy;
#run the program
main();
#---------------------------------------------------------------------
# The main program
sub main(){
GetOptions(
"--destdir=s" => \$destdir,
"--iso15924=s" => \$iso,
"--prop=s" => \$prop,
"--code-start=s" => \$code,
);
usage() unless defined $destdir;
usage() unless defined $iso;
usage() unless defined $prop;
$outfile = "$destdir/SyntheticPropertyValueAliases.txt";
$propFH = IO::File->new($prop,"r")
or die "could not open the file $prop for reading: $! \n";
$isoFH = IO::File->new($iso,"r")
or die "could not open the file $iso for reading: $! \n";
$outFH = IO::File->new($outfile,"w")
or die "could not open the file $outfile for reading: $! \n";
my @propLines;
while (<$propFH>) {
next if(!($_ =~/sc ; /));
push(@propLines, $_);
}
printHeader($outFH);
if(defined $code){
print "Please add the following to UScriptCode enum in uscript.h.\n";
print "#ifndef U_HIDE_DRAFT_API\n";
}
while (<$isoFH>) {
next if($_=~/^#/);#skip if the line starts with a comment char
($script, $t, $name, $rest) = split(/;/,$_,4);
#sc ; Arab
$outstr = "sc ; $script";
$encoded = 0; #false
# seach the propLines to make sure that this scipt code is not
# encoded in Unicode
foreach $key (@propLines){
if($key =~ /$outstr/){
$encoded = 1;
}
}
next if($encoded == 1);
#ignore private use codes
next if($script =~ /Qa[ab][a-z]/);
#if($script eq "Qaaa"){
# $outstr = $outstr." ; Private_Use_Start\n";
#}elsif($script eq "Qabx"){
# $outstr = $outstr." ; Private_Use_End\n";
#}else{
# $outstr = $outstr." ; $script \n";
#}
$outstr = $outstr." ; $script \n";
print $outFH $outstr;
#print to console
if(defined $code){
if($name =~ /[(\s,\x80-\xFF]/){
$name = $script;
}
$name =~s/-/_/g;
$scriptcode = "USCRIPT_".uc($name);
print " $scriptcode = $code, /* $script */\n";
$code++;
}
}
if(defined $code){
print "#endif /* U_HIDE_DRAFT_API */\n";
}
for($i=0; $i<2; $i++){
}
close($isoFH);
close($propFH);
close($outFH);
}
#-----------------------------------------------------------------------
sub printHeader{
($outFH) = @_;
($DAY, $MONTH, $YEAR) = (localtime)[3,4,5];
$YEAR += 1900;
#We will print our copyright here + warnings
print $outFH <<END_HEADER_COMMENT;
########################################################################
# Copyright (c) 2006-$YEAR, International Business Machines
# Corporation and others. All Rights Reserved.
########################################################################
# file name: SyntheticPropertyValueAliases.txt
# encoding: US-ASCII
# tab size: 8 (not used)
# indentation: 4
# created by: gensvpa.pl
########################################################################
# This file follows the format of PropertyValueAliases.txt
# It contains synthetic property value aliases not present
# in the UCD. Unlike PropertyValueAliases.txt, it should
# NOT contain a version number.
########################################################################
# THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW
# WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
########################################################################
# set the same names as short and long names to fit the syntax without
# inventing names that we would have to support forever
# Script (sc)
END_HEADER_COMMENT
}
#-----------------------------------------------------------------------
sub usage {
print << "END";
Usage:
gensvpa.pl
Options:
--destdir=<directory>
--iso15924=<file name>
--prop=<PropertyValueAliases.txt>
--code-start=s
e.g.: gensvpa.pl --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60
END
exit(0);
}