ICU-7144 adjust to IdnaMappingTable.txt separating reserved and assigned ranges

X-SVN-Rev: 27949
This commit is contained in:
Markus Scherer 2010-04-19 21:05:15 +00:00
parent f3c6395224
commit 9bbee2c4ba

View File

@ -1,3 +1,4 @@
#!/usr/bin/python2.4
# Copyright (C) 2010, International Business Machines # Copyright (C) 2010, International Business Machines
# Corporation and others. All Rights Reserved. # Corporation and others. All Rights Reserved.
# #
@ -22,7 +23,8 @@ replacements = [
(re.compile(r"; mapped ; "), ">"), (re.compile(r"; mapped ; "), ">"),
(re.compile(r"; deviation ; "), ">"), (re.compile(r"; deviation ; "), ">"),
(re.compile(r" +(\# [^\#]+)$"), r" \1"), (re.compile(r" +(\# [^\#]+)$"), r" \1"),
(re.compile(r"\.\.FFFF"), "..FFFC") (re.compile(r"\.\.FFFD"), "..FFFC"),
(re.compile(r"(FFF[^E])\.\.FFFF"), "\1..FFFC")
] ]
in_file = open("IdnaMappingTable.txt", "r") in_file = open("IdnaMappingTable.txt", "r")
@ -30,6 +32,7 @@ out_file = open("uts46.txt", "w")
out_file.write("# Original file:\n") out_file.write("# Original file:\n")
for line in in_file: for line in in_file:
orig_line = line
if line.startswith("# For documentation, see"): if line.startswith("# For documentation, see"):
out_file.write(line) out_file.write(line)
out_file.write(r""" out_file.write(r"""
@ -44,8 +47,8 @@ for line in in_file:
# s/; deviation ; />/ # s/; deviation ; />/
# s/ +(\# [^\#]+)$/ \1/ # s/ +(\# [^\#]+)$/ \1/
# #
# A circular mapping FFFD>FFFD is avoided by rewriting the line that starts with # A circular mapping FFFD>FFFD is avoided by rewriting the line that contains
# FFEF..FFFF to two lines, splitting this range and omitting FFFD. # ..FFFD to contain ..FFFC instead.
# #
# Use this file as the second gennorm2 input file after nfc.txt. # Use this file as the second gennorm2 input file after nfc.txt.
# ================================================ # ================================================
@ -53,7 +56,7 @@ for line in in_file:
continue continue
for rep in replacements: line = rep[0].sub(rep[1], line) for rep in replacements: line = rep[0].sub(rep[1], line)
out_file.write(line) out_file.write(line)
if "..FFFC" in line: if "..FFFF" in orig_line and "..FFFC" in line:
out_file.write("FFFE..FFFF >FFFD\n"); out_file.write("FFFE..FFFF >FFFD\n");
in_file.close() in_file.close()
out_file.close() out_file.close()