harfbuzz/test/shaping/hb-unicode-prettyname

53 lines
1.3 KiB
Plaintext
Raw Normal View History

#!/usr/bin/python
import sys
import re
import unicodedata
shorthands = {
"ZERO WIDTH NON-JOINER": "ZWNJ",
"ZERO WIDTH JOINER": "ZWJ",
"NARROW NO-BREAK SPACE": "NNBSP",
"COMBINING GRAPHEME JOINER": "CGJ",
2012-01-20 18:39:27 +00:00
"LEFT-TO-RIGHT MARK": "LRM",
"RIGHT-TO-LEFT MARK": "RLM",
"LEFT-TO-RIGHT EMBEDDING": "LRE",
"RIGHT-TO-LEFT EMBEDDING": "RLE",
"POP DIRECTIONAL FORMATTING": "PDF",
"LEFT-TO-RIGHT OVERRIDE": "LRO",
"RIGHT-TO-LEFT OVERRIDE": "RLO",
}
def pretty_name (x):
try:
s = unicodedata.name (x)
except ValueError:
return "XXX"
s = re.sub (".* LETTER ", "", s)
s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
s = re.sub (".* SIGN ", "", s)
s = re.sub (".* COMBINING ", "", s)
if re.match (".* VIRAMA", s):
s = "HALANT"
if s in shorthands:
s = shorthands[s]
return s
def pretty_names (s):
2012-01-20 19:21:53 +00:00
s = re.sub (r"[<+>\\uU]", " ", s)
s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
return ' + '.join (pretty_name (x) for x in s)
if __name__ == '__main__':
2012-01-20 18:49:56 +00:00
if len (sys.argv) == 1 or ('--stdin' in sys.argv and len (sys.argv) != 2):
2012-01-20 19:21:53 +00:00
print "Usage:\n %s UNICODE_CODEPOINTS...\nor:\n %s --stdin" % (sys.argv[0], sys.argv[0])
2012-01-20 18:49:56 +00:00
sys.exit (1)
if '--stdin' in sys.argv:
sys.argv.remove ('--stdin')
for line in sys.stdin.readlines ():
print pretty_names (line)
else:
print pretty_names (','.join (sys.argv[1:]))