forked from AuroraMiddleware/gtk
193 lines
6.4 KiB
Python
Executable File
193 lines
6.4 KiB
Python
Executable File
#!/usr/bin/python3
|
|
#
|
|
# Call pandoc to convert markdown to docbook, then expand gtk-doc
|
|
# abbreviations (|[ ]|, function(), #object, %constant, etc)
|
|
|
|
import sys
|
|
import re
|
|
import tempfile
|
|
import os.path
|
|
import subprocess
|
|
|
|
# The following code is taken from gtk-doc
|
|
|
|
def ExpandAbbreviations(symbol, text):
|
|
# Convert '@param()'
|
|
text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)\s*\(\)', r'\1<parameter>\2()</parameter>', text)
|
|
|
|
# Convert 'function()' or 'macro()'.
|
|
# if there is abc_*_def() we don't want to make a link to _def()
|
|
# FIXME: also handle abc(def(....)) : but that would need to be done recursively :/
|
|
def f1(m):
|
|
return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2) + "()", "function"))
|
|
text = re.sub(r'([^\*.\w])(\w+)\s*\(\)', f1, text)
|
|
# handle #Object.func()
|
|
text = re.sub(r'(\A|[^\\])#([\w\-:\.]+[\w]+)\s*\(\)', f1, text)
|
|
|
|
# Convert '@param', but not '\@param'.
|
|
text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)', r'\1<parameter>\2</parameter>', text)
|
|
text = re.sub(r'/\\\@', r'\@', text)
|
|
|
|
# Convert '%constant', but not '\%constant'.
|
|
# Also allow negative numbers, e.g. %-1.
|
|
def f2(m):
|
|
return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2), "literal"))
|
|
|
|
text = re.sub(r'(\A|[^\\])\%(-?\w+)', f2, text)
|
|
text = re.sub(r'\\\%', r'\%', text)
|
|
|
|
# Convert '#symbol', but not '\#symbol'.
|
|
|
|
# Only convert #foo after a space to avoid interfering with
|
|
# fragment identifiers in urls
|
|
def f3(m):
|
|
return m.group(1) + MakeHashXRef(m.group(2), "type")
|
|
|
|
text = re.sub(r'(\A|[ ])#([\w\-:\.]+[\w]+)', f3, text)
|
|
text = re.sub(r'\\#', '#', text)
|
|
|
|
return text
|
|
|
|
# Standard C preprocessor directives, which we ignore for '#' abbreviations.
|
|
PreProcessorDirectives = {
|
|
'assert', 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef',
|
|
'include', 'line', 'pragma', 'unassert', 'undef', 'warning'
|
|
}
|
|
|
|
def MakeHashXRef(symbol, tag):
|
|
text = symbol
|
|
|
|
# Check for things like '#include', '#define', and skip them.
|
|
if symbol in PreProcessorDirectives:
|
|
return "#%s" % symbol
|
|
|
|
# Get rid of special suffixes ('-struct','-enum').
|
|
text = re.sub(r'-struct$', '', text)
|
|
text = re.sub(r'-enum$', '', text)
|
|
|
|
# If the symbol is in the form "Object::signal", then change the symbol to
|
|
# "Object-signal" and use "signal" as the text.
|
|
if '::' in symbol:
|
|
o, s = symbol.split('::', 1)
|
|
symbol = '%s-%s' % (o, s)
|
|
text = u'“' + s + u'”'
|
|
|
|
# If the symbol is in the form "Object:property", then change the symbol to
|
|
# "Object--property" and use "property" as the text.
|
|
if ':' in symbol:
|
|
o, p = symbol.split(':', 1)
|
|
symbol = '%s--%s' % (o, p)
|
|
text = u'“' + p + u'”'
|
|
|
|
if tag != '':
|
|
text = tagify(text, tag)
|
|
|
|
return MakeXRef(symbol, text)
|
|
|
|
def MakeXRef(symbol, text=None):
|
|
"""This returns a cross-reference link to the given symbol.
|
|
|
|
Though it doesn't try to do this for a few standard C types that it knows
|
|
won't be in the documentation.
|
|
|
|
Args:
|
|
symbol (str): the symbol to try to create a XRef to.
|
|
text (str): text to put inside the XRef, defaults to symbol
|
|
|
|
Returns:
|
|
str: a docbook link
|
|
"""
|
|
symbol = symbol.strip()
|
|
if not text:
|
|
text = symbol
|
|
|
|
# Get rid of special suffixes ('-struct','-enum').
|
|
text = re.sub(r'-struct$', '', text)
|
|
text = re.sub(r'-enum$', '', text)
|
|
|
|
if ' ' in symbol:
|
|
return text
|
|
|
|
symbol_id = CreateValidSGMLID(symbol)
|
|
return "<link linkend=\"%s\">%s</link>" % (symbol_id, text)
|
|
|
|
def CreateValidSGMLID(xml_id):
|
|
"""Creates a valid SGML 'id' from the given string.
|
|
|
|
According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
|
|
tokens must begin with a letter ([A-Za-z]) and may be followed by any number
|
|
of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
|
|
and periods (".")."
|
|
|
|
When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
|
|
prevent name clashes (SGML ids are case-insensitive). (It basically never is
|
|
the case that mixed-case identifiers would collide.)
|
|
|
|
Args:
|
|
id (str): The text to be converted into a valid SGML id.
|
|
|
|
Returns:
|
|
str: The converted id.
|
|
"""
|
|
|
|
# Special case, '_' would end up as '' so we use 'gettext-macro' instead.
|
|
if xml_id == '_':
|
|
return "gettext-macro"
|
|
|
|
xml_id = re.sub(r'[,;]', '', xml_id)
|
|
xml_id = re.sub(r'[_ ]', '-', xml_id)
|
|
xml_id = re.sub(r'^-+', '', xml_id)
|
|
xml_id = xml_id.replace('::', '-')
|
|
xml_id = xml_id.replace(':', '--')
|
|
|
|
# Append ":CAPS" to all all-caps identifiers
|
|
# FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
|
|
if xml_id.isupper() and not xml_id.endswith('-CAPS'):
|
|
xml_id += ':CAPS'
|
|
|
|
return xml_id
|
|
|
|
def tagify(text, elem):
|
|
# Adds a tag around some text.
|
|
# e.g tagify("Text", "literal") => "<literal>Text</literal>".
|
|
return '<' + elem + '>' + text + '</' + elem + '>'
|
|
|
|
# End of gtk-doc excerpts
|
|
|
|
MarkdownExtensions = {
|
|
'-auto_identifiers', # we use explicit identifiers where needed
|
|
'+header_attributes', # for explicit identifiers
|
|
'+blank_before_header', # helps with gtk-doc #Object abbreviations
|
|
'+compact_definition_lists', # to replace <variablelist>
|
|
'+pipe_tables',
|
|
'+backtick_code_blocks', # to replace |[ ]|
|
|
'+fenced_code_attributes', # to add language annotations
|
|
'-raw_html', # to escape literal tags like <child> in input
|
|
'+startnum', # to have interrupted lists in the q&a part
|
|
}
|
|
|
|
def ConvertToDocbook(infile, outfile):
|
|
basename = os.path.basename(infile)
|
|
if basename.startswith('section'):
|
|
division='section'
|
|
else:
|
|
division='chapter'
|
|
input_format = "markdown" + "".join(MarkdownExtensions)
|
|
output_format = "docbook"
|
|
subprocess.check_call(["pandoc", infile, "-o", outfile,
|
|
"--from=" + input_format,
|
|
"--to=" + output_format,
|
|
"--top-level-division=" + division])
|
|
|
|
def ExpandGtkDocAbbreviations(infile, outfile):
|
|
contents = open(infile, 'r', encoding='utf-8').read()
|
|
with open(outfile, 'w', encoding='utf-8') as out:
|
|
out.write(ExpandAbbreviations("file", contents))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
tmp = tempfile.mktemp()
|
|
ConvertToDocbook(sys.argv[1], tmp)
|
|
ExpandGtkDocAbbreviations(tmp, sys.argv[2])
|
|
os.remove(tmp)
|