#!/usr/bin/python3 # # Call pandoc to convert markdown to docbook, then expand gtk-doc # abbreviations (|[ ]|, function(), #object, %constant, etc) import sys import re import tempfile import os.path import subprocess # The following code is taken from gtk-doc def ExpandAbbreviations(symbol, text): # Convert '@param()' text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)\s*\(\)', r'\1\2()', text) # Convert 'function()' or 'macro()'. # if there is abc_*_def() we don't want to make a link to _def() # FIXME: also handle abc(def(....)) : but that would need to be done recursively :/ def f1(m): return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2) + "()", "function")) text = re.sub(r'([^\*.\w])(\w+)\s*\(\)', f1, text) # handle #Object.func() text = re.sub(r'(\A|[^\\])#([\w\-:\.]+[\w]+)\s*\(\)', f1, text) # Convert '@param', but not '\@param'. text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)', r'\1\2', text) text = re.sub(r'/\\\@', r'\@', text) # Convert '%constant', but not '\%constant'. # Also allow negative numbers, e.g. %-1. def f2(m): return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2), "literal")) text = re.sub(r'(\A|[^\\])\%(-?\w+)', f2, text) text = re.sub(r'\\\%', r'\%', text) # Convert '#symbol', but not '\#symbol'. # Only convert #foo after a space to avoid interfering with # fragment identifiers in urls def f3(m): return m.group(1) + MakeHashXRef(m.group(2), "type") text = re.sub(r'(\A|[ ])#([\w\-:\.]+[\w]+)', f3, text) text = re.sub(r'\\#', '#', text) return text # Standard C preprocessor directives, which we ignore for '#' abbreviations. PreProcessorDirectives = { 'assert', 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'include', 'line', 'pragma', 'unassert', 'undef', 'warning' } def MakeHashXRef(symbol, tag): text = symbol # Check for things like '#include', '#define', and skip them. if symbol in PreProcessorDirectives: return "#%s" % symbol # Get rid of special suffixes ('-struct','-enum'). text = re.sub(r'-struct$', '', text) text = re.sub(r'-enum$', '', text) # If the symbol is in the form "Object::signal", then change the symbol to # "Object-signal" and use "signal" as the text. if '::' in symbol: o, s = symbol.split('::', 1) symbol = '%s-%s' % (o, s) text = u'“' + s + u'”' # If the symbol is in the form "Object:property", then change the symbol to # "Object--property" and use "property" as the text. if ':' in symbol: o, p = symbol.split(':', 1) symbol = '%s--%s' % (o, p) text = u'“' + p + u'”' if tag != '': text = tagify(text, tag) return MakeXRef(symbol, text) def MakeXRef(symbol, text=None): """This returns a cross-reference link to the given symbol. Though it doesn't try to do this for a few standard C types that it knows won't be in the documentation. Args: symbol (str): the symbol to try to create a XRef to. text (str): text to put inside the XRef, defaults to symbol Returns: str: a docbook link """ symbol = symbol.strip() if not text: text = symbol # Get rid of special suffixes ('-struct','-enum'). text = re.sub(r'-struct$', '', text) text = re.sub(r'-enum$', '', text) if ' ' in symbol: return text symbol_id = CreateValidSGMLID(symbol) return "%s" % (symbol_id, text) def CreateValidSGMLID(xml_id): """Creates a valid SGML 'id' from the given string. According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME tokens must begin with a letter ([A-Za-z]) and may be followed by any number of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"), and periods (".")." When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to prevent name clashes (SGML ids are case-insensitive). (It basically never is the case that mixed-case identifiers would collide.) Args: id (str): The text to be converted into a valid SGML id. Returns: str: The converted id. """ # Special case, '_' would end up as '' so we use 'gettext-macro' instead. if xml_id == '_': return "gettext-macro" xml_id = re.sub(r'[,;]', '', xml_id) xml_id = re.sub(r'[_ ]', '-', xml_id) xml_id = re.sub(r'^-+', '', xml_id) xml_id = xml_id.replace('::', '-') xml_id = xml_id.replace(':', '--') # Append ":CAPS" to all all-caps identifiers # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS if xml_id.isupper() and not xml_id.endswith('-CAPS'): xml_id += ':CAPS' return xml_id def tagify(text, elem): # Adds a tag around some text. # e.g tagify("Text", "literal") => "Text". return '<' + elem + '>' + text + '' # End of gtk-doc excerpts MarkdownExtensions = { '-auto_identifiers', # we use explicit identifiers where needed '+header_attributes', # for explicit identifiers '+blank_before_header', # helps with gtk-doc #Object abbreviations '+compact_definition_lists', # to replace '+pipe_tables', '+backtick_code_blocks', # to replace |[ ]| '+fenced_code_attributes', # to add language annotations '-raw_html', # to escape literal tags like in input '+startnum', # to have interrupted lists in the q&a part } def ConvertToDocbook(infile, outfile): basename = os.path.basename(infile) if basename.startswith('section'): division='section' else: division='chapter' input_format = "markdown" + "".join(MarkdownExtensions) output_format = "docbook" subprocess.check_call(["pandoc", infile, "-o", outfile, "--from=" + input_format, "--to=" + output_format, "--standalone", "--top-level-division=" + division]) def ExpandGtkDocAbbreviations(infile, outfile): contents = open(infile, 'r', encoding='utf-8').read() with open(outfile, 'w', encoding='utf-8') as out: out.write(ExpandAbbreviations("file", contents)) if __name__ == '__main__': tmp = tempfile.mktemp() ConvertToDocbook(sys.argv[1], tmp) ExpandGtkDocAbbreviations(tmp, sys.argv[2]) os.remove(tmp)