QTextMarkdownWriter: write fenced code blocks with language declaration
MD4C now makes it possible to detect indented and fenced code blocks: https://github.com/mity/md4c/issues/81 Fenced code blocks have the advantages of being easier to write by hand, and having an "info string" following the opening fence, which is commonly used to declare the language. Also, the HTML parser now recognizes tags of the form <pre class="language-foo"> which is one convention for declaring the programming language (as opposed to human language, for which the lang attribute would be used): https://stackoverflow.com/questions/5134242/semantics-standards-and-using-the-lang-attribute-for-source-code-in-markup So it's possible to read HTML and write markdown without losing this information. It's also possible to read markdown with any type of code block: fenced with ``` or ~~~, or indented, and rewrite it the same way. Change-Id: I33c2bf7d7b66c8f3ba5bdd41ab32572f09349c47 Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
This commit is contained in:
parent
57f38bc49d
commit
b3cc9403c4
@ -564,6 +564,9 @@ Q_GUI_EXPORT QDataStream &operator>>(QDataStream &stream, QTextFormat &fmt)
|
||||
\value BlockTrailingHorizontalRulerWidth The width of a horizontal ruler element.
|
||||
\value HeadingLevel The level of a heading, for example 1 corresponds to an HTML H1 tag; otherwise 0.
|
||||
This enum value has been added in Qt 5.12.
|
||||
\value BlockCodeFence The character that was used in the "fences" around a Markdown code block.
|
||||
If the code block was indented rather than fenced, the block should not have this property.
|
||||
This enum value has been added in Qt 5.14.
|
||||
|
||||
\value BlockQuoteLevel The depth of nested quoting on this block: 1 means the block is a top-level block quote.
|
||||
Blocks that are not block quotes should not have this property.
|
||||
|
@ -178,6 +178,7 @@ public:
|
||||
HeadingLevel = 0x1070,
|
||||
BlockQuoteLevel = 0x1080,
|
||||
BlockCodeLanguage = 0x1090,
|
||||
BlockCodeFence = 0x1091,
|
||||
BlockMarker = 0x10A0,
|
||||
|
||||
// character properties
|
||||
|
@ -1635,6 +1635,10 @@ void QTextHtmlParser::applyAttributes(const QStringList &attributes)
|
||||
else if (key == QLatin1String("type"))
|
||||
linkType = value;
|
||||
break;
|
||||
case Html_pre:
|
||||
if (key == QLatin1String("class") && value.startsWith(QLatin1String("language-")))
|
||||
node->blockFormat.setProperty(QTextFormat::BlockCodeLanguage, value.mid(9));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -165,12 +165,13 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det)
|
||||
MD_BLOCK_CODE_DETAIL *detail = static_cast<MD_BLOCK_CODE_DETAIL *>(det);
|
||||
m_codeBlock = true;
|
||||
m_blockCodeLanguage = QLatin1String(detail->lang.text, int(detail->lang.size));
|
||||
m_blockCodeFence = detail->fence_char;
|
||||
QString info = QLatin1String(detail->info.text, int(detail->info.size));
|
||||
m_needsInsertBlock = true;
|
||||
if (m_blockQuoteDepth)
|
||||
qCDebug(lcMD, "CODE lang '%s' info '%s' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockQuoteDepth);
|
||||
qCDebug(lcMD, "CODE lang '%s' info '%s' fenced with '%c' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockCodeFence, m_blockQuoteDepth);
|
||||
else
|
||||
qCDebug(lcMD, "CODE lang '%s' info '%s'", qPrintable(m_blockCodeLanguage), qPrintable(info));
|
||||
qCDebug(lcMD, "CODE lang '%s' info '%s' fenced with '%c'", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockCodeFence);
|
||||
} break;
|
||||
case MD_BLOCK_H: {
|
||||
MD_BLOCK_H_DETAIL *detail = static_cast<MD_BLOCK_H_DETAIL *>(det);
|
||||
@ -326,6 +327,7 @@ int QTextMarkdownImporter::cbLeaveBlock(int blockType, void *detail)
|
||||
case MD_BLOCK_CODE: {
|
||||
m_codeBlock = false;
|
||||
m_blockCodeLanguage.clear();
|
||||
m_blockCodeFence = 0;
|
||||
if (m_blockQuoteDepth)
|
||||
qCDebug(lcMD, "CODE ended inside QUOTE %d", m_blockQuoteDepth);
|
||||
else
|
||||
@ -540,6 +542,8 @@ void QTextMarkdownImporter::insertBlock()
|
||||
}
|
||||
if (m_codeBlock) {
|
||||
blockFormat.setProperty(QTextFormat::BlockCodeLanguage, m_blockCodeLanguage);
|
||||
if (m_blockCodeFence)
|
||||
blockFormat.setProperty(QTextFormat::BlockCodeFence, QString(QLatin1Char(m_blockCodeFence)));
|
||||
charFormat.setFont(m_monoFont);
|
||||
} else {
|
||||
blockFormat.setTopMargin(m_paragraphMargin);
|
||||
|
@ -124,6 +124,7 @@ private:
|
||||
int m_tableCol = -1; // because relative cell movements (e.g. m_cursor->movePosition(QTextCursor::NextCell)) don't work
|
||||
int m_paragraphMargin = 0;
|
||||
int m_blockType = 0;
|
||||
char m_blockCodeFence = 0;
|
||||
Features m_features;
|
||||
QTextImageFormat m_imageFormat;
|
||||
QTextListFormat m_listFormat;
|
||||
|
@ -134,6 +134,24 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
|
||||
writeFrame(iterator.currentFrame());
|
||||
else { // no frame, it's a block
|
||||
QTextBlock block = iterator.currentBlock();
|
||||
// Look ahead and detect some cases when we should
|
||||
// suppress needless blank lines, when there will be a big change in block format
|
||||
bool nextIsDifferent = false;
|
||||
bool ending = false;
|
||||
{
|
||||
QTextFrame::iterator next = iterator;
|
||||
++next;
|
||||
if (next.atEnd()) {
|
||||
nextIsDifferent = true;
|
||||
ending = true;
|
||||
} else {
|
||||
QTextBlockFormat format = iterator.currentBlock().blockFormat();
|
||||
QTextBlockFormat nextFormat = next.currentBlock().blockFormat();
|
||||
if (nextFormat.indent() != format.indent() ||
|
||||
nextFormat.property(QTextFormat::BlockCodeLanguage) != format.property(QTextFormat::BlockCodeLanguage))
|
||||
nextIsDifferent = true;
|
||||
}
|
||||
}
|
||||
if (table) {
|
||||
QTextTableCell cell = table->cellAt(block.position());
|
||||
if (tableRow < cell.row()) {
|
||||
@ -150,7 +168,7 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
|
||||
if (lastWasList)
|
||||
m_stream << Newline;
|
||||
}
|
||||
int endingCol = writeBlock(block, !table, table && tableRow == 0);
|
||||
int endingCol = writeBlock(block, !table, table && tableRow == 0, nextIsDifferent);
|
||||
m_doubleNewlineWritten = false;
|
||||
if (table) {
|
||||
QTextTableCell cell = table->cellAt(block.position());
|
||||
@ -162,11 +180,19 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
|
||||
m_stream << QString(paddingLen, Space);
|
||||
for (int col = cell.column(); col < spanEndCol; ++col)
|
||||
m_stream << "|";
|
||||
} else if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) {
|
||||
} else if (m_fencedCodeBlock && ending) {
|
||||
m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
|
||||
<< m_codeBlockFence << Newline << Newline;
|
||||
m_codeBlockFence.clear();
|
||||
} else if (m_indentedCodeBlock && nextIsDifferent) {
|
||||
m_stream << Newline;
|
||||
} else if (endingCol > 0) {
|
||||
m_stream << Newline << Newline;
|
||||
m_doubleNewlineWritten = true;
|
||||
if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) {
|
||||
m_stream << Newline;
|
||||
} else {
|
||||
m_stream << Newline << Newline;
|
||||
m_doubleNewlineWritten = true;
|
||||
}
|
||||
}
|
||||
lastWasList = block.textList();
|
||||
}
|
||||
@ -259,11 +285,13 @@ static void maybeEscapeFirstChar(QString &s)
|
||||
}
|
||||
}
|
||||
|
||||
int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat)
|
||||
int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat, bool ignoreEmpty)
|
||||
{
|
||||
if (block.text().isEmpty() && ignoreEmpty)
|
||||
return 0;
|
||||
const int ColumnLimit = 80;
|
||||
QTextBlockFormat blockFmt = block.blockFormat();
|
||||
bool indentedCodeBlock = false;
|
||||
bool missedBlankCodeBlockLine = false;
|
||||
if (block.textList()) { // it's a list-item
|
||||
auto fmt = block.textList()->format();
|
||||
const int listLevel = fmt.indent();
|
||||
@ -324,7 +352,28 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
|
||||
} else if (blockFmt.hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) {
|
||||
m_stream << "- - -\n"; // unambiguous horizontal rule, not an underline under a heading
|
||||
return 0;
|
||||
} else if (blockFmt.hasProperty(QTextFormat::BlockCodeFence) || blockFmt.stringProperty(QTextFormat::BlockCodeLanguage).length() > 0) {
|
||||
// It's important to preserve blank lines in code blocks. But blank lines in code blocks
|
||||
// inside block quotes are getting preserved anyway (along with the "> " prefix).
|
||||
if (!blockFmt.hasProperty(QTextFormat::BlockQuoteLevel))
|
||||
missedBlankCodeBlockLine = true; // only if we don't get any fragments below
|
||||
if (!m_fencedCodeBlock) {
|
||||
QString fenceChar = blockFmt.stringProperty(QTextFormat::BlockCodeFence);
|
||||
if (fenceChar.isEmpty())
|
||||
fenceChar = QLatin1String("`");
|
||||
m_codeBlockFence = QString(3, fenceChar.at(0));
|
||||
// A block quote can contain an indented code block, but not vice-versa.
|
||||
m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space) << m_codeBlockFence
|
||||
<< Space << blockFmt.stringProperty(QTextFormat::BlockCodeLanguage) << Newline;
|
||||
m_fencedCodeBlock = true;
|
||||
}
|
||||
} else if (!blockFmt.indent()) {
|
||||
if (m_fencedCodeBlock) {
|
||||
m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
|
||||
<< m_codeBlockFence << Newline;
|
||||
m_fencedCodeBlock = false;
|
||||
m_codeBlockFence.clear();
|
||||
}
|
||||
m_wrappedLineIndent = 0;
|
||||
m_linePrefix.clear();
|
||||
if (blockFmt.hasProperty(QTextFormat::BlockQuoteLevel)) {
|
||||
@ -337,7 +386,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
|
||||
if (blockFmt.hasProperty(QTextFormat::BlockCodeLanguage)) {
|
||||
// A block quote can contain an indented code block, but not vice-versa.
|
||||
m_linePrefix += QString(4, Space);
|
||||
indentedCodeBlock = true;
|
||||
m_indentedCodeBlock = true;
|
||||
}
|
||||
}
|
||||
if (blockFmt.headingLevel())
|
||||
@ -358,6 +407,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
|
||||
bool strikeOut = false;
|
||||
QString backticks(Backtick);
|
||||
for (QTextBlock::Iterator frag = block.begin(); !frag.atEnd(); ++frag) {
|
||||
missedBlankCodeBlockLine = false;
|
||||
QString fragmentText = frag.fragment().text();
|
||||
while (fragmentText.endsWith(Newline))
|
||||
fragmentText.chop(1);
|
||||
@ -401,7 +451,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
|
||||
bool monoFrag = fontInfo.fixedPitch();
|
||||
QString markers;
|
||||
if (!ignoreFormat) {
|
||||
if (monoFrag != mono && !indentedCodeBlock) {
|
||||
if (monoFrag != mono && !m_indentedCodeBlock && !m_fencedCodeBlock) {
|
||||
if (monoFrag)
|
||||
backticks = QString(adjacentBackticksCount(fragmentText) + 1, Backtick);
|
||||
markers += backticks;
|
||||
@ -501,6 +551,8 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
|
||||
m_stream << "~~";
|
||||
col += 2;
|
||||
}
|
||||
if (missedBlankCodeBlockLine)
|
||||
m_stream << Newline;
|
||||
return col;
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ public:
|
||||
bool writeAll(const QTextDocument *document);
|
||||
void writeTable(const QAbstractItemModel *table);
|
||||
|
||||
int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat);
|
||||
int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat, bool ignoreEmpty);
|
||||
void writeFrame(const QTextFrame *frame);
|
||||
|
||||
private:
|
||||
@ -82,9 +82,12 @@ private:
|
||||
QTextDocument::MarkdownFeatures m_features;
|
||||
QMap<QTextList *, ListInfo> m_listInfo;
|
||||
QString m_linePrefix;
|
||||
QString m_codeBlockFence;
|
||||
int m_wrappedLineIndent = 0;
|
||||
int m_lastListIndent = 1;
|
||||
bool m_doubleNewlineWritten = false;
|
||||
bool m_indentedCodeBlock = false;
|
||||
bool m_fencedCodeBlock = false;
|
||||
};
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -20,21 +20,43 @@ MacFarlane writes:
|
||||
> equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc
|
||||
> manual:
|
||||
|
||||
> 1. List item one.
|
||||
> +
|
||||
> List item one continued with a second paragraph followed by an
|
||||
> Indented block.
|
||||
> +
|
||||
> .................
|
||||
> $ ls *.sh
|
||||
> $ mv *.sh ~/tmp
|
||||
> .................
|
||||
> +
|
||||
> List item continued with a third paragraph.
|
||||
>
|
||||
> 2. List item two continued with an open block.
|
||||
> ...
|
||||
>
|
||||
> ``` AsciiDoc
|
||||
> 1. List item one.
|
||||
> +
|
||||
> List item one continued with a second paragraph followed by an
|
||||
> Indented block.
|
||||
> +
|
||||
> .................
|
||||
> $ ls *.sh
|
||||
> $ mv *.sh ~/tmp
|
||||
> .................
|
||||
> +
|
||||
> List item continued with a third paragraph.
|
||||
>
|
||||
> 2. List item two continued with an open block.
|
||||
> ...
|
||||
> ```
|
||||
The quotation includes an embedded quotation and a code quotation and ends with
|
||||
an ellipsis due to being incomplete.
|
||||
|
||||
Now let's have an indented code block:
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
printf("# hello markdown\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
and end with a fenced code block:
|
||||
~~~ pseudocode
|
||||
#include <something.h>
|
||||
#include <else.h>
|
||||
|
||||
a block {
|
||||
a statement;
|
||||
another statement;
|
||||
}
|
||||
~~~
|
||||
|
||||
|
@ -419,6 +419,9 @@ void tst_QTextMarkdownWriter::fromHtml_data()
|
||||
QTest::newRow("image") <<
|
||||
"<img src=\"/url\" alt=\"foo\" title=\"title\"/>" <<
|
||||
"![foo](/url \"title\")\n\n";
|
||||
QTest::newRow("code") <<
|
||||
"<pre class=\"language-pseudocode\">\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n</pre>" <<
|
||||
"``` pseudocode\n#include \"foo.h\"\n\nblock {\n statement();\n}\n```\n\n";
|
||||
// TODO
|
||||
// QTest::newRow("escaped number and paren after double newline") <<
|
||||
// "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" <<
|
||||
|
Loading…
Reference in New Issue
Block a user