2017-12-19 14:25:55 +00:00
/****************************************************************************
* *
* * Copyright ( C ) 2019 The Qt Company Ltd .
* * Contact : https : //www.qt.io/licensing/
* *
* * This file is part of the test suite of the Qt Toolkit .
* *
* * $ QT_BEGIN_LICENSE : GPL - EXCEPT $
* * Commercial License Usage
* * Licensees holding valid commercial Qt licenses may use this file in
* * accordance with the commercial license agreement provided with the
* * Software or , alternatively , in accordance with the terms contained in
* * a written agreement between you and The Qt Company . For licensing terms
* * and conditions see https : //www.qt.io/terms-conditions. For further
* * information use the contact form at https : //www.qt.io/contact-us.
* *
* * GNU General Public License Usage
* * Alternatively , this file may be used under the terms of the GNU
* * General Public License version 3 as published by the Free Software
* * Foundation with exceptions as appearing in the file LICENSE . GPL3 - EXCEPT
* * included in the packaging of this file . Please review the following
* * information to ensure the GNU General Public License requirements will
* * be met : https : //www.gnu.org/licenses/gpl-3.0.html.
* *
* * $ QT_END_LICENSE $
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include <QtTest/QtTest>
# include <QTextDocument>
# include <QTextCursor>
# include <QTextBlock>
# include <QTextList>
# include <QTextTable>
# include <QBuffer>
# include <QDebug>
# include <private/qtextmarkdownwriter_p.h>
// #define DEBUG_WRITE_OUTPUT
class tst_QTextMarkdownWriter : public QObject
{
Q_OBJECT
public slots :
void init ( ) ;
void cleanup ( ) ;
private slots :
void testWriteParagraph_data ( ) ;
void testWriteParagraph ( ) ;
void testWriteList ( ) ;
2019-10-14 15:59:16 +00:00
void testWriteEmptyList ( ) ;
2019-04-26 05:40:34 +00:00
void testWriteNestedBulletLists_data ( ) ;
2017-12-19 14:25:55 +00:00
void testWriteNestedBulletLists ( ) ;
void testWriteNestedNumericLists ( ) ;
void testWriteTable ( ) ;
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
2019-04-26 06:12:18 +00:00
void rewriteDocument_data ( ) ;
2017-12-19 14:25:55 +00:00
void rewriteDocument ( ) ;
void fromHtml_data ( ) ;
void fromHtml ( ) ;
private :
QString documentToUnixMarkdown ( ) ;
private :
QTextDocument * document ;
} ;
void tst_QTextMarkdownWriter : : init ( )
{
document = new QTextDocument ( ) ;
}
void tst_QTextMarkdownWriter : : cleanup ( )
{
delete document ;
}
void tst_QTextMarkdownWriter : : testWriteParagraph_data ( )
{
QTest : : addColumn < QString > ( " input " ) ;
QTest : : addColumn < QString > ( " output " ) ;
QTest : : newRow ( " empty " ) < < " " < <
" " ;
QTest : : newRow ( " spaces " ) < < " foobar word " < <
" foobar word \n \n " ;
QTest : : newRow ( " starting spaces " ) < < " starting spaces " < <
" starting spaces \n \n " ;
QTest : : newRow ( " trailing spaces " ) < < " trailing spaces " < <
" trailing spaces \n \n " ;
QTest : : newRow ( " tab " ) < < " word \t tab x " < <
" word \t tab x \n \n " ;
QTest : : newRow ( " tab2 " ) < < " word \t \t tab \t x " < <
" word \t \t tab \t x \n \n " ;
QTest : : newRow ( " misc " ) < < " foobar word \t tab x " < <
" foobar word \t tab x \n \n " ;
QTest : : newRow ( " misc2 " ) < < " \t \t Foo " < <
" \t \t Foo \n \n " ;
}
void tst_QTextMarkdownWriter : : testWriteParagraph ( )
{
QFETCH ( QString , input ) ;
QFETCH ( QString , output ) ;
QTextCursor cursor ( document ) ;
cursor . insertText ( input ) ;
QCOMPARE ( documentToUnixMarkdown ( ) , output ) ;
}
void tst_QTextMarkdownWriter : : testWriteList ( )
{
QTextCursor cursor ( document ) ;
QTextList * list = cursor . createList ( QTextListFormat : : ListDisc ) ;
cursor . insertText ( " ListItem 1 " ) ;
list - > add ( cursor . block ( ) ) ;
cursor . insertBlock ( ) ;
cursor . insertText ( " ListItem 2 " ) ;
list - > add ( cursor . block ( ) ) ;
QCOMPARE ( documentToUnixMarkdown ( ) , QString : : fromLatin1 (
" - ListItem 1 \n - ListItem 2 \n " ) ) ;
}
2019-10-14 15:59:16 +00:00
void tst_QTextMarkdownWriter : : testWriteEmptyList ( )
{
QTextCursor cursor ( document ) ;
cursor . createList ( QTextListFormat : : ListDisc ) ;
QCOMPARE ( documentToUnixMarkdown ( ) , QString : : fromLatin1 ( " - \n " ) ) ;
}
2019-04-26 05:40:34 +00:00
void tst_QTextMarkdownWriter : : testWriteNestedBulletLists_data ( )
{
QTest : : addColumn < bool > ( " checkbox " ) ;
QTest : : addColumn < bool > ( " checked " ) ;
QTest : : addColumn < bool > ( " continuationLine " ) ;
QTest : : addColumn < bool > ( " continuationParagraph " ) ;
QTest : : addColumn < QString > ( " expectedOutput " ) ;
QTest : : newRow ( " plain bullets " ) < < false < < false < < false < < false < <
" - ListItem 1 \n * ListItem 2 \n + ListItem 3 \n - ListItem 4 \n * ListItem 5 \n " ;
QTest : : newRow ( " bullets with continuation lines " ) < < false < < false < < true < < false < <
" - ListItem 1 \n * ListItem 2 \n + ListItem 3 with text that won't fit on one line and thus needs a \n continuation \n - ListItem 4 \n * ListItem 5 with text that won't fit on one line and thus needs a \n continuation \n " ;
QTest : : newRow ( " bullets with continuation paragraphs " ) < < false < < false < < false < < true < <
" - ListItem 1 \n \n * ListItem 2 \n + ListItem 3 \n \n continuation \n \n - ListItem 4 \n \n * ListItem 5 \n \n continuation \n \n " ;
QTest : : newRow ( " unchecked " ) < < true < < false < < false < < false < <
" - [ ] ListItem 1 \n * [ ] ListItem 2 \n + [ ] ListItem 3 \n - [ ] ListItem 4 \n * [ ] ListItem 5 \n " ;
QTest : : newRow ( " checked " ) < < true < < true < < false < < false < <
" - [x] ListItem 1 \n * [x] ListItem 2 \n + [x] ListItem 3 \n - [x] ListItem 4 \n * [x] ListItem 5 \n " ;
QTest : : newRow ( " checked with continuation lines " ) < < true < < true < < true < < false < <
" - [x] ListItem 1 \n * [x] ListItem 2 \n + [x] ListItem 3 with text that won't fit on one line and thus needs a \n continuation \n - [x] ListItem 4 \n * [x] ListItem 5 with text that won't fit on one line and thus needs a \n continuation \n " ;
QTest : : newRow ( " checked with continuation paragraphs " ) < < true < < true < < false < < true < <
" - [x] ListItem 1 \n \n * [x] ListItem 2 \n + [x] ListItem 3 \n \n continuation \n \n - [x] ListItem 4 \n \n * [x] ListItem 5 \n \n continuation \n \n " ;
}
2017-12-19 14:25:55 +00:00
void tst_QTextMarkdownWriter : : testWriteNestedBulletLists ( )
{
2019-04-26 05:40:34 +00:00
QFETCH ( bool , checkbox ) ;
QFETCH ( bool , checked ) ;
QFETCH ( bool , continuationParagraph ) ;
QFETCH ( bool , continuationLine ) ;
QFETCH ( QString , expectedOutput ) ;
2017-12-19 14:25:55 +00:00
QTextCursor cursor ( document ) ;
2019-04-26 05:40:34 +00:00
QTextBlockFormat blockFmt = cursor . blockFormat ( ) ;
if ( checkbox ) {
2019-10-02 10:19:11 +00:00
blockFmt . setMarker ( checked ? QTextBlockFormat : : MarkerType : : Checked : QTextBlockFormat : : MarkerType : : Unchecked ) ;
2019-04-26 05:40:34 +00:00
cursor . setBlockFormat ( blockFmt ) ;
}
2017-12-19 14:25:55 +00:00
QTextList * list1 = cursor . createList ( QTextListFormat : : ListDisc ) ;
cursor . insertText ( " ListItem 1 " ) ;
list1 - > add ( cursor . block ( ) ) ;
QTextListFormat fmt2 ;
fmt2 . setStyle ( QTextListFormat : : ListCircle ) ;
fmt2 . setIndent ( 2 ) ;
QTextList * list2 = cursor . insertList ( fmt2 ) ;
cursor . insertText ( " ListItem 2 " ) ;
QTextListFormat fmt3 ;
fmt3 . setStyle ( QTextListFormat : : ListSquare ) ;
fmt3 . setIndent ( 3 ) ;
cursor . insertList ( fmt3 ) ;
2019-04-26 05:40:34 +00:00
cursor . insertText ( continuationLine ?
" ListItem 3 with text that won't fit on one line and thus needs a continuation " :
" ListItem 3 " ) ;
if ( continuationParagraph ) {
QTextBlockFormat blockFmt ;
blockFmt . setIndent ( 2 ) ;
cursor . insertBlock ( blockFmt ) ;
cursor . insertText ( " continuation " ) ;
}
2017-12-19 14:25:55 +00:00
2019-04-26 05:40:34 +00:00
cursor . insertBlock ( blockFmt ) ;
2017-12-19 14:25:55 +00:00
cursor . insertText ( " ListItem 4 " ) ;
list1 - > add ( cursor . block ( ) ) ;
cursor . insertBlock ( ) ;
2019-04-26 05:40:34 +00:00
cursor . insertText ( continuationLine ?
" ListItem 5 with text that won't fit on one line and thus needs a continuation " :
" ListItem 5 " ) ;
2017-12-19 14:25:55 +00:00
list2 - > add ( cursor . block ( ) ) ;
2019-04-26 05:40:34 +00:00
if ( continuationParagraph ) {
QTextBlockFormat blockFmt ;
blockFmt . setIndent ( 2 ) ;
cursor . insertBlock ( blockFmt ) ;
cursor . insertText ( " continuation " ) ;
}
2017-12-19 14:25:55 +00:00
2019-04-26 05:40:34 +00:00
QString output = documentToUnixMarkdown ( ) ;
# ifdef DEBUG_WRITE_OUTPUT
{
QFile out ( " /tmp/ " + QLatin1String ( QTest : : currentDataTag ( ) ) + " .md " ) ;
out . open ( QFile : : WriteOnly ) ;
out . write ( output . toUtf8 ( ) ) ;
out . close ( ) ;
}
# endif
QCOMPARE ( documentToUnixMarkdown ( ) , expectedOutput ) ;
2017-12-19 14:25:55 +00:00
}
void tst_QTextMarkdownWriter : : testWriteNestedNumericLists ( )
{
QTextCursor cursor ( document ) ;
QTextList * list1 = cursor . createList ( QTextListFormat : : ListDecimal ) ;
cursor . insertText ( " ListItem 1 " ) ;
list1 - > add ( cursor . block ( ) ) ;
QTextListFormat fmt2 ;
fmt2 . setStyle ( QTextListFormat : : ListLowerAlpha ) ;
fmt2 . setNumberSuffix ( QLatin1String ( " ) " ) ) ;
fmt2 . setIndent ( 2 ) ;
QTextList * list2 = cursor . insertList ( fmt2 ) ;
cursor . insertText ( " ListItem 2 " ) ;
QTextListFormat fmt3 ;
fmt3 . setStyle ( QTextListFormat : : ListDecimal ) ;
fmt3 . setIndent ( 3 ) ;
cursor . insertList ( fmt3 ) ;
cursor . insertText ( " ListItem 3 " ) ;
cursor . insertBlock ( ) ;
cursor . insertText ( " ListItem 4 " ) ;
list1 - > add ( cursor . block ( ) ) ;
cursor . insertBlock ( ) ;
cursor . insertText ( " ListItem 5 " ) ;
list2 - > add ( cursor . block ( ) ) ;
// There's no QTextList API to set the starting number so we hard-coded all lists to start at 1 (QTBUG-65384)
QCOMPARE ( documentToUnixMarkdown ( ) , QString : : fromLatin1 (
2019-04-26 05:40:34 +00:00
" 1. ListItem 1 \n 1) ListItem 2 \n 1. ListItem 3 \n 2. ListItem 4 \n 2) ListItem 5 \n " ) ) ;
2017-12-19 14:25:55 +00:00
}
void tst_QTextMarkdownWriter : : testWriteTable ( )
{
QTextCursor cursor ( document ) ;
QTextTable * table = cursor . insertTable ( 4 , 3 ) ;
cursor = table - > cellAt ( 0 , 0 ) . firstCursorPosition ( ) ;
// valid Markdown tables need headers, but QTextTable doesn't make that distinction
// so QTextMarkdownWriter assumes the first row of any table is a header
cursor . insertText ( " one " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " two " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " three " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " alice " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " bob " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " carl " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " dennis " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " eric " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " fiona " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " gina " ) ;
/*
| one | two | three |
| - - - - - - | - - - - | - - - - - |
| alice | bob | carl |
| dennis | eric | fiona |
| gina | | |
*/
QString md = documentToUnixMarkdown ( ) ;
# ifdef DEBUG_WRITE_OUTPUT
{
QFile out ( " /tmp/table.md " ) ;
out . open ( QFile : : WriteOnly ) ;
out . write ( md . toUtf8 ( ) ) ;
out . close ( ) ;
}
# endif
QString expected = QString : : fromLatin1 (
" \n |one |two |three| \n |------|----|-----| \n |alice |bob |carl | \n |dennis|eric|fiona| \n |gina | | | \n \n " ) ;
QCOMPARE ( md , expected ) ;
// create table with merged cells
document - > clear ( ) ;
cursor = QTextCursor ( document ) ;
table = cursor . insertTable ( 3 , 3 ) ;
table - > mergeCells ( 0 , 0 , 1 , 2 ) ;
table - > mergeCells ( 1 , 1 , 1 , 2 ) ;
cursor = table - > cellAt ( 0 , 0 ) . firstCursorPosition ( ) ;
cursor . insertText ( " a " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " b " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " c " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " d " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " e " ) ;
cursor . movePosition ( QTextCursor : : NextCell ) ;
cursor . insertText ( " f " ) ;
/*
+ - - - + - +
| a | b |
+ - - - + - +
| c | d |
+ - + - + - +
| e | f | |
+ - + - + - +
generates
| a | | b |
| - | - | - |
| c | d | |
| e | f | |
*/
md = documentToUnixMarkdown ( ) ;
# ifdef DEBUG_WRITE_OUTPUT
{
QFile out ( " /tmp/table-merged-cells.md " ) ;
out . open ( QFile : : WriteOnly ) ;
out . write ( md . toUtf8 ( ) ) ;
out . close ( ) ;
}
# endif
QCOMPARE ( md , QString : : fromLatin1 ( " \n |a ||b| \n |-|-|-| \n |c|d || \n |e|f| | \n \n " ) ) ;
}
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
2019-04-26 06:12:18 +00:00
void tst_QTextMarkdownWriter : : rewriteDocument_data ( )
{
QTest : : addColumn < QString > ( " inputFile " ) ;
QTest : : newRow ( " block quotes " ) < < " blockquotes.md " ;
QTest : : newRow ( " example " ) < < " example.md " ;
2019-05-13 10:58:37 +00:00
QTest : : newRow ( " list items after headings " ) < < " headingsAndLists.md " ;
2019-05-20 09:50:26 +00:00
QTest : : newRow ( " word wrap " ) < < " wordWrap.md " ;
2020-01-31 13:32:24 +00:00
QTest : : newRow ( " links " ) < < " links.md " ;
2020-04-18 22:39:32 +00:00
QTest : : newRow ( " lists and code blocks " ) < < " listsAndCodeBlocks.md " ;
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
2019-04-26 06:12:18 +00:00
}
2017-12-19 14:25:55 +00:00
void tst_QTextMarkdownWriter : : rewriteDocument ( )
{
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
2019-04-26 06:12:18 +00:00
QFETCH ( QString , inputFile ) ;
2017-12-19 14:25:55 +00:00
QTextDocument doc ;
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
2019-04-26 06:12:18 +00:00
QFile f ( QFINDTESTDATA ( " data/ " + inputFile ) ) ;
2017-12-19 14:25:55 +00:00
QVERIFY ( f . open ( QFile : : ReadOnly | QIODevice : : Text ) ) ;
QString orig = QString : : fromUtf8 ( f . readAll ( ) ) ;
f . close ( ) ;
doc . setMarkdown ( orig ) ;
QString md = doc . toMarkdown ( ) ;
# ifdef DEBUG_WRITE_OUTPUT
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
2019-04-26 06:12:18 +00:00
QFile out ( " /tmp/rewrite- " + inputFile ) ;
2017-12-19 14:25:55 +00:00
out . open ( QFile : : WriteOnly ) ;
out . write ( md . toUtf8 ( ) ) ;
out . close ( ) ;
# endif
QCOMPARE ( md , orig ) ;
}
void tst_QTextMarkdownWriter : : fromHtml_data ( )
{
2019-04-26 05:40:34 +00:00
QTest : : addColumn < QString > ( " expectedInput " ) ;
QTest : : addColumn < QString > ( " expectedOutput " ) ;
2017-12-19 14:25:55 +00:00
QTest : : newRow ( " long URL " ) < <
" <span style= \" font-style:italic; \" >https://www.example.com/dir/subdir/subsubdir/subsubsubdir/subsubsubsubdir/subsubsubsubsubdir/</span> " < <
" *https://www.example.com/dir/subdir/subsubdir/subsubsubdir/subsubsubsubdir/subsubsubsubsubdir/* \n \n " ;
QTest : : newRow ( " non-emphasis inline asterisk " ) < < " 3 * 4 " < < " 3 * 4 \n \n " ;
QTest : : newRow ( " arithmetic " ) < < " (2 * a * x + b)^2 = b^2 - 4 * a * c " < < " (2 * a * x + b)^2 = b^2 - 4 * a * c \n \n " ;
QTest : : newRow ( " escaped asterisk after newline " ) < <
" The first sentence of this paragraph holds 80 characters, then there's a star. * This is wrapped, but is <em>not</em> a bullet point. " < <
" The first sentence of this paragraph holds 80 characters, then there's a star. \n \\ * This is wrapped, but is *not* a bullet point. \n \n " ;
QTest : : newRow ( " escaped plus after newline " ) < <
" The first sentence of this paragraph holds 80 characters, then there's a plus. + This is wrapped, but is <em>not</em> a bullet point. " < <
" The first sentence of this paragraph holds 80 characters, then there's a plus. \n \\ + This is wrapped, but is *not* a bullet point. \n \n " ;
QTest : : newRow ( " escaped hyphen after newline " ) < <
" The first sentence of this paragraph holds 80 characters, then there's a minus. - This is wrapped, but is <em>not</em> a bullet point. " < <
" The first sentence of this paragraph holds 80 characters, then there's a minus. \n \\ - This is wrapped, but is *not* a bullet point. \n \n " ;
2019-04-26 05:40:34 +00:00
QTest : : newRow ( " list items with indented continuations " ) < <
" <ul><li>bullet<p>continuation paragraph</p></li><li>another bullet<br/>continuation line</li></ul> " < <
" - bullet \n \n continuation paragraph \n \n - another bullet \n continuation line \n " ;
QTest : : newRow ( " nested list items with continuations " ) < <
" <ul><li>bullet<p>continuation paragraph</p></li><li>another bullet<br/>continuation line</li><ul><li>bullet<p>continuation paragraph</p></li><li>another bullet<br/>continuation line</li></ul></ul> " < <
" - bullet \n \n continuation paragraph \n \n - another bullet \n continuation line \n \n - bullet \n \n continuation paragraph \n \n - another bullet \n continuation line \n " ;
QTest : : newRow ( " nested ordered list items with continuations " ) < <
" <ol><li>item<p>continuation paragraph</p></li><li>another item<br/>continuation line</li><ol><li>item<p>continuation paragraph</p></li><li>another item<br/>continuation line</li></ol><li>another</li><li>another</li></ol> " < <
" 1. item \n \n continuation paragraph \n \n 2. another item \n continuation line \n \n 1. item \n \n continuation paragraph \n \n 2. another item \n continuation line \n \n 3. another \n 4. another \n " ;
2019-04-26 06:29:07 +00:00
QTest : : newRow ( " thematic break " ) < <
" something<hr/>something else " < <
" something \n \n - - - \n something else \n \n " ;
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
2019-04-26 06:12:18 +00:00
QTest : : newRow ( " block quote " ) < <
" <p>In 1958, Mahatma Gandhi was quoted as follows:</p><blockquote>The Earth provides enough to satisfy every man's need but not for every man's greed.</blockquote> " < <
" In 1958, Mahatma Gandhi was quoted as follows: \n \n > The Earth provides enough to satisfy every man's need but not for every man's \n > greed. \n \n " ;
2019-05-20 10:58:36 +00:00
QTest : : newRow ( " image " ) < <
" <img src= \" /url \" alt= \" foo \" title= \" title \" /> " < <
" ![foo](/url \" title \" ) \n \n " ;
2019-05-20 15:40:12 +00:00
QTest : : newRow ( " code " ) < <
" <pre class= \" language-pseudocode \" > \n #include \" foo.h \" \n \n block { \n statement(); \n } \n \n </pre> " < <
2020-04-18 23:24:05 +00:00
" ```pseudocode \n #include \" foo.h \" \n \n block { \n statement(); \n } \n ``` \n \n " ;
2017-12-19 14:25:55 +00:00
// TODO
// QTest::newRow("escaped number and paren after double newline") <<
// "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" <<
// "(The first sentence of this paragraph is a line, the next paragraph has a number\n\n13\\) but that's not part of an ordered list\n\n";
// QTest::newRow("preformats with embedded backticks") <<
// "<pre>none `one` ``two``</pre><pre>```three``` ````four````</pre>plain" <<
// "``` none `one` ``two`` ```\n\n````` ```three``` ````four```` `````\n\nplain\n\n";
}
void tst_QTextMarkdownWriter : : fromHtml ( )
{
2019-04-26 05:40:34 +00:00
QFETCH ( QString , expectedInput ) ;
QFETCH ( QString , expectedOutput ) ;
2017-12-19 14:25:55 +00:00
2019-04-26 05:40:34 +00:00
document - > setHtml ( expectedInput ) ;
QString output = documentToUnixMarkdown ( ) ;
# ifdef DEBUG_WRITE_OUTPUT
{
QFile out ( " /tmp/ " + QLatin1String ( QTest : : currentDataTag ( ) ) + " .md " ) ;
out . open ( QFile : : WriteOnly ) ;
out . write ( output . toUtf8 ( ) ) ;
out . close ( ) ;
}
# endif
QCOMPARE ( output , expectedOutput ) ;
2017-12-19 14:25:55 +00:00
}
QString tst_QTextMarkdownWriter : : documentToUnixMarkdown ( )
{
QString ret ;
QTextStream ts ( & ret , QIODevice : : WriteOnly ) ;
QTextMarkdownWriter writer ( ts , QTextDocument : : MarkdownDialectGitHub ) ;
2019-05-07 15:49:32 +00:00
writer . writeAll ( document ) ;
2017-12-19 14:25:55 +00:00
return ret ;
}
QTEST_MAIN ( tst_QTextMarkdownWriter )
# include "tst_qtextmarkdownwriter.moc"