Skip to content

Commit cf1207c

Browse files
committed
feat(html): add paragraph_attributes extension
- Introduce `Ext_paragraph_attributes` to control preservation of attributes on paragraph tags. - Enable the extension by default for all HTML-based formats. - The HTML reader now wraps attributed paragraphs in a Div when the `native_divs` extension is enabled. - The HTML writer unwraps the Div back to an attributed p tag. - Add tests to verify attribute handling with and without the new extension.
1 parent 7ad3573 commit cf1207c

File tree

4 files changed

+37
-8
lines changed

4 files changed

+37
-8
lines changed

src/Text/Pandoc/Extensions.hs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ data Extension =
109109
| Ext_ntb -- ^ ConTeXt Natural Tables
110110
| Ext_old_dashes -- ^ -- = em, - before number = en
111111
| Ext_pandoc_title_block -- ^ Pandoc title block
112+
| Ext_paragraph_attributes-- ^ Preserve attributes on paragraphs. By default, pandoc strips all attributes from paragraphs.
112113
| Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra)
113114
| Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines
114115
| Ext_raw_html -- ^ Allow raw HTML
@@ -213,6 +214,7 @@ pandocExtensions = extensionsFromList
213214
[ Ext_footnotes
214215
, Ext_inline_notes
215216
, Ext_pandoc_title_block
217+
, Ext_paragraph_attributes
216218
, Ext_yaml_metadata_block
217219
, Ext_table_captions
218220
, Ext_implicit_figures
@@ -435,7 +437,8 @@ getDefaultExtensions "html" = extensionsFromList
435437
[Ext_auto_identifiers,
436438
Ext_native_divs,
437439
Ext_line_blocks,
438-
Ext_native_spans]
440+
Ext_native_spans,
441+
Ext_paragraph_attributes]
439442
getDefaultExtensions "html4" = getDefaultExtensions "html"
440443
getDefaultExtensions "html5" = getDefaultExtensions "html"
441444
getDefaultExtensions "epub" = extensionsFromList

src/Text/Pandoc/Readers/HTML.hs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ import Text.Pandoc.Error
5959
import Text.Pandoc.Logging
6060
import Text.Pandoc.Options (
6161
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
62-
Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
62+
Ext_native_spans, Ext_raw_html,
63+
Ext_line_blocks, Ext_raw_tex),
6364
ReaderOptions (readerExtensions, readerStripComments),
6465
extensionEnabled)
6566
import Text.Pandoc.Parsing hiding ((<|>))
@@ -632,8 +633,7 @@ pParaWithWrapper (ident, classes, kvs) = do
632633
guard (null contents)
633634
return mempty) <|> do
634635
let wrapperAttr = ("wrapper", "1")
635-
let finalKVs = wrapperAttr : kvs
636-
let finalAttrs = (ident, classes, finalKVs)
636+
let finalAttrs = (ident, classes, wrapperAttr : kvs)
637637
return $ B.divWith finalAttrs (B.para contents)
638638

639639
-- Helper function for pPara when no significant attributes are present
@@ -648,11 +648,11 @@ pParaSimple = do
648648
pPara :: PandocMonad m => TagParser m Blocks
649649
pPara = do
650650
TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "p" [])
651+
exts <- getOption readerExtensions
651652
let attr@(ident, classes, kvs) = toAttr attr'
652653
-- "Significant" attributes are any id, class, or key-value pair.
653654
let hasSignificantAttributes = not (T.null ident) || not (null classes) || not (null kvs)
654-
655-
if hasSignificantAttributes
655+
if hasSignificantAttributes && extensionEnabled Ext_native_divs exts
656656
then pParaWithWrapper attr
657657
else pParaSimple
658658

src/Text/Pandoc/Writers/HTML.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,8 @@ blockToHtmlInner opts (Para lst) = do
750750
blockToHtmlInner opts (LineBlock lns) = do
751751
htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns
752752
return $ H.div ! A.class_ "line-block" $ htmlLines
753-
blockToHtmlInner opts (Div (ident, classes, kvs) [Para pans]) | Just "1" <- lookup "wrapper" kvs = do
753+
blockToHtmlInner opts (Div (ident, classes, kvs) [Para pans])
754+
| Just "1" <- lookup "wrapper" kvs = do
754755
-- This is a paragraph that was wrapped in a Div by the reader
755756
-- Unwrap it back to a <p> tag, transferring attributes from the Div
756757
let pKVs = filter (\(k,_) -> k /= "wrapper") kvs

test/Tests/Readers/HTML.hs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import Text.Pandoc.Shared (isHeaderBlock)
2323
import Text.Pandoc.Arbitrary ()
2424
import Text.Pandoc.Builder
2525
import Text.Pandoc.Walk (walk)
26+
import Test.Tasty.HUnit (testCase, assertEqual)
2627

2728
html :: Text -> Pandoc
2829
html = purely $ readHtml def
@@ -55,6 +56,16 @@ roundTrip b = d'' == d'''
5556
purely (writeHtml5String def
5657
{ writerWrapText = WrapPreserve })
5758

59+
htmlHtmlTest :: TestName -> Extensions -> Extensions -> Text -> Text -> TestTree
60+
htmlHtmlTest name readerExts writerExts input expected =
61+
testCase name $ do
62+
let readerOpts = def { readerExtensions = readerExts }
63+
let writerOpts = def { writerExtensions = writerExts, writerWrapText = WrapAuto }
64+
actual <- runIOorExplode $ do
65+
pandoc <- readHtml readerOpts input
66+
writeHtml5String writerOpts pandoc
67+
assertEqual name (T.strip expected) (T.strip actual)
68+
5869
tests :: [TestTree]
5970
tests = [ testGroup "base tag"
6071
[ test html "simple" $
@@ -175,7 +186,21 @@ tests = [ testGroup "base tag"
175186
"<p id=\"baz\" align=\"invalid\">Invalid align with id.</p>" =?>
176187
doc (divWith ("baz", [], [("wrapper", "1"), ("align", "invalid")]) (para (text "Invalid align with id.")))
177188
]
178-
, askOption $ \(QuickCheckTests numtests) ->
189+
, testGroup "paragraph-attributes-roundtrip"
190+
[ htmlHtmlTest
191+
"strip attributes when extension is disabled"
192+
(readerExtensions def)
193+
(writerExtensions def)
194+
"<p id=\"foo\" class=\"bar\">Hello</p>"
195+
"<p>Hello</p>"
196+
, htmlHtmlTest
197+
"keep attributes when extension is enabled"
198+
(enableExtension Ext_native_divs $ enableExtension Ext_paragraph_attributes (readerExtensions def))
199+
(enableExtension Ext_paragraph_attributes (writerExtensions def))
200+
"<p id=\"foo\" class=\"bar\">Hello</p>"
201+
"<p id=\"foo\" class=\"bar\">Hello</p>"
202+
]
203+
, askOption $ \(QuickCheckTests numtests) ->
179204
testProperty "Round trip" $
180205
withMaxSuccess (if QuickCheckTests numtests == defaultValue
181206
then 25

0 commit comments

Comments
 (0)