From cfcd442b2fcdac4d5f49b0850f73de0153fa287f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 22 Jun 2025 16:33:42 -0700 Subject: [PATCH] Extract citationSuffix, citationPrefix. In transforming pandoc Cite to citeproc Citation, extract a `citationSuffix` and `citationPrefix` from the last item's suffix and first item's prefix, respectively, if they contain a `|` character which separates the item's suffix or prefix from the whole Citation's. for example: [for example, see |@C1; @A3; @B4|, and others] Here "for example, see" acts as a prefix for the whole group and will remain at the beginning even if the citation items are reordered by citeproc. Similarly, ", and others" will be a suffix for the whole group. Closes #10894. Notes: 1. The org reader now adds global prefixes and suffixes the same way as the Markdown reader: as affixes to the first item's prefix or the last item's suffix, separated by a pipe (`|`). 2. The org writer, however, has not been modified to convert the `|` to a `;`, as required by org-cite syntax. 3. This change doesn't currently do what one would expect, because of changes that were made to citeproc to prevent citation items with prefixes and suffixes from being sorted. Hence in `test/command/10894.md`, we have test output ``` (Doe, 2020; Smith, 2021) ``` without affixes, but ``` (see Smith, 2021; Doe, 2020, and others) ``` with affixes. To make this work well, we'd need to remove the citeproc code that prevented bad results before we had proper global prefixes and suffixes. However, removing this code would mean that existing documents would render differently, unless the new pipe syntax for citation affixes were used. That may be something we want to avoid. 4. The use of pipes to separate out global affixes from item-level affixes is a kludge that could be avoided if we added additional fields to Cite in the pandoc AST. However, AST changes are disruptive, so perhaps it's not worth doing that. --- MANUAL.txt | 13 +++++++++ src/Text/Pandoc/Citeproc.hs | 40 +++++++++++++++++++++++--- src/Text/Pandoc/Citeproc/Locator.hs | 5 ++-- src/Text/Pandoc/Readers/Markdown.hs | 19 ++++-------- src/Text/Pandoc/Readers/Org/Inlines.hs | 13 ++++++--- test/command/10894.md | 34 ++++++++++++++++++++++ 6 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 test/command/10894.md diff --git a/MANUAL.txt b/MANUAL.txt index 1f9f04362226..24b3429bc1bc 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5865,6 +5865,19 @@ the suffix as locator by prepending curly braces: [@smith, {pp. iv, vi-xi, (xv)-(xvii)} with suffix here] [@smith{}, 99 years later] +A prefix or suffix can be marked as going with the whole citation +rather than an individual item. A horizontal bar (`|`) is used +to separate the global citation prefix or suffix from the individual +items: + + [for example, see |@C1; @A3; @B4, in part|, and others] + +The global prefix/suffix will always be at the beginning/end +of the rendered citation, even if the style causes the +items to be sorted in a different order. By contrast, the +prefixes and suffixes on each item will move with the items when +they are sorted. + A minus sign (`-`) before the `@` will suppress mention of the author in the citation. This can be useful when the author is already mentioned in the text: diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 5ab245e1697a..954953fdb6f3 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -299,17 +299,49 @@ getCitations locale otherIdsMap = Foldable.toList . query getCitation where getCitation (Cite cs _fallback) = Seq.singleton $ Citeproc.Citation { Citeproc.citationId = Nothing - , Citeproc.citationPrefix = Nothing - , Citeproc.citationSuffix = Nothing + , Citeproc.citationPrefix = pref + , Citeproc.citationSuffix = suff , Citeproc.citationNoteNumber = case cs of [] -> Nothing (Pandoc.Citation{ Pandoc.citationNoteNum = n }: _) | n > 0 -> Just n | otherwise -> Nothing - , Citeproc.citationItems = - fromPandocCitations locale otherIdsMap cs + , Citeproc.citationItems = items } + where + (pref, suff, items) = + case fromPandocCitations locale otherIdsMap cs of + [] -> (Nothing, Nothing, []) + (i:is) -> + let (pref', i') = case citationItemPrefix i of + Nothing -> (Nothing, i) + Just p -> + case splitInlinesOnPipe (B.toList p) of + (_,[]) -> (Nothing, i) + (as,bs) -> (Just (B.fromList as), + i{ citationItemPrefix = Just (B.fromList bs) }) + (suff', is') = case reverse is of + [] -> (Nothing, []) + (i'':is'') -> + case Citeproc.citationItemSuffix i'' of + Nothing -> (Nothing, is) + Just s -> + case splitInlinesOnPipe (B.toList s) of + (_,[]) -> (Nothing, is) + (as,bs) -> (Just (B.fromList bs), reverse + (i''{ citationItemSuffix = Just (B.fromList as) }:is'')) + in (pref', suff', i':is') + splitInlinesOnPipe ils = + case break isStrWithPipe ils of + (xs,Str s : ys) -> + let (as,bs) = T.break (=='|') s + bs' = T.drop 1 bs + in (xs ++ [Str as | not (T.null as)], + [Str bs' | not (T.null bs')] ++ ys) + _ -> (ils,[]) + isStrWithPipe (Str s) = T.any (=='|') s + isStrWithPipe _ = False getCitation _ = mempty fromPandocCitations :: Locale diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index ee5459e3d7ac..9ee58506b0dd 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -19,7 +19,6 @@ import Control.Monad (mzero) import qualified Data.Map as M import Data.Char (isSpace, isPunctuation, isDigit) - data LocatorInfo = LocatorInfo{ locatorRaw :: Text , locatorLabel :: Text @@ -57,9 +56,11 @@ pLocatorWords locMap = do maybeAddComma :: [Inline] -> [Inline] maybeAddComma [] = [] maybeAddComma ils@(Space : _) = ils +maybeAddComma ils@(SoftBreak : _) = ils +maybeAddComma ils@(LineBreak : _) = ils maybeAddComma ils@(Str t : _) | Just (c, _) <- T.uncons t - , isPunctuation c = ils + , isPunctuation c || c == '|' = ils maybeAddComma ils = Str "," : Space : ils pLocatorDelimited :: LocatorMap -> LocatorParser LocatorInfo diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d81c4e3c1135..248bf0146c37 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -55,7 +55,6 @@ import Text.Pandoc.Shared import Text.Pandoc.URI (escapeURI, isURI, pBase64DataURI) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock) --- import Debug.Trace (traceShowId) type MarkdownParser m = ParsecT Sources ParserState m @@ -2247,14 +2246,8 @@ normalCite = try $ do return citations suffix :: PandocMonad m => MarkdownParser m (F Inlines) -suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - spnl - ils <- many (notFollowedBy (oneOf ";]") >> inline) - let rest = trimInlinesF (mconcat ils) - return $ if hasSpace && not (null ils) - then (B.space <>) <$> rest - else rest +suffix = try $ + mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) prefix :: PandocMonad m => MarkdownParser m (F Inlines) prefix = trimInlinesF . mconcat <$> @@ -2274,11 +2267,11 @@ citation = try $ do suff <- suffix noteNum <- stateNoteNumber <$> getState return $ do - x <- pref - y <- suff + pref' <- B.toList <$> pref + suff' <- B.toList <$> suff return Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y + , citationPrefix = pref' + , citationSuffix = suff' , citationMode = if suppress_author then SuppressAuthor else NormalCitation diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 4d901ffc4bbc..99f71f306f3a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -182,21 +182,26 @@ adjustCiteStyle sty cs = do addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) addPrefixToFirstItem aff cs = do cs' <- cs - aff' <- aff + aff' <- B.toList <$> aff case cs' of [] -> return [] (d:ds) -> return (d{ citationPrefix = - B.toList aff' <> citationPrefix d }:ds) + if null aff' + then citationPrefix d + else aff' ++ (Str "|" : citationPrefix d) }:ds) addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) addSuffixToLastItem aff cs = do cs' <- cs - aff' <- aff + aff' <- B.toList <$> aff case lastMay cs' of Nothing -> return cs' Just d -> return (init cs' ++ [d{ citationSuffix = - citationSuffix d <> B.toList aff' }]) + citationSuffix d <> + if null aff' + then [] + else Str "|" : aff' }]) citeItems :: PandocMonad m => OrgParser m (F [Citation]) citeItems = sequence <$> sepBy1' citeItem (char ';' <* void (many spaceChar)) diff --git a/test/command/10894.md b/test/command/10894.md new file mode 100644 index 000000000000..25d68c9f7d8b --- /dev/null +++ b/test/command/10894.md @@ -0,0 +1,34 @@ +``` +% pandoc --citeproc -t plain --csl command/apa.csl +--- +references: +- author: + - family: Doe + given: John + container-title: Journal of Examples + id: doe2020 + issue: 1 + issued: 2020 + page: 1-10 + title: An example article + type: article-journal + volume: 1 +- author: + - family: Smith + given: Jane + id: smith2021 + issued: 2021 + publisher: Example Press + title: A sample book + type: book +suppress-bibliography: true +--- +[@smith2021; @doe2020] + +[see |@smith2021; @doe2020|, and others] +^D +(Doe, 2020; Smith, 2021) + +(see Smith, 2021; Doe, 2020, and others) +``` +