-
-
Notifications
You must be signed in to change notification settings - Fork 74
Smart Quotation Marks in Formattable Strings. #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
37fdec8
2e0bfcb
d73310c
7d6aa4e
59fe560
aa474f7
f5db8ee
6f128f0
7f74716
efb5309
074c2e7
b69d1b5
6bca476
380093a
c54ff8a
d2713c9
d03dc60
69a2d67
2a9f57b
85f0d6b
db812db
72a2cc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,225 @@ | ||
| use citationberg::TermForm; | ||
| use citationberg::taxonomy::OtherTerm; | ||
|
|
||
| use super::Context; | ||
| use super::taxonomy::EntryLike; | ||
|
|
||
| pub fn apply_quotes(s: &str, quotes: &SmartQuotes, inner: bool) -> String { | ||
| let mut res = String::with_capacity(s.len()); | ||
| let mut before = None; | ||
| let mut quoter = SmartQuoter::new(); | ||
| let mut escape = false; | ||
| for c in s.chars() { | ||
| match c { | ||
| '"' | '\'' if escape => { | ||
| res.push(c); | ||
| escape = false | ||
| } | ||
| '"' => res.push_str(quoter.quote(before, quotes, !inner)), | ||
| '\'' => res.push_str(quoter.quote(before, quotes, inner)), | ||
| '\\' if escape => { | ||
| res.push('\\'); | ||
| escape = false | ||
| } | ||
| '\\' => escape = true, | ||
| c => res.push(c), | ||
| } | ||
| before = Some(c); | ||
| } | ||
| res | ||
| } | ||
|
|
||
| /// A smart quote substitutor with zero lookahead. | ||
| #[derive(Debug, Clone)] | ||
| pub struct SmartQuoter { | ||
| /// The amount of quotes that have been opened. | ||
| depth: u8, | ||
| /// Each bit indicates whether the quote at this nesting depth is a double. | ||
| /// Maximum supported depth is thus 32. | ||
| kinds: u32, | ||
| } | ||
|
|
||
| impl SmartQuoter { | ||
| /// Start quoting. | ||
| pub fn new() -> Self { | ||
| Self { depth: 0, kinds: 0 } | ||
| } | ||
|
|
||
| /// Determine which smart quote to substitute given this quoter's nesting | ||
| /// state and the character immediately preceding the quote. | ||
| pub fn quote<'a>( | ||
| &mut self, | ||
| before: Option<char>, | ||
| quotes: &SmartQuotes<'a>, | ||
| double: bool, | ||
| ) -> &'a str { | ||
| let opened = self.top(); | ||
| let before = before.unwrap_or(' '); | ||
|
|
||
| // If we are after a number and haven't most recently opened a quote of | ||
| // this kind, produce a prime. Otherwise, we prefer a closing quote. | ||
| if before.is_numeric() && opened != Some(double) { | ||
| return if double { "″" } else { "′" }; | ||
| } | ||
|
|
||
| // If we have a single smart quote, didn't recently open a single | ||
| // quotation, and are after an alphabetic char or an object (e.g. a | ||
| // math equation), interpret this as an apostrophe. | ||
| if !double | ||
| && opened != Some(false) | ||
| && (before.is_alphabetic() || before == '\u{FFFC}') | ||
| { | ||
| return "’"; | ||
| } | ||
PgBiel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // If the most recently opened quotation is of this kind and the | ||
| // previous char does not indicate a nested quotation, close it. | ||
| if opened == Some(double) | ||
| && !before.is_whitespace() | ||
| && !is_opening_bracket(before) | ||
| { | ||
| self.pop(); | ||
| return quotes.close(double); | ||
| } | ||
|
|
||
| // Otherwise, open a new the quotation. | ||
| self.push(double); | ||
| quotes.open(double) | ||
| } | ||
|
|
||
| /// The top of our quotation stack. Returns `Some(double)` for the most | ||
| /// recently opened quote or `None` if we didn't open one. | ||
| fn top(&self) -> Option<bool> { | ||
| self.depth.checked_sub(1).map(|i| (self.kinds >> i) & 1 == 1) | ||
| } | ||
|
|
||
| /// Push onto the quotation stack. | ||
| fn push(&mut self, double: bool) { | ||
| if self.depth < 32 { | ||
| self.kinds |= (double as u32) << self.depth; | ||
| self.depth += 1; | ||
| } | ||
| } | ||
|
|
||
| /// Pop from the quotation stack. | ||
| fn pop(&mut self) { | ||
| self.depth -= 1; | ||
| self.kinds &= (1 << self.depth) - 1; | ||
| } | ||
| } | ||
|
|
||
| impl Default for SmartQuoter { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
|
|
||
| /// Whether the character is an opening bracket, parenthesis, or brace. | ||
| #[inline] | ||
| fn is_opening_bracket(c: char) -> bool { | ||
Drodt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| matches!(c, '(' | '{' | '[') | ||
| } | ||
|
|
||
| /// Decides which quotes to substitute smart quotes with. | ||
| pub struct SmartQuotes<'s> { | ||
| /// The opening single quote. | ||
| pub single_open: &'s str, | ||
| /// The closing single quote. | ||
| pub single_close: &'s str, | ||
| /// The opening double quote. | ||
| pub double_open: &'s str, | ||
| /// The closing double quote. | ||
| pub double_close: &'s str, | ||
| } | ||
|
|
||
| impl<'s> SmartQuotes<'s> { | ||
| /// Create a new `Quotes` struct with quotes taken from the current CSL locale's | ||
| /// terms, falling back to `"` and `'` when not available. | ||
| pub fn get<T: EntryLike>(ctx: &'s Context<'s, T>) -> Self { | ||
| let default = ("'", "\""); | ||
|
|
||
| Self { | ||
| single_open: ctx | ||
| .term(OtherTerm::OpenInnerQuote.into(), TermForm::default(), false) | ||
| .unwrap_or(default.0), | ||
| single_close: ctx | ||
| .term(OtherTerm::CloseInnerQuote.into(), TermForm::default(), false) | ||
| .unwrap_or(default.0), | ||
| double_open: ctx | ||
| .term(OtherTerm::OpenQuote.into(), TermForm::default(), false) | ||
| .unwrap_or(default.1), | ||
| double_close: ctx | ||
| .term(OtherTerm::CloseQuote.into(), TermForm::default(), false) | ||
| .unwrap_or(default.1), | ||
| } | ||
| } | ||
|
|
||
| /// The opening quote. | ||
| pub fn open(&self, double: bool) -> &'s str { | ||
| if double { self.double_open } else { self.single_open } | ||
| } | ||
|
|
||
| /// The closing quote. | ||
| pub fn close(&self, double: bool) -> &'s str { | ||
| if double { self.double_close } else { self.single_close } | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| const US_MARKS: SmartQuotes = SmartQuotes { | ||
| single_open: "‘", | ||
| single_close: "’", | ||
| double_open: "“", | ||
| double_close: "”", | ||
| }; | ||
| const DE_MARKS: SmartQuotes = SmartQuotes { | ||
| single_open: "‚", | ||
| single_close: "‘", | ||
| double_open: "„", | ||
| double_close: "“", | ||
| }; | ||
|
|
||
| #[test] | ||
| fn typst_tests() { | ||
| let cases = vec![ | ||
| ( | ||
| "“The horse eats no cucumber salad” was the first sentence ever uttered on the ‘telephone.’", | ||
| r#""The horse eats no cucumber salad" was the first sentence ever uttered on the 'telephone.'"#, | ||
| &US_MARKS, | ||
| ), | ||
| ( | ||
| "„Das Pferd frisst keinen Gurkensalat“ war der erste jemals am ‚Fernsprecher‘ gesagte Satz.", | ||
| r#""Das Pferd frisst keinen Gurkensalat" war der erste jemals am 'Fernsprecher' gesagte Satz."#, | ||
| &DE_MARKS, | ||
| ), | ||
| ("“”", r#""""#, &US_MARKS), | ||
| ( | ||
| "The 5′11″ ‘quick’ brown fox jumps over the “lazy” dog’s ear.", | ||
| r#"The 5'11" 'quick' brown fox jumps over the "lazy" dog's ear."#, | ||
| &US_MARKS, | ||
| ), | ||
| ("He said “I’m a big fella.”", r#"He said "I'm a big fella.""#, &US_MARKS), | ||
| ( | ||
| r#"The 5'11" ‘quick' brown fox jumps over the "lazy’ dog's ear."#, | ||
| r#"The 5\'11\" 'quick\' brown fox jumps over the \"lazy' dog\'s ear."#, | ||
| &US_MARKS, | ||
| ), | ||
| ("“Hello”/“World”", r#""Hello"/"World""#, &US_MARKS), | ||
| ("‘“Hello”/“World”’", r#"'"Hello"/"World"'"#, &US_MARKS), | ||
| ("“”Hello“/”World“”", r#"""Hello"/"World"""#, &US_MARKS), | ||
| ("Straight “A”s and “B”s", r#"Straight "A"s and "B"s"#, &US_MARKS), | ||
| ("A 2″ nail.", r#"A 2" nail."#, &US_MARKS), | ||
| ("‘A 2″ nail.’", r#"'A 2" nail.'"#, &US_MARKS), | ||
| ("“A 2” nail.“", r#""A 2" nail.""#, &US_MARKS), | ||
| ("“a [“b”] c”", r#""a ["b"] c""#, &US_MARKS), | ||
| ("“a b”c“d e”", r#""a b"c"d e""#, &US_MARKS), | ||
| ]; | ||
|
|
||
| for (expected, input, quotes) in cases { | ||
| assert_eq!(expected, apply_quotes(input, quotes, false)); | ||
| } | ||
| } | ||
| } | ||
Drodt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| >>==== MODE ====>> | ||
| citation | ||
| <<==== MODE ====<< | ||
|
|
||
| >>==== RESULT ====>> | ||
| Nation of “Positive Obligations “ of State under the European Convention on Human Rights (1) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting that the original test, which expected
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On main the test passes. Hayagriva handles this correctly (aside from this branch using fancy quotation marks) |
||
| <<==== RESULT ====<< | ||
|
|
||
| >>==== CITATION-ITEMS ====>> | ||
| [ | ||
| [ | ||
| { | ||
| "id": "ITEM-1" | ||
| } | ||
| ] | ||
| ] | ||
| <<==== CITATION-ITEMS ====<< | ||
|
|
||
| >>==== CSL ====>> | ||
| <style | ||
| xmlns="http://purl.org/net/xbiblio/csl" | ||
| class="note" | ||
| version="1.0"> | ||
| <info> | ||
| <id /> | ||
| <title /> | ||
| <updated>2009-08-10T04:49:00+09:00</updated> | ||
| </info> | ||
| <citation> | ||
| <layout delimiter="; "> | ||
| <text variable="title" text-case="title"/> | ||
| </layout> | ||
| </citation> | ||
| </style> | ||
| <<==== CSL ====<< | ||
|
|
||
| >>==== INPUT ====>> | ||
| [ | ||
| { | ||
| "id": "ITEM-1", | ||
| "title": "Nation of \"Positive Obligations \" of State under the European Convention on Human Rights (1)", | ||
| "type": "book" | ||
| } | ||
| ] | ||
| <<==== INPUT ====<< | ||
|
|
||
|
|
||
|
|
||
| >>===== VERSION =====>> | ||
| 1.0 | ||
| <<===== VERSION =====<< | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do wonder if Typst should somehow wrap this in a smartquotes elements, which would render your changes potentially problematic... I'll try to ask the rest of the team on this.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After some initial discussion with part of the team, it's possible that we could want the smartquoter to emit an
ElemChild::SmartQuoteso that Typst can later emit asmartquoteelement, which would allow customization via show and set rules. The idea is then that the default plain text renderer would just convert it into text with the surrounding quotes. In addition, the chosen quotes stored in that element would beNoneif the CSL locale doesn't override the quote terms to allow for set rules to work on the Typst side.However, we're still not sure regarding the algorithm. We would like to know how BibLaTeX approaches smart quoting so we can make sure existing
.bibfiles behave at least somewhat as expected, converting one approach to another if necessary. It could be nice to have some further investigation in this regard.Still, we won't make a decision before feedback from @reknih (who is currently unavailable), so don't worry about making too many changes right now, although feel free to investigate about BibLaTeX as that would be helpful.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good.
I already looked into how biblatex does this; I‘ll write it up some time next week
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Biblatex does not handle
"in any special way. For quotation marks, they provide the\mkbibquotecommand, which either produces American quotation marks (and, typically, moves punctuation like.or,inside the quote) or uses\enquoteby the csquotes packages. The latter uses language-dependent quotation marks.For conversion, we could translate
\mkbibquoteand\enquoteto use normal quotation marks, which are then handled by hayagriva.