From 2a00aeb812c0a69831995b5b5b6e9e3cac401fc7 Mon Sep 17 00:00:00 2001 From: "William J. Bowman" Date: Tue, 18 Aug 2020 23:21:14 -0700 Subject: [PATCH] Added support for latex escapes in bibtex titles and authors This allows using certain escape sequences, such as \", in titles and authors of bibtex files. Titles are wrapped titles with 'exact-chars, so the set of escapes handled is large. A different method is used for author which basically convert certain well-known escapes into Unicode. This seems necessary to support parsing of author names. --- scribble-lib/scriblib/bibtex.rkt | 46 +++++++++++++++---- .../tests/scriblib/bibtex.latex-escapes.txt | 6 +++ scribble-test/tests/scriblib/bibtex.rkt | 6 ++- 3 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 scribble-test/tests/scriblib/bibtex.latex-escapes.txt diff --git a/scribble-lib/scriblib/bibtex.rkt b/scribble-lib/scriblib/bibtex.rkt index 2b62393091..9eb546aeef 100644 --- a/scribble-lib/scriblib/bibtex.rkt +++ b/scribble-lib/scriblib/bibtex.rkt @@ -214,13 +214,35 @@ (define ~cite-id (make-citer autobib-cite)) (define citet-id (make-citer autobib-citet)))) +;; Seems a little redundant to convert latex escapes into unicode only to +;; convert them back into latex, but we need to sort authors so we can't +;; leave them as literal-chars. +(define (latex-to-unicode str) + ; This is probably defined somewhere... + ; NOTE: Incomplete. Please file PR if you need more. + (define converts + '(("\\'\\i" . "ı́") + ("\\\"u" . "ü") + ("\\\"o" . "ö") + ("\\\"i" . "ï") + ("\\'i" . "í") + ("\\i" . "ı") + ("\\'a" . "á") + ("\\'A" . "Á") + ("\\~a" . "ã") + ("\\`a" . "À") + ("\\~A" . "Ã"))) + (for/fold ([str str]) + ([p converts]) + (string-replace str (car p) (cdr p)))) + (define (parse-author as) (and as (apply authors (for/list ([a (in-list (regexp-split #px"\\s+and\\s+" as))]) (define (trim s) (string-trim (regexp-replace #px"\\s+" s " "))) - (match a + (match (latex-to-unicode a) [(pregexp #px"^(.*),(.*),(.*)$" (list _ two suffix one)) (author-name (trim one) (trim two) #:suffix (trim suffix))] [(pregexp #px"^(.*),(.*)$" (list _ two one)) @@ -422,6 +444,10 @@ [_ (error 'parse-pages "Invalid page format ~e" ps)])) +(require scribble/core) +(define (support-escapes s) + (elem #:style (make-style #f '(exact-chars)) s)) + (define (generate-bib db key) (match-define (bibdb raw bibs) db) (hash-ref! bibs key @@ -435,18 +461,18 @@ key a the-raw)))) (match (raw-attr 'type) ["misc" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:url (raw-attr "url"))] ["book" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:is-book? #t #:url (raw-attr "url"))] ["article" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (journal-location (raw-attr* "journal") @@ -455,31 +481,31 @@ #:volume (raw-attr "volume")) #:url (raw-attr "url"))] ["inproceedings" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (proceedings-location (raw-attr "booktitle")) #:url (raw-attr "url"))] ["webpage" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") - #:url (raw-attr "url"))] + #:url (raw-attr "url"))] ["mastersthesis" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (raw-attr "school") #:url (raw-attr "url"))] ["phdthesis" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location (dissertation-location #:institution (raw-attr "school") #:degree "PhD") #:url (raw-attr "url"))] ["techreport" - (make-bib #:title (raw-attr "title") + (make-bib #:title (support-escapes (raw-attr "title")) #:author (parse-author (raw-attr "author")) #:date (raw-attr "year") #:location diff --git a/scribble-test/tests/scriblib/bibtex.latex-escapes.txt b/scribble-test/tests/scriblib/bibtex.latex-escapes.txt new file mode 100644 index 0000000000..246aa7e0e0 --- /dev/null +++ b/scribble-test/tests/scriblib/bibtex.latex-escapes.txt @@ -0,0 +1,6 @@ +Bibliography + +[1]Vı́ctor Braberman, Federico Fernández, Diego Garbervetsky, and Sergio + Yovine. Parametric prediction of heap memory requirements. In Proc. + Proceedings of the 7th international symposium on Memory management, + 2008. http://doi.acm.org/10.1145/1375634.1375655 diff --git a/scribble-test/tests/scriblib/bibtex.rkt b/scribble-test/tests/scriblib/bibtex.rkt index a73089ef9a..b145a2d351 100644 --- a/scribble-test/tests/scriblib/bibtex.rkt +++ b/scribble-test/tests/scriblib/bibtex.rkt @@ -10,6 +10,7 @@ (define-runtime-path normal-expected-path "bibtex.normal.txt") (define-runtime-path number-expected-path "bibtex.number.txt") +(define-runtime-path latex-escapes-path "bibtex.latex-escapes.txt") (define-syntax-rule (test-render* definer expected-path body generate-bibliography-id) (let () @@ -73,4 +74,7 @@ (λ (~cite-id citet-id) (citet-id "salib:starkiller") (citet-id "cryptoeprint:2000:067") - (citet-id "Tobin-Hochstadt:2011fk")))) + (citet-id "Tobin-Hochstadt:2011fk"))) + (test-render latex-escapes-path (#:style number-style) + (λ (~cite-id citet-id) + (citet-id "Braberman:2008:PPH:1375634.1375655"))))