From 2b4fc21f6b2436766f5dc093bdd0ba8612a864a7 Mon Sep 17 00:00:00 2001 From: William Palin Date: Thu, 6 Feb 2025 15:29:57 -0500 Subject: [PATCH 1/2] fix(find.py): Correct span calculation for short-form citations Short-form citations were incorrectly identifying the span and full span of a citation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For example: And Twombly, 550 U. S., at 555 … Currently, when an antecedent guess is identified, it is not factored into the full span calculation. Additionally, the pin-cite is not correctly incorporated into the offset. This fix ensures both are properly accounted for. --- eyecite/find.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/eyecite/find.py b/eyecite/find.py index 90985be..5e50d7b 100644 --- a/eyecite/find.py +++ b/eyecite/find.py @@ -246,7 +246,10 @@ def _extract_shortform_citation( strings_only=True, forward=False, ) + offset = 0 if m: + ante_start, ante_end = m.span() + offset = ante_end - ante_start antecedent_guess = m["antecedent"].strip() # Get pin_cite @@ -262,6 +265,8 @@ def _extract_shortform_citation( exact_editions=cite_token.exact_editions, variation_editions=cite_token.variation_editions, span_end=span_end, + full_span_start=cite_token.start - offset, + full_span_end=max([span_end, cite_token.end]), metadata={ "antecedent_guess": antecedent_guess, "pin_cite": pin_cite, From e0cff410e0ab6a2d449540729f6f954af5513fce Mon Sep 17 00:00:00 2001 From: William Palin Date: Thu, 6 Feb 2025 15:34:58 -0500 Subject: [PATCH 2/2] fix(lint): Fix lint issue --- eyecite/find.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eyecite/find.py b/eyecite/find.py index 5e50d7b..6efeda9 100644 --- a/eyecite/find.py +++ b/eyecite/find.py @@ -257,6 +257,7 @@ def _extract_shortform_citation( pin_cite, span_end, parenthetical = extract_pin_cite( words, index, prefix=cite_token.groups["page"] ) + span_end = span_end if span_end else 0 # make ShortCaseCitation citation = ShortCaseCitation(