Skip to content

Commit

Permalink
Merge pull request #202 from freelawproject/improve-reference-citations
Browse files Browse the repository at this point in the history
Improve reference citations
  • Loading branch information
flooie authored Feb 7, 2025
2 parents 40619c7 + 9ca0665 commit 6e7ec51
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 0 deletions.
2 changes: 2 additions & 0 deletions eyecite/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
)
from eyecite.regexes import SHORT_CITE_ANTECEDENT_REGEX, SUPRA_ANTECEDENT_REGEX
from eyecite.tokenizers import Tokenizer, default_tokenizer
from eyecite.utils import DISALLOWED_NAMES


def get_citations(
Expand Down Expand Up @@ -153,6 +154,7 @@ def is_valid_name(name: str) -> bool:
and name[0].isupper()
and not name.endswith(".")
and not name.isdigit()
and name.lower() not in DISALLOWED_NAMES
)

regexes = [
Expand Down
101 changes: 101 additions & 0 deletions eyecite/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,107 @@

from lxml import etree

# Names not allowed to be reference citations
# this is partially taken from juriscraper
DISALLOWED_NAMES = [
# Common options
"state",
"united states",
"people",
"commonwealth",
"mass",
"commissioner"
# AGs
"Akerman",
"Ashcroft",
"Barr",
"Bates",
"Bell",
"Berrien",
"Biddle",
"Black",
"Bonaparte",
"Bork",
"Bondi",
"Bradford",
"Breckinridge",
"Brewster",
"Brownell",
"Butler",
"Civiletti",
"Clark",
"Clement",
"Clifford",
"Crittenden",
"Cummings",
"Cushing",
"Daugherty",
"Devens",
"Evarts",
"Filip",
"Garland",
"Gerson",
"Gilpin",
"Gonzales",
"Gregory",
"Griggs",
"Grundy",
"Harmon",
"Hoar",
"Holder",
"Jackson",
"Johnson",
"Katzenbach",
"Keisler",
"Kennedy",
"Kleindienst",
"Knox",
"Lee",
"Legaré",
"Levi",
"Lincoln",
"Lynch",
"MacVeagh",
"Mason",
"McGranery",
"McGrath",
"McKenna",
"McReynolds",
"Meese",
"Miller",
"Mitchell",
"Moody",
"Mukasey",
"Murphy",
"Nelson",
"Olney",
"Palmer",
"Pierrepont",
"Pinkney",
"Randolph",
"Reno",
"Richardson",
"Rodney",
"Rogers",
"Rush",
"Sargent",
"Saxbe",
"Sessions",
"Smith",
"Speed",
"Stanbery",
"Stanton",
"Stone",
"Taft",
"Taney",
"Thornburgh",
"Toucey",
"Whitacker",
"Wickersham",
"Williams",
"Wirt",
]


def strip_punct(text: str) -> str:
"""Strips punctuation from a given string
Expand Down
6 changes: 6 additions & 0 deletions tests/test_FindTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,12 @@ def test_find_citations(self):
'defendant': 'Bar',
'pin_cite': '347-348'}),
reference_citation('Foo at 62', metadata={'plaintiff': 'Foo', 'pin_cite': '62'})]),
('Foo v. United States 1 U.S. 12, 347-348. something something ... the United States at 1776 we see that and Foo at 62',
[case_citation(page='12',
metadata={'plaintiff': 'Foo',
'defendant': 'United States',
'pin_cite': '347-348'}),
reference_citation('Foo at 62', metadata={'plaintiff': 'Foo', 'pin_cite': '62'})]),
# Test that reference citation must occur after full case citation
('In Foo at 62 we see that, Foo v. Bar 1 U.S. 12, 347-348. something something,',
[case_citation(page='12',
Expand Down

0 comments on commit 6e7ec51

Please sign in to comment.