From 7671f3d883af31cb43fcb8b15056f30cddafd58b Mon Sep 17 00:00:00 2001 From: UVMvmfee <186379858+UVMvmfee@users.noreply.github.com> Date: Sat, 7 Dec 2024 13:42:07 +0100 Subject: [PATCH] Fixes eager doctype regex matching when doctype is not followed by a newline. --- tests/unit_tests.py | 7 +++++++ trafilatura/utils.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests.py b/tests/unit_tests.py index b793b074..29887a6f 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -140,6 +140,13 @@ def test_input(): == '\n\n
Foo\n
\n' ) + htmlstring = '