From 1f287dda82ff14e67ca8ef13c895db1de7b59a1b Mon Sep 17 00:00:00 2001 From: CodyInnowhere Date: Mon, 2 Dec 2024 20:33:12 +0800 Subject: [PATCH] fix type check --- tests/unit_tests.py | 1 + trafilatura/main_extractor.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 81f74301..9ae9a3fe 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -483,6 +483,7 @@ def test_images(): assert is_image_file('test.txt') is False assert is_image_file('test.jpg'*2000) is False # length threshold # tag with attributes + assert handle_image(None) is None assert handle_image(html.fromstring('')) is not None assert handle_image(html.fromstring('text')) is not None assert handle_image(html.fromstring('')) is None diff --git a/trafilatura/main_extractor.py b/trafilatura/main_extractor.py index bba79e53..2a950bec 100644 --- a/trafilatura/main_extractor.py +++ b/trafilatura/main_extractor.py @@ -444,6 +444,9 @@ def handle_table(table_elem: _Element, potential_tags: Set[str], options: Extrac def handle_image(element: Optional[_Element]) -> Optional[_Element]: "Process image elements and their relevant attributes." + if element is None: + return None + processed_element = Element(element.tag) for attr in ("data-src", "src"):