Skip to content

Commit

Permalink
fix type check
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyInnowhere authored and CodyInnowhere committed Dec 2, 2024
1 parent 742fede commit 1f287dd
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 0 deletions.
1 change: 1 addition & 0 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ def test_images():
assert is_image_file('test.txt') is False
assert is_image_file('test.jpg'*2000) is False # length threshold
# tag with attributes
assert handle_image(None) is None
assert handle_image(html.fromstring('<img src="test.jpg"/>')) is not None
assert handle_image(html.fromstring('<img data-src="test.jpg" alt="text" title="a title"/>')) is not None
assert handle_image(html.fromstring('<img other="test.jpg"/>')) is None
Expand Down
3 changes: 3 additions & 0 deletions trafilatura/main_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,9 @@ def handle_table(table_elem: _Element, potential_tags: Set[str], options: Extrac

def handle_image(element: Optional[_Element]) -> Optional[_Element]:
"Process image elements and their relevant attributes."
if element is None:
return None

processed_element = Element(element.tag)

for attr in ("data-src", "src"):
Expand Down

0 comments on commit 1f287dd

Please sign in to comment.