Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tarun-menta committed Feb 11, 2025
1 parent 5cd0f3a commit 4153056
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
8 changes: 4 additions & 4 deletions tests/builders/test_ocr_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from marker.schema.text.line import Line


@pytest.mark.config({"force_ocr": True, "page_range": [0]})
@pytest.mark.config({"force_ocr": True, "page_range": [0], "disable_inline_math_detection":True})
def test_ocr_pipeline(pdf_document):
first_page = pdf_document.pages[0]
assert first_page.structure[0] == '/page/0/SectionHeader/0'
Expand All @@ -24,14 +24,14 @@ def test_ocr_pipeline(pdf_document):
# Makes sure the OCR bbox is being scaled to the same scale as the layout boxes
text_lines = first_page.contained_blocks(pdf_document, (BlockTypes.Line,))
text_blocks = first_page.contained_blocks(pdf_document, (BlockTypes.Text,BlockTypes.TextInlineMath))
assert len(text_lines) == 80
assert len(text_lines) == 75

# Ensure the bbox sizes match up
max_line_position = max([line.polygon.y_end for line in text_lines])
max_block_position = max([block.polygon.y_end for block in text_blocks if block.source == "layout"])
assert max_line_position <= (max_block_position * 1.02)

@pytest.mark.config({"force_ocr": True, "page_range": [0], "enable_inline_math_detection": True})
@pytest.mark.config({"force_ocr": True, "page_range": [0]})
def test_ocr_with_inline_pipeline(pdf_document):
first_page = pdf_document.pages[0]
assert first_page.structure[0] == '/page/0/SectionHeader/0'
Expand All @@ -51,7 +51,7 @@ def test_ocr_with_inline_pipeline(pdf_document):
# Makes sure the OCR bbox is being scaled to the same scale as the layout boxes
text_lines = first_page.contained_blocks(pdf_document, (BlockTypes.Line,))
text_blocks = first_page.contained_blocks(pdf_document, (BlockTypes.Text,BlockTypes.TextInlineMath))
assert len(text_lines) == 89
assert len(text_lines) == 79

# Ensure the bbox sizes match up
max_line_position = max([line.polygon.y_end for line in text_lines])
Expand Down
2 changes: 1 addition & 1 deletion tests/builders/test_rotated_bboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_rotated_bboxes(pdf_document):
# Ensure we match all text lines up properly
text_lines = first_page.contained_blocks(pdf_document, (BlockTypes.Line,))
text_blocks = first_page.contained_blocks(pdf_document, (BlockTypes.Text, BlockTypes.TextInlineMath))
assert len(text_lines) == 90
assert len(text_lines) == 88

# Ensure the bbox sizes match up
max_line_position = max([line.polygon.x_end for line in text_lines])
Expand Down

0 comments on commit 4153056

Please sign in to comment.