Skip to content

Commit

Permalink
switch to code formula model v1.0.1 and new test pdf
Browse files Browse the repository at this point in the history
Signed-off-by: Matteo-Omenetti <[email protected]>
  • Loading branch information
Matteo-Omenetti authored and Matteo-Omenetti committed Feb 4, 2025
1 parent 89844a5 commit 68d1713
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion docling/models/code_formula_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
"""

images_scale = 1.66 # = 120 dpi, aligned with training data resolution
expansion_factor = 0.05
expansion_factor = 0.03

def __init__(
self,
Expand Down
Binary file modified tests/data/code_and_formula.pdf
Binary file not shown.
8 changes: 4 additions & 4 deletions tests/test_code_formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def test_code_and_formula_conversion():
code_blocks = [el for el in results if isinstance(el, CodeItem)]
assert len(code_blocks) == 1

gt = 'int main() {\n printf("Hello, World!");\n return 0;\n}'
gt = 'function add(a, b) {\n return a + b;\n}\nconsole.log(add(3, 5));'

predicted = code_blocks[0].text.strip()
predicted = code_blocks[0].text.strip()
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"
assert code_blocks[0].code_language == CodeLanguageLabel.C_PLUS_PLUS
assert code_blocks[0].code_language == CodeLanguageLabel.JAVASCRIPT

formula_blocks = [
el
Expand All @@ -61,6 +61,6 @@ def test_code_and_formula_conversion():
]
assert len(formula_blocks) == 1

gt = "a ^ { 2 } + 8 = 1 2"
gt = "x ^ { 2 } + 8 = 1 2"
predicted = formula_blocks[0].text
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"

0 comments on commit 68d1713

Please sign in to comment.