diff --git a/marker/providers/pdf.py b/marker/providers/pdf.py index a63723ff..c07b5649 100644 --- a/marker/providers/pdf.py +++ b/marker/providers/pdf.py @@ -232,7 +232,9 @@ def pdftext_extraction(self, doc: PdfDocument) -> ProviderPageLines: ) if self.check_line_spans(lines): page_lines[page_id] = lines - self.page_refs[page_id] = page["refs"] + refs = page.get("refs") + if refs is not None: + self.page_refs[page_id] = page["refs"] return page_lines