docs: Introduce example with custom models for RapidOCR (#874)

* docs: Introduce example with custom models for RapidOCR Signed-off-by: Nikos Livathinos <[email protected]> * chore: Exclude the example with custom RapidOCR models from the examples to run in github actions Signed-off-by: Nikos Livathinos <[email protected]> --------- Signed-off-by: Nikos Livathinos <[email protected]>
DS4SD · Feb 4, 2025 · 6d3fea0 · 6d3fea0
1 parent b5da408
commit 6d3fea0
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 1 deletion.
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -28,7 +28,7 @@ jobs:
         run: |
           for file in docs/examples/*.py; do
             # Skip batch_convert.py
-            if [[ "$(basename "$file")" =~ ^(batch_convert|minimal|export_multimodal|custom_convert|develop_picture_enrichment).py ]]; then
+            if [[ "$(basename "$file")" =~ ^(batch_convert|minimal|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models).py ]]; then
                 echo "Skipping $file"
                 continue
             fi

diff --git a/docs/examples/rapidocr_with_custom_models.py b/docs/examples/rapidocr_with_custom_models.py
@@ -0,0 +1,58 @@
+import os
+
+from huggingface_hub import snapshot_download
+
+from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions
+from docling.document_converter import (
+    ConversionResult,
+    DocumentConverter,
+    InputFormat,
+    PdfFormatOption,
+)
+
+
+def main():
+    # Source document to convert
+    source = "https://arxiv.org/pdf/2408.09869v4"
+
+    # Download RappidOCR models from HuggingFace
+    print("Downloading RapidOCR models")
+    download_path = snapshot_download(repo_id="SWHL/RapidOCR")
+
+    # Setup RapidOcrOptions for english detection
+    det_model_path = os.path.join(
+        download_path, "PP-OCRv4", "en_PP-OCRv3_det_infer.onnx"
+    )
+    rec_model_path = os.path.join(
+        download_path, "PP-OCRv4", "ch_PP-OCRv4_rec_server_infer.onnx"
+    )
+    cls_model_path = os.path.join(
+        download_path, "PP-OCRv3", "ch_ppocr_mobile_v2.0_cls_train.onnx"
+    )
+    ocr_options = RapidOcrOptions(
+        det_model_path=det_model_path,
+        rec_model_path=rec_model_path,
+        cls_model_path=cls_model_path,
+    )
+
+    pipeline_options = PdfPipelineOptions(
+        ocr_options=ocr_options,
+    )
+
+    # Convert the document
+    converter = DocumentConverter(
+        format_options={
+            InputFormat.PDF: PdfFormatOption(
+                pipeline_options=pipeline_options,
+            ),
+        },
+    )
+
+    conversion_result: ConversionResult = converter.convert(source=source)
+    doc = conversion_result.document
+    md = doc.export_to_markdown()
+    print(md)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -77,6 +77,7 @@ nav:
       - "Multimodal export": examples/export_multimodal.py
       - "Force full page OCR": examples/full_page_ocr.py
       - "Automatic OCR language detection with tesseract": examples/tesseract_lang_detection.py
+      - "RapidOCR with custom OCR models": examples/rapidocr_with_custom_models.py
       - "Accelerator options": examples/run_with_accelerator.py
       - "Simple translation": examples/translate.py
       - examples/backend_xml_rag.ipynb