run-llama · hexapode · Jan 10, 2025 · Jan 10, 2025
diff --git a/llama_parse/base.py b/llama_parse/base.py
@@ -140,6 +140,14 @@ class LlamaParse(BasePydanticReader):
         default=None,
         description="The top margin of the bounding box to use to extract text from documents expressed as a float between 0 and 1 representing the percentage of the page height.",
     )
+    complemental_formatting_instruction: Optional[str] = Field(
+        default=None,
+        description="The complemental formatting instruction for the parser. Tell llamaParse how some thing should to be formatted, while retaining the markdown output.",
+    )
+    content_guideline_instruction: Optional[str] = Field(
+        default=None,
+        description="The content guideline for the parser. Tell LlamaParse how the content should be changed / transformed.",
+    )
     continuous_mode: Optional[bool] = Field(
         default=False,
         description="Parse documents continuously, leading to better results on documents where tables span across two pages.",
@@ -172,6 +180,10 @@ class LlamaParse(BasePydanticReader):
         default=False,
         description="Note: Non compatible with gpt-4o. If set to true, the parser will use a faster mode to extract text from documents. This mode will skip OCR of images, and table/heading reconstruction.",
     )
+    formatting_instruction: Optional[str] = Field(
+        default=None,
+        description="The Formatting instruction for the parser. Override default llamaParse behavior. In most case you want to use complemental_formatting_instruction instead.",
+    )
     guess_xlsx_sheet_names: Optional[bool] = Field(
         default=False,
         description="Whether to guess the sheet names of the xlsx file.",
@@ -196,10 +208,6 @@ class LlamaParse(BasePydanticReader):
         default=False,
         description="If set to true, the cache will be ignored and the document re-processes. All document are kept in cache for 48hours after the job was completed to avoid processing the same document twice.",
     )
-    is_formatting_instruction: Optional[bool] = Field(
-        default=False,
-        description="Allow the parsing instruction to also format the output. Disable to have a cleaner markdown output.",
-    )
     language: Optional[str] = Field(
         default="en", description="The language of the text to parse."
     )
@@ -227,9 +235,6 @@ class LlamaParse(BasePydanticReader):
         default=None,
         description="A templated suffix to add to the beginning of each page. If it contain `{page_number}`, it will be replaced by the page number.",
     )
-    parsing_instruction: Optional[str] = Field(
-        default="", description="The parsing instruction for the parser."
-    )
     premium_mode: Optional[bool] = Field(
         default=False,
         description="Use our best parser mode if set to True.",
@@ -288,6 +293,13 @@ class LlamaParse(BasePydanticReader):
         default=None,
         description="The API key for the GPT-4o API. Lowers the cost of parsing.",
     )
+    is_formatting_instruction: Optional[bool] = Field(
+        default=False,
+        description="Allow the parsing instruction to also format the output. Disable to have a cleaner markdown output.",
+    )
+    parsing_instruction: Optional[str] = Field(
+        default="", description="The parsing instruction for the parser."
+    )
 
     @field_validator("api_key", mode="before", check_fields=True)
     @classmethod
@@ -467,6 +479,14 @@ async def _create_job(
         if self.bbox_top is not None:
             data["bbox_top"] = self.bbox_top
 
+        if self.complemental_formatting_instruction:
+            data[
+                "complemental_formatting_instruction"
+            ] = self.complemental_formatting_instruction
+
+        if self.content_guideline_instruction:
+            data["content_guideline_instruction"] = self.content_guideline_instruction
+
         if self.continuous_mode:
             data["continuous_mode"] = self.continuous_mode
 
@@ -491,6 +511,9 @@ async def _create_job(
         if self.fast_mode:
             data["fast_mode"] = self.fast_mode
 
+        if self.formatting_instruction:
+            data["formatting_instruction"] = self.formatting_instruction
+
         if self.guess_xlsx_sheet_names:
             data["guess_xlsx_sheet_names"] = self.guess_xlsx_sheet_names
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "llama-parse"
-version = "0.5.19"
+version = "0.5.20"
 description = "Parse files into RAG-Optimized formats."
 authors = ["Logan Markewich <[email protected]>"]
 license = "MIT"