Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(document content): support external id and instance id #2046

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
33 changes: 27 additions & 6 deletions cognite/client/_api/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cognite.client._constants import DEFAULT_LIMIT_READ
from cognite.client.data_classes import filters
from cognite.client.data_classes.aggregations import AggregationFilter, UniqueResultList
from cognite.client.data_classes.data_modeling.ids import NodeId
from cognite.client.data_classes.documents import (
Document,
DocumentHighlightList,
Expand All @@ -19,6 +20,7 @@
TemporaryLink,
)
from cognite.client.data_classes.filters import _BASIC_FILTERS, Filter, _validate_filter
from cognite.client.utils._identifier import IdentifierSequence

if TYPE_CHECKING:
from cognite.client import ClientConfig, CogniteClient
Expand Down Expand Up @@ -475,7 +477,12 @@ def aggregate_unique_properties(
limit=limit,
)

def retrieve_content(self, id: int) -> bytes:
def retrieve_content(
self,
id: int | None = None,
external_id: str | None = None,
instance_id: NodeId | None = None,
) -> bytes:
"""`Retrieve document content <https://developer.cognite.com/api#tag/Documents/operation/documentsContent>`_

Returns extracted textual information for the given document.
Expand All @@ -487,7 +494,9 @@ def retrieve_content(self, id: int) -> bytes:


Args:
id (int): The server-generated ID for the document you want to retrieve the content of.
id (int | None): The server-generated ID for the document you want to retrieve the content of.
external_id (str | None): External ID
instance_id (NodeId | None): Instance ID

Returns:
bytes: The content of the document.
Expand All @@ -500,10 +509,18 @@ def retrieve_content(self, id: int) -> bytes:
>>> client = CogniteClient()
>>> content = client.documents.retrieve_content(id=123)
"""
response = self._do_request("GET", f"{self._RESOURCE_PATH}/{id}/content", accept="text/plain")
identifiers = IdentifierSequence.load(ids=id, external_ids=external_id, instance_ids=instance_id).as_singleton()
identifier = identifiers.as_dicts()[0]
response = self._do_request("POST", f"{self._RESOURCE_PATH}/content", accept="text/plain", json=identifier)
return response.content

def retrieve_content_buffer(self, id: int, buffer: BinaryIO) -> None:
def retrieve_content_buffer(
self,
buffer: BinaryIO,
id: int | None = None,
external_id: str | None = None,
instance_id: NodeId | None = None,
) -> None:
"""`Retrieve document content into buffer <https://developer.cognite.com/api#tag/Documents/operation/documentsContent>`_

Returns extracted textual information for the given document.
Expand All @@ -515,8 +532,10 @@ def retrieve_content_buffer(self, id: int, buffer: BinaryIO) -> None:


Args:
id (int): The server-generated ID for the document you want to retrieve the content of.
buffer (BinaryIO): The document content is streamed directly into the buffer. This is useful for retrieving large documents.
id (int | None): The server-generated ID for the document you want to retrieve the content of.
external_id (str | None): External ID
instance_id (NodeId | None): Instance ID

Examples:

Expand All @@ -528,8 +547,10 @@ def retrieve_content_buffer(self, id: int, buffer: BinaryIO) -> None:
>>> with Path("my_file.txt").open("wb") as buffer:
... client.documents.retrieve_content_buffer(id=123, buffer=buffer)
"""
identifiers = IdentifierSequence.load(ids=id, external_ids=external_id, instance_ids=instance_id).as_singleton()
identifier = identifiers.as_dicts()[0]
with self._do_request(
"GET", f"{self._RESOURCE_PATH}/{id}/content", stream=True, accept="text/plain"
"POST", f"{self._RESOURCE_PATH}/content", stream=True, accept="text/plain", json=identifier
) as response:
for chunk in response.iter_content(chunk_size=2**21):
if chunk: # filter out keep-alive new chunks
Expand Down