Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(document content): support external id and instance id #2046

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Changes are grouped as follows
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [7.70.0] - 2024-11-24
### Added
- Documents content endpoint now support external id and instance id.

## [7.69.0] - 2024-11-23
### Added
- Synthetic Datapoints API has better support for `instance_id`. Previously you had to specify these directly
Expand Down
33 changes: 27 additions & 6 deletions cognite/client/_api/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cognite.client._constants import DEFAULT_LIMIT_READ
from cognite.client.data_classes import filters
from cognite.client.data_classes.aggregations import AggregationFilter, UniqueResultList
from cognite.client.data_classes.data_modeling.ids import NodeId
from cognite.client.data_classes.documents import (
Document,
DocumentHighlightList,
Expand All @@ -19,6 +20,7 @@
TemporaryLink,
)
from cognite.client.data_classes.filters import _BASIC_FILTERS, Filter, _validate_filter
from cognite.client.utils._identifier import IdentifierSequence

if TYPE_CHECKING:
from cognite.client import ClientConfig, CogniteClient
Expand Down Expand Up @@ -467,7 +469,12 @@ def aggregate_unique_properties(
limit=limit,
)

def retrieve_content(self, id: int) -> bytes:
def retrieve_content(
self,
id: int | None = None,
external_id: str | None = None,
instance_id: NodeId | None = None,
) -> bytes:
"""`Retrieve document content <https://developer.cognite.com/api#tag/Documents/operation/documentsContent>`_

Returns extracted textual information for the given document.
Expand All @@ -479,7 +486,9 @@ def retrieve_content(self, id: int) -> bytes:


Args:
id (int): The server-generated ID for the document you want to retrieve the content of.
id (int | None): The server-generated ID for the document you want to retrieve the content of.
external_id (str | None): External ID
instance_id (NodeId | None): Instance ID

Returns:
bytes: The content of the document.
Expand All @@ -492,10 +501,18 @@ def retrieve_content(self, id: int) -> bytes:
>>> client = CogniteClient()
>>> content = client.documents.retrieve_content(id=123)
"""
response = self._do_request("GET", f"{self._RESOURCE_PATH}/{id}/content", accept="text/plain")
identifiers = IdentifierSequence.load(ids=id, external_ids=external_id, instance_ids=instance_id).as_singleton()
identifier = identifiers.as_dicts()[0]
response = self._do_request("POST", f"{self._RESOURCE_PATH}/content", accept="text/plain", json=identifier)
return response.content

def retrieve_content_buffer(self, id: int, buffer: BinaryIO) -> None:
def retrieve_content_buffer(
self,
buffer: BinaryIO,
id: int | None = None,
external_id: str | None = None,
instance_id: NodeId | None = None,
) -> None:
"""`Retrieve document content into buffer <https://developer.cognite.com/api#tag/Documents/operation/documentsContent>`_

Returns extracted textual information for the given document.
Expand All @@ -507,8 +524,10 @@ def retrieve_content_buffer(self, id: int, buffer: BinaryIO) -> None:


Args:
id (int): The server-generated ID for the document you want to retrieve the content of.
buffer (BinaryIO): The document content is streamed directly into the buffer. This is useful for retrieving large documents.
id (int | None): The server-generated ID for the document you want to retrieve the content of.
external_id (str | None): External ID
instance_id (NodeId | None): Instance ID

Examples:

Expand All @@ -520,8 +539,10 @@ def retrieve_content_buffer(self, id: int, buffer: BinaryIO) -> None:
>>> with Path("my_file.txt").open("wb") as buffer:
... client.documents.retrieve_content_buffer(id=123, buffer=buffer)
"""
identifiers = IdentifierSequence.load(ids=id, external_ids=external_id, instance_ids=instance_id).as_singleton()
identifier = identifiers.as_dicts()[0]
with self._do_request(
"GET", f"{self._RESOURCE_PATH}/{id}/content", stream=True, accept="text/plain"
"POST", f"{self._RESOURCE_PATH}/content", stream=True, accept="text/plain", json=identifier
) as response:
for chunk in response.iter_content(chunk_size=2**21):
if chunk: # filter out keep-alive new chunks
Expand Down
2 changes: 1 addition & 1 deletion cognite/client/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import annotations

__version__ = "7.69.0"
__version__ = "7.70.0"
__api_subversion__ = "20230101"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
name = "cognite-sdk"

version = "7.69.0"
version = "7.70.0"
description = "Cognite Python SDK"
readme = "README.md"
documentation = "https://cognite-sdk-python.readthedocs-hosted.com"
Expand Down
Loading