Skip to content

Commit

Permalink
feat: log wrap_openai runs with unified usage_metadata (#1071)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfcampos authored Oct 14, 2024
2 parents 6a510ee + 610de6d commit 5cfe416
Show file tree
Hide file tree
Showing 8 changed files with 1,263 additions and 2 deletions.
2 changes: 1 addition & 1 deletion js/src/tests/evaluate.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ test("max concurrency works with summary evaluators", async () => {
expect(receivedCommentStrings).toEqual(expectedCommentString);
});

test("Target func can be a runnable", async () => {
test.skip("Target func can be a runnable", async () => {
const targetFunc = RunnableSequence.from([
RunnableLambda.from((input: Record<string, any>) => ({
foo: input.input + 1,
Expand Down
63 changes: 62 additions & 1 deletion python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from uuid import UUID

from typing_extensions import TypedDict
from typing_extensions import NotRequired, TypedDict

try:
from pydantic.v1 import ( # type: ignore[import]
Expand Down Expand Up @@ -891,3 +891,64 @@ class PromptSortField(str, Enum):
"""Last updated time."""
num_likes = "num_likes"
"""Number of likes."""


class InputTokenDetails(TypedDict, total=False):
"""Breakdown of input token counts.
Does *not* need to sum to full input token count. Does *not* need to have all keys.
"""

audio: int
"""Audio input tokens."""
cache_creation: int
"""Input tokens that were cached and there was a cache miss.
Since there was a cache miss, the cache was created from these tokens.
"""
cache_read: int
"""Input tokens that were cached and there was a cache hit.
Since there was a cache hit, the tokens were read from the cache. More precisely,
the model state given these tokens was read from the cache.
"""


class OutputTokenDetails(TypedDict, total=False):
"""Breakdown of output token counts.
Does *not* need to sum to full output token count. Does *not* need to have all keys.
"""

audio: int
"""Audio output tokens."""
reasoning: int
"""Reasoning output tokens.
Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
models) that are not returned as part of model output.
"""


class UsageMetadata(TypedDict):
"""Usage metadata for a message, such as token counts.
This is a standard representation of token usage that is consistent across models.
"""

input_tokens: int
"""Count of input (or prompt) tokens. Sum of all input token types."""
output_tokens: int
"""Count of output (or completion) tokens. Sum of all output token types."""
total_tokens: int
"""Total token count. Sum of input_tokens + output_tokens."""
input_token_details: NotRequired[InputTokenDetails]
"""Breakdown of input token counts.
Does *not* need to sum to full input token count. Does *not* need to have all keys.
"""
output_token_details: NotRequired[OutputTokenDetails]
"""Breakdown of output token counts.
Does *not* need to sum to full output token count. Does *not* need to have all keys.
"""
57 changes: 57 additions & 0 deletions python/langsmith/wrappers/_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from langsmith import client as ls_client
from langsmith import run_helpers
from langsmith.schemas import InputTokenDetails, OutputTokenDetails, UsageMetadata

if TYPE_CHECKING:
from openai import AsyncOpenAI, OpenAI
Expand Down Expand Up @@ -141,6 +142,12 @@ def _reduce_chat(all_chunks: List[ChatCompletionChunk]) -> dict:
]
else:
d = {"choices": [{"message": {"role": "assistant", "content": ""}}]}
# streamed outputs don't go through `process_outputs`
# so we need to flatten metadata here
oai_token_usage = d.pop("usage", None)
d["usage_metadata"] = (
_create_usage_metadata(oai_token_usage) if oai_token_usage else None
)
return d


Expand All @@ -160,12 +167,59 @@ def _reduce_completions(all_chunks: List[Completion]) -> dict:
return d


def _create_usage_metadata(oai_token_usage: dict) -> UsageMetadata:
input_tokens = oai_token_usage.get("prompt_tokens") or 0
output_tokens = oai_token_usage.get("completion_tokens") or 0
total_tokens = oai_token_usage.get("total_tokens") or input_tokens + output_tokens
input_token_details: dict = {
"audio": (oai_token_usage.get("prompt_tokens_details") or {}).get(
"audio_tokens"
),
"cache_read": (oai_token_usage.get("prompt_tokens_details") or {}).get(
"cached_tokens"
),
}
output_token_details: dict = {
"audio": (oai_token_usage.get("completion_tokens_details") or {}).get(
"audio_tokens"
),
"reasoning": (oai_token_usage.get("completion_tokens_details") or {}).get(
"reasoning_tokens"
),
}
return UsageMetadata(
input_tokens=input_tokens,
output_tokens=output_tokens,
total_tokens=total_tokens,
input_token_details=InputTokenDetails(
**{k: v for k, v in input_token_details.items() if v is not None}
),
output_token_details=OutputTokenDetails(
**{k: v for k, v in output_token_details.items() if v is not None}
),
)


def _process_chat_completion(outputs: Any):
try:
rdict = outputs.model_dump()
oai_token_usage = rdict.pop("usage", None)
rdict["usage_metadata"] = (
_create_usage_metadata(oai_token_usage) if oai_token_usage else None
)
return rdict
except BaseException as e:
logger.debug(f"Error processing chat completion: {e}")
return {"output": outputs}


def _get_wrapper(
original_create: Callable,
name: str,
reduce_fn: Callable,
tracing_extra: Optional[TracingExtra] = None,
invocation_params_fn: Optional[Callable] = None,
process_outputs: Optional[Callable] = None,
) -> Callable:
textra = tracing_extra or {}

Expand All @@ -177,6 +231,7 @@ def create(*args, stream: bool = False, **kwargs):
reduce_fn=reduce_fn if stream else None,
process_inputs=_strip_not_given,
_invocation_params_fn=invocation_params_fn,
process_outputs=process_outputs,
**textra,
)

Expand All @@ -191,6 +246,7 @@ async def acreate(*args, stream: bool = False, **kwargs):
reduce_fn=reduce_fn if stream else None,
process_inputs=_strip_not_given,
_invocation_params_fn=invocation_params_fn,
process_outputs=process_outputs,
**textra,
)
return await decorator(original_create)(*args, stream=stream, **kwargs)
Expand Down Expand Up @@ -232,6 +288,7 @@ def wrap_openai(
_reduce_chat,
tracing_extra=tracing_extra,
invocation_params_fn=functools.partial(_infer_invocation_params, "chat"),
process_outputs=_process_chat_completion,
)
client.completions.create = _get_wrapper( # type: ignore[method-assign]
client.completions.create,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
{
"post": [
{
"id": "d0d84d31-923d-4cb5-94a8-40a0a0087578",
"start_time": "2024-10-11T20:58:23.298773+00:00",
"extra": {
"metadata": {
"ls_method": "traceable",
"ls_provider": "openai",
"ls_model_type": "chat",
"ls_model_name": "gpt-4o-mini",
"revision_id": "v0.1.82-381-g03d9e1a-dirty"
},
"runtime": {
"sdk": "langsmith-py",
"sdk_version": "0.1.131",
"library": "langsmith",
"platform": "macOS-13.2-arm64-arm-64bit",
"runtime": "python",
"py_implementation": "CPython",
"runtime_version": "3.11.7",
"langchain_version": "0.2.9",
"langchain_core_version": "0.2.21"
}
},
"serialized": {
"name": "ChatOpenAI",
"signature": "(*, messages: 'Iterable[ChatCompletionMessageParam]', model: 'Union[str, ChatModel]', frequency_penalty: 'Optional[float] | NotGiven' = NOT_GIVEN, function_call: 'completion_create_params.FunctionCall | NotGiven' = NOT_GIVEN, functions: 'Iterable[completion_create_params.Function] | NotGiven' = NOT_GIVEN, logit_bias: 'Optional[Dict[str, int]] | NotGiven' = NOT_GIVEN, logprobs: 'Optional[bool] | NotGiven' = NOT_GIVEN, max_completion_tokens: 'Optional[int] | NotGiven' = NOT_GIVEN, max_tokens: 'Optional[int] | NotGiven' = NOT_GIVEN, n: 'Optional[int] | NotGiven' = NOT_GIVEN, parallel_tool_calls: 'bool | NotGiven' = NOT_GIVEN, presence_penalty: 'Optional[float] | NotGiven' = NOT_GIVEN, response_format: 'completion_create_params.ResponseFormat | NotGiven' = NOT_GIVEN, seed: 'Optional[int] | NotGiven' = NOT_GIVEN, service_tier: \"Optional[Literal['auto', 'default']] | NotGiven\" = NOT_GIVEN, stop: 'Union[Optional[str], List[str]] | NotGiven' = NOT_GIVEN, stream: 'Optional[Literal[False]] | Literal[True] | NotGiven' = NOT_GIVEN, stream_options: 'Optional[ChatCompletionStreamOptionsParam] | NotGiven' = NOT_GIVEN, temperature: 'Optional[float] | NotGiven' = NOT_GIVEN, tool_choice: 'ChatCompletionToolChoiceOptionParam | NotGiven' = NOT_GIVEN, tools: 'Iterable[ChatCompletionToolParam] | NotGiven' = NOT_GIVEN, top_logprobs: 'Optional[int] | NotGiven' = NOT_GIVEN, top_p: 'Optional[float] | NotGiven' = NOT_GIVEN, user: 'str | NotGiven' = NOT_GIVEN, extra_headers: 'Headers | None' = None, extra_query: 'Query | None' = None, extra_body: 'Body | None' = None, timeout: 'float | httpx.Timeout | None | NotGiven' = NOT_GIVEN) -> 'ChatCompletion | AsyncStream[ChatCompletionChunk]'",
"doc": null
},
"events": [],
"tags": [],
"attachments": {},
"dotted_order": "20241011T205823298773Zd0d84d31-923d-4cb5-94a8-40a0a0087578",
"trace_id": "d0d84d31-923d-4cb5-94a8-40a0a0087578",
"outputs": {},
"session_name": "default",
"name": "ChatOpenAI",
"inputs": {
"messages": [
{
"role": "user",
"content": "howdy"
}
],
"model": "gpt-4o-mini",
"stream": false,
"extra_headers": null,
"extra_query": null,
"extra_body": null
},
"run_type": "llm"
}
],
"patch": [
{
"id": "d0d84d31-923d-4cb5-94a8-40a0a0087578",
"name": "ChatOpenAI",
"trace_id": "d0d84d31-923d-4cb5-94a8-40a0a0087578",
"parent_run_id": null,
"dotted_order": "20241011T205823298773Zd0d84d31-923d-4cb5-94a8-40a0a0087578",
"tags": [],
"extra": {
"metadata": {
"ls_method": "traceable",
"ls_provider": "openai",
"ls_model_type": "chat",
"ls_model_name": "gpt-4o-mini",
"revision_id": "v0.1.82-381-g03d9e1a-dirty"
},
"runtime": {
"sdk": "langsmith-py",
"sdk_version": "0.1.131",
"library": "langsmith",
"platform": "macOS-13.2-arm64-arm-64bit",
"runtime": "python",
"py_implementation": "CPython",
"runtime_version": "3.11.7",
"langchain_version": "0.2.9",
"langchain_core_version": "0.2.21"
}
},
"end_time": "2024-10-11T20:58:24.417106+00:00",
"outputs": {
"id": "chatcmpl-AHH0KBvLG7Wq3wfSEGQuxh0xE07Fl",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Howdy! How can I assist you today?",
"refusal": null,
"role": "assistant",
"function_call": null,
"tool_calls": null
}
}
],
"created": 1728680304,
"model": "gpt-4o-mini-2024-07-18",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_e2bde53e6e",
"usage_metadata": {
"input_tokens": 9,
"output_tokens": 9,
"total_tokens": 18,
"input_token_details": {
"cache_read": 0
},
"output_token_details": {
"reasoning": 0
}
}
},
"events": []
}
]
}
Loading

0 comments on commit 5cfe416

Please sign in to comment.