Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: send error instead of extra #1430

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Client for interacting with the LangSmith API.

Check notice on line 1 in python/langsmith/client.py

View workflow job for this annotation

GitHub Actions / benchmark

Benchmark results

........... create_5_000_run_trees: Mean +- std dev: 636 ms +- 51 ms ........... create_10_000_run_trees: Mean +- std dev: 1.33 sec +- 0.09 sec ........... create_20_000_run_trees: Mean +- std dev: 2.62 sec +- 0.11 sec ........... dumps_class_nested_py_branch_and_leaf_200x400: Mean +- std dev: 715 us +- 13 us ........... dumps_class_nested_py_leaf_50x100: Mean +- std dev: 24.9 ms +- 0.2 ms ........... dumps_class_nested_py_leaf_100x200: Mean +- std dev: 105 ms +- 3 ms ........... dumps_dataclass_nested_50x100: Mean +- std dev: 25.2 ms +- 0.3 ms ........... WARNING: the benchmark result may be unstable * the standard deviation (17.2 ms) is 24% of the mean (72.6 ms) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. dumps_pydantic_nested_50x100: Mean +- std dev: 72.6 ms +- 17.2 ms ........... dumps_pydanticv1_nested_50x100: Mean +- std dev: 196 ms +- 7 ms

Check notice on line 1 in python/langsmith/client.py

View workflow job for this annotation

GitHub Actions / benchmark

Comparison against main

+-----------------------------------------------+----------+------------------------+ | Benchmark | main | changes | +===============================================+==========+========================+ | dumps_pydanticv1_nested_50x100 | 220 ms | 196 ms: 1.12x faster | +-----------------------------------------------+----------+------------------------+ | create_5_000_run_trees | 658 ms | 636 ms: 1.03x faster | +-----------------------------------------------+----------+------------------------+ | create_20_000_run_trees | 2.66 sec | 2.62 sec: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_50x100 | 25.2 ms | 24.9 ms: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | dumps_dataclass_nested_50x100 | 25.5 ms | 25.2 ms: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_100x200 | 105 ms | 105 ms: 1.00x slower | +-----------------------------------------------+----------+------------------------+ | create_10_000_run_trees | 1.31 sec | 1.33 sec: 1.01x slower | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_branch_and_leaf_200x400 | 706 us | 715 us: 1.01x slower | +-----------------------------------------------+----------+------------------------+ | dumps_pydantic_nested_50x100 | 68.3 ms | 72.6 ms: 1.06x slower | +-----------------------------------------------+----------+------------------------+ | Geometric mean | (ref) | 1.01x faster | +-----------------------------------------------+----------+------------------------+

Use the client to customize API keys / workspace ocnnections, SSl certs,
etc. for tracing.
Expand Down Expand Up @@ -5047,8 +5047,8 @@
),
feedback_source_type=ls_schemas.FeedbackSourceType.MODEL,
project_id=project_id,
extra=res.extra,
trace_id=run.trace_id if run else None,
error=res.error,
)
return results

Expand Down Expand Up @@ -5116,7 +5116,7 @@
project_id: Optional[ID_TYPE] = None,
comparative_experiment_id: Optional[ID_TYPE] = None,
feedback_group_id: Optional[ID_TYPE] = None,
extra: Optional[Dict] = None,
error: Optional[bool] = None,
trace_id: Optional[ID_TYPE] = None,
**kwargs: Any,
) -> ls_schemas.Feedback:
Expand Down Expand Up @@ -5162,8 +5162,8 @@
feedback_group_id (Optional[Union[UUID, str]]):
When logging preferences, ranking runs, or other comparative feedback,
this is used to group feedback together.
extra (Optional[Dict]):
Metadata for the feedback.
error (Optional[bool]):
Whether the evaluator run errored.
trace_id (Optional[Union[UUID, str]]):
The trace ID of the run to provide feedback for. Enables batch ingestion.
**kwargs (Any):
Expand Down Expand Up @@ -5234,7 +5234,7 @@
comparative_experiment_id, accept_null=True
),
feedback_group_id=_ensure_uuid(feedback_group_id, accept_null=True),
extra=extra,
error=error,
)

use_multipart = (self.info.batch_ingest_config or {}).get(
Expand Down
38 changes: 22 additions & 16 deletions python/langsmith/evaluation/_arunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging
import pathlib
import uuid
from contextlib import ExitStack
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -784,15 +785,21 @@ async def _arun_evaluators(
**(current_context["metadata"] or {}),
**{"experiment": self.experiment_name},
}
with rh.tracing_context(
**{
**current_context,
"project_name": "evaluators",
"metadata": metadata,
"enabled": "local" if not self._upload_results else True,
"client": self.client,
}
):
stack = ExitStack()

stack.enter_context(
rh.tracing_context(
**{
**current_context,
"project_name": "evaluators",
"metadata": metadata,
"enabled": "local" if not self._upload_results else True,
"client": self.client,
}
)
)
run_collector = stack.enter_context(rh._on_run_set())
with stack:
run = current_results["run"]
example = current_results["example"]
eval_results = current_results["evaluation_results"]
Expand All @@ -812,14 +819,17 @@ async def _arun_evaluators(
except Exception as e:
try:
feedback_keys = _extract_feedback_keys(evaluator)
source_run_id = (
run_collector.runs[-1].id if run_collector.runs else None
)

error_response = EvaluationResults(
results=[
EvaluationResult(
key=key,
source_run_id=run.id,
comment=repr(e),
extra={"error": True},
source_run_id=source_run_id,
error=True,
)
for key in feedback_keys
]
Expand All @@ -839,11 +849,7 @@ async def _arun_evaluators(
f" run {run.id}: {repr(e)}",
exc_info=True,
)
logger.error(
f"Error running evaluator {repr(evaluator)} on"
f" run {run.id}: {repr(e)}",
exc_info=True,
)
run_collector.runs.clear()
if example.attachments is not None:
for attachment in example.attachments:
reader = example.attachments[attachment]["reader"]
Expand Down
34 changes: 23 additions & 11 deletions python/langsmith/evaluation/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import textwrap
import threading
import uuid
from contextlib import ExitStack
from contextvars import copy_context
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -1556,15 +1557,21 @@ def _run_evaluators(
"reference_run_id": current_results["run"].id,
},
}
with rh.tracing_context(
**{
**current_context,
"project_name": "evaluators",
"metadata": metadata,
"enabled": "local" if not self._upload_results else True,
"client": self.client,
}
):
stack = ExitStack()
stack.enter_context(
rh.tracing_context(
**{
**current_context,
"project_name": "evaluators",
"metadata": metadata,
"enabled": "local" if not self._upload_results else True,
"client": self.client,
}
)
)
run_collector = stack.enter_context(rh._on_run_set())

with stack:
run = current_results["run"]
example = current_results["example"]
eval_results = current_results["evaluation_results"]
Expand All @@ -1586,14 +1593,17 @@ def _run_evaluators(
except Exception as e:
try:
feedback_keys = _extract_feedback_keys(evaluator)
source_run_id = (
run_collector.runs[-1].id if run_collector.runs else None
)

error_response = EvaluationResults(
results=[
EvaluationResult(
key=key,
source_run_id=run.id,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was just setting the evaluator trace to be the run itself (the one the feedback is associated with). is there a way to get the evaluator trace here @hinthornw ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol

comment=repr(e),
extra={"error": True},
source_run_id=source_run_id,
error=True,
)
for key in feedback_keys
]
Expand All @@ -1614,6 +1624,8 @@ def _run_evaluators(
f" run {run.id if run else ''}: {repr(e)}",
exc_info=True,
)
run_collector.runs.clear()

if example.attachments is not None:
for attachment in example.attachments:
reader = example.attachments[attachment]["reader"]
Expand Down
4 changes: 2 additions & 2 deletions python/langsmith/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ class EvaluationResult(BaseModel):

If none provided, the evaluation feedback is applied to the
root trace being."""
extra: Optional[Dict] = None
"""Metadata for the evaluator run."""
error: Optional[bool] = None
"""If the evaluator run errored."""

class Config:
"""Pydantic model configuration."""
Expand Down
65 changes: 56 additions & 9 deletions python/langsmith/run_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@
_PROJECT_NAME = contextvars.ContextVar[Optional[str]]("_PROJECT_NAME", default=None)
_TAGS = contextvars.ContextVar[Optional[List[str]]]("_TAGS", default=None)
_METADATA = contextvars.ContextVar[Optional[Dict[str, Any]]]("_METADATA", default=None)


_TRACING_ENABLED = contextvars.ContextVar[Optional[Union[bool, Literal["local"]]]](
"_TRACING_ENABLED", default=None
)
Expand All @@ -74,6 +72,10 @@
"client": _CLIENT,
}

_ON_RUN_SET = contextvars.ContextVar[Optional["_Collector"]](
"_ON_RUN_SET", default=None
)


def get_current_run_tree() -> Optional[run_trees.RunTree]:
"""Get the current run tree."""
Expand Down Expand Up @@ -945,11 +947,15 @@ def _setup(self) -> run_trees.RunTree:
if enabled is True:
self.new_run.post()
if enabled:
_TAGS.set(tags_)
_METADATA.set(metadata)
_PARENT_RUN_TREE.set(self.new_run)
_PROJECT_NAME.set(project_name_)
_CLIENT.set(client_)
_set_tracing_context(
{
"tags": tags_,
"metadata": metadata,
"parent": self.new_run,
"project_name": project_name_,
"client": client_,
}
)

return self.new_run

Expand Down Expand Up @@ -1434,8 +1440,13 @@ def _setup_run(
on_end=langsmith_extra.get("on_end"),
context=context,
)
context.run(_PROJECT_NAME.set, response_container["project_name"])
context.run(_PARENT_RUN_TREE.set, response_container["new_run"])
context.run(
_set_tracing_context,
{
"project_name": response_container["project_name"],
"parent": response_container["new_run"],
},
)
return response_container


Expand Down Expand Up @@ -1537,6 +1548,10 @@ def _get_inputs_and_attachments_safe(
def _set_tracing_context(context: Dict[str, Any]):
"""Set the tracing context."""
for k, v in context.items():
if k == "parent":
cb = _ON_RUN_SET.get()
if cb is not None:
cb(v)
var = _CONTEXT_KEYS[k]
var.set(v)

Expand Down Expand Up @@ -1774,3 +1789,35 @@ def _get_function_result(results: list, reduce_fn: Callable) -> Any:
return results
else:
return results


class _Collector:
"""Collect runs set in contex."""

__slots__ = ("parent_run_id", "runs")

def __init__(self, parent_run_id: Optional[uuid.UUID]):
"""Construct callback."""
self.parent_run_id = parent_run_id
self.runs = []

def __call__(self, run: Optional[schemas.Run]):
"""Add a run."""
try:
if run is None:
return
if run.parent_run_id == self.parent_run_id:
self.runs.append(run)
except Exception:
pass


@contextlib.contextmanager
def _on_run_set() -> Generator[_Collector, None, None]:
parent_run_tree = _PARENT_RUN_TREE.get()
parent_run_id = parent_run_tree.run_id if parent_run_tree else None
collector = _Collector(parent_run_id)
prev = _ON_RUN_SET.get()
_ON_RUN_SET.set(collector)
yield collector
_ON_RUN_SET.set(prev)
4 changes: 2 additions & 2 deletions python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,8 @@ class FeedbackBase(BaseModel):
"""For preference scoring, this group ID is shared across feedbacks for each

run in the group that was being compared."""
extra: Optional[Dict] = None
"""The metadata of the feedback."""
error: Optional[bool] = None
"""Whether the evaluator run errored."""

class Config:
"""Configuration class for the schema."""
Expand Down
Loading