Skip to content

Commit

Permalink
[Core] Add span metrics for model_forward, scheduler and sampler time (
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-mkeralapura committed Aug 12, 2024
1 parent 7ff2727 commit f61b37d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
4 changes: 2 additions & 2 deletions tests/tracing/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,5 @@ def test_traces(trace_service):
SpanAttributes.LLM_LATENCY_TIME_TO_FIRST_TOKEN) == ttft
e2e_time = metrics.finished_time - metrics.arrival_time
assert attributes.get(SpanAttributes.LLM_LATENCY_E2E) == e2e_time
assert attributes.get(
SpanAttributes.LLM_LATENCY_TIME_IN_SCHEDULER) == metrics.scheduler_time
assert attributes.get(SpanAttributes.LLM_LATENCY_TIME_IN_SCHEDULER
) == metrics.scheduler_time
7 changes: 3 additions & 4 deletions vllm/worker/worker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,9 @@ def _get_driver_input_and_broadcast(
def prepare_input(
self,
execute_model_req: Optional[ExecuteModelRequest] = None
) -> Optional[Tuple[ModelRunnerInputBase, WorkerInput]]:
"""
Prepare the inputs to ModelRunner and workers.
"""
) -> Optional[List[SamplerOutput]]:
"""Executes at least one model step on the given sequences, unless no
sequences are provided."""
if self.is_driver_worker:
if execute_model_req is None:
if self.do_metadata_broadcast:
Expand Down

0 comments on commit f61b37d

Please sign in to comment.