diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index daf6aeb3a736b..7040900aeef78 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -221,12 +221,12 @@ def stream( generation: Optional[ChatGenerationChunk] = None try: for chunk in self._stream(messages, stop=stop, **kwargs): - run_manager.on_llm_new_token( - cast(str, chunk.message.content), chunk=chunk - ) if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + run_manager.on_llm_new_token( + cast(str, chunk.message.content), chunk=chunk + ) yield chunk.message if generation is None: generation = chunk @@ -293,12 +293,12 @@ async def astream( stop=stop, **kwargs, ): - await run_manager.on_llm_new_token( - cast(str, chunk.message.content), chunk=chunk - ) if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + await run_manager.on_llm_new_token( + cast(str, chunk.message.content), chunk=chunk + ) yield chunk.message if generation is None: generation = chunk @@ -610,13 +610,13 @@ def _generate_with_cache( ): chunks: List[ChatGenerationChunk] = [] for chunk in self._stream(messages, stop=stop, **kwargs): + chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" run_manager.on_llm_new_token( cast(str, chunk.message.content), chunk=chunk ) - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) chunks.append(chunk) result = generate_from_stream(iter(chunks)) else: @@ -691,13 +691,13 @@ async def _agenerate_with_cache( ): chunks: List[ChatGenerationChunk] = [] async for chunk in self._astream(messages, stop=stop, **kwargs): + chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: chunk.message.id = f"run-{run_manager.run_id}" await run_manager.on_llm_new_token( cast(str, chunk.message.content), chunk=chunk ) - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) chunks.append(chunk) result = generate_from_stream(iter(chunks)) else: