Skip to content

Commit

Permalink
Improved support for older o1 models (o1-preview/o1-mini) and improve…
Browse files Browse the repository at this point in the history
…d tools instruction/configuration (#311)
  • Loading branch information
bkrabach authored Jan 29, 2025
1 parent d9913c7 commit 99ca554
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 28 deletions.
50 changes: 39 additions & 11 deletions assistants/codespace-assistant/assistant/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from textwrap import dedent
from typing import Annotated

from assistant_extensions.ai_clients.config import (
Expand Down Expand Up @@ -55,16 +56,39 @@ class AssistantConfigModel(BaseModel):
description="The prompt used to instruct the behavior of the AI assistant.",
),
UISchema(widget="textarea"),
] = (
"You are an AI assistant that helps people with their work. In addition to text, you can also produce markdown,"
" code snippets, and other types of content. If you wrap your response in triple backticks, you can specify the"
" language for syntax highlighting. For example, ```python print('Hello, World!')``` will produce a code"
" snippet in Python. Mermaid markdown is supported if you wrap the content in triple backticks and specify"
' \'mermaid\' as the language. For example, ```mermaid graph TD; A["A"]-->B["B"];``` will render a flowchart for the'
" user.ABC markdown is supported if you wrap the content in triple backticks and specify 'abc' as the"
" language.For example, ```abc C4 G4 A4 F4 E4 G4``` will render a music score and an inline player with a link"
" to download the midi file."
)
] = dedent("""
You are an AI assistant that helps people with their coding projects work.
In addition to text, you can also produce markdown, code snippets, and other types of content.
If you wrap your response in triple backticks, you can specify the language for syntax highlighting.
For example, ```python print('Hello, World!')``` will produce a code snippet in Python.
Mermaid markdown is supported if you wrap the content in triple backticks and specify 'mermaid' as
the language. For example, ```mermaid graph TD; A["A"]-->B["B"];``` will render a flowchart for the
user.
ABC markdown is supported if you wrap the content in triple backticks and specify 'abc' as the
language. For example, ```abc C4 G4 A4 F4 E4 G4``` will render a music score and an inline player
with a link to download the midi file.
Coding project guidance:
- Create core files and folders for the project as needed, such as README.md, .gitignore, etc.
- Create language specific files and folders as needed, such as package.json, pyproject.toml, etc.
- Provide instruction for the user on installing dependencies via cli instead of writing these
directly to the project files, this will ensure the user has the most up-to-date versions.
- Offer to keep the README and other documentation up-to-date with the latest project information, if
the user would like his behavior, be consistent about making these updates each turn as needed.
- Python projects:
- Use 'uv' for managing virtual environments and dependencies (do not use 'poetry')
- Typescript projects:
- Use 'pnpm' for managing dependencies (do not use 'npm' or 'yarn')
- It is ok to update '.vscode' folder contents and 'package.json' scripts as needed for adding run
and debug configurations, but do not add or remove any other files or folders.
Ultimately, however, the user is in control of the project and can override the above guidance as needed.
""").strip()

guardrails_prompt: Annotated[
str,
Expand Down Expand Up @@ -134,7 +158,11 @@ class AssistantConfigModel(BaseModel):
] = AzureOpenAIClientConfigModel(
service_config=AzureOpenAIServiceConfig.model_construct(),
request_config=OpenAIRequestConfig(
max_tokens=128_000, response_tokens=65_536, model="o1-mini", is_reasoning_model=True
max_tokens=128_000,
response_tokens=65_536,
model="o1-mini",
is_reasoning_model=True,
reasoning_effort="medium",
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,29 @@ class ToolsConfigModel(BaseModel):
),
] = "[ Maximum steps reached for this turn, engage with assistant to continue ]"

tools_instructions: Annotated[
additional_instructions: Annotated[
str,
Field(
title="Tools Instructions",
description=dedent("""
General instructions for using tools. No need to include a list of tools or instruction
on how to use them in general, that will be handled automatically. Instead, use this
space to provide any additional instructions for using specific tools, such folders to
exclude in file searches, or instruction to always re-read a file before using it.
""").strip(),
),
UISchema(widget="textarea", enable_markdown_in_description=True),
] = dedent("""
- When searching or browsing for files, consider the kinds of folders and files that should be avoided:
- For example, for coding projects exclude folders like `.git`, `.vscode`, `node_modules`, and `dist`.
- For each turn, always re-read a file before using it to ensure the most up-to-date information, especially
when writing or editing files.
""").strip()

instructions_for_non_tool_models: Annotated[
str,
Field(
title="Tools Instructions for Models Without Tools Support",
description=dedent("""
Some models don't support tools (like OpenAI reasoning models), so these instructions
are only used to implement tool support through custom instruction and injection of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ async def handle_error(error_message: str) -> StepResult:
])
if ai_context is not None and ai_context.strip() != "":
response_content.append(ai_context)
else:
response_content.append(
f"[Assistant is calling tools: {', '.join([tool_call.name for tool_call in tool_calls])}]"
)

content = "\n\n".join(response_content)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ async def build_request(
# Build system message content
system_message_content = build_system_message_content(config, context, participants, silence_token)

# Add any additional tools instructions to the system message content
if config.extensions_config.tools.enabled:
system_message_content = "\n\n".join([
system_message_content,
config.extensions_config.tools.additional_instructions,
])

# Add MCP Server prompts to the system message content
if len(mcp_prompts) > 0:
system_message_content = "\n\n".join([system_message_content, *mcp_prompts])
Expand Down
26 changes: 20 additions & 6 deletions assistants/codespace-assistant/assistant/response/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ async def respond_to_conversation(
metadata: dict[str, Any] = {},
) -> None:
"""
Respond to a conversation message using dynamically loaded MCP servers with support for multiple tool invocations.
Perform a multi-step response to a conversation message using dynamically loaded MCP servers with
support for multiple tool invocations.
"""

async with AsyncExitStack() as stack:
Expand All @@ -54,14 +55,26 @@ async def respond_to_conversation(

# Initialize a loop control variable
max_steps = config.extensions_config.tools.max_steps
interrupted = False
encountered_error = False
completed_within_max_steps = False
step_count = 0

# Loop until the conversation is complete or the maximum number of steps is reached
# Loop until the response is complete or the maximum number of steps is reached
while step_count < max_steps:
step_count += 1

# Check to see if we should interrupt our flow
last_message = await context.get_messages(limit=1, message_types=[MessageType.chat])
if step_count > 1 and last_message.messages[0].sender.participant_id != context.assistant.id:
# The last message was from a sender other than the assistant, so we should
# interrupt our flow as this would have kicked off a new response from this
# assistant with the new message in mind and that process can decide if it
# should continue with the current flow or not.
interrupted = True
logger.info("Response interrupted.")
break

step_result = await next_step(
mcp_sessions=mcp_sessions,
mcp_tools=mcp_tools,
Expand All @@ -81,15 +94,16 @@ async def respond_to_conversation(
completed_within_max_steps = True
break

# If the conversation did not complete within the maximum number of steps, send a message to the user
if not completed_within_max_steps and not encountered_error:
# If the response did not complete within the maximum number of steps, send a message to the user
if not completed_within_max_steps and not encountered_error and not interrupted:
await context.send_messages(
NewConversationMessage(
content=config.extensions_config.tools.max_steps_truncation_message,
message_type=MessageType.notice,
metadata=metadata,
)
)
logger.info("Response stopped early due to maximum steps.")

# Log the completion of the conversation
logger.info("Conversation completed.")
# Log the completion of the response
logger.info("Response completed.")
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,20 @@ async def get_response(
async with openai_client.create_client(self.service_config) as client:
try:
if self.request_config.is_reasoning_model:
# reasoning models do not support system messages, so replace them with appropriate role
if self.request_config.model != "o1-preview":
# due to variations in the API response for reasoning models, we need to adjust the messages
# and completion request based on the model type

# initialize the completion parameters
# for reasoning models, use max_completion_tokens instead of max_tokens
completion_params = {
"model": self.request_config.model,
"max_completion_tokens": self.request_config.response_tokens,
}

legacy_models = ["o1-preview", "o1-mini"]

# set the role of the messages based on the model type
if self.request_config.model not in legacy_models:
chat_message_params = [
ChatCompletionDeveloperMessageParam({
"role": "developer",
Expand All @@ -123,6 +135,7 @@ async def get_response(
for message in chat_message_params
]
else:
# fallback for older reasoning models
chat_message_params = [
ChatCompletionUserMessageParam({
"role": "user",
Expand All @@ -133,13 +146,15 @@ async def get_response(
for message in chat_message_params
]

# for reasoning models, use max_completion_tokens instead of max_tokens
completion = await client.chat.completions.create(
messages=chat_message_params,
model=self.request_config.model,
max_completion_tokens=self.request_config.response_tokens,
reasoning_effort=self.request_config.reasoning_effort,
)
# set the reasoning effort for the completion for newer reasoning models
if self.request_config.model not in legacy_models:
completion_params["reasoning_effort"] = self.request_config.reasoning_effort

completion_params["messages"] = chat_message_params

# cast the completion to a ChatCompletion for reasoning models
reasoning_completion: ChatCompletion = await client.chat.completions.create(**completion_params)
completion = reasoning_completion

response_result.content = completion.choices[0].message.content

Expand Down
2 changes: 1 addition & 1 deletion libraries/python/openai-client/openai_client/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class OpenAIRequestConfig(RequestConfigBaseModel):
description=(
"Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and"
" high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in"
" a response."
" a response. (Does not apply to o1-preview or o1-mini models)"
),
),
] = "medium"
Expand Down

0 comments on commit 99ca554

Please sign in to comment.