Improved support for older o1 models (o1-preview/o1-mini) and improve…

…d tools instruction/configuration (#311)
microsoft · Jan 29, 2025 · 99ca554 · 99ca554
1 parent d9913c7
commit 99ca554
Show file tree

Hide file tree

Showing 7 changed files with 115 additions and 28 deletions.
diff --git a/assistants/codespace-assistant/assistant/config.py b/assistants/codespace-assistant/assistant/config.py
@@ -1,3 +1,4 @@
+from textwrap import dedent
 from typing import Annotated
 
 from assistant_extensions.ai_clients.config import (
@@ -55,16 +56,39 @@ class AssistantConfigModel(BaseModel):
             description="The prompt used to instruct the behavior of the AI assistant.",
         ),
         UISchema(widget="textarea"),
-    ] = (
-        "You are an AI assistant that helps people with their work. In addition to text, you can also produce markdown,"
-        " code snippets, and other types of content. If you wrap your response in triple backticks, you can specify the"
-        " language for syntax highlighting. For example, ```python print('Hello, World!')``` will produce a code"
-        " snippet in Python. Mermaid markdown is supported if you wrap the content in triple backticks and specify"
-        ' \'mermaid\' as the language. For example, ```mermaid graph TD; A["A"]-->B["B"];``` will render a flowchart for the'
-        " user.ABC markdown is supported if you wrap the content in triple backticks and specify 'abc' as the"
-        " language.For example, ```abc C4 G4 A4 F4 E4 G4``` will render a music score and an inline player with a link"
-        " to download the midi file."
-    )
+    ] = dedent("""
+        You are an AI assistant that helps people with their coding projects work.
+
+        In addition to text, you can also produce markdown, code snippets, and other types of content.
+
+        If you wrap your response in triple backticks, you can specify the language for syntax highlighting.
+        For example, ```python print('Hello, World!')``` will produce a code snippet in Python.
+
+        Mermaid markdown is supported if you wrap the content in triple backticks and specify 'mermaid' as
+        the language. For example, ```mermaid graph TD; A["A"]-->B["B"];``` will render a flowchart for the
+        user.
+
+        ABC markdown is supported if you wrap the content in triple backticks and specify 'abc' as the
+        language. For example, ```abc C4 G4 A4 F4 E4 G4``` will render a music score and an inline player
+        with a link to download the midi file.
+
+        Coding project guidance:
+
+            - Create core files and folders for the project as needed, such as README.md, .gitignore, etc.
+            - Create language specific files and folders as needed, such as package.json, pyproject.toml, etc.
+            - Provide instruction for the user on installing dependencies via cli instead of writing these
+               directly to the project files, this will ensure the user has the most up-to-date versions.
+            - Offer to keep the README and other documentation up-to-date with the latest project information, if
+               the user would like his behavior, be consistent about making these updates each turn as needed.
+            - Python projects:
+               - Use 'uv' for managing virtual environments and dependencies (do not use 'poetry')
+            - Typescript projects:
+               - Use 'pnpm' for managing dependencies (do not use 'npm' or 'yarn')
+            - It is ok to update '.vscode' folder contents and 'package.json' scripts as needed for adding run
+               and debug configurations, but do not add or remove any other files or folders.
+
+        Ultimately, however, the user is in control of the project and can override the above guidance as needed.
+    """).strip()
 
     guardrails_prompt: Annotated[
         str,
@@ -134,7 +158,11 @@ class AssistantConfigModel(BaseModel):
     ] = AzureOpenAIClientConfigModel(
         service_config=AzureOpenAIServiceConfig.model_construct(),
         request_config=OpenAIRequestConfig(
-            max_tokens=128_000, response_tokens=65_536, model="o1-mini", is_reasoning_model=True
+            max_tokens=128_000,
+            response_tokens=65_536,
+            model="o1-mini",
+            is_reasoning_model=True,
+            reasoning_effort="medium",
         ),
     )
 

diff --git a/assistants/codespace-assistant/assistant/extensions/tools/__model.py b/assistants/codespace-assistant/assistant/extensions/tools/__model.py
@@ -72,10 +72,29 @@ class ToolsConfigModel(BaseModel):
         ),
     ] = "[ Maximum steps reached for this turn, engage with assistant to continue ]"
 
-    tools_instructions: Annotated[
+    additional_instructions: Annotated[
         str,
         Field(
             title="Tools Instructions",
+            description=dedent("""
+                General instructions for using tools.  No need to include a list of tools or instruction
+                on how to use them in general, that will be handled automatically.  Instead, use this
+                space to provide any additional instructions for using specific tools, such folders to
+                exclude in file searches, or instruction to always re-read a file before using it.
+            """).strip(),
+        ),
+        UISchema(widget="textarea", enable_markdown_in_description=True),
+    ] = dedent("""
+        - When searching or browsing for files, consider the kinds of folders and files that should be avoided:
+            - For example, for coding projects exclude folders like `.git`, `.vscode`, `node_modules`, and `dist`.
+        - For each turn, always re-read a file before using it to ensure the most up-to-date information, especially
+            when writing or editing files.
+    """).strip()
+
+    instructions_for_non_tool_models: Annotated[
+        str,
+        Field(
+            title="Tools Instructions for Models Without Tools Support",
             description=dedent("""
                 Some models don't support tools (like OpenAI reasoning models), so these instructions
                 are only used to implement tool support through custom instruction and injection of

diff --git a/assistants/codespace-assistant/assistant/response/completion_handler.py b/assistants/codespace-assistant/assistant/response/completion_handler.py
@@ -82,6 +82,10 @@ async def handle_error(error_message: str) -> StepResult:
         ])
         if ai_context is not None and ai_context.strip() != "":
             response_content.append(ai_context)
+        else:
+            response_content.append(
+                f"[Assistant is calling tools: {', '.join([tool_call.name for tool_call in tool_calls])}]"
+            )
 
     content = "\n\n".join(response_content)
 

diff --git a/assistants/codespace-assistant/assistant/response/request_builder.py b/assistants/codespace-assistant/assistant/response/request_builder.py
@@ -35,6 +35,13 @@ async def build_request(
     # Build system message content
     system_message_content = build_system_message_content(config, context, participants, silence_token)
 
+    # Add any additional tools instructions to the system message content
+    if config.extensions_config.tools.enabled:
+        system_message_content = "\n\n".join([
+            system_message_content,
+            config.extensions_config.tools.additional_instructions,
+        ])
+
     # Add MCP Server prompts to the system message content
     if len(mcp_prompts) > 0:
         system_message_content = "\n\n".join([system_message_content, *mcp_prompts])

diff --git a/assistants/codespace-assistant/assistant/response/response.py b/assistants/codespace-assistant/assistant/response/response.py
@@ -28,7 +28,8 @@ async def respond_to_conversation(
     metadata: dict[str, Any] = {},
 ) -> None:
     """
-    Respond to a conversation message using dynamically loaded MCP servers with support for multiple tool invocations.
+    Perform a multi-step response to a conversation message using dynamically loaded MCP servers with
+    support for multiple tool invocations.
     """
 
     async with AsyncExitStack() as stack:
@@ -54,14 +55,26 @@ async def respond_to_conversation(
 
         # Initialize a loop control variable
         max_steps = config.extensions_config.tools.max_steps
+        interrupted = False
         encountered_error = False
         completed_within_max_steps = False
         step_count = 0
 
-        # Loop until the conversation is complete or the maximum number of steps is reached
+        # Loop until the response is complete or the maximum number of steps is reached
         while step_count < max_steps:
             step_count += 1
 
+            # Check to see if we should interrupt our flow
+            last_message = await context.get_messages(limit=1, message_types=[MessageType.chat])
+            if step_count > 1 and last_message.messages[0].sender.participant_id != context.assistant.id:
+                # The last message was from a sender other than the assistant, so we should
+                # interrupt our flow as this would have kicked off a new response from this
+                # assistant with the new message in mind and that process can decide if it
+                # should continue with the current flow or not.
+                interrupted = True
+                logger.info("Response interrupted.")
+                break
+
             step_result = await next_step(
                 mcp_sessions=mcp_sessions,
                 mcp_tools=mcp_tools,
@@ -81,15 +94,16 @@ async def respond_to_conversation(
                 completed_within_max_steps = True
                 break
 
-        # If the conversation did not complete within the maximum number of steps, send a message to the user
-        if not completed_within_max_steps and not encountered_error:
+        # If the response did not complete within the maximum number of steps, send a message to the user
+        if not completed_within_max_steps and not encountered_error and not interrupted:
             await context.send_messages(
                 NewConversationMessage(
                     content=config.extensions_config.tools.max_steps_truncation_message,
                     message_type=MessageType.notice,
                     metadata=metadata,
                 )
             )
+            logger.info("Response stopped early due to maximum steps.")
 
-    # Log the completion of the conversation
-    logger.info("Conversation completed.")
+    # Log the completion of the response
+    logger.info("Response completed.")
diff --git a/assistants/explorer-assistant/assistant/response/response_openai.py b/assistants/explorer-assistant/assistant/response/response_openai.py
@@ -111,8 +111,20 @@ async def get_response(
         async with openai_client.create_client(self.service_config) as client:
             try:
                 if self.request_config.is_reasoning_model:
-                    # reasoning models do not support system messages, so replace them with appropriate role
-                    if self.request_config.model != "o1-preview":
+                    # due to variations in the API response for reasoning models, we need to adjust the messages
+                    # and completion request based on the model type
+
+                    # initialize the completion parameters
+                    # for reasoning models, use max_completion_tokens instead of max_tokens
+                    completion_params = {
+                        "model": self.request_config.model,
+                        "max_completion_tokens": self.request_config.response_tokens,
+                    }
+
+                    legacy_models = ["o1-preview", "o1-mini"]
+
+                    # set the role of the messages based on the model type
+                    if self.request_config.model not in legacy_models:
                         chat_message_params = [
                             ChatCompletionDeveloperMessageParam({
                                 "role": "developer",
@@ -123,6 +135,7 @@ async def get_response(
                             for message in chat_message_params
                         ]
                     else:
+                        # fallback for older reasoning models
                         chat_message_params = [
                             ChatCompletionUserMessageParam({
                                 "role": "user",
@@ -133,13 +146,15 @@ async def get_response(
                             for message in chat_message_params
                         ]
 
-                    # for reasoning models, use max_completion_tokens instead of max_tokens
-                    completion = await client.chat.completions.create(
-                        messages=chat_message_params,
-                        model=self.request_config.model,
-                        max_completion_tokens=self.request_config.response_tokens,
-                        reasoning_effort=self.request_config.reasoning_effort,
-                    )
+                    # set the reasoning effort for the completion for newer reasoning models
+                    if self.request_config.model not in legacy_models:
+                        completion_params["reasoning_effort"] = self.request_config.reasoning_effort
+
+                    completion_params["messages"] = chat_message_params
+
+                    # cast the completion to a ChatCompletion for reasoning models
+                    reasoning_completion: ChatCompletion = await client.chat.completions.create(**completion_params)
+                    completion = reasoning_completion
 
                     response_result.content = completion.choices[0].message.content
 

diff --git a/libraries/python/openai-client/openai_client/config.py b/libraries/python/openai-client/openai_client/config.py
@@ -183,7 +183,7 @@ class OpenAIRequestConfig(RequestConfigBaseModel):
             description=(
                 "Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and"
                 " high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in"
-                " a response."
+                " a response. (Does not apply to o1-preview or o1-mini models)"
             ),
         ),
     ] = "medium"