From 32520cf9c1ee4e80a8ba3b278875bba5b5c3c0e4 Mon Sep 17 00:00:00 2001 From: Raj Kumar Date: Sat, 22 Feb 2025 00:43:39 +0530 Subject: [PATCH] Enhance create_image_block to Dynamically Detect Image Type (#15) --- os_computer_use/llm_provider.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/os_computer_use/llm_provider.py b/os_computer_use/llm_provider.py index 4482249..8785295 100644 --- a/os_computer_use/llm_provider.py +++ b/os_computer_use/llm_provider.py @@ -4,6 +4,7 @@ import json import re import base64 +import imghdr def Message(content, role="assistant"): @@ -68,8 +69,8 @@ def create_tool_call(self, name, parameters): # Wrap a content block in a text or an image object def wrap_block(self, block): if isinstance(block, bytes): - encoded_image = base64.b64encode(block).decode("utf-8") - return self.create_image_block(encoded_image) + # Pass raw bytes so that imghdr can detect the image type properly. + return self.create_image_block(block) else: return Text(block) @@ -117,10 +118,20 @@ def create_function_def(self, name, details, properties, required): }, } - def create_image_block(self, base64_image): + def create_image_block(self, image_data): + # Detect the image type using imghdr. + image_type = imghdr.what(None, image_data) + if image_type is None: + image_type = "png" # fallback if type cannot be detected + + # Print detected image type for debugging + # print(f"Detected image type: {image_type}") + + # Base64-encode the raw image bytes. + encoded = base64.b64encode(image_data).decode("utf-8") return { "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + "image_url": {"url": f"data:image/{image_type};base64,{encoded}"}, } def call(self, messages, functions=None):