From be483785f23c3e2a738c85028cbac3a390ec2bab Mon Sep 17 00:00:00 2001 From: Michael Feil <63565275+michaelfeil@users.noreply.github.com> Date: Thu, 5 Dec 2024 17:45:05 -0800 Subject: [PATCH 01/10] add max size (#489) * add max size * bump version --- docs/assets/openapi.json | 2 +- libs/client_infinity/infinity_client/pyproject.toml | 2 +- libs/infinity_emb/infinity_emb/env.py | 10 +++++++++- .../infinity_emb/fastapi_schemas/pydantic_v2.py | 5 +++-- libs/infinity_emb/pyproject.toml | 2 +- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json index f57d1148..4689745a 100644 --- a/docs/assets/openapi.json +++ b/docs/assets/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.71"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.70"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file diff --git a/libs/client_infinity/infinity_client/pyproject.toml b/libs/client_infinity/infinity_client/pyproject.toml index 15187214..cd3f6861 100644 --- a/libs/client_infinity/infinity_client/pyproject.toml +++ b/libs/client_infinity/infinity_client/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "infinity_client" -version = "0.0.71" +version = "0.0.72" description = "A client library for accessing ♾️ Infinity - Embedding Inference Server" authors = [] readme = "README.md" diff --git a/libs/infinity_emb/infinity_emb/env.py b/libs/infinity_emb/infinity_emb/env.py index 6c66121c..a3261624 100644 --- a/libs/infinity_emb/infinity_emb/env.py +++ b/libs/infinity_emb/infinity_emb/env.py @@ -189,7 +189,15 @@ def cache_dir(self) -> Path: @cached_property def queue_size(self) -> int: - return int(self._optional_infinity_var("queue_size", default="32000")) + size = int(self._optional_infinity_var("queue_size", default="32000")) + assert size > 0, "INFINITY_QUEUE_SIZE must be a positive number" + return size + + @cached_property + def max_client_batch_size(self) -> int: + size = int(self._optional_infinity_var("max_client_batch_size", default="2048")) + assert size > 0, "INFINITY_MAX_CLIENT_BATCH_SIZE must be a positive number" + return size @cached_property def permissive_cors(self): diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pydantic_v2.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pydantic_v2.py index 12554330..ff2f9dd7 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pydantic_v2.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pydantic_v2.py @@ -1,4 +1,5 @@ from pydantic import AnyUrl, HttpUrl, StringConstraints +from infinity_emb.env import MANAGER __all__ = [ "INPUT_STRING", @@ -14,9 +15,9 @@ INPUT_STRING = StringConstraints(max_length=8192 * 15, strip_whitespace=True) ITEMS_LIMIT = { "min_length": 1, - "max_length": 2048, + "max_length": MANAGER.max_client_batch_size, } ITEMS_LIMIT_SMALL = { "min_length": 1, - "max_length": 32, + "max_length": min(32, MANAGER.max_client_batch_size), } diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 208336d8..654cdf13 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "infinity_emb" -version = "0.0.71" +version = "0.0.72" description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip." authors = ["michaelfeil "] license = "MIT" From a0b5cc41135c1ee5775a2114d5b7b0767df11108 Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 6 Dec 2024 05:08:13 +0100 Subject: [PATCH 02/10] initial commits for matryoshka_dim --- libs/infinity_emb/infinity_emb/engine.py | 41 +++++--- .../infinity_emb/fastapi_schemas/pymodels.py | 1 + .../infinity_emb/inference/batch_handler.py | 18 ++-- libs/infinity_emb/infinity_emb/sync_engine.py | 27 ++++-- .../end_to_end/test_api_with_dummymodel.py | 23 +++++ .../end_to_end/test_openapi_client_compat.py | 10 ++ .../tests/unit_test/test_engine.py | 95 +++++++++++++++++++ 7 files changed, 190 insertions(+), 25 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/engine.py b/libs/infinity_emb/infinity_emb/engine.py index 73867a7d..68b2ca8f 100644 --- a/libs/infinity_emb/infinity_emb/engine.py +++ b/libs/infinity_emb/infinity_emb/engine.py @@ -130,11 +130,14 @@ def capabilities(self) -> set[ModelCapabilites]: def engine_args(self) -> EngineArgs: return self._engine_args - async def embed(self, sentences: list[str]) -> tuple[list["EmbeddingReturnType"], int]: + async def embed( + self, sentences: list[str], matryoshka_dim: int | None = None + ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple sentences Kwargs: sentences (list[str]): sentences to be embedded + matryoshka_dim (int): Length of matryoshka embedding Raises: ValueError: raised if engine is not started yet @@ -148,7 +151,9 @@ async def embed(self, sentences: list[str]) -> tuple[list["EmbeddingReturnType"] """ self._assert_running() - embeddings, usage = await self._batch_handler.embed(sentences=sentences) + embeddings, usage = await self._batch_handler.embed( + sentences=sentences, matryoshka_dim=matryoshka_dim + ) return embeddings, usage async def rerank( @@ -213,12 +218,16 @@ async def classify( return scores, usage async def image_embed( - self, *, images: list[Union[str, "ImageClassType", bytes]] + self, + *, + images: list[Union[str, "ImageClassType", bytes]], + matryoshka_dim: int | None = None, ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple images Kwargs: images (list[Union[str, ImageClassType]]): list of image urls or ImageClassType objects, to be embedded + matryoshka_dim (int): Length of matryoshka embedding Raises: ValueError: raised if engine is not started yet @@ -232,16 +241,19 @@ async def image_embed( """ self._assert_running() - embeddings, usage = await self._batch_handler.image_embed(images=images) + embeddings, usage = await self._batch_handler.image_embed( + images=images, matryoshka_dim=matryoshka_dim + ) return embeddings, usage async def audio_embed( - self, *, audios: list[Union[str, bytes]] + self, *, audios: list[Union[str, bytes]], matryoshka_dim: int | None = None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple audios Kwargs: audios (list[Union[str, Audiobytes]]): list of audio data, to be embedded + matryoshka_dim (int): Length of matryoshka embedding Raises: ValueError: raised if engine is not started yet @@ -255,7 +267,9 @@ async def audio_embed( """ self._assert_running() - embeddings, usage = await self._batch_handler.audio_embed(audios=audios) + embeddings, usage = await self._batch_handler.audio_embed( + audios=audios, matryoshka_dim=matryoshka_dim + ) return embeddings, usage def _assert_running(self): @@ -304,13 +318,14 @@ async def astop(self): await engine.astop() async def embed( - self, *, model: str, sentences: list[str] + self, *, model: str, sentences: list[str], matryoshka_dim=None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple sentences Kwargs: model (str): model name to be used sentences (list[str]): sentences to be embedded + matryoshka_dim (int): Length of matryoshka embedding Raises: ValueError: raised if engine is not started yet @@ -322,7 +337,7 @@ async def embed( 2D list-array of shape( len(sentences),embed_dim ) int: token usage """ - return await self[model].embed(sentences) + return await self[model].embed(sentences, matryoshka_dim=matryoshka_dim) def is_running(self) -> bool: return all(engine.is_running for engine in self.engines_dict.values()) @@ -378,13 +393,14 @@ async def classify( return await self[model].classify(sentences=sentences, raw_scores=raw_scores) async def image_embed( - self, *, model: str, images: list[Union[str, "ImageClassType"]] + self, *, model: str, images: list[Union[str, "ImageClassType"]], matryoshka_dim=None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple images Kwargs: model (str): model name to be used images (list[Union[str, ImageClassType]]): list of image urls or ImageClassType objects, to be embedded + matryoshka_dim (int): Length of matryoshka embedding Raises: ValueError: raised if engine is not started yet @@ -396,7 +412,7 @@ async def image_embed( 2D list-array of shape( len(sentences),embed_dim ) int: token usage """ - return await self[model].image_embed(images=images) + return await self[model].image_embed(images=images, matryoshka_dim=matryoshka_dim) def __getitem__(self, index_or_name: Union[str, int]) -> "AsyncEmbeddingEngine": """resolve engine by model name -> Auto resolve if only one engine is present @@ -416,13 +432,14 @@ def __getitem__(self, index_or_name: Union[str, int]) -> "AsyncEmbeddingEngine": ) async def audio_embed( - self, *, model: str, audios: list[Union[str, bytes]] + self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim=None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple audios Kwargs: model (str): model name to be used audios (list[Union[str, bytes]]): list of audio data, to be embedded + matryoshka_dim (int): Length of matryoshka embedding Raises: ValueError: raised if engine is not started yet @@ -434,4 +451,4 @@ async def audio_embed( 2D list-array of shape( len(sentences),embed_dim ) int: token usage """ - return await self[model].audio_embed(audios=audios) + return await self[model].audio_embed(audios=audios, matryoshka_dim=matryoshka_dim) diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index 3a623471..a706f090 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -54,6 +54,7 @@ class _OpenAIEmbeddingInput(BaseModel): model: str = "default/not-specified" encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float user: Optional[str] = None + dimensions: Optional[int] = None class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput): diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 7ed4b83d..49696599 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -136,7 +136,9 @@ def __init__( " Consider increasing queue size" ) - async def embed(self, sentences: list[str]) -> tuple[list["EmbeddingReturnType"], int]: + async def embed( + self, sentences: list[str], matryoshka_dim=None + ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule a sentence to be embedded. Awaits until embedded. Args: @@ -157,6 +159,8 @@ async def embed(self, sentences: list[str]) -> tuple[list["EmbeddingReturnType"] input_sentences = [EmbeddingSingle(sentence=s) for s in sentences] embeddings, usage = await self._schedule(input_sentences) + if matryoshka_dim: + embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] return embeddings, usage async def rerank( @@ -236,9 +240,7 @@ async def classify( return classifications, usage async def image_embed( - self, - *, - images: list[Union[str, "ImageClassType", bytes]], + self, *, images: list[Union[str, "ImageClassType", bytes]], matryoshka_dim=None ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule a images and sentences to be embedded. Awaits until embedded. @@ -262,12 +264,12 @@ async def image_embed( items = await resolve_images(images) embeddings, usage = await self._schedule(items) + if matryoshka_dim: + embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] return embeddings, usage async def audio_embed( - self, - *, - audios: list[Union[str, bytes]], + self, *, audios: list[Union[str, bytes]], matryoshka_dim=None ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule audios and sentences to be embedded. Awaits until embedded. @@ -294,6 +296,8 @@ async def audio_embed( getattr(self.model_worker[0]._model, "sampling_rate", -42), ) embeddings, usage = await self._schedule(items) + if matryoshka_dim: + embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] return embeddings, usage async def _schedule(self, list_queueitem: Sequence[AbstractSingle]) -> tuple[list[Any], int]: diff --git a/libs/infinity_emb/infinity_emb/sync_engine.py b/libs/infinity_emb/infinity_emb/sync_engine.py index c6a907d8..a398488a 100644 --- a/libs/infinity_emb/infinity_emb/sync_engine.py +++ b/libs/infinity_emb/infinity_emb/sync_engine.py @@ -171,9 +171,14 @@ def stop(self): self.async_run(self.async_engine_array.astop).result() @add_start_docstrings(AsyncEngineArray.embed.__doc__) - def embed(self, *, model: str, sentences: list[str]): + def embed(self, *, model: str, sentences: list[str], matryoshka_dim=None): """sync interface of AsyncEngineArray""" - return self.async_run(self.async_engine_array.embed, model=model, sentences=sentences) + return self.async_run( + self.async_engine_array.embed, + model=model, + sentences=sentences, + matryoshka_dim=matryoshka_dim, + ) @add_start_docstrings(AsyncEngineArray.rerank.__doc__) def rerank( @@ -206,14 +211,24 @@ def classify(self, *, model: str, sentences: list[str], raw_scores: bool = False ) @add_start_docstrings(AsyncEngineArray.image_embed.__doc__) - def image_embed(self, *, model: str, images: list[Union[str, bytes]]): + def image_embed(self, *, model: str, images: list[Union[str, bytes]], matryoshka_dim=None): """sync interface of AsyncEngineArray""" - return self.async_run(self.async_engine_array.image_embed, model=model, images=images) + return self.async_run( + self.async_engine_array.image_embed, + model=model, + images=images, + matryoshka_dim=matryoshka_dim, + ) @add_start_docstrings(AsyncEngineArray.audio_embed.__doc__) - def audio_embed(self, *, model: str, audios: list[Union[str, bytes]]): + def audio_embed(self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim=None): """sync interface of AsyncEngineArray""" - return self.async_run(self.async_engine_array.audio_embed, model=model, audios=audios) + return self.async_run( + self.async_engine_array.audio_embed, + model=model, + audios=audios, + matryoshka_dim=matryoshka_dim, + ) def __del__(self): self.stop() diff --git a/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py b/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py index 73e68e2f..1e8d1aa4 100644 --- a/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py +++ b/libs/infinity_emb/tests/end_to_end/test_api_with_dummymodel.py @@ -170,3 +170,26 @@ async def test_openapi_same_as_docs_file(client): tc.assertDictEqual(openapi_json["info"], openapi_json_expected["info"]) tc.assertDictEqual(openapi_json["paths"], openapi_json_expected["paths"]) # tc.assertDictEqual(openapi_json["components"], openapi_json_expected["components"]) + + +@pytest.mark.anyio +async def test_matryoshka_embedding(client): + matryoshka_dim = 10 + + possible_inputs = [ + ["This is a test sentence."], + ["This is a test sentence.", "This is another test sentence."], + ] + for inp in possible_inputs: + response = await client.post( + f"{PREFIX}/embeddings", + json=dict(input=inp, model=MODEL_NAME, dimensions=matryoshka_dim), + ) + assert response.status_code == 200, f"{response.status_code}, {response.text}" + rdata = response.json() + assert "data" in rdata and isinstance(rdata["data"], list) + assert all("embedding" in d for d in rdata["data"]) + assert len(rdata["data"]) == len(inp) + for embedding, sentence in zip(rdata["data"], inp): + assert len(sentence) == embedding["embedding"][0] + assert len(embedding["embedding"]) == matryoshka_dim diff --git a/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py b/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py index 79b5ebab..19f5a385 100644 --- a/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py +++ b/libs/infinity_emb/tests/end_to_end/test_openapi_client_compat.py @@ -120,6 +120,16 @@ async def test_openai(client: AsyncClient): extra_body={"modality": "text"}, ) + # test: text matryoshka + emb_1_text_matryoshka_dim = await client_oai.embeddings.create( + model=pytest.DEFAULT_BERT_MODEL, + input=["a cat", "a cat", "a bird"], + encoding_format="float", + dimensions=64, + extra_body={"modality": "text"}, + ) + assert len(emb_1_text_matryoshka_dim.data[0].embedding) == 64 + # test AUDIO: cosine distance of beep to cat and dog np.testing.assert_allclose( emb1_audio.data[0].embedding, emb1_1_audio.data[0].embedding, rtol=1e-5 diff --git a/libs/infinity_emb/tests/unit_test/test_engine.py b/libs/infinity_emb/tests/unit_test/test_engine.py index c3bf054b..99cecbbb 100644 --- a/libs/infinity_emb/tests/unit_test/test_engine.py +++ b/libs/infinity_emb/tests/unit_test/test_engine.py @@ -380,3 +380,98 @@ def test_args_between_array_and_engine_same(method_name: str): assert sorted(array_method.args + array_method.kwonlyargs) == sorted( engine_method.args + engine_method.kwonlyargs + ["model"] ) + + +@pytest.mark.anyio +async def test_async_api_torch_matryoshka(): + matryoshka_dim = 64 + + sentences = ["Hi", "how"] + engine = AsyncEmbeddingEngine.from_args( + EngineArgs( + model_name_or_path="nomic-ai/nomic-embed-text-v1.5", + engine=InferenceEngine.torch, + revision="main", + device="cpu", + ) + ) + assert engine.capabilities == {"embed"} + async with engine: + embeddings, usage = await engine.embed(sentences=sentences, matryoshka_dim=matryoshka_dim) + assert isinstance(embeddings, list) + assert isinstance(embeddings[0], np.ndarray) + embeddings = np.array(embeddings) + assert usage == sum([len(s) for s in sentences]) + assert embeddings.shape[0] == len(sentences) + assert embeddings.shape[1] >= 10 + + assert len(embeddings[0]) == 64 + + # test if model denies classification and reranking + with pytest.raises(ModelNotDeployedError): + await engine.classify(sentences=sentences) + with pytest.raises(ModelNotDeployedError): + await engine.rerank(query="dummy", docs=sentences) + + +@pytest.mark.anyio +async def test_torch_clip_embed_matryoshka(): + matryoshka_dim = 128 + + image_urls = ["http://images.cocodataset.org/val2017/000000039769.jpg"] # a photo of two cats + sentences = [ + "a photo of two cats", + "a photo of a cat", + "a photo of a dog", + "a photo of a car", + ] + engine = AsyncEmbeddingEngine.from_args( + EngineArgs( + model_name_or_path="jinaai/jina-clip-v2", + engine=InferenceEngine.torch, + model_warmup=True, + ) + ) + async with engine: + t1, t2 = ( + asyncio.create_task(engine.embed(sentences=sentences, matryoshka_dim=matryoshka_dim)), + asyncio.create_task( + engine.image_embed(images=image_urls, matryoshka_dim=matryoshka_dim) + ), + ) + emb_text, usage_text = await t1 + emb_image, usage_image = await t2 + emb_text_np = np.array(emb_text) # type: ignore + emb_image_np = np.array(emb_image) # type: ignore + + assert len(emb_text_np[0]) == matryoshka_dim + assert len(emb_image_np[0]) == matryoshka_dim + + # check if cat image and two cats are most similar + for i in range(1, len(sentences)): + assert np.dot(emb_text_np[0], emb_image_np[0]) > np.dot(emb_text_np[i], emb_image_np[0]) + + +@pytest.mark.anyio +async def test_clap_like_model_matryoshka(audio_sample): + matryoshka_dim = 64 + + model_name = pytest.DEFAULT_AUDIO_MODEL + engine = AsyncEmbeddingEngine.from_args( + EngineArgs(model_name_or_path=model_name, dtype="float32") + ) + url = audio_sample[1] + bytes_url = audio_sample[0].content + + inputs = ["a sound of a cat", "a sound of a cat"] + audios = [url, bytes_url] + async with engine: + embeddings_text, usage_1 = await engine.embed( + sentences=inputs, matryoshka_dim=matryoshka_dim + ) + embeddings_audio, usage_2 = await engine.audio_embed( + audios=audios, matryoshka_dim=matryoshka_dim + ) + + assert len(embeddings_text[0]) == matryoshka_dim + assert len(embeddings_audio[0]) == matryoshka_dim From 7917c56cc450de032e4322006411d0b1f8fe55e8 Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 6 Dec 2024 05:20:58 +0100 Subject: [PATCH 03/10] add missing type hints --- libs/infinity_emb/infinity_emb/engine.py | 6 +++--- libs/infinity_emb/infinity_emb/inference/batch_handler.py | 6 +++--- libs/infinity_emb/infinity_emb/sync_engine.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/engine.py b/libs/infinity_emb/infinity_emb/engine.py index 68b2ca8f..d98a1877 100644 --- a/libs/infinity_emb/infinity_emb/engine.py +++ b/libs/infinity_emb/infinity_emb/engine.py @@ -318,7 +318,7 @@ async def astop(self): await engine.astop() async def embed( - self, *, model: str, sentences: list[str], matryoshka_dim=None + self, *, model: str, sentences: list[str], matryoshka_dim: Optional[int]=None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple sentences @@ -393,7 +393,7 @@ async def classify( return await self[model].classify(sentences=sentences, raw_scores=raw_scores) async def image_embed( - self, *, model: str, images: list[Union[str, "ImageClassType"]], matryoshka_dim=None + self, *, model: str, images: list[Union[str, "ImageClassType"]], matryoshka_dim:Optional[int]=None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple images @@ -432,7 +432,7 @@ def __getitem__(self, index_or_name: Union[str, int]) -> "AsyncEmbeddingEngine": ) async def audio_embed( - self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim=None + self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple audios diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 49696599..94d2ecaa 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -137,7 +137,7 @@ def __init__( ) async def embed( - self, sentences: list[str], matryoshka_dim=None + self, sentences: list[str], matryoshka_dim:Optional[int]=None ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule a sentence to be embedded. Awaits until embedded. @@ -240,7 +240,7 @@ async def classify( return classifications, usage async def image_embed( - self, *, images: list[Union[str, "ImageClassType", bytes]], matryoshka_dim=None + self, *, images: list[Union[str, "ImageClassType", bytes]], matryoshka_dim:Optional[int]=None ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule a images and sentences to be embedded. Awaits until embedded. @@ -269,7 +269,7 @@ async def image_embed( return embeddings, usage async def audio_embed( - self, *, audios: list[Union[str, bytes]], matryoshka_dim=None + self, *, audios: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule audios and sentences to be embedded. Awaits until embedded. diff --git a/libs/infinity_emb/infinity_emb/sync_engine.py b/libs/infinity_emb/infinity_emb/sync_engine.py index a398488a..2475269d 100644 --- a/libs/infinity_emb/infinity_emb/sync_engine.py +++ b/libs/infinity_emb/infinity_emb/sync_engine.py @@ -171,7 +171,7 @@ def stop(self): self.async_run(self.async_engine_array.astop).result() @add_start_docstrings(AsyncEngineArray.embed.__doc__) - def embed(self, *, model: str, sentences: list[str], matryoshka_dim=None): + def embed(self, *, model: str, sentences: list[str], matryoshka_dim:Optional[int]=None): """sync interface of AsyncEngineArray""" return self.async_run( self.async_engine_array.embed, @@ -211,7 +211,7 @@ def classify(self, *, model: str, sentences: list[str], raw_scores: bool = False ) @add_start_docstrings(AsyncEngineArray.image_embed.__doc__) - def image_embed(self, *, model: str, images: list[Union[str, bytes]], matryoshka_dim=None): + def image_embed(self, *, model: str, images: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None): """sync interface of AsyncEngineArray""" return self.async_run( self.async_engine_array.image_embed, @@ -221,7 +221,7 @@ def image_embed(self, *, model: str, images: list[Union[str, bytes]], matryoshka ) @add_start_docstrings(AsyncEngineArray.audio_embed.__doc__) - def audio_embed(self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim=None): + def audio_embed(self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None): """sync interface of AsyncEngineArray""" return self.async_run( self.async_engine_array.audio_embed, From d8ad01009937b0fcbbc012f24fcf9e2c295c8616 Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 6 Dec 2024 05:27:25 +0100 Subject: [PATCH 04/10] format. Use future annotations --- libs/infinity_emb/infinity_emb/engine.py | 11 ++++++++--- .../infinity_emb/inference/batch_handler.py | 9 ++++++--- libs/infinity_emb/infinity_emb/sync_engine.py | 10 +++++++--- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/engine.py b/libs/infinity_emb/infinity_emb/engine.py index d98a1877..153e15ba 100644 --- a/libs/infinity_emb/infinity_emb/engine.py +++ b/libs/infinity_emb/infinity_emb/engine.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2023-now michaelfeilfeil +from __future__ import annotations from asyncio import Semaphore from typing import Iterable, Iterator, Optional, Union @@ -318,7 +319,7 @@ async def astop(self): await engine.astop() async def embed( - self, *, model: str, sentences: list[str], matryoshka_dim: Optional[int]=None + self, *, model: str, sentences: list[str], matryoshka_dim: Optional[int] = None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple sentences @@ -393,7 +394,11 @@ async def classify( return await self[model].classify(sentences=sentences, raw_scores=raw_scores) async def image_embed( - self, *, model: str, images: list[Union[str, "ImageClassType"]], matryoshka_dim:Optional[int]=None + self, + *, + model: str, + images: list[Union[str, "ImageClassType"]], + matryoshka_dim: Optional[int] = None, ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple images @@ -432,7 +437,7 @@ def __getitem__(self, index_or_name: Union[str, int]) -> "AsyncEmbeddingEngine": ) async def audio_embed( - self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None + self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None ) -> tuple[list["EmbeddingReturnType"], int]: """embed multiple audios diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 94d2ecaa..1edda315 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -137,7 +137,7 @@ def __init__( ) async def embed( - self, sentences: list[str], matryoshka_dim:Optional[int]=None + self, sentences: list[str], matryoshka_dim: Optional[int] = None ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule a sentence to be embedded. Awaits until embedded. @@ -240,7 +240,10 @@ async def classify( return classifications, usage async def image_embed( - self, *, images: list[Union[str, "ImageClassType", bytes]], matryoshka_dim:Optional[int]=None + self, + *, + images: list[Union[str, "ImageClassType", bytes]], + matryoshka_dim: Optional[int] = None, ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule a images and sentences to be embedded. Awaits until embedded. @@ -269,7 +272,7 @@ async def image_embed( return embeddings, usage async def audio_embed( - self, *, audios: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None + self, *, audios: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None ) -> tuple[list["EmbeddingReturnType"], int]: """Schedule audios and sentences to be embedded. Awaits until embedded. diff --git a/libs/infinity_emb/infinity_emb/sync_engine.py b/libs/infinity_emb/infinity_emb/sync_engine.py index 2475269d..deda66d9 100644 --- a/libs/infinity_emb/infinity_emb/sync_engine.py +++ b/libs/infinity_emb/infinity_emb/sync_engine.py @@ -171,7 +171,7 @@ def stop(self): self.async_run(self.async_engine_array.astop).result() @add_start_docstrings(AsyncEngineArray.embed.__doc__) - def embed(self, *, model: str, sentences: list[str], matryoshka_dim:Optional[int]=None): + def embed(self, *, model: str, sentences: list[str], matryoshka_dim: Optional[int] = None): """sync interface of AsyncEngineArray""" return self.async_run( self.async_engine_array.embed, @@ -211,7 +211,9 @@ def classify(self, *, model: str, sentences: list[str], raw_scores: bool = False ) @add_start_docstrings(AsyncEngineArray.image_embed.__doc__) - def image_embed(self, *, model: str, images: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None): + def image_embed( + self, *, model: str, images: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None + ): """sync interface of AsyncEngineArray""" return self.async_run( self.async_engine_array.image_embed, @@ -221,7 +223,9 @@ def image_embed(self, *, model: str, images: list[Union[str, bytes]], matryoshka ) @add_start_docstrings(AsyncEngineArray.audio_embed.__doc__) - def audio_embed(self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim:Optional[int]=None): + def audio_embed( + self, *, model: str, audios: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None + ): """sync interface of AsyncEngineArray""" return self.async_run( self.async_engine_array.audio_embed, From 1de6c5278fa94b1c7c3cbc03027fdd1e9b1b2bb1 Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 6 Dec 2024 17:46:22 +0100 Subject: [PATCH 05/10] add dims to server --- libs/infinity_emb/infinity_emb/infinity_server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 0ac1c835..582d7fa3 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -354,21 +354,21 @@ def url_to_base64(url, modality = "image"): "[📝] Received request with %s input texts ", len(input_), # type: ignore ) - embedding, usage = await engine.embed(sentences=input_) + embedding, usage = await engine.embed(sentences=input_,matryoshka_dim=data_root.dimensions) elif modality == Modality.audio: urls_or_bytes = _resolve_mixed_input(data_root.input) # type: ignore logger.debug( "[📝] Received request with %s input audios ", len(urls_or_bytes), # type: ignore ) - embedding, usage = await engine.audio_embed(audios=urls_or_bytes) + embedding, usage = await engine.audio_embed(audios=urls_or_bytes,matryoshka_dim=data_root.dimensions) elif modality == Modality.image: urls_or_bytes = _resolve_mixed_input(data_root.input) # type: ignore logger.debug( "[📝] Received request with %s input images ", len(urls_or_bytes), # type: ignore ) - embedding, usage = await engine.image_embed(images=urls_or_bytes) + embedding, usage = await engine.image_embed(images=urls_or_bytes,matryoshka_dim=data_root.dimensions) duration = (time.perf_counter() - start) * 1000 logger.debug("[✅] Done in %s ms", duration) From 9c811fbc7bebb4c09f26fe3e7ca19a27ced52c1a Mon Sep 17 00:00:00 2001 From: wirthual Date: Mon, 9 Dec 2024 17:42:28 +0100 Subject: [PATCH 06/10] add constraints for dimensions --- libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index a706f090..d7c392e9 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -54,7 +54,7 @@ class _OpenAIEmbeddingInput(BaseModel): model: str = "default/not-specified" encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float user: Optional[str] = None - dimensions: Optional[int] = None + dimensions: Optional[Annotated[int, Field(strict=True, gt=0, lt=8193)]] = None class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput): From c335df818087158bcda5620d776d3cd4a7b87743 Mon Sep 17 00:00:00 2001 From: Wendy Mak <6398157+wwymak@users.noreply.github.com> Date: Tue, 10 Dec 2024 07:09:00 +0000 Subject: [PATCH 07/10] Adding optimum option for PredictEngine (#492) * adding optimum onnx option for classification * removing a few unneeded bits from optimum.py * adding missing required args from optimum pipeline to make sure it gives the same output as torch implementation * adding unit test * remove a few stray things that are not needed * fixing code styles * minor linting fix * fix failing test --------- Co-authored-by: wendy mak --- .../transformer/classifier/optimum.py | 91 +++++++++++++++++++ .../infinity_emb/transformer/utils.py | 4 + .../classifier/test_optimum_classifier.py | 49 ++++++++++ 3 files changed, 144 insertions(+) create mode 100644 libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py create mode 100644 libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py new file mode 100644 index 00000000..51edef0f --- /dev/null +++ b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2023-now michaelfeil + +import copy +import os + +from infinity_emb._optional_imports import CHECK_ONNXRUNTIME, CHECK_TRANSFORMERS +from infinity_emb.args import EngineArgs +from infinity_emb.transformer.abstract import BaseClassifer +from infinity_emb.transformer.utils_optimum import ( + device_to_onnx, + get_onnx_files, + optimize_model, +) + +if CHECK_ONNXRUNTIME.is_available: + try: + from optimum.onnxruntime import ( # type: ignore[import-untyped] + ORTModelForSequenceClassification, + ) + + except (ImportError, RuntimeError, Exception) as ex: + CHECK_ONNXRUNTIME.mark_dirty(ex) + +if CHECK_TRANSFORMERS.is_available: + from transformers import AutoTokenizer, pipeline # type: ignore[import-untyped] + + +class OptimumClassifier(BaseClassifer): + def __init__(self, *, engine_args: EngineArgs): + CHECK_ONNXRUNTIME.mark_required() + CHECK_TRANSFORMERS.mark_required() + provider = device_to_onnx(engine_args.device) + + onnx_file = get_onnx_files( + model_name_or_path=engine_args.model_name_or_path, + revision=engine_args.revision, + use_auth_token=True, + prefer_quantized=("cpu" in provider.lower() or "openvino" in provider.lower()), + ) + + self.model = optimize_model( + model_name_or_path=engine_args.model_name_or_path, + model_class=ORTModelForSequenceClassification, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, + execution_provider=provider, + file_name=onnx_file.as_posix(), + optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False), + ) + self.model.use_io_binding = False + + self.tokenizer = AutoTokenizer.from_pretrained( + engine_args.model_name_or_path, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, + ) + + self._infinity_tokenizer = copy.deepcopy(self.tokenizer) + + self._pipe = pipeline( + task="text-classification", + model=self.model, + trust_remote_code=engine_args.trust_remote_code, + top_k=None, + revision=engine_args.revision, + tokenizer=self.tokenizer, + device=engine_args.device, + ) + + def encode_pre(self, sentences: list[str]): + return sentences + + def encode_core(self, sentences: list[str]) -> dict: + outputs = self._pipe(sentences) + return outputs + + def encode_post(self, classes) -> dict[str, float]: + """runs post encoding such as normalization""" + return classes + + def tokenize_lengths(self, sentences: list[str]) -> list[int]: + """gets the lengths of each sentences according to tokenize/len etc.""" + tks = self._infinity_tokenizer.batch_encode_plus( + sentences, + add_special_tokens=False, + return_token_type_ids=False, + return_attention_mask=False, + return_length=False, + ).encodings + return [len(t.tokens) for t in tks] diff --git a/libs/infinity_emb/infinity_emb/transformer/utils.py b/libs/infinity_emb/infinity_emb/transformer/utils.py index 5ed56786..e17de3a5 100644 --- a/libs/infinity_emb/infinity_emb/transformer/utils.py +++ b/libs/infinity_emb/infinity_emb/transformer/utils.py @@ -7,6 +7,7 @@ from infinity_emb.primitives import InferenceEngine from infinity_emb.transformer.audio.torch import TorchAudioModel from infinity_emb.transformer.classifier.torch import SentenceClassifier +from infinity_emb.transformer.classifier.optimum import OptimumClassifier from infinity_emb.transformer.crossencoder.optimum import OptimumCrossEncoder from infinity_emb.transformer.crossencoder.torch import ( CrossEncoderPatched as CrossEncoderTorch, @@ -87,11 +88,14 @@ def from_inference_engine(engine: InferenceEngine): class PredictEngine(Enum): torch = SentenceClassifier + optimum = OptimumClassifier @staticmethod def from_inference_engine(engine: InferenceEngine): if engine == InferenceEngine.torch: return PredictEngine.torch + elif engine == InferenceEngine.optimum: + return PredictEngine.optimum else: raise NotImplementedError(f"PredictEngine for {engine} not implemented") diff --git a/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py b/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py new file mode 100644 index 00000000..386c3061 --- /dev/null +++ b/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py @@ -0,0 +1,49 @@ +import torch +from optimum.pipelines import pipeline # type: ignore +from optimum.onnxruntime import ORTModelForSequenceClassification +from infinity_emb.args import EngineArgs +from infinity_emb.transformer.classifier.optimum import OptimumClassifier + + +def test_classifier(model_name: str = "SamLowe/roberta-base-go_emotions-onnx"): + model = OptimumClassifier( + engine_args=EngineArgs( + model_name_or_path=model_name, + device="cuda" if torch.cuda.is_available() else "cpu", + ) # type: ignore + ) + + pipe = pipeline( + task="text-classification", + model=ORTModelForSequenceClassification.from_pretrained( + model_name, file_name="onnx/model_quantized.onnx" + ), + top_k=None, + ) + + sentences = ["This is awesome.", "I am depressed."] + + encode_pre = model.encode_pre(sentences) + encode_core = model.encode_core(encode_pre) + preds = model.encode_post(encode_core) + + assert len(preds) == len(sentences) + assert isinstance(preds, list) + assert isinstance(preds[0], list) + assert isinstance(preds[0][0], dict) + assert isinstance(preds[0][0]["label"], str) + assert isinstance(preds[0][0]["score"], float) + assert preds[0][0]["label"] == "admiration" + assert 0.98 > preds[0][0]["score"] > 0.93 + + preds_orig = pipe(sentences, top_k=None, truncation=True) + + assert len(preds_orig) == len(preds) + + for pred_orig, pred in zip(preds_orig, preds): + assert len(pred_orig) == len(pred) + for pred_orig_i, pred_i in zip(pred_orig[:5], pred[:5]): + assert abs(pred_orig_i["score"] - pred_i["score"]) < 0.05 + + if pred_orig_i["score"] > 0.005: + assert pred_orig_i["label"] == pred_i["label"] From edd9107971c0f390833d3b3228bd92f7fd54efe5 Mon Sep 17 00:00:00 2001 From: michaelfeil <63565275+michaelfeil@users.noreply.github.com> Date: Mon, 9 Dec 2024 23:15:22 -0800 Subject: [PATCH 08/10] format --- libs/infinity_emb/infinity_emb/infinity_server.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 582d7fa3..36f7c6a4 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -354,21 +354,27 @@ def url_to_base64(url, modality = "image"): "[📝] Received request with %s input texts ", len(input_), # type: ignore ) - embedding, usage = await engine.embed(sentences=input_,matryoshka_dim=data_root.dimensions) + embedding, usage = await engine.embed( + sentences=input_, matryoshka_dim=data_root.dimensions + ) elif modality == Modality.audio: urls_or_bytes = _resolve_mixed_input(data_root.input) # type: ignore logger.debug( "[📝] Received request with %s input audios ", len(urls_or_bytes), # type: ignore ) - embedding, usage = await engine.audio_embed(audios=urls_or_bytes,matryoshka_dim=data_root.dimensions) + embedding, usage = await engine.audio_embed( + audios=urls_or_bytes, matryoshka_dim=data_root.dimensions + ) elif modality == Modality.image: urls_or_bytes = _resolve_mixed_input(data_root.input) # type: ignore logger.debug( "[📝] Received request with %s input images ", len(urls_or_bytes), # type: ignore ) - embedding, usage = await engine.image_embed(images=urls_or_bytes,matryoshka_dim=data_root.dimensions) + embedding, usage = await engine.image_embed( + images=urls_or_bytes, matryoshka_dim=data_root.dimensions + ) duration = (time.perf_counter() - start) * 1000 logger.debug("[✅] Done in %s ms", duration) From d614094c473145cab55759eb27199358641e8f75 Mon Sep 17 00:00:00 2001 From: Michael Feil <63565275+michaelfeil@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:22:38 -0800 Subject: [PATCH 09/10] new-release? (#494) * new-release? * rm conint * fix: optimum classifier * lint --- docs/assets/openapi.json | 2 +- .../models/open_ai_embedding_input_audio.py | 9 +++++++ .../models/open_ai_embedding_input_image.py | 9 +++++++ .../models/open_ai_embedding_input_text.py | 9 +++++++ .../infinity_client/pyproject.toml | 2 +- .../infinity_emb/fastapi_schemas/pymodels.py | 2 +- .../infinity_emb/inference/batch_handler.py | 25 ++++++++++++------- .../infinity_emb/infinity_server.py | 18 ++++++------- libs/infinity_emb/infinity_emb/primitives.py | 4 +++ .../infinity_emb/transformer/audio/utils.py | 10 +++----- .../transformer/classifier/optimum.py | 7 +++--- .../infinity_emb/transformer/vision/utils.py | 8 ++---- libs/infinity_emb/pyproject.toml | 2 +- .../classifier/test_optimum_classifier.py | 10 +++----- 14 files changed, 69 insertions(+), 48 deletions(-) diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json index 4689745a..fe6a9faf 100644 --- a/docs/assets/openapi.json +++ b/docs/assets/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.70"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.73"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py index 0369c880..f35557c4 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_audio.py @@ -18,6 +18,7 @@ class OpenAIEmbeddingInputAudio: model (Union[Unset, str]): Default: 'default/not-specified'. encoding_format (Union[Unset, EmbeddingEncodingFormat]): user (Union[None, Unset, str]): + dimensions (Union[Unset, int]): Default: 0. modality (Union[Unset, OpenAIEmbeddingInputAudioModality]): Default: OpenAIEmbeddingInputAudioModality.AUDIO. """ @@ -25,6 +26,7 @@ class OpenAIEmbeddingInputAudio: model: Union[Unset, str] = "default/not-specified" encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET user: Union[None, Unset, str] = UNSET + dimensions: Union[Unset, int] = 0 modality: Union[Unset, OpenAIEmbeddingInputAudioModality] = OpenAIEmbeddingInputAudioModality.AUDIO additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) @@ -52,6 +54,8 @@ def to_dict(self) -> Dict[str, Any]: else: user = self.user + dimensions = self.dimensions + modality: Union[Unset, str] = UNSET if not isinstance(self.modality, Unset): modality = self.modality.value @@ -69,6 +73,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["encoding_format"] = encoding_format if user is not UNSET: field_dict["user"] = user + if dimensions is not UNSET: + field_dict["dimensions"] = dimensions if modality is not UNSET: field_dict["modality"] = modality @@ -118,6 +124,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]: user = _parse_user(d.pop("user", UNSET)) + dimensions = d.pop("dimensions", UNSET) + _modality = d.pop("modality", UNSET) modality: Union[Unset, OpenAIEmbeddingInputAudioModality] if isinstance(_modality, Unset): @@ -130,6 +138,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]: model=model, encoding_format=encoding_format, user=user, + dimensions=dimensions, modality=modality, ) diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py index b036ad0e..7ceb6d96 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_image.py @@ -18,6 +18,7 @@ class OpenAIEmbeddingInputImage: model (Union[Unset, str]): Default: 'default/not-specified'. encoding_format (Union[Unset, EmbeddingEncodingFormat]): user (Union[None, Unset, str]): + dimensions (Union[Unset, int]): Default: 0. modality (Union[Unset, OpenAIEmbeddingInputImageModality]): Default: OpenAIEmbeddingInputImageModality.IMAGE. """ @@ -25,6 +26,7 @@ class OpenAIEmbeddingInputImage: model: Union[Unset, str] = "default/not-specified" encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET user: Union[None, Unset, str] = UNSET + dimensions: Union[Unset, int] = 0 modality: Union[Unset, OpenAIEmbeddingInputImageModality] = OpenAIEmbeddingInputImageModality.IMAGE additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) @@ -52,6 +54,8 @@ def to_dict(self) -> Dict[str, Any]: else: user = self.user + dimensions = self.dimensions + modality: Union[Unset, str] = UNSET if not isinstance(self.modality, Unset): modality = self.modality.value @@ -69,6 +73,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["encoding_format"] = encoding_format if user is not UNSET: field_dict["user"] = user + if dimensions is not UNSET: + field_dict["dimensions"] = dimensions if modality is not UNSET: field_dict["modality"] = modality @@ -118,6 +124,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]: user = _parse_user(d.pop("user", UNSET)) + dimensions = d.pop("dimensions", UNSET) + _modality = d.pop("modality", UNSET) modality: Union[Unset, OpenAIEmbeddingInputImageModality] if isinstance(_modality, Unset): @@ -130,6 +138,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]: model=model, encoding_format=encoding_format, user=user, + dimensions=dimensions, modality=modality, ) diff --git a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py index c68d1456..296de221 100644 --- a/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py +++ b/libs/client_infinity/infinity_client/infinity_client/models/open_ai_embedding_input_text.py @@ -19,6 +19,7 @@ class OpenAIEmbeddingInputText: model (Union[Unset, str]): Default: 'default/not-specified'. encoding_format (Union[Unset, EmbeddingEncodingFormat]): user (Union[None, Unset, str]): + dimensions (Union[Unset, int]): Default: 0. modality (Union[Unset, OpenAIEmbeddingInputTextModality]): Default: OpenAIEmbeddingInputTextModality.TEXT. """ @@ -26,6 +27,7 @@ class OpenAIEmbeddingInputText: model: Union[Unset, str] = "default/not-specified" encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET user: Union[None, Unset, str] = UNSET + dimensions: Union[Unset, int] = 0 modality: Union[Unset, OpenAIEmbeddingInputTextModality] = OpenAIEmbeddingInputTextModality.TEXT additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) @@ -49,6 +51,8 @@ def to_dict(self) -> Dict[str, Any]: else: user = self.user + dimensions = self.dimensions + modality: Union[Unset, str] = UNSET if not isinstance(self.modality, Unset): modality = self.modality.value @@ -66,6 +70,8 @@ def to_dict(self) -> Dict[str, Any]: field_dict["encoding_format"] = encoding_format if user is not UNSET: field_dict["user"] = user + if dimensions is not UNSET: + field_dict["dimensions"] = dimensions if modality is not UNSET: field_dict["modality"] = modality @@ -106,6 +112,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]: user = _parse_user(d.pop("user", UNSET)) + dimensions = d.pop("dimensions", UNSET) + _modality = d.pop("modality", UNSET) modality: Union[Unset, OpenAIEmbeddingInputTextModality] if isinstance(_modality, Unset): @@ -118,6 +126,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]: model=model, encoding_format=encoding_format, user=user, + dimensions=dimensions, modality=modality, ) diff --git a/libs/client_infinity/infinity_client/pyproject.toml b/libs/client_infinity/infinity_client/pyproject.toml index cd3f6861..b17ebe12 100644 --- a/libs/client_infinity/infinity_client/pyproject.toml +++ b/libs/client_infinity/infinity_client/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "infinity_client" -version = "0.0.72" +version = "0.0.73" description = "A client library for accessing ♾️ Infinity - Embedding Inference Server" authors = [] readme = "README.md" diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index d7c392e9..94c3c3d6 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -54,7 +54,7 @@ class _OpenAIEmbeddingInput(BaseModel): model: str = "default/not-specified" encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float user: Optional[str] = None - dimensions: Optional[Annotated[int, Field(strict=True, gt=0, lt=8193)]] = None + dimensions: int = 0 class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput): diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 1edda315..1bcf14eb 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -26,6 +26,7 @@ ImageClassType, ModelCapabilites, ModelNotDeployedError, + MatryoshkaDimError, OverloadStatus, PredictSingle, PrioritizedQueueItem, @@ -61,6 +62,18 @@ def submit(self, *args, **kwargs): return self._tp.submit(*args, **kwargs) +def matryososka_slice( + embeddings: list[np.ndarray], matryoshka_dim: Optional[int] +) -> list[np.ndarray]: + if matryoshka_dim: + if 1 > matryoshka_dim or matryoshka_dim > len(embeddings[0]): + raise MatryoshkaDimError( + f"matryoshka_dim={matryoshka_dim} is not in a valid range. Select between 1 and {len(embeddings[0])}." + ) + return [e[:matryoshka_dim] for e in embeddings] + return embeddings + + class BatchHandler: def __init__( self, @@ -159,9 +172,7 @@ async def embed( input_sentences = [EmbeddingSingle(sentence=s) for s in sentences] embeddings, usage = await self._schedule(input_sentences) - if matryoshka_dim: - embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] - return embeddings, usage + return matryososka_slice(embeddings, matryoshka_dim), usage async def rerank( self, @@ -267,9 +278,7 @@ async def image_embed( items = await resolve_images(images) embeddings, usage = await self._schedule(items) - if matryoshka_dim: - embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] - return embeddings, usage + return matryososka_slice(embeddings, matryoshka_dim), usage async def audio_embed( self, *, audios: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None @@ -299,9 +308,7 @@ async def audio_embed( getattr(self.model_worker[0]._model, "sampling_rate", -42), ) embeddings, usage = await self._schedule(items) - if matryoshka_dim: - embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] - return embeddings, usage + return matryososka_slice(embeddings, matryoshka_dim), usage async def _schedule(self, list_queueitem: Sequence[AbstractSingle]) -> tuple[list[Any], int]: """adds list of items to the queue and awaits until these are completed.""" diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 36f7c6a4..a835d3e3 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -29,6 +29,7 @@ InferenceEngine, Modality, ModelCapabilites, + MatryoshkaDimError, ModelNotDeployedError, PoolingMethod, ) @@ -390,14 +391,9 @@ def url_to_base64(url, modality = "image"): f"ModelNotDeployedError: model=`{data_root.model}` does not support `embed` for modality `{modality.value}`. Reason: {ex}", code=status.HTTP_400_BAD_REQUEST, ) - except (ImageCorruption, AudioCorruption) as ex: - # get urls_or_bytes if not defined - try: - urls_or_bytes = urls_or_bytes - except NameError: - urls_or_bytes = [] + except (ImageCorruption, AudioCorruption, MatryoshkaDimError) as ex: raise errors.OpenAIException( - f"{modality.value}Corruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}", + f"{ex.__class__} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except Exception as ex: @@ -545,9 +541,9 @@ async def _embeddings_image(data: ImageEmbeddingInput): encoding_format=data.encoding_format, usage=usage, ) - except ImageCorruption as ex: + except (ImageCorruption, MatryoshkaDimError) as ex: raise errors.OpenAIException( - f"ImageCorruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}", + f"{ex.__class__} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except ModelNotDeployedError as ex: @@ -604,9 +600,9 @@ async def _embeddings_audio(data: AudioEmbeddingInput): encoding_format=data.encoding_format, usage=usage, ) - except AudioCorruption as ex: + except (AudioCorruption, MatryoshkaDimError) as ex: raise errors.OpenAIException( - f"AudioCorruption, could not open {[b if isinstance(b, str) else 'bytes' for b in urls_or_bytes]} -> {ex}", + f"{ex.__class__} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except ModelNotDeployedError as ex: diff --git a/libs/infinity_emb/infinity_emb/primitives.py b/libs/infinity_emb/infinity_emb/primitives.py index 7ff8d404..34677069 100644 --- a/libs/infinity_emb/infinity_emb/primitives.py +++ b/libs/infinity_emb/infinity_emb/primitives.py @@ -436,6 +436,10 @@ class ModelNotDeployedError(Exception): pass +class MatryoshkaDimError(Exception): + pass + + class ImageCorruption(Exception): pass diff --git a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py index 4893df2a..b31a0a81 100644 --- a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py +++ b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py @@ -51,13 +51,9 @@ async def resolve_audios( CHECK_AIOHTTP.mark_required() CHECK_SOUNDFILE.mark_required() - resolved_audios: list[AudioSingle] = [] async with aiohttp.ClientSession(trust_env=True) as session: - try: - resolved_audios = await asyncio.gather( - *[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls] - ) - except Exception as e: - raise AudioCorruption(f"Failed to resolve audio: {e}") + resolved_audios = await asyncio.gather( + *[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls] + ) return resolved_audios diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py index 51edef0f..ae995b13 100644 --- a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py @@ -39,7 +39,7 @@ def __init__(self, *, engine_args: EngineArgs): prefer_quantized=("cpu" in provider.lower() or "openvino" in provider.lower()), ) - self.model = optimize_model( + model = optimize_model( model_name_or_path=engine_args.model_name_or_path, model_class=ORTModelForSequenceClassification, revision=engine_args.revision, @@ -48,7 +48,7 @@ def __init__(self, *, engine_args: EngineArgs): file_name=onnx_file.as_posix(), optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False), ) - self.model.use_io_binding = False + model.use_io_binding = False self.tokenizer = AutoTokenizer.from_pretrained( engine_args.model_name_or_path, @@ -60,12 +60,11 @@ def __init__(self, *, engine_args: EngineArgs): self._pipe = pipeline( task="text-classification", - model=self.model, + model=model, trust_remote_code=engine_args.trust_remote_code, top_k=None, revision=engine_args.revision, tokenizer=self.tokenizer, - device=engine_args.device, ) def encode_pre(self, sentences: list[str]): diff --git a/libs/infinity_emb/infinity_emb/transformer/vision/utils.py b/libs/infinity_emb/infinity_emb/transformer/vision/utils.py index 16b80c37..f0378fde 100644 --- a/libs/infinity_emb/infinity_emb/transformer/vision/utils.py +++ b/libs/infinity_emb/infinity_emb/transformer/vision/utils.py @@ -78,11 +78,7 @@ async def resolve_images( CHECK_PIL.mark_required() resolved_imgs = [] - - try: - async with aiohttp.ClientSession(trust_env=True) as session: - resolved_imgs = await asyncio.gather(*[resolve_image(img, session) for img in images]) - except Exception as e: - raise ImageCorruption(f"Failed to resolve image: {images}.\nError msg: {str(e)}") + async with aiohttp.ClientSession(trust_env=True) as session: + resolved_imgs = await asyncio.gather(*[resolve_image(img, session) for img in images]) return resolved_imgs diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 654cdf13..1bd621d1 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "infinity_emb" -version = "0.0.72" +version = "0.0.73" description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip." authors = ["michaelfeil "] license = "MIT" diff --git a/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py b/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py index 386c3061..95b4104d 100644 --- a/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py +++ b/libs/infinity_emb/tests/unit_test/transformer/classifier/test_optimum_classifier.py @@ -1,7 +1,6 @@ -import torch -from optimum.pipelines import pipeline # type: ignore -from optimum.onnxruntime import ORTModelForSequenceClassification +from transformers.pipelines import pipeline # type: ignore from infinity_emb.args import EngineArgs + from infinity_emb.transformer.classifier.optimum import OptimumClassifier @@ -9,15 +8,12 @@ def test_classifier(model_name: str = "SamLowe/roberta-base-go_emotions-onnx"): model = OptimumClassifier( engine_args=EngineArgs( model_name_or_path=model_name, - device="cuda" if torch.cuda.is_available() else "cpu", ) # type: ignore ) pipe = pipeline( task="text-classification", - model=ORTModelForSequenceClassification.from_pretrained( - model_name, file_name="onnx/model_quantized.onnx" - ), + model="SamLowe/roberta-base-go_emotions", # hoping that this is the same model as model_name top_k=None, ) From 944643bf3c83654d831ef5223e925eda4f1f0c6d Mon Sep 17 00:00:00 2001 From: michaelfeil <63565275+michaelfeil@users.noreply.github.com> Date: Tue, 10 Dec 2024 00:43:41 -0800 Subject: [PATCH 10/10] add all authors > 10 commits, 1000LOC excluding poetry.lock to contributor list --- docs/assets/openapi.json | 2 +- libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py | 4 +++- libs/infinity_emb/infinity_emb/infinity_server.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json index fe6a9faf..497a0bc5 100644 --- a/docs/assets/openapi.json +++ b/docs/assets/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.73"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil, Raphael Wirth"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.72"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format=\"float\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format=\"base64\", # base64: optional high performance setting\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"},{"items":{"items":{"type":"number"},"type":"array"},"type":"array"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"dimensions":{"type":"integer","title":"Dimensions","default":0},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py index 349580e2..9595fe0b 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py @@ -17,7 +17,9 @@ def startup_message(host: str, port: int, prefix: str) -> str: return f""" ♾️ Infinity - Embedding Inference Server -MIT License; Copyright (c) 2023-now Michael Feil +MIT License; Copyright (c) 2023-now +Infinity OSS-Project: github.com/michaelfeil.infinity +Maintained by @michaelfeil @wirthual Version {__version__} Open the Docs via Swagger UI: diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index a835d3e3..74e085ca 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -130,7 +130,7 @@ async def kill_later(seconds: int): summary=docs.FASTAPI_SUMMARY, description=docs.FASTAPI_DESCRIPTION, version=infinity_emb.__version__, - contact=dict(name="Michael Feil"), + contact=dict(name="Michael Feil, Raphael Wirth"), # codespell:ignore docs_url=f"{url_prefix}/docs", openapi_url=f"{url_prefix}/openapi.json", license_info={