diff --git a/libs/infinity_emb/infinity_emb/engine.py b/libs/infinity_emb/infinity_emb/engine.py index 0db14ea5..d23a1bf4 100644 --- a/libs/infinity_emb/infinity_emb/engine.py +++ b/libs/infinity_emb/infinity_emb/engine.py @@ -336,7 +336,7 @@ async def classify( return await self[model].classify(sentences=sentences, raw_scores=raw_scores) async def image_embed( - self, model: str, images: list[str] + self, *, model: str, images: list[str] ) -> tuple[list[EmbeddingReturnType], int]: """embed multiple images diff --git a/libs/infinity_emb/infinity_emb/sync_engine.py b/libs/infinity_emb/infinity_emb/sync_engine.py index 7a9774ab..cf49405d 100644 --- a/libs/infinity_emb/infinity_emb/sync_engine.py +++ b/libs/infinity_emb/infinity_emb/sync_engine.py @@ -99,14 +99,14 @@ def async_run( @add_start_docstrings(AsyncEngineArray.__doc__) class SyncEngineArray(AsyncLifeMixin): - def __init__(self, engine_args: list[EngineArgs]): + def __init__(self, _engine_args_array: list[EngineArgs]): super().__init__() - self.async_engine_array = AsyncEngineArray.from_args(engine_args) + self.async_engine_array = AsyncEngineArray.from_args(_engine_args_array) self.async_run(self.async_engine_array.astart).result() @classmethod - def from_args(cls, engine_args: list[EngineArgs]) -> "SyncEngineArray": - return cls(engine_args) + def from_args(cls, engine_args_array: list[EngineArgs]) -> "SyncEngineArray": + return cls(_engine_args_array=engine_args_array) @property def is_running(self): @@ -128,17 +128,26 @@ def embed(self, *, model: str, sentences: list[str]): ) @add_start_docstrings(AsyncEngineArray.rerank.__doc__) - def rerank(self, *, model: str, query: str, docs: list[str]): + def rerank( + self, *, model: str, query: str, docs: list[str], raw_scores: bool = False + ): """sync interface of AsyncEngineArray""" return self.async_run( - self.async_engine_array.rerank, model=model, query=query, docs=docs + self.async_engine_array.rerank, + model=model, + query=query, + docs=docs, + raw_scores=raw_scores, ) @add_start_docstrings(AsyncEngineArray.classify.__doc__) - def classify(self, *, model: str, sentences: str): + def classify(self, *, model: str, sentences: list[str], raw_scores: bool = False): """sync interface of AsyncEngineArray""" return self.async_run( - self.async_engine_array.classify, model=model, sentences=sentences + self.async_engine_array.classify, + model=model, + sentences=sentences, + raw_scores=raw_scores, ) @add_start_docstrings(AsyncEngineArray.image_embed.__doc__) diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index a6778e79..ccccc6e8 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "infinity_emb" -version = "0.0.48" +version = "0.0.49" description = "Infinity is a high-throughput, low-latency REST API for serving vector embeddings, supporting a wide range of sentence-transformer models and frameworks." authors = ["michaelfeil "] license = "MIT" diff --git a/libs/infinity_emb/tests/conftest.py b/libs/infinity_emb/tests/conftest.py index f46fecc6..4fec5bd5 100644 --- a/libs/infinity_emb/tests/conftest.py +++ b/libs/infinity_emb/tests/conftest.py @@ -9,6 +9,8 @@ pytest.DEFAULT_RERANKER_MODEL = "mixedbread-ai/mxbai-rerank-xsmall-v1" pytest.DEFAULT_CLASSIFIER_MODEL = "SamLowe/roberta-base-go_emotions" +pytest.ENGINE_METHODS = ["embed", "image_embed", "classify", "rerank"] + @pytest.fixture def anyio_backend(): diff --git a/libs/infinity_emb/tests/unit_test/test_engine.py b/libs/infinity_emb/tests/unit_test/test_engine.py index d833f443..89b1a3df 100644 --- a/libs/infinity_emb/tests/unit_test/test_engine.py +++ b/libs/infinity_emb/tests/unit_test/test_engine.py @@ -1,4 +1,5 @@ import asyncio +import inspect import sys import numpy as np @@ -6,7 +7,7 @@ import torch from sentence_transformers import CrossEncoder # type: ignore[import-untyped] -from infinity_emb import AsyncEmbeddingEngine, EngineArgs +from infinity_emb import AsyncEmbeddingEngine, AsyncEngineArray, EngineArgs from infinity_emb.primitives import ( Device, EmbeddingDtype, @@ -251,3 +252,14 @@ async def test_async_api_failing_revision(): revision="a32952c6d05d45f64f9f709a092c00839bcfe70a", ) ) + + +@pytest.mark.parametrize("method_name", list(pytest.ENGINE_METHODS)) # type: ignore +def test_args_between_array_and_engine_same(method_name: str): + array_method = inspect.getfullargspec(getattr(AsyncEngineArray, method_name)) + engine_method = inspect.getfullargspec(getattr(AsyncEmbeddingEngine, method_name)) + + assert "model" in array_method.kwonlyargs + assert sorted(array_method.args + array_method.kwonlyargs) == sorted( + engine_method.args + engine_method.kwonlyargs + ["model"] + ) diff --git a/libs/infinity_emb/tests/unit_test/test_sync_engine.py b/libs/infinity_emb/tests/unit_test/test_sync_engine.py index 6b8f1053..6e238012 100644 --- a/libs/infinity_emb/tests/unit_test/test_sync_engine.py +++ b/libs/infinity_emb/tests/unit_test/test_sync_engine.py @@ -1,8 +1,9 @@ +import inspect from uuid import uuid4 import pytest -from infinity_emb import EngineArgs, SyncEngineArray +from infinity_emb import AsyncEngineArray, EngineArgs, SyncEngineArray def test_sync_engine(): @@ -75,5 +76,16 @@ def test_sync_engine_on_model(model_id, method: str, payload: dict): s_eng_array.stop() +@pytest.mark.parametrize("method_name", list(pytest.ENGINE_METHODS) + ["from_args"]) # type: ignore +def test_args_between_sync_and_async_same(method_name: str): + sync_method = inspect.getfullargspec(getattr(SyncEngineArray, method_name)) + async_method = inspect.getfullargspec(getattr(AsyncEngineArray, method_name)) + if method_name in list(pytest.ENGINE_METHODS): # type: ignore + assert "model" in sync_method.kwonlyargs + assert "model" in async_method.kwonlyargs + assert sync_method.args == async_method.args + assert sync_method.kwonlyargs == async_method.kwonlyargs + + if __name__ == "__main__": test_sync_engine() diff --git a/libs/simpleinference/poetry.lock b/libs/simpleinference/poetry.lock index 8212f7e7..4be8c0e4 100644 --- a/libs/simpleinference/poetry.lock +++ b/libs/simpleinference/poetry.lock @@ -933,22 +933,20 @@ files = [ [[package]] name = "infinity-emb" -version = "0.0.47" +version = "0.0.49" description = "Infinity is a high-throughput, low-latency REST API for serving vector embeddings, supporting a wide range of sentence-transformer models and frameworks." optional = false -python-versions = "<4,>=3.9" -files = [ - {file = "infinity_emb-0.0.47-py3-none-any.whl", hash = "sha256:53790664be0c677d72009869d73cf9e03cc107694f2c128ed782ef24138b8ed2"}, - {file = "infinity_emb-0.0.47.tar.gz", hash = "sha256:e9835f37301a16f73ea282cf511917d9a65547f9acb408a8d4245ca7de9d7cb7"}, -] +python-versions = ">=3.9,<4" +files = [] +develop = false [package.dependencies] hf_transfer = ">=0.1.5" huggingface_hub = "*" numpy = ">=1.20.0,<2" -optimum = {version = ">=1.16.2", extras = ["onnxruntime"], optional = true, markers = "extra == \"optimum\" or extra == \"all\""} -pillow = {version = "*", optional = true, markers = "extra == \"vision\" or extra == \"all\""} -timm = {version = "*", optional = true, markers = "extra == \"vision\" or extra == \"all\""} +optimum = {version = ">=1.16.2", extras = ["onnxruntime"], optional = true} +pillow = {version = "*", optional = true} +timm = {version = "*", optional = true} [package.extras] all = ["ctranslate2 (>=4.0.0,<5.0.0)", "diskcache", "einops", "fastapi (>=0.103.2)", "optimum[onnxruntime] (>=1.16.2)", "orjson (>=3.9.8,!=3.10.0)", "pillow", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "sentence-transformers (>=3.0.1,<4.0.0)", "timm", "torch (>=2.2.1)", "typer[all] (>=0.9.0,<0.10.0)", "uvicorn[standard] (>=0.23.2,<0.24.0)"] @@ -963,6 +961,10 @@ tensorrt = ["tensorrt (>=8.6.1,<9.0.0)"] torch = ["sentence-transformers (>=3.0.1,<4.0.0)", "torch (>=2.2.1)"] vision = ["pillow", "timm"] +[package.source] +type = "directory" +url = "../infinity_emb" + [[package]] name = "iniconfig" version = "2.0.0" @@ -3125,4 +3127,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "9f8306b6bb33ee205dafb73816204996153c371d643786bbd1f7e516b34aa988" +content-hash = "090cebaf1aba5d7cfb553d30fe32f524cc2ca990d9352ea066deca74fe510884" diff --git a/libs/simpleinference/pyproject.toml b/libs/simpleinference/pyproject.toml index 644e9d4b..1ab7fc1e 100644 --- a/libs/simpleinference/pyproject.toml +++ b/libs/simpleinference/pyproject.toml @@ -12,7 +12,7 @@ priority = "explicit" [tool.poetry.dependencies] python = ">=3.9,<4" -infinity_emb = {version = "0.0.47", extras = ["optimum","vision"]} +infinity_emb = {path = "../infinity_emb", extras = ["optimum","vision"]} [tool.poetry.group.test.dependencies] pytest = "^7.0.0" diff --git a/libs/simpleinference/simpleinference/infer.py b/libs/simpleinference/simpleinference/infer.py index dcb7d172..051d58ed 100644 --- a/libs/simpleinference/simpleinference/infer.py +++ b/libs/simpleinference/simpleinference/infer.py @@ -48,7 +48,9 @@ def __init__( ) for m, e, d, edt in zip(model_id, engine, device, embedding_dtype) ] - self._engine_array = SyncEngineArray.from_args(engine_args=self._engine_args) + self._engine_array = SyncEngineArray.from_args( + engine_args_array=self._engine_args + ) def stop(self): self._engine_array.stop()