From 5dde578e189e55d7c65a8231f32ee9f1ae2a8e28 Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Thu, 12 Oct 2023 02:35:58 +0200 Subject: [PATCH 1/4] add unit tests passing. --- .github/workflows/test.yaml | 4 +- README.md | 4 +- .../infinity_emb/fastapi_schemas/convert.py | 21 ++----- .../infinity_emb/fastapi_schemas/docs.py | 16 +++++ .../infinity_emb/inference/batch_handler.py | 14 +++-- .../infinity_emb/inference/models.py | 25 +++----- .../infinity_emb/infinity_server.py | 57 +++++++++-------- libs/infinity_emb/poetry.lock | 61 ++++++++++++++++++- libs/infinity_emb/pyproject.toml | 1 + .../tests/end_to_end/test_ct2_sentence.py | 4 +- libs/infinity_emb/tests/script_live.py | 51 ++++++++++++++++ .../unit_test/inference/test_batch_handler.py | 4 +- 12 files changed, 189 insertions(+), 73 deletions(-) create mode 100644 libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py create mode 100644 libs/infinity_emb/tests/script_live.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 486fb67e..0583588e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -49,4 +49,6 @@ jobs: - name: Upload coverage Report to Codecov for python 3.10 if: ${{ matrix.python-version == '3.10' && inputs.upload_coverage == true }} - uses: codecov/codecov-action@v2 \ No newline at end of file + uses: codecov/codecov-action@v3 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index 1842ef62..bb1f306f 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ Embedding Inference Server - finding TGI for embeddings ## Why Infinity: Infinity provides the following features: - **Fast inference**: The inference server is built on top of [torch](https:) and [ctranslate2](https://github.com/OpenNMT/CTranslate2) under the hood, getting most out of your **CUDA** or **CPU** hardware. -- **Continous batching**: All new embedding requests are queued while GPU is busy with the previous ones. New requests are served as soon as GPU is ready. Adds only ~2% overhead for large datasets, over static batching. +- **Dynamic, optimal batching**: New embedding requests are queued while GPU is busy with the previous ones. New requests are squeezed intro your GPU/CPU as soon as ready. - **Correct and tested implementation**: Unit and end-to-end tested. API embeddings are identical to [sentence-transformers](https://github.com/UKPLab/sentence-transformers/) (up to numerical precision). Lets API users create embeddings till infinity and beyond. -- **Easy to use**: The API is built on top of [FastAPI](https://fastapi.tiangolo.com/) and [Swagger](https://swagger.io/) and is fully documented. See below on how to get started. +- **Easy to use**: The API is built on top of [FastAPI](https://fastapi.tiangolo.com/), [Swagger](https://swagger.io/) makes it fully documented. API specs are aligned to OpenAI. See below on how to get started. # Demo: A quick demo of launching: [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) with batch-size=2 and sending 3 requests via cURL. diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/convert.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/convert.py index 457553e7..eb8c1854 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/convert.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/convert.py @@ -1,32 +1,19 @@ from ..inference.primitives import NpEmbeddingType -from .pymodels import OpenAIEmbeddingResult, _EmbeddingObject, _Usage +from .pymodels import OpenAIEmbeddingResult def list_embeddings_to_response( embeddings: NpEmbeddingType, model: str, usage: int ) -> OpenAIEmbeddingResult: - return OpenAIEmbeddingResult( + return dict( model=model, data=[ - _EmbeddingObject( + dict( object="embedding", embedding=emb, index=count, ) for count, emb in enumerate(embeddings) ], - usage=_Usage(prompt_tokens=usage, total_tokens=usage), + usage=dict(prompt_tokens=usage, total_tokens=usage), ) - - # return { - # "model": model, - # "data": [ - # dict( - # object="embedding", - # embedding=emb, - # index=count, - # ) - # for count, emb in enumerate(embeddings) - # ], - # "usage": {"prompt_tokens": usage, "total_tokens": usage}, - # } diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py new file mode 100644 index 00000000..fa251908 --- /dev/null +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/docs.py @@ -0,0 +1,16 @@ +FASTAPI_TITLE = "♾️ Infinity - Embedding Inference Server" +FASTAPI_SUMMARY = "Embedding Inference Server - finding TGI for embeddings" + + +def startup_message(host: str, port: str, prefix: str) -> str: + return f""" + +♾️ Infinity - Embedding Inference Server +MIT License; Copyright (c) 2023 Michael Feil + +Open the Docs via Swagger UI: +http://{host}:{port}/docs + +Access model via 'GET': +curl http://{host}:{port}{prefix}/models +""" diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 5d5029c9..c0316f81 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -8,7 +8,7 @@ from typing import Dict, List, Union from ..log_handler import logger -from .models import BaseTransformer +from .models import BaseTransformer, get_lengths_with_tokenize from .primitives import ( EmbeddingResult, NpEmbeddingType, @@ -128,9 +128,7 @@ def __init__( def shutdown(self): self._shutdown.set() - async def schedule( - self, sentences: List[str], prios: List[int] - ) -> NpEmbeddingType | None: + async def schedule(self, sentences: List[str]) -> tuple[List[NpEmbeddingType], int]: """Schedule a sentence to be embedded. Awaits until embedded. Args: @@ -143,6 +141,9 @@ async def schedule( # add an unique identifier uuid_event = [] prioqueue = [] + + prios, usage = get_lengths_with_tokenize(sentences, self.model.tokenize_lengths) + for s, p in zip(sentences, prios): inner = EmbeddingResult(sentence=s, event=EventTS(self._threadpool)) item = PrioritizedQueueItem(item=inner, priority=p) @@ -154,7 +155,8 @@ async def schedule( self._result_store.wait_for_response(uuid, event) for uuid, event in uuid_event ] - return await asyncio.gather(*gather_results) + embeddings = await asyncio.gather(*gather_results) + return embeddings, usage def is_overloaded(self) -> bool: # start consuming @@ -176,7 +178,7 @@ def overload_status(self) -> OverloadStatus: def _preprocess_batch(self): """loops and checks if the _core_batch has worked on all items""" self._ready = True - logger.info("ready to receive requests.") + logger.info("ready to batch requests.") try: while not self._shutdown.is_set(): # patience: diff --git a/libs/infinity_emb/infinity_emb/inference/models.py b/libs/infinity_emb/infinity_emb/inference/models.py index 2bdba0a6..5c93b86f 100644 --- a/libs/infinity_emb/infinity_emb/inference/models.py +++ b/libs/infinity_emb/infinity_emb/inference/models.py @@ -1,3 +1,4 @@ +import copy import os from abc import ABC, abstractmethod from enum import Enum @@ -70,9 +71,12 @@ def __init__(self, *args, **kwargs): device = self._target_device self.eval() self.to(device) + # make a copy of the tokenizer, + # to be able to could the tokens in another thread + # without corrupting the original. + self._infinity_tokenizer = copy.deepcopy(self._first_module().tokenizer) def encode_pre(self, sentences) -> Dict[str, Tensor]: - # features = self._tokenize_actual(sentences) features = self.tokenize(sentences) return features @@ -81,7 +85,6 @@ def encode_core(self, features: Dict[str, Tensor]) -> Tensor: """ Computes sentence embeddings """ - # features = self._tokenize_actual(features) device = self._target_device features = util.batch_to_device(features, device) # move forward @@ -103,29 +106,17 @@ def encode_post( return embeddings_out def tokenize_lengths(self, sentences: List[str]) -> List[int]: - fm = self._first_module() - tks = fm.tokenizer.batch_encode_plus( + tks = self._infinity_tokenizer.batch_encode_plus( sentences, add_special_tokens=False, return_token_type_ids=False, return_attention_mask=False, return_length=False, + max_length=self._infinity_tokenizer.model_max_length, + truncation="longest_first", ).encodings return [len(t.tokens) for t in tks] - def _tokenize_actual(self, sentences: List[str]): - fm = self._first_module() - output = fm.tokenizer( - sentences, - padding=True, - truncation="longest_first", - return_tensors="pt", - max_length=fm.tokenizer.model_max_length, - # pad_to_multiple_of=16, - ) - - return dict(**output) - class CT2SentenceTransformer(SentenceTransformerPatched): """ diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 5ae10a61..2870c9ec 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -3,15 +3,14 @@ import typer import uvicorn -from fastapi import FastAPI, status +from fastapi import FastAPI, responses, status from prometheus_fastapi_instrumentator import Instrumentator # prometheus import infinity_emb -from infinity_emb.fastapi_schemas import errors +from infinity_emb.fastapi_schemas import docs, errors from infinity_emb.fastapi_schemas.convert import list_embeddings_to_response from infinity_emb.fastapi_schemas.pymodels import ( - ModelInfo, OpenAIEmbeddingInput, OpenAIEmbeddingResult, OpenAIModelInfo, @@ -26,10 +25,11 @@ def create_server( batch_size: int = 64, engine: models.InferenceEngine = models.InferenceEngine.torch, verbose: bool = False, + doc_extra: dict = {}, ): app = FastAPI( - title="♾️ Infinity - Embedding Inference Server", - summary="Embedding Inference Server - finding TGI for embeddings", + title=docs.FASTAPI_TITLE, + summary=docs.FASTAPI_SUMMARY, version=infinity_emb.__version__, contact=dict(name="Michael Feil"), docs_url="/docs", @@ -53,10 +53,17 @@ async def _startup(): app.batch_handler = BatchHandler( max_batch_size=batch_size, model=model, threadpool=app.tp, verbose=verbose ) - app.tokenize_len = model.tokenize_lengths # start in a threadpool await app.batch_handler.spawn() + logger.info( + docs.startup_message( + host=doc_extra.pop("host", "localhost"), + port=doc_extra.pop("port", "PORT"), + prefix=url_prefix, + ) + ) + @app.on_event("shutdown") async def _shutdown(): app.batch_handler.shutdown() @@ -71,23 +78,32 @@ async def _ready() -> float: "model not ready", code=status.HTTP_503_SERVICE_UNAVAILABLE ) - @app.get(f"{url_prefix}/models") - async def _models() -> OpenAIModelInfo: + @app.get( + f"{url_prefix}/models", + response_model=OpenAIModelInfo, + response_class=responses.ORJSONResponse, + ) + async def _models(): """get models endpoint""" s = app.batch_handler.overload_status() # type: ignore - return OpenAIModelInfo( - data=ModelInfo( + return dict( + data=dict( id=model_name_or_path, stats=dict( queue_fraction=s.queue_fraction, queue_absolute=s.queue_absolute, results_pending=s.results_absolute, + batch_size=batch_size, ), ) ) - @app.post(f"{url_prefix}/embeddings") - async def _embeddings(data: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult: + @app.post( + f"{url_prefix}/embeddings", + response_model=OpenAIEmbeddingResult, + response_class=responses.ORJSONResponse, + ) + async def _embeddings(data: OpenAIEmbeddingInput): """Encode Embeddings ```python @@ -102,25 +118,16 @@ async def _embeddings(data: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult: ) try: + logger.debug("[📝] Received request with %s inputs ", len(data.input)) start = time.perf_counter() - # lengths, usage = await to_thread( - # models.get_lengths_with_tokenize, app.tp, data.input, app.tokenize_len) - lengths, usage = models.get_lengths_with_tokenize( - data.input # , app.tokenize_len - ) - logger.debug("[📝] Received request with %s inputs ", len(lengths)) - - # emb = await asyncio.gather( - # *[(bh.schedule(s, prio=prio)) for s, prio in zip(data.input, lengths)] - # ) - emb = await bh.schedule(data.input, prios=lengths) + embedding, usage = await bh.schedule(data.input) duration = (time.perf_counter() - start) * 1000 logger.debug("[✅] Done in %s ms", duration) res = list_embeddings_to_response( - embeddings=emb, model=data.model, usage=usage + embeddings=embedding, model=data.model, usage=usage ) return res @@ -165,6 +172,7 @@ def start_uvicorn( batch_size=batch_size, engine=engine_load, verbose=log_level.to_int() <= 10, + doc_extra=dict(host=host, port=port), ) uvicorn.run(app, host=host, port=port, log_level=log_level.name) @@ -174,6 +182,7 @@ def cli(): typer.run(start_uvicorn) +# app = create_server() if __name__ == "__main__": # for debugging cli() diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock index c0d16eb8..299fedc6 100644 --- a/libs/infinity_emb/poetry.lock +++ b/libs/infinity_emb/poetry.lock @@ -1166,6 +1166,65 @@ files = [ setuptools = "*" wheel = "*" +[[package]] +name = "orjson" +version = "3.9.8" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.8" +files = [ + {file = "orjson-3.9.8-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:823525bfb27b804b492acc59a45dc0973ea629d97557eac81dde7b34b5267611"}, + {file = "orjson-3.9.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be6f2634fe6c88a0e1e785fc0b6845ad75bef6e20f1ee3d62fd81b17e7505cbf"}, + {file = "orjson-3.9.8-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c56dd62754e2ee5b7f64d37f3e85685d3bd5bcaa448076e9113be9069078dfc"}, + {file = "orjson-3.9.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c863c7805a7961428a40431a8f47c3f71c74e6c5ddf1ab023e6e79bc5806e6d5"}, + {file = "orjson-3.9.8-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d30621cf18a0e16a16fbcf2fa536d800f78514a46f5321130f1b54e88994267"}, + {file = "orjson-3.9.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5311ce1457a29084146d2599588dc8ad96256feb921af8e365444fa8ad67afac"}, + {file = "orjson-3.9.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f9b070c895fc81c362b1b41dc6d0c81a84ee4abb1193804de15683549aeeb0ee"}, + {file = "orjson-3.9.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:24915b65ac19731a57a5ab7dbf463f91555e10d4ad833513e7d8cc6848487c24"}, + {file = "orjson-3.9.8-cp310-none-win32.whl", hash = "sha256:2bcc9dc53f9e1d679515349bf299ed5e75310146c755d2ba227a7e37851ab3fb"}, + {file = "orjson-3.9.8-cp310-none-win_amd64.whl", hash = "sha256:423774c85e73054acfef10fc3328f35c8d3e0193a7247d47308ebfccde70695d"}, + {file = "orjson-3.9.8-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8a1c92f467f5fd0f8fb79273006b563364b1e45667b3760423498348dc2e22fa"}, + {file = "orjson-3.9.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:742d4d16d66579ffff4b2048a8de4a0b03d731847233e92c4edd418a9c582d0f"}, + {file = "orjson-3.9.8-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d1aab08b373232f568ea9ae048f9f77e09f389068afee6dd44bb6140e2c3ea3"}, + {file = "orjson-3.9.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:68ed63273ec4ecdd7865e9d984d65a749c0d780882cf9dde6ab2bc6323f6471a"}, + {file = "orjson-3.9.8-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d23edcb32383f3d86b2f4914f9825ce2d67625abd34be6e5ed1f59ec30127b7a"}, + {file = "orjson-3.9.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9bcd3a48b260d3dfe68b8ce93d11f99a70bd4c908efe22d195a1b1dcfb15ac2"}, + {file = "orjson-3.9.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9ce982f3c1df83f7dc74f3b2690605470ff4790d12558e44359f01e822c5cb08"}, + {file = "orjson-3.9.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4433dd903d5b022a64e9dd1dca94f08ab04d5d928a0ecd33dd46110468960879"}, + {file = "orjson-3.9.8-cp311-none-win32.whl", hash = "sha256:a119c73520192c2882d0549151b9cdd65e0bb5396bedf8951ba5f70d6a873879"}, + {file = "orjson-3.9.8-cp311-none-win_amd64.whl", hash = "sha256:764306f6370e6c76cbbf3139dd9b05be9c4481ee0b15966bd1907827a5777216"}, + {file = "orjson-3.9.8-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:af8e6185516ce0c93d6ce1f4105918504da629c631fd969686f32a1be3ed3c9b"}, + {file = "orjson-3.9.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e8f5ac250184dcb6b00543f0f82853d7e840e476d0135733e459aee058695e5"}, + {file = "orjson-3.9.8-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:edafb45fc5b2063abd8a0baf6be21c38497df2d9e0b75cdb053eb0ff100fa26c"}, + {file = "orjson-3.9.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc449bff1d4152438615f4a6a003577942908c4e166d64dc46d1f3f0cde72ecd"}, + {file = "orjson-3.9.8-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee887aeb8ab0c1d25e9f2b540f9a34b4cbfe8894f95b63a5984441a9f337d2ff"}, + {file = "orjson-3.9.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:235b4aa46c58ded90c8b368722c1eb941613fe5a6b18bc14cfaae929f0be902e"}, + {file = "orjson-3.9.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ff2e6e429416b6287006ba0556083f62396199299ab85afd3ba1e83be14677e2"}, + {file = "orjson-3.9.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ab9c234bfe89aeba825feb897718c65a80851f367a4a8308d6b5074a80fce6e5"}, + {file = "orjson-3.9.8-cp312-none-win_amd64.whl", hash = "sha256:5c818f19315251d68954c529f5d8322053f1c35b500b47d008e968bf2d32ed97"}, + {file = "orjson-3.9.8-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e6a267c0fc64fc4d0b8fb146e1a060a40f570441a9390ec4bc6de0b5fda148cd"}, + {file = "orjson-3.9.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3c7c4d60e21b0f10c8214d7ca9f2243019dd1bf9d2750b3b4a9250935977a24"}, + {file = "orjson-3.9.8-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3be3da93c4d044d2f60de816320087a8494c3e75cdf3369655e014240b1a229d"}, + {file = "orjson-3.9.8-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0619df2454b87d883f7f9ea95d79fc21fec0b8a4d600b549a1e91f59a3493d6b"}, + {file = "orjson-3.9.8-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:119a6edcecef4e37d30d6998e9cedd9e0ecdc894fa07216221dc8dd2eb24dd9d"}, + {file = "orjson-3.9.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e32ac29f9c30cc152e7432a26c665232a382678f2402bf782f73fbc985cfb37e"}, + {file = "orjson-3.9.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:002f7ca314cc8fbed5f00990bf48eda098ba1bba1e0c23be4bb024381e7889d1"}, + {file = "orjson-3.9.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e538974e2ed20504f3dad0bcdab41cd5e4fa086dabea852a150e4cc98293183d"}, + {file = "orjson-3.9.8-cp38-none-win32.whl", hash = "sha256:9df23493a72f073b2ab1005e628a963248dc577a2816e9c82caf09ff74908414"}, + {file = "orjson-3.9.8-cp38-none-win_amd64.whl", hash = "sha256:34eec476141a043d478651d1efbf218162cdd57add24dfa659ac89e1a001477a"}, + {file = "orjson-3.9.8-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c9ae634b8a55539c3d5a53813552325733ab3da3601feef8e99f91cef634f3c4"}, + {file = "orjson-3.9.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ad73fde11117b6b103c1d4071168b0e2875d890556fa8597663a5eca81bb812"}, + {file = "orjson-3.9.8-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:154f048e4da06275c1f173445dfbd88f038d29f7529a0dae6157293241b7f5bd"}, + {file = "orjson-3.9.8-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:428fec9497d17ebb5936495bbeaf12b5952bff5f6fde8a0e64030887b8d8cf94"}, + {file = "orjson-3.9.8-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55ae6509f078eb90d157da7717f2826e55ef08756bc4f5b89448c6b56be4ff2c"}, + {file = "orjson-3.9.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e26836a11b88f839b6902f92e8dd997c32f49486119a1aa67d714bc288aae172"}, + {file = "orjson-3.9.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0a27e5161b1f23fd1b5e549b38018bbc7a0f0bd3699d3dec04e2e62d271480d3"}, + {file = "orjson-3.9.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4c836845177d6ee92682d0d9b61346a06b140b5666319905a5b423ebb0ecc5d3"}, + {file = "orjson-3.9.8-cp39-none-win32.whl", hash = "sha256:ca4f3e15517bdcdb573dfe6c97d4171247ce50ec82e3a7b708941b53d5f4bc29"}, + {file = "orjson-3.9.8-cp39-none-win_amd64.whl", hash = "sha256:52c0480d5be12697b10b4d748b86acd4999f47e1d8e44e49486d0a550f30fcba"}, + {file = "orjson-3.9.8.tar.gz", hash = "sha256:ed1adc6db9841974170a5195b827ee4e392b1e8ca385b19fcdc3248489844059"}, +] + [[package]] name = "outcome" version = "1.2.0" @@ -2673,4 +2732,4 @@ ct2 = ["ctranslate2"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "238a1a2818488a318c73089d4a0b1281fab8fac29b22230b288736af40b0c462" +content-hash = "015ca70062293ec1c0e0c60d9d409e32b8965dcda93052f29a2cb567b047c075" diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 1594f4ec..32491860 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -19,6 +19,7 @@ rich = "^13.6.0" numpy = "^1" ctranslate2 = {version = "^3.20.0", optional=true} typer = {extras = ["all"], version = "^0.9.0"} +orjson = "^3.9.8" [tool.poetry.scripts] infinity_emb = "infinity_emb.infinity_server:cli" diff --git a/libs/infinity_emb/tests/end_to_end/test_ct2_sentence.py b/libs/infinity_emb/tests/end_to_end/test_ct2_sentence.py index fb4c9860..6180ca3c 100644 --- a/libs/infinity_emb/tests/end_to_end/test_ct2_sentence.py +++ b/libs/infinity_emb/tests/end_to_end/test_ct2_sentence.py @@ -97,9 +97,9 @@ async def _post_batch(inputs): f"{PREFIX}/embeddings", json=dict(input=inputs, model=MODEL) ) - await _post_batch(inputs=dummy_sentences) + response = await _post_batch(inputs=dummy_sentences) - _request_size = batch_size // 2 + _request_size = int(batch_size * 1.5) tasks = [ _post_batch(inputs=sentences[sl : sl + _request_size]) for sl in range(0, len(sentences), _request_size) diff --git a/libs/infinity_emb/tests/script_live.py b/libs/infinity_emb/tests/script_live.py new file mode 100644 index 00000000..a59c16cb --- /dev/null +++ b/libs/infinity_emb/tests/script_live.py @@ -0,0 +1,51 @@ +import json +import timeit + +import numpy as np +import requests +from sentence_transformers import SentenceTransformer + +LIVE_URL = "http://localhost:8001/v1" + + +def embedding_live_performance(): + sample = ["This is a test sentence" * 128] * 2048 + json_d = json.dumps({"input": sample, "model": "model"}) + session = requests.Session() + req = session.get(f"{LIVE_URL}/models") + assert req.status_code == 200 + + batch_size = req.json()["data"]["stats"]["batch_size"] + print(f"batch_size is {batch_size}") + model = SentenceTransformer( + model_name_or_path="sentence-transformers/all-MiniLM-L6-v2" + ) + + def local(data: str): + enc = model.encode(data, batch_size=batch_size) + assert len(enc) == len(data) + return enc + + def remote(json_data: bytes): + req = session.post(f"{LIVE_URL}/embeddings", data=json_data) + assert req.status_code == 200 + return req + + local_resp = local(sample) + remote_resp = [d["embedding"] for d in remote(json_d).json()["data"]] + np.testing.assert_almost_equal(local_resp, remote_resp, 6) + + print("Measuring latency via SentenceTransformers") + latency_st = timeit.timeit("local(sample)", number=10, globals=locals()) + print("SentenceTransformers latency: ", latency_st) + model = None + + print("Measuring latency via requests") + latency_request = timeit.timeit("remote(json_d)", number=10, globals=locals()) + print(f"Request latency: {latency_request}") + + assert latency_st * 1.1 > latency_request + + +if __name__ == "__main__": + embedding_live_performance() diff --git a/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py b/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py index 9fabf5cd..f21aeacc 100644 --- a/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py +++ b/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py @@ -16,7 +16,7 @@ BATCH_SIZE = 32 N_TIMINGS = 3 -LIMIT_SLOWDOWN = 1.15 if torch.cuda.is_available() else 1.3 +LIMIT_SLOWDOWN = 1.20 if torch.cuda.is_available() else 1.3 @pytest.fixture @@ -53,12 +53,10 @@ async def test_batch_performance_raw(get_sts_bechmark_dataset, load_patched_bh): async def method_batch_handler(_sentences): _sentences = copy.deepcopy(_sentences) start = time.perf_counter() - lengths, _ = get_lengths_with_tokenize(_sentences, model.tokenize_lengths) _request_size = BATCH_SIZE * 4 tasks = [ bh.schedule( _sentences[sl : sl + _request_size], - prios=lengths[sl : sl + _request_size], ) for sl in range(0, len(_sentences), _request_size) ] From d4c42514a384d1c9c4f053d0da2239a696a84b71 Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Thu, 12 Oct 2023 02:44:39 +0200 Subject: [PATCH 2/4] improve tokenization --- libs/infinity_emb/infinity_emb/inference/batch_handler.py | 2 +- libs/infinity_emb/infinity_emb/inference/models.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index c0316f81..626876f1 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -142,7 +142,7 @@ async def schedule(self, sentences: List[str]) -> tuple[List[NpEmbeddingType], i uuid_event = [] prioqueue = [] - prios, usage = get_lengths_with_tokenize(sentences, self.model.tokenize_lengths) + prios, usage = get_lengths_with_tokenize(sentences) #, self.model.tokenize_lengths) for s, p in zip(sentences, prios): inner = EmbeddingResult(sentence=s, event=EventTS(self._threadpool)) diff --git a/libs/infinity_emb/infinity_emb/inference/models.py b/libs/infinity_emb/infinity_emb/inference/models.py index 5c93b86f..87225a4b 100644 --- a/libs/infinity_emb/infinity_emb/inference/models.py +++ b/libs/infinity_emb/infinity_emb/inference/models.py @@ -112,8 +112,8 @@ def tokenize_lengths(self, sentences: List[str]) -> List[int]: return_token_type_ids=False, return_attention_mask=False, return_length=False, - max_length=self._infinity_tokenizer.model_max_length, - truncation="longest_first", + # max_length=self._infinity_tokenizer.model_max_length, + # truncation="longest_first", ).encodings return [len(t.tokens) for t in tks] From 53a58c92256088bc1766cf8c5b08eec781deb786 Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Thu, 12 Oct 2023 03:24:33 +0200 Subject: [PATCH 3/4] update torch: move to torch only if needed. --- .../infinity_emb/inference/batch_handler.py | 4 ++-- .../infinity_emb/inference/models.py | 20 +++++++++---------- libs/infinity_emb/tests/script_live.py | 10 +++++----- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 626876f1..b639bbac 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -117,7 +117,7 @@ def __init__( self._queue_prio = CustomPrioQueue() self._result_store = ResultKVStore() self._feature_queue: queue.Queue = queue.Queue(4) - self._postprocess_queue: queue.Queue = queue.Queue(5) + self._postprocess_queue: queue.Queue = queue.Queue(4) self.max_batch_size = max_batch_size self.model = model self.max_queue_wait = max_queue_wait @@ -266,7 +266,7 @@ async def _postprocess_batch(self): except queue.Empty: # 7 ms, assuming this is below # 3-50ms for inference on avg. - await asyncio.sleep(7e-3) + await asyncio.sleep(5e-3) continue embed, batch = post_batch embeddings = self.model.encode_post(embed).tolist() diff --git a/libs/infinity_emb/infinity_emb/inference/models.py b/libs/infinity_emb/infinity_emb/inference/models.py index 87225a4b..9c869ebc 100644 --- a/libs/infinity_emb/infinity_emb/inference/models.py +++ b/libs/infinity_emb/infinity_emb/inference/models.py @@ -77,6 +77,7 @@ def __init__(self, *args, **kwargs): self._infinity_tokenizer = copy.deepcopy(self._first_module().tokenizer) def encode_pre(self, sentences) -> Dict[str, Tensor]: + features = self.tokenize(sentences) return features @@ -85,23 +86,22 @@ def encode_core(self, features: Dict[str, Tensor]) -> Tensor: """ Computes sentence embeddings """ - device = self._target_device - features = util.batch_to_device(features, device) - # move forward - - with torch.no_grad(): - out_features = self.forward(features) + + with torch.inference_mode(): + device = self._target_device + features = util.batch_to_device(features, device) + out_features = self.forward(features)["sentence_embedding"] - return out_features["sentence_embedding"].detach().cpu() + return out_features def encode_post( self, out_features: Tensor, normalize_embeddings: bool = True ) -> NpEmbeddingType: - with torch.no_grad(): - embeddings = out_features + with torch.inference_mode(): + embeddings = out_features.detach().cpu() if normalize_embeddings: embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) - embeddings_out: np.ndarray = embeddings.cpu().numpy() + embeddings_out: np.ndarray = embeddings.numpy() return embeddings_out diff --git a/libs/infinity_emb/tests/script_live.py b/libs/infinity_emb/tests/script_live.py index a59c16cb..dbb99448 100644 --- a/libs/infinity_emb/tests/script_live.py +++ b/libs/infinity_emb/tests/script_live.py @@ -35,16 +35,16 @@ def remote(json_data: bytes): remote_resp = [d["embedding"] for d in remote(json_d).json()["data"]] np.testing.assert_almost_equal(local_resp, remote_resp, 6) - print("Measuring latency via SentenceTransformers") - latency_st = timeit.timeit("local(sample)", number=10, globals=locals()) - print("SentenceTransformers latency: ", latency_st) - model = None + # print("Measuring latency via SentenceTransformers") + # latency_st = timeit.timeit("local(sample)", number=10, globals=locals()) + # print("SentenceTransformers latency: ", latency_st) + # model = None print("Measuring latency via requests") latency_request = timeit.timeit("remote(json_d)", number=10, globals=locals()) print(f"Request latency: {latency_request}") - assert latency_st * 1.1 > latency_request + # assert latency_st * 1.1 > latency_request if __name__ == "__main__": From a7358ee1a3d48b2688d1224ed4e606fda0c26ead Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Thu, 12 Oct 2023 03:36:13 +0200 Subject: [PATCH 4/4] format and add uvicorn --- README.md | 2 +- .../infinity_emb/inference/batch_handler.py | 4 +- .../infinity_emb/inference/models.py | 5 +- libs/infinity_emb/poetry.lock | 227 +++++++++++++++++- libs/infinity_emb/pyproject.toml | 2 +- .../unit_test/inference/test_batch_handler.py | 1 - 6 files changed, 233 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bb1f306f..0f9f01a6 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Embedding Inference Server - finding TGI for embeddings ## Why Infinity: Infinity provides the following features: - **Fast inference**: The inference server is built on top of [torch](https:) and [ctranslate2](https://github.com/OpenNMT/CTranslate2) under the hood, getting most out of your **CUDA** or **CPU** hardware. -- **Dynamic, optimal batching**: New embedding requests are queued while GPU is busy with the previous ones. New requests are squeezed intro your GPU/CPU as soon as ready. +- **Dynamic batching**: New embedding requests are queued while GPU is busy with the previous ones. New requests are squeezed intro your GPU/CPU as soon as ready. - **Correct and tested implementation**: Unit and end-to-end tested. API embeddings are identical to [sentence-transformers](https://github.com/UKPLab/sentence-transformers/) (up to numerical precision). Lets API users create embeddings till infinity and beyond. - **Easy to use**: The API is built on top of [FastAPI](https://fastapi.tiangolo.com/), [Swagger](https://swagger.io/) makes it fully documented. API specs are aligned to OpenAI. See below on how to get started. diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index b639bbac..bf576810 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -142,7 +142,9 @@ async def schedule(self, sentences: List[str]) -> tuple[List[NpEmbeddingType], i uuid_event = [] prioqueue = [] - prios, usage = get_lengths_with_tokenize(sentences) #, self.model.tokenize_lengths) + prios, usage = get_lengths_with_tokenize( + sentences + ) # , self.model.tokenize_lengths) for s, p in zip(sentences, prios): inner = EmbeddingResult(sentence=s, event=EventTS(self._threadpool)) diff --git a/libs/infinity_emb/infinity_emb/inference/models.py b/libs/infinity_emb/infinity_emb/inference/models.py index 9c869ebc..02b9791d 100644 --- a/libs/infinity_emb/infinity_emb/inference/models.py +++ b/libs/infinity_emb/infinity_emb/inference/models.py @@ -77,7 +77,6 @@ def __init__(self, *args, **kwargs): self._infinity_tokenizer = copy.deepcopy(self._first_module().tokenizer) def encode_pre(self, sentences) -> Dict[str, Tensor]: - features = self.tokenize(sentences) return features @@ -86,10 +85,10 @@ def encode_core(self, features: Dict[str, Tensor]) -> Tensor: """ Computes sentence embeddings """ - + with torch.inference_mode(): device = self._target_device - features = util.batch_to_device(features, device) + features = util.batch_to_device(features, device) out_features = self.forward(features)["sentence_embedding"] return out_features diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock index 299fedc6..18d4f5f6 100644 --- a/libs/infinity_emb/poetry.lock +++ b/libs/infinity_emb/poetry.lock @@ -622,6 +622,53 @@ sniffio = "==1.*" http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "httptools" +version = "0.6.0" +description = "A collection of framework independent HTTP protocol utils." +optional = false +python-versions = ">=3.5.0" +files = [ + {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:818325afee467d483bfab1647a72054246d29f9053fd17cc4b86cda09cc60339"}, + {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72205730bf1be875003692ca54a4a7c35fac77b4746008966061d9d41a61b0f5"}, + {file = "httptools-0.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33eb1d4e609c835966e969a31b1dedf5ba16b38cab356c2ce4f3e33ffa94cad3"}, + {file = "httptools-0.6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdc6675ec6cb79d27e0575750ac6e2b47032742e24eed011b8db73f2da9ed40"}, + {file = "httptools-0.6.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:463c3bc5ef64b9cf091be9ac0e0556199503f6e80456b790a917774a616aff6e"}, + {file = "httptools-0.6.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:82f228b88b0e8c6099a9c4757ce9fdbb8b45548074f8d0b1f0fc071e35655d1c"}, + {file = "httptools-0.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:0781fedc610293a2716bc7fa142d4c85e6776bc59d617a807ff91246a95dea35"}, + {file = "httptools-0.6.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:721e503245d591527cddd0f6fd771d156c509e831caa7a57929b55ac91ee2b51"}, + {file = "httptools-0.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:274bf20eeb41b0956e34f6a81f84d26ed57c84dd9253f13dcb7174b27ccd8aaf"}, + {file = "httptools-0.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:259920bbae18740a40236807915def554132ad70af5067e562f4660b62c59b90"}, + {file = "httptools-0.6.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03bfd2ae8a2d532952ac54445a2fb2504c804135ed28b53fefaf03d3a93eb1fd"}, + {file = "httptools-0.6.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f959e4770b3fc8ee4dbc3578fd910fab9003e093f20ac8c621452c4d62e517cb"}, + {file = "httptools-0.6.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e22896b42b95b3237eccc42278cd72c0df6f23247d886b7ded3163452481e38"}, + {file = "httptools-0.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:38f3cafedd6aa20ae05f81f2e616ea6f92116c8a0f8dcb79dc798df3356836e2"}, + {file = "httptools-0.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:47043a6e0ea753f006a9d0dd076a8f8c99bc0ecae86a0888448eb3076c43d717"}, + {file = "httptools-0.6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35a541579bed0270d1ac10245a3e71e5beeb1903b5fbbc8d8b4d4e728d48ff1d"}, + {file = "httptools-0.6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65d802e7b2538a9756df5acc062300c160907b02e15ed15ba035b02bce43e89c"}, + {file = "httptools-0.6.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:26326e0a8fe56829f3af483200d914a7cd16d8d398d14e36888b56de30bec81a"}, + {file = "httptools-0.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e41ccac9e77cd045f3e4ee0fc62cbf3d54d7d4b375431eb855561f26ee7a9ec4"}, + {file = "httptools-0.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4e748fc0d5c4a629988ef50ac1aef99dfb5e8996583a73a717fc2cac4ab89932"}, + {file = "httptools-0.6.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:cf8169e839a0d740f3d3c9c4fa630ac1a5aaf81641a34575ca6773ed7ce041a1"}, + {file = "httptools-0.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5dcc14c090ab57b35908d4a4585ec5c0715439df07be2913405991dbb37e049d"}, + {file = "httptools-0.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d0b0571806a5168013b8c3d180d9f9d6997365a4212cb18ea20df18b938aa0b"}, + {file = "httptools-0.6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fb4a608c631f7dcbdf986f40af7a030521a10ba6bc3d36b28c1dc9e9035a3c0"}, + {file = "httptools-0.6.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:93f89975465133619aea8b1952bc6fa0e6bad22a447c6d982fc338fbb4c89649"}, + {file = "httptools-0.6.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:73e9d66a5a28b2d5d9fbd9e197a31edd02be310186db423b28e6052472dc8201"}, + {file = "httptools-0.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:22c01fcd53648162730a71c42842f73b50f989daae36534c818b3f5050b54589"}, + {file = "httptools-0.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f96d2a351b5625a9fd9133c95744e8ca06f7a4f8f0b8231e4bbaae2c485046a"}, + {file = "httptools-0.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:72ec7c70bd9f95ef1083d14a755f321d181f046ca685b6358676737a5fecd26a"}, + {file = "httptools-0.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b703d15dbe082cc23266bf5d9448e764c7cb3fcfe7cb358d79d3fd8248673ef9"}, + {file = "httptools-0.6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82c723ed5982f8ead00f8e7605c53e55ffe47c47465d878305ebe0082b6a1755"}, + {file = "httptools-0.6.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b0a816bb425c116a160fbc6f34cece097fd22ece15059d68932af686520966bd"}, + {file = "httptools-0.6.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:dea66d94e5a3f68c5e9d86e0894653b87d952e624845e0b0e3ad1c733c6cc75d"}, + {file = "httptools-0.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:23b09537086a5a611fad5696fc8963d67c7e7f98cb329d38ee114d588b0b74cd"}, + {file = "httptools-0.6.0.tar.gz", hash = "sha256:9fc6e409ad38cbd68b177cd5158fc4042c796b82ca88d99ec78f07bed6c6b796"}, +] + +[package.extras] +test = ["Cython (>=0.29.24,<0.30.0)"] + [[package]] name = "httpx" version = "0.25.0" @@ -1610,6 +1657,20 @@ pytest = ">=5.0" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] +[[package]] +name = "python-dotenv" +version = "1.0.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, + {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pyyaml" version = "6.0.1" @@ -2705,12 +2766,176 @@ files = [ [package.dependencies] click = ">=7.0" +colorama = {version = ">=0.4", optional = true, markers = "sys_platform == \"win32\" and extra == \"standard\""} h11 = ">=0.8" +httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} +python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +[[package]] +name = "uvloop" +version = "0.17.0" +description = "Fast implementation of asyncio event loop on top of libuv" +optional = false +python-versions = ">=3.7" +files = [ + {file = "uvloop-0.17.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ce9f61938d7155f79d3cb2ffa663147d4a76d16e08f65e2c66b77bd41b356718"}, + {file = "uvloop-0.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:68532f4349fd3900b839f588972b3392ee56042e440dd5873dfbbcd2cc67617c"}, + {file = "uvloop-0.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0949caf774b9fcefc7c5756bacbbbd3fc4c05a6b7eebc7c7ad6f825b23998d6d"}, + {file = "uvloop-0.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff3d00b70ce95adce264462c930fbaecb29718ba6563db354608f37e49e09024"}, + {file = "uvloop-0.17.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a5abddb3558d3f0a78949c750644a67be31e47936042d4f6c888dd6f3c95f4aa"}, + {file = "uvloop-0.17.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8efcadc5a0003d3a6e887ccc1fb44dec25594f117a94e3127954c05cf144d811"}, + {file = "uvloop-0.17.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3378eb62c63bf336ae2070599e49089005771cc651c8769aaad72d1bd9385a7c"}, + {file = "uvloop-0.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6aafa5a78b9e62493539456f8b646f85abc7093dd997f4976bb105537cf2635e"}, + {file = "uvloop-0.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c686a47d57ca910a2572fddfe9912819880b8765e2f01dc0dd12a9bf8573e539"}, + {file = "uvloop-0.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:864e1197139d651a76c81757db5eb199db8866e13acb0dfe96e6fc5d1cf45fc4"}, + {file = "uvloop-0.17.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2a6149e1defac0faf505406259561bc14b034cdf1d4711a3ddcdfbaa8d825a05"}, + {file = "uvloop-0.17.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6708f30db9117f115eadc4f125c2a10c1a50d711461699a0cbfaa45b9a78e376"}, + {file = "uvloop-0.17.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:23609ca361a7fc587031429fa25ad2ed7242941adec948f9d10c045bfecab06b"}, + {file = "uvloop-0.17.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2deae0b0fb00a6af41fe60a675cec079615b01d68beb4cc7b722424406b126a8"}, + {file = "uvloop-0.17.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45cea33b208971e87a31c17622e4b440cac231766ec11e5d22c76fab3bf9df62"}, + {file = "uvloop-0.17.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9b09e0f0ac29eee0451d71798878eae5a4e6a91aa275e114037b27f7db72702d"}, + {file = "uvloop-0.17.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:dbbaf9da2ee98ee2531e0c780455f2841e4675ff580ecf93fe5c48fe733b5667"}, + {file = "uvloop-0.17.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a4aee22ece20958888eedbad20e4dbb03c37533e010fb824161b4f05e641f738"}, + {file = "uvloop-0.17.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:307958f9fc5c8bb01fad752d1345168c0abc5d62c1b72a4a8c6c06f042b45b20"}, + {file = "uvloop-0.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ebeeec6a6641d0adb2ea71dcfb76017602ee2bfd8213e3fcc18d8f699c5104f"}, + {file = "uvloop-0.17.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1436c8673c1563422213ac6907789ecb2b070f5939b9cbff9ef7113f2b531595"}, + {file = "uvloop-0.17.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8887d675a64cfc59f4ecd34382e5b4f0ef4ae1da37ed665adba0c2badf0d6578"}, + {file = "uvloop-0.17.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3db8de10ed684995a7f34a001f15b374c230f7655ae840964d51496e2f8a8474"}, + {file = "uvloop-0.17.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7d37dccc7ae63e61f7b96ee2e19c40f153ba6ce730d8ba4d3b4e9738c1dccc1b"}, + {file = "uvloop-0.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cbbe908fda687e39afd6ea2a2f14c2c3e43f2ca88e3a11964b297822358d0e6c"}, + {file = "uvloop-0.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d97672dc709fa4447ab83276f344a165075fd9f366a97b712bdd3fee05efae8"}, + {file = "uvloop-0.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1e507c9ee39c61bfddd79714e4f85900656db1aec4d40c6de55648e85c2799c"}, + {file = "uvloop-0.17.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c092a2c1e736086d59ac8e41f9c98f26bbf9b9222a76f21af9dfe949b99b2eb9"}, + {file = "uvloop-0.17.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:30babd84706115626ea78ea5dbc7dd8d0d01a2e9f9b306d24ca4ed5796c66ded"}, + {file = "uvloop-0.17.0.tar.gz", hash = "sha256:0ddf6baf9cf11a1a22c71487f39f15b2cf78eb5bde7e5b45fbb99e8a9d91b9e1"}, +] + +[package.extras] +dev = ["Cython (>=0.29.32,<0.30.0)", "Sphinx (>=4.1.2,<4.2.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=22.0.0,<22.1.0)", "pycodestyle (>=2.7.0,<2.8.0)", "pytest (>=3.6.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] +docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] +test = ["Cython (>=0.29.32,<0.30.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=22.0.0,<22.1.0)", "pycodestyle (>=2.7.0,<2.8.0)"] + +[[package]] +name = "watchfiles" +version = "0.20.0" +description = "Simple, modern and high performance file watching and code reload in python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "watchfiles-0.20.0-cp37-abi3-macosx_10_7_x86_64.whl", hash = "sha256:3796312bd3587e14926013612b23066912cf45a14af71cf2b20db1c12dadf4e9"}, + {file = "watchfiles-0.20.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:d0002d81c89a662b595645fb684a371b98ff90a9c7d8f8630c82f0fde8310458"}, + {file = "watchfiles-0.20.0-cp37-abi3-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:570848706440373b4cd8017f3e850ae17f76dbdf1e9045fc79023b11e1afe490"}, + {file = "watchfiles-0.20.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a0351d20d03c6f7ad6b2e8a226a5efafb924c7755ee1e34f04c77c3682417fa"}, + {file = "watchfiles-0.20.0-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:007dcc4a401093010b389c044e81172c8a2520dba257c88f8828b3d460c6bb38"}, + {file = "watchfiles-0.20.0-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d82dbc1832da83e441d112069833eedd4cf583d983fb8dd666fbefbea9d99c0"}, + {file = "watchfiles-0.20.0-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99f4c65fd2fce61a571b2a6fcf747d6868db0bef8a934e8ca235cc8533944d95"}, + {file = "watchfiles-0.20.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5392dd327a05f538c56edb1c6ebba6af91afc81b40822452342f6da54907bbdf"}, + {file = "watchfiles-0.20.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:08dc702529bb06a2b23859110c214db245455532da5eaea602921687cfcd23db"}, + {file = "watchfiles-0.20.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7d4e66a857621584869cfbad87039e65dadd7119f0d9bb9dbc957e089e32c164"}, + {file = "watchfiles-0.20.0-cp37-abi3-win32.whl", hash = "sha256:a03d1e6feb7966b417f43c3e3783188167fd69c2063e86bad31e62c4ea794cc5"}, + {file = "watchfiles-0.20.0-cp37-abi3-win_amd64.whl", hash = "sha256:eccc8942bcdc7d638a01435d915b913255bbd66f018f1af051cd8afddb339ea3"}, + {file = "watchfiles-0.20.0-cp37-abi3-win_arm64.whl", hash = "sha256:b17d4176c49d207865630da5b59a91779468dd3e08692fe943064da260de2c7c"}, + {file = "watchfiles-0.20.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d97db179f7566dcf145c5179ddb2ae2a4450e3a634eb864b09ea04e68c252e8e"}, + {file = "watchfiles-0.20.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:835df2da7a5df5464c4a23b2d963e1a9d35afa422c83bf4ff4380b3114603644"}, + {file = "watchfiles-0.20.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:608cd94a8767f49521901aff9ae0c92cc8f5a24d528db7d6b0295290f9d41193"}, + {file = "watchfiles-0.20.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89d1de8218874925bce7bb2ae9657efc504411528930d7a83f98b1749864f2ef"}, + {file = "watchfiles-0.20.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:13f995d5152a8ba4ed7c2bbbaeee4e11a5944defc7cacd0ccb4dcbdcfd78029a"}, + {file = "watchfiles-0.20.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9b5c8d3be7b502f8c43a33c63166ada8828dbb0c6d49c8f9ce990a96de2f5a49"}, + {file = "watchfiles-0.20.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e43af4464daa08723c04b43cf978ab86cc55c684c16172622bdac64b34e36af0"}, + {file = "watchfiles-0.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87d9e1f75c4f86c93d73b5bd1ebe667558357548f11b4f8af4e0e272f79413ce"}, + {file = "watchfiles-0.20.0.tar.gz", hash = "sha256:728575b6b94c90dd531514677201e8851708e6e4b5fe7028ac506a200b622019"}, +] + +[package.dependencies] +anyio = ">=3.0.0" + +[[package]] +name = "websockets" +version = "11.0.3" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"}, + {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"}, + {file = "websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d27a4832cc1a0ee07cdcf2b0629a8a72db73f4cf6de6f0904f6661227f256f"}, + {file = "websockets-11.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffd7dcaf744f25f82190856bc26ed81721508fc5cbf2a330751e135ff1283564"}, + {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7622a89d696fc87af8e8d280d9b421db5133ef5b29d3f7a1ce9f1a7bf7fcfa11"}, + {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bceab846bac555aff6427d060f2fcfff71042dba6f5fca7dc4f75cac815e57ca"}, + {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54c6e5b3d3a8936a4ab6870d46bdd6ec500ad62bde9e44462c32d18f1e9a8e54"}, + {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:41f696ba95cd92dc047e46b41b26dd24518384749ed0d99bea0a941ca87404c4"}, + {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:86d2a77fd490ae3ff6fae1c6ceaecad063d3cc2320b44377efdde79880e11526"}, + {file = "websockets-11.0.3-cp310-cp310-win32.whl", hash = "sha256:2d903ad4419f5b472de90cd2d40384573b25da71e33519a67797de17ef849b69"}, + {file = "websockets-11.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:1d2256283fa4b7f4c7d7d3e84dc2ece74d341bce57d5b9bf385df109c2a1a82f"}, + {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e848f46a58b9fcf3d06061d17be388caf70ea5b8cc3466251963c8345e13f7eb"}, + {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa5003845cdd21ac0dc6c9bf661c5beddd01116f6eb9eb3c8e272353d45b3288"}, + {file = "websockets-11.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b58cbf0697721120866820b89f93659abc31c1e876bf20d0b3d03cef14faf84d"}, + {file = "websockets-11.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:660e2d9068d2bedc0912af508f30bbeb505bbbf9774d98def45f68278cea20d3"}, + {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1f0524f203e3bd35149f12157438f406eff2e4fb30f71221c8a5eceb3617b6b"}, + {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:def07915168ac8f7853812cc593c71185a16216e9e4fa886358a17ed0fd9fcf6"}, + {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b30c6590146e53149f04e85a6e4fcae068df4289e31e4aee1fdf56a0dead8f97"}, + {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:619d9f06372b3a42bc29d0cd0354c9bb9fb39c2cbc1a9c5025b4538738dbffaf"}, + {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:01f5567d9cf6f502d655151645d4e8b72b453413d3819d2b6f1185abc23e82dd"}, + {file = "websockets-11.0.3-cp311-cp311-win32.whl", hash = "sha256:e1459677e5d12be8bbc7584c35b992eea142911a6236a3278b9b5ce3326f282c"}, + {file = "websockets-11.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:e7837cb169eca3b3ae94cc5787c4fed99eef74c0ab9506756eea335e0d6f3ed8"}, + {file = "websockets-11.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f59a3c656fef341a99e3d63189852be7084c0e54b75734cde571182c087b152"}, + {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2529338a6ff0eb0b50c7be33dc3d0e456381157a31eefc561771ee431134a97f"}, + {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34fd59a4ac42dff6d4681d8843217137f6bc85ed29722f2f7222bd619d15e95b"}, + {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:332d126167ddddec94597c2365537baf9ff62dfcc9db4266f263d455f2f031cb"}, + {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6505c1b31274723ccaf5f515c1824a4ad2f0d191cec942666b3d0f3aa4cb4007"}, + {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f467ba0050b7de85016b43f5a22b46383ef004c4f672148a8abf32bc999a87f0"}, + {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9d9acd80072abcc98bd2c86c3c9cd4ac2347b5a5a0cae7ed5c0ee5675f86d9af"}, + {file = "websockets-11.0.3-cp37-cp37m-win32.whl", hash = "sha256:e590228200fcfc7e9109509e4d9125eace2042fd52b595dd22bbc34bb282307f"}, + {file = "websockets-11.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b16fff62b45eccb9c7abb18e60e7e446998093cdcb50fed33134b9b6878836de"}, + {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fb06eea71a00a7af0ae6aefbb932fb8a7df3cb390cc217d51a9ad7343de1b8d0"}, + {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8a34e13a62a59c871064dfd8ffb150867e54291e46d4a7cf11d02c94a5275bae"}, + {file = "websockets-11.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4841ed00f1026dfbced6fca7d963c4e7043aa832648671b5138008dc5a8f6d99"}, + {file = "websockets-11.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a073fc9ab1c8aff37c99f11f1641e16da517770e31a37265d2755282a5d28aa"}, + {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68b977f21ce443d6d378dbd5ca38621755f2063d6fdb3335bda981d552cfff86"}, + {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a99a7a71631f0efe727c10edfba09ea6bee4166a6f9c19aafb6c0b5917d09c"}, + {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bee9fcb41db2a23bed96c6b6ead6489702c12334ea20a297aa095ce6d31370d0"}, + {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4b253869ea05a5a073ebfdcb5cb3b0266a57c3764cf6fe114e4cd90f4bfa5f5e"}, + {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1553cb82942b2a74dd9b15a018dce645d4e68674de2ca31ff13ebc2d9f283788"}, + {file = "websockets-11.0.3-cp38-cp38-win32.whl", hash = "sha256:f61bdb1df43dc9c131791fbc2355535f9024b9a04398d3bd0684fc16ab07df74"}, + {file = "websockets-11.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:03aae4edc0b1c68498f41a6772d80ac7c1e33c06c6ffa2ac1c27a07653e79d6f"}, + {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:777354ee16f02f643a4c7f2b3eff8027a33c9861edc691a2003531f5da4f6bc8"}, + {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c82f11964f010053e13daafdc7154ce7385ecc538989a354ccc7067fd7028fd"}, + {file = "websockets-11.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3580dd9c1ad0701169e4d6fc41e878ffe05e6bdcaf3c412f9d559389d0c9e016"}, + {file = "websockets-11.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f1a3f10f836fab6ca6efa97bb952300b20ae56b409414ca85bff2ad241d2a61"}, + {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df41b9bc27c2c25b486bae7cf42fccdc52ff181c8c387bfd026624a491c2671b"}, + {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279e5de4671e79a9ac877427f4ac4ce93751b8823f276b681d04b2156713b9dd"}, + {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1fdf26fa8a6a592f8f9235285b8affa72748dc12e964a5518c6c5e8f916716f7"}, + {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:69269f3a0b472e91125b503d3c0b3566bda26da0a3261c49f0027eb6075086d1"}, + {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:97b52894d948d2f6ea480171a27122d77af14ced35f62e5c892ca2fae9344311"}, + {file = "websockets-11.0.3-cp39-cp39-win32.whl", hash = "sha256:c7f3cb904cce8e1be667c7e6fef4516b98d1a6a0635a58a57528d577ac18a128"}, + {file = "websockets-11.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c792ea4eabc0159535608fc5658a74d1a81020eb35195dd63214dcf07556f67e"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e58f2c36cc52d41f2659e4c0cbf7353e28c8c9e63e30d8c6d3494dc9fdedcf"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de36fe9c02995c7e6ae6efe2e205816f5f00c22fd1fbf343d4d18c3d5ceac2f5"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ac56b661e60edd453585f4bd68eb6a29ae25b5184fd5ba51e97652580458998"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e052b8467dd07d4943936009f46ae5ce7b908ddcac3fda581656b1b19c083d9b"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:42cc5452a54a8e46a032521d7365da775823e21bfba2895fb7b77633cce031bb"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e6316827e3e79b7b8e7d8e3b08f4e331af91a48e794d5d8b099928b6f0b85f20"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8531fdcad636d82c517b26a448dcfe62f720e1922b33c81ce695d0edb91eb931"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c114e8da9b475739dde229fd3bc6b05a6537a88a578358bc8eb29b4030fac9c9"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e063b1865974611313a3849d43f2c3f5368093691349cf3c7c8f8f75ad7cb280"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:92b2065d642bf8c0a82d59e59053dd2fdde64d4ed44efe4870fa816c1232647b"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0ee68fe502f9031f19d495dae2c268830df2760c0524cbac5d759921ba8c8e82"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcacf2c7a6c3a84e720d1bb2b543c675bf6c40e460300b628bab1b1efc7c034c"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b67c6f5e5a401fc56394f191f00f9b3811fe843ee93f4a70df3c389d1adf857d"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5023a4b6a5b183dc838808087033ec5df77580485fc533e7dab2567851b0a4"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ed058398f55163a79bb9f06a90ef9ccc063b204bb346c4de78efc5d15abfe602"}, + {file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"}, + {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"}, +] + [[package]] name = "wheel" version = "0.41.2" @@ -2732,4 +2957,4 @@ ct2 = ["ctranslate2"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "015ca70062293ec1c0e0c60d9d409e32b8965dcda93052f29a2cb567b047c075" +content-hash = "2db5df6bd4130cfad8cf4dc719e09414150b877301d86751c8206aead10f9869" diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 32491860..969b7821 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -14,7 +14,7 @@ pydantic = ">=2.4.2,<3" torch = ">=2.0.0, !=2.0.1, !=2.1.0" sentence-transformers = "^2.2.2" prometheus-fastapi-instrumentator = "^6.1.0" -uvicorn = "^0.23.2" +uvicorn = {extras = ["standard"], version = "^0.23.2"} rich = "^13.6.0" numpy = "^1" ctranslate2 = {version = "^3.20.0", optional=true} diff --git a/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py b/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py index f21aeacc..8aecafc2 100644 --- a/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py +++ b/libs/infinity_emb/tests/unit_test/inference/test_batch_handler.py @@ -11,7 +11,6 @@ from infinity_emb.inference import BatchHandler from infinity_emb.inference.models import ( SentenceTransformerPatched, - get_lengths_with_tokenize, ) BATCH_SIZE = 32