Skip to content

Commit

Permalink
Colipali - ready for review. (#423)
Browse files Browse the repository at this point in the history
* add colpali

* add colpali 2

* add colipali

* add colpali test v3

* update autogenerated files

* update torch model

* fix: test with torch vision model

* lint

* fix: openapi

* remove: int8 test

* improve ports on server hooks and feedback

* resort: import lines

* rename: torch image and audio classes
  • Loading branch information
michaelfeil authored Oct 14, 2024
1 parent 0f1b786 commit faf5918
Show file tree
Hide file tree
Showing 22 changed files with 1,376 additions and 921 deletions.
6 changes: 3 additions & 3 deletions docs/assets/create_openapi_with_server_hook.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ cleanup() {
trap cleanup EXIT

# Start infinity_emb in the background
infinity_emb v2 --log-level error --engine debugengine &
infinity_emb v2 --log-level error --engine debugengine --port 7996 &
INFINITY_PID=$!
echo "infinity_emb started with PID $INFINITY_PID"

# Wait for infinity_emb to be ready
for i in {1..10}; do
if wget -q --spider http://0.0.0.0:7997/openapi.json; then
if wget -q --spider http://0.0.0.0:7996/openapi.json; then
echo "infinity_emb is ready."
break
else
Expand All @@ -32,4 +32,4 @@ for i in {1..10}; do
done

# Download the openapi.json
wget http://0.0.0.0:7997/openapi.json -O "$SCRIPT_DIR/openapi.json"
wget http://0.0.0.0:7996/openapi.json -O "$SCRIPT_DIR/openapi.json"
2 changes: 1 addition & 1 deletion docs/assets/openapi.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions libs/client_infinity/run_generate_with_hook.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ cleanup() {
trap cleanup EXIT

# Start infinity_emb in the background
infinity_emb v2 --log-level error --engine debugengine &
infinity_emb v2 --log-level error --engine debugengine --no-model-warmup --port 7994 &
INFINITY_PID=$!
echo "infinity_emb started with PID $INFINITY_PID"

# Wait for infinity_emb to be ready
for i in {1..10}; do
if wget -q --spider http://0.0.0.0:7997/openapi.json; then
if wget -q --spider http://0.0.0.0:7994/openapi.json; then
echo "infinity_emb is ready."
break
else
Expand Down
6 changes: 3 additions & 3 deletions libs/client_infinity/run_tests_with_hook.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ cleanup() {
trap cleanup EXIT

# Start infinity_emb in the background
infinity_emb v2 --log-level error --engine debugengine &
infinity_emb v2 --log-level error --engine debugengine --port 7993 &
INFINITY_PID=$!
echo "infinity_emb started with PID $INFINITY_PID"

# Wait for infinity_emb to be ready
for i in {1..10}; do
if wget -q --spider http://0.0.0.0:7997/openapi.json; then
if wget -q --spider http://0.0.0.0:7993/openapi.json; then
echo "infinity_emb is ready."
break
else
Expand All @@ -32,7 +32,7 @@ done
# Run the tests
pip install openapi-python-client==0.21.1
openapi-python-client generate \
--url http://0.0.0.0:7997/openapi.json \
--url http://0.0.0.0:7993/openapi.json \
--config client_config.yaml \
--overwrite \
--custom-template-path=./template
Expand Down
2 changes: 1 addition & 1 deletion libs/client_infinity/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import time
import pytest

pytest.URL = "http://0.0.0.0:7997"
pytest.URL = "http://0.0.0.0:7994"


@pytest.fixture
Expand Down
8 changes: 6 additions & 2 deletions libs/infinity_emb/infinity_emb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@
import importlib.metadata
import os

import huggingface_hub.constants # type: ignore

### Check if HF_HUB_ENABLE_HF_TRANSFER is set, if not try to enable it
if "HF_HUB_ENABLE_HF_TRANSFER" not in os.environ:
try:
# enable hf hub transfer if available
import hf_transfer # type: ignore # noqa

# Needs to be at the top of the file / before other
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
import huggingface_hub.constants # type: ignore

huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True
except ImportError:
pass
import huggingface_hub.constants # type: ignore

huggingface_hub.constants.HF_HUB_DISABLE_PROGRESS_BARS = True


Expand Down
16 changes: 8 additions & 8 deletions libs/infinity_emb/infinity_emb/_optional_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,20 @@ def _raise_error(self) -> None:
raise ImportError(msg)


CHECK_DISKCACHE = OptionalImports("diskcache", "cache")
CHECK_AIOHTTP = OptionalImports("aiohttp", "server")
CHECK_COLPALI_ENGINE = OptionalImports("colpali_engine", "vision")
CHECK_CTRANSLATE2 = OptionalImports("ctranslate2", "ctranslate2")
CHECK_DISKCACHE = OptionalImports("diskcache", "cache")
CHECK_FASTAPI = OptionalImports("fastapi", "server")
CHECK_ONNXRUNTIME = OptionalImports("optimum.onnxruntime", "optimum")
CHECK_OPTIMUM = OptionalImports("optimum", "optimum")
CHECK_OPTIMUM_NEURON = OptionalImports("optimum.neuron", "neuronx")
CHECK_SENTENCE_TRANSFORMERS = OptionalImports("sentence_transformers", "torch")
CHECK_TRANSFORMERS = OptionalImports("transformers", "torch")
CHECK_TORCH = OptionalImports("torch.nn", "torch")
# CHECK_REQUESTS = OptionalImports("requests", "server")
CHECK_POSTHOG = OptionalImports("posthog", "server")
CHECK_AIOHTTP = OptionalImports("aiohttp", "server")
CHECK_PIL = OptionalImports("PIL", "vision")
CHECK_SOUNDFILE = OptionalImports("soundfile", "audio")
CHECK_POSTHOG = OptionalImports("posthog", "server")
CHECK_PYDANTIC = OptionalImports("pydantic", "server")
CHECK_SENTENCE_TRANSFORMERS = OptionalImports("sentence_transformers", "torch")
CHECK_SOUNDFILE = OptionalImports("soundfile", "audio")
CHECK_TORCH = OptionalImports("torch.nn", "torch")
CHECK_TRANSFORMERS = OptionalImports("transformers", "torch")
CHECK_TYPER = OptionalImports("typer", "server")
CHECK_UVICORN = OptionalImports("uvicorn", "server")
9 changes: 6 additions & 3 deletions libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ class AudioEmbeddingInput(ImageEmbeddingInput):

class _EmbeddingObject(BaseModel):
object: Literal["embedding"] = "embedding"
embedding: Union[list[float], bytes]
embedding: Union[list[float], bytes, list[list[float]]]
index: int


Expand All @@ -187,7 +187,7 @@ class OpenAIEmbeddingResult(BaseModel):

@staticmethod
def to_embeddings_response(
embeddings: Iterable["EmbeddingReturnType"],
embeddings: Union[Iterable["EmbeddingReturnType"], np.ndarray],
engine_args: "EngineArgs",
usage: int,
encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float,
Expand All @@ -198,7 +198,10 @@ def to_embeddings_response(
f"model {engine_args.served_model_name} does not support base64 encoding, as it uses uint8-bitpacking with {engine_args.embedding_dtype}"
)
embeddings = [base64.b64encode(np.frombuffer(emb.astype(np.float32), dtype=np.float32)) for emb in embeddings] # type: ignore

elif isinstance(embeddings, np.ndarray):
embeddings = embeddings.tolist()
else:
embeddings = [e.tolist() for e in embeddings]
return dict(
model=engine_args.served_model_name,
data=[
Expand Down
14 changes: 8 additions & 6 deletions libs/infinity_emb/infinity_emb/inference/select_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from infinity_emb.log_handler import logger
from infinity_emb.transformer.abstract import BaseCrossEncoder, BaseEmbedder
from infinity_emb.transformer.utils import (
ClapLikeEngine,
ClipLikeEngine,
AudioEmbedEngine,
EmbedderEngine,
ImageEmbedEngine,
InferenceEngine,
PredictEngine,
RerankEngine,
Expand All @@ -22,7 +22,9 @@

def get_engine_type_from_config(
engine_args: EngineArgs,
) -> Union[EmbedderEngine, RerankEngine, PredictEngine, ClipLikeEngine, ClapLikeEngine]:
) -> Union[
EmbedderEngine, RerankEngine, PredictEngine, ImageEmbedEngine, AudioEmbedEngine
]:
"""resolved the class of inference engine path from config.json of the repo."""
if engine_args.engine in [InferenceEngine.debugengine]:
return EmbedderEngine.from_inference_engine(engine_args.engine)
Expand Down Expand Up @@ -50,10 +52,10 @@ def get_engine_type_from_config(
return RerankEngine.from_inference_engine(engine_args.engine)
else:
return PredictEngine.from_inference_engine(engine_args.engine)
if config.get("vision_config") and "clip" in config.get("model_type", "").lower():
return ClipLikeEngine.from_inference_engine(engine_args.engine)
if config.get("vision_config"):
return ImageEmbedEngine.from_inference_engine(engine_args.engine)
if config.get("audio_config") and "clap" in config.get("model_type", "").lower():
return ClapLikeEngine.from_inference_engine(engine_args.engine)
return AudioEmbedEngine.from_inference_engine(engine_args.engine)

else:
return EmbedderEngine.from_inference_engine(engine_args.engine)
Expand Down
6 changes: 3 additions & 3 deletions libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ async def _embeddings(data: MultiModalOpenAIEmbedding):
"model": "openai/clip-vit-base-patch32",
"encoding_format": "base64",
"input": [
http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg",
# can also be base64 encoded
],
# set extra modality to image to process as image
Expand Down Expand Up @@ -310,7 +310,7 @@ def url_to_base64(url, modality = "image"):
client.embeddings.create(
model="laion/larger_clap_general",
input=[url_to_base64(url, "audio")],
encoding_format= "base64",
encoding_format="float",
extra_body={
"modality": "audio"
}
Expand All @@ -319,7 +319,7 @@ def url_to_base64(url, modality = "image"):
client.embeddings.create(
model="laion/larger_clap_general",
input=["the sound of a beep", "the sound of a cat"],
encoding_format= "base64",
encoding_format="base64", # base64: optional high performance setting
extra_body={
"modality": "text"
}
Expand Down
7 changes: 7 additions & 0 deletions libs/infinity_emb/infinity_emb/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def default_value():
return Device.auto.value

def resolve(self) -> Optional[str]:
"""gets the torch device string"""
if self == Device.auto:
return None
return self.value
Expand All @@ -134,6 +135,12 @@ class Dtype(EnumType):
def default_value():
return Dtype.auto.value

def resolve(self) -> Optional[str]:
"""gets the torch dtype string"""
if self == Dtype.auto:
return None
return self.value


class EmbeddingDtype(EnumType):
float32: str = "float32"
Expand Down
4 changes: 2 additions & 2 deletions libs/infinity_emb/infinity_emb/transformer/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def warmup(self, *, batch_size: int = 64, n_tokens=1) -> tuple[float, float, str
return run_warmup(self, inp)


class BaseClipVisionModel(BaseEmbedder): # Inherit from ABC(Abstract base class)
class BaseTIMM(BaseEmbedder): # Inherit from ABC(Abstract base class)
capabilities = {"embed", "image_embed"}

@property
Expand Down Expand Up @@ -136,7 +136,7 @@ def warmup(self, *, batch_size: int = 64, n_tokens=1) -> tuple[float, float, str
return run_warmup(self, inp)


class BaseClapAudioModel(BaseEmbedder): # Inherit from ABC(Abstract base class)
class BaseAudioEmbedModel(BaseEmbedder): # Inherit from ABC(Abstract base class)
capabilities = {"embed", "audio_embed"}

@property
Expand Down
4 changes: 2 additions & 2 deletions libs/infinity_emb/infinity_emb/transformer/audio/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from infinity_emb._optional_imports import CHECK_TORCH, CHECK_TRANSFORMERS
from infinity_emb.args import EngineArgs
from infinity_emb.primitives import AudioInputType
from infinity_emb.transformer.abstract import BaseClapAudioModel
from infinity_emb.transformer.abstract import BaseAudioEmbedModel
from infinity_emb.transformer.quantization.interface import quant_embedding_decorator

if TYPE_CHECKING:
Expand All @@ -20,7 +20,7 @@
from transformers import AutoModel, AutoProcessor # type: ignore


class ClapLikeModel(BaseClapAudioModel):
class TorchAudioModel(BaseAudioEmbedModel):
"""Audio model for CLAP models"""

def __init__(self, *, engine_args: EngineArgs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def encode_core(self, features: np.ndarray) -> EmbeddingReturnType:

@quant_embedding_decorator()
def encode_post(self, embedding: EmbeddingReturnType):
return embedding
return [e for e in embedding]

def tokenize_lengths(self, sentences: list[str]) -> list[int]:
return [len(s) for s in sentences]
20 changes: 10 additions & 10 deletions libs/infinity_emb/infinity_emb/transformer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Callable

from infinity_emb.primitives import InferenceEngine
from infinity_emb.transformer.audio.torch import ClapLikeModel
from infinity_emb.transformer.audio.torch import TorchAudioModel
from infinity_emb.transformer.classifier.torch import SentenceClassifier
from infinity_emb.transformer.crossencoder.optimum import OptimumCrossEncoder
from infinity_emb.transformer.crossencoder.torch import (
Expand All @@ -18,7 +18,7 @@
from infinity_emb.transformer.embedder.sentence_transformer import (
SentenceTransformerPatched,
)
from infinity_emb.transformer.vision.torch_vision import ClipLikeModel
from infinity_emb.transformer.vision.torch_vision import TIMM

__all__ = [
"length_tokenizer",
Expand Down Expand Up @@ -63,26 +63,26 @@ def from_inference_engine(engine: InferenceEngine):
raise NotImplementedError(f"RerankEngine for {engine} not implemented")


class ClipLikeEngine(Enum):
torch = ClipLikeModel
class ImageEmbedEngine(Enum):
torch = TIMM

@staticmethod
def from_inference_engine(engine: InferenceEngine):
if engine == InferenceEngine.torch:
return ClipLikeEngine.torch
return ImageEmbedEngine.torch
else:
raise NotImplementedError(f"ClipLikeEngine for {engine} not implemented")
raise NotImplementedError(f"ImageEmbedEngine for {engine} not implemented")


class ClapLikeEngine(Enum):
torch = ClapLikeModel
class AudioEmbedEngine(Enum):
torch = TorchAudioModel

@staticmethod
def from_inference_engine(engine: InferenceEngine):
if engine == InferenceEngine.torch:
return ClapLikeEngine.torch
return AudioEmbedEngine.torch
else:
raise NotImplementedError(f"ClapLikeEngine for {engine} not implemented")
raise NotImplementedError(f"AudioEmbedEngine for {engine} not implemented")


class PredictEngine(Enum):
Expand Down
8 changes: 8 additions & 0 deletions libs/infinity_emb/infinity_emb/transformer/vision/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2023-now michaelfeil

IMAGE_COL_MODELS = [
"ColPali",
"ColQwen2",
"ColIdefics2",
]

IMAGE_MODELS = ["Clip"] + IMAGE_COL_MODELS
Loading

0 comments on commit faf5918

Please sign in to comment.