Skip to content

Commit

Permalink
Merge pull request #227 from michaelfeil/multi-cli-launch
Browse files Browse the repository at this point in the history
add v2 to CLI
  • Loading branch information
michaelfeil authored May 19, 2024
2 parents 8d78747 + 4d37317 commit 7013d3d
Show file tree
Hide file tree
Showing 13 changed files with 343 additions and 134 deletions.
8 changes: 7 additions & 1 deletion docs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,18 @@ pip install infinity-emb[all]
</details>

### Launch the CLI using a pre-built docker container (recommended)

```bash
port=7997
model=BAAI/bge-small-en-v1.5
volume=$PWD/data

docker run -it --gpus all -v $volume:/app/.cache -p $port:$port michaelf34/infinity:latest --model-name-or-path $model --port $port
docker run -it --gpus all \
-v $volume:/app/.cache \
-p $port:$port \
michaelf34/infinity:latest \
--model-name-or-path $model \
--port $port
```
The cache path at inside the docker container is set by the environment variable `HF_HOME`.

Expand Down
30 changes: 21 additions & 9 deletions libs/infinity_emb/infinity_emb/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,44 @@ class EngineArgs:
pooling_method: PoolingMethod = PoolingMethod.auto
lengths_via_tokenize: bool = False
embedding_dtype: EmbeddingDtype = EmbeddingDtype.float32
served_model_name: str = None # type: ignore
permissive_cors: bool = False
served_model_name: str = ""

def __post_init__(self):
# convert the following strings to enums
# so they don't need to be exported to the external interface
if isinstance(self.engine, str):
if not isinstance(self.engine, InferenceEngine):
object.__setattr__(self, "engine", InferenceEngine[self.engine])
if isinstance(self.device, str):
object.__setattr__(self, "device", Device[self.device])
if isinstance(self.dtype, str):
if not isinstance(self.device, Device):
if self.device is None:
object.__setattr__(self, "device", Device.auto)
else:
object.__setattr__(self, "device", Device[self.device])
if not isinstance(self.dtype, Dtype):
object.__setattr__(self, "dtype", Dtype[self.dtype])
if isinstance(self.pooling_method, str):
if not isinstance(self.pooling_method, PoolingMethod):
object.__setattr__(
self, "pooling_method", PoolingMethod[self.pooling_method]
)
if isinstance(self.embedding_dtype, str):
if not isinstance(self.embedding_dtype, EmbeddingDtype):
object.__setattr__(
self, "embedding_dtype", EmbeddingDtype[self.embedding_dtype]
)
if self.served_model_name is None:
if not self.served_model_name:
object.__setattr__(
self,
"served_model_name",
"/".join(self.model_name_or_path.split("/")[-2:]),
)
if self.revision is not None and self.revision == "":
object.__setattr__(self, "revision", None)
if isinstance(self.vector_disk_cache_path, bool):
object.__setattr__(
self,
"vector_disk_cache_path",
f"{self.engine}_{self.model_name_or_path.replace('/','_')}"
if self.vector_disk_cache_path
else "",
)

# after all done -> check if the dataclass is valid
if CHECK_PYDANTIC.is_available:
Expand Down
2 changes: 1 addition & 1 deletion libs/infinity_emb/infinity_emb/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,5 +255,5 @@ def __getitem__(self, index_or_name: Union[str, int]) -> "AsyncEmbeddingEngine":
return self.engines_dict[index_or_name]
raise IndexError(
f"Engine for model name {index_or_name} not found. "
"Available model names are {list(self.engines_dict.keys())}"
f"Available model names are {list(self.engines_dict.keys())}"
)
2 changes: 1 addition & 1 deletion libs/infinity_emb/infinity_emb/inference/select_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def select_model(
logger.info(
f"model=`{engine_args.model_name_or_path}` selected, "
f"using engine=`{engine_args.engine.value}`"
f" and device=`{engine_args.device.value}`"
f" and device=`{engine_args.device.resolve()}`"
)
# TODO: add EncoderEngine
unloaded_engine = get_engine_type_from_config(engine_args)
Expand Down
Loading

0 comments on commit 7013d3d

Please sign in to comment.