Skip to content

Commit

Permalink
Merge pull request #180 from bufferoverflow/feat/add-served_model_nam…
Browse files Browse the repository at this point in the history
…e-arg

feat: add served_model_name argument for the infinity_server
  • Loading branch information
michaelfeil authored Mar 29, 2024
2 parents 1d27dde + 0fc7a2e commit 93189e6
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 2 additions & 0 deletions libs/infinity_emb/infinity_emb/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class EngineArgs:
Args:
model_name_or_path, str: Defaults to "michaelfeil/bge-small-en-v1.5".
served_model_name, str: Defaults to bge-small-en-v1.5
batch_size, int: Defaults to 32.
revision, str: Defaults to None.
trust_remote_code, bool: Defaults to True.
Expand All @@ -38,6 +39,7 @@ class EngineArgs:
"""

model_name_or_path: str = "michaelfeil/bge-small-en-v1.5"
served_model_name: Optional[str] = None
batch_size: int = 32
revision: Optional[str] = None
trust_remote_code: bool = True
Expand Down
7 changes: 6 additions & 1 deletion libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def create_server(
instrumentator = Instrumentator().instrument(app)
app.add_exception_handler(errors.OpenAIException, errors.openai_exception_handler)

MODEL_RESPONSE_NAME = "/".join(engine_args.model_name_or_path.split("/")[-2:])
MODEL_RESPONSE_NAME = engine_args.served_model_name or "/".join(
engine_args.model_name_or_path.split("/")[-2:]
)

@app.on_event("startup")
async def _startup():
Expand Down Expand Up @@ -216,6 +218,7 @@ async def _rerank(data: RerankInput):

def _start_uvicorn(
model_name_or_path: str = "michaelfeil/bge-small-en-v1.5",
served_model_name: Optional[str] = None,
batch_size: int = 32,
revision: Optional[str] = None,
trust_remote_code: bool = True,
Expand All @@ -240,6 +243,7 @@ def _start_uvicorn(
Args:
model_name_or_path, str: Huggingface model, e.g.
"michaelfeil/bge-small-en-v1.5".
served_model_name, str: "", e.g. "bge-small-en-v1.5"
batch_size, int: batch size for forward pass.
revision: str: revision of the model.
trust_remote_code, bool: trust remote code.
Expand Down Expand Up @@ -273,6 +277,7 @@ def _start_uvicorn(

engine_args = EngineArgs(
model_name_or_path=model_name_or_path,
served_model_name=served_model_name,
batch_size=batch_size,
revision=revision,
trust_remote_code=trust_remote_code,
Expand Down

0 comments on commit 93189e6

Please sign in to comment.