Skip to content

Commit

Permalink
update autotokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelfeil committed May 18, 2023
1 parent 7374fd8 commit 372b610
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions hf_hub_ctranslate2/translate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import ctranslate2
from transformers import AutoTokenizer
try:
from transformers import AutoTokenizer
autotokenizer_ok = True
except ImportError:
AutoTokenizer = object
autotokenizer_ok = False

try:
from typing import Literal
Expand Down Expand Up @@ -44,9 +49,12 @@ def __init__(
self.tokenizer = tokenizer
else:
if "tokenizer.json" in os.listdir(model_path):
if not autotokenizer_ok:
raise ValueError("`pip install transformers` missing to load AutoTokenizer.")
self.tokenizer = AutoTokenizer.from_pretrained(model_path, fast=True)
if "tokenizer.json" in os.listdir(model_path):
self.tokenizer = AutoTokenizer.from_pretrained(model_path, fast=True)
else:
raise ValueError("no suitable Tokenizer found. "
"Please set one via tokenizer=AutoTokenizer.from_pretrained(..) arg.")


def _forward(self, *args: Any, **kwds: Any) -> Any:
Expand Down

0 comments on commit 372b610

Please sign in to comment.