From 3fce78994ebbe320e56c9244ef9f67a9f10bf87e Mon Sep 17 00:00:00 2001 From: Teruaki Ishizaki Date: Tue, 28 Jan 2025 04:54:54 +0900 Subject: [PATCH] community: Fixed the procedure of initializing pad_token_id (#29434) - **Description:** Add to check pad_token_id and eos_token_id of model config. It seems that this is the same bug as the HuggingFace TGI bug. In addition, the source code of libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py also requires similar changes. - **Issue:** #29431 - **Dependencies:** none - **Twitter handle:** tell14 --- .../langchain_community/llms/huggingface_pipeline.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libs/community/langchain_community/llms/huggingface_pipeline.py b/libs/community/langchain_community/llms/huggingface_pipeline.py index 44b4558d2dfbc..185405645eb52 100644 --- a/libs/community/langchain_community/llms/huggingface_pipeline.py +++ b/libs/community/langchain_community/llms/huggingface_pipeline.py @@ -169,7 +169,16 @@ def from_model_id( ) from e if tokenizer.pad_token is None: - tokenizer.pad_token_id = model.config.eos_token_id + if model.config.pad_token_id is not None: + tokenizer.pad_token_id = model.config.pad_token_id + elif model.config.eos_token_id is not None and isinstance( + model.config.eos_token_id, int + ): + tokenizer.pad_token_id = model.config.eos_token_id + elif tokenizer.eos_token_id is not None: + tokenizer.pad_token_id = tokenizer.eos_token_id + else: + tokenizer.add_special_tokens({"pad_token": "[PAD]"}) if ( (