diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py index ec532e54..5c9d1b2d 100644 --- a/gptqmodel/models/loader.py +++ b/gptqmodel/models/loader.py @@ -98,6 +98,8 @@ def get_model_local_path(pretrained_model_id_or_path, **kwargs): if is_local: return pretrained_model_id_or_path else: + # hf_transfer does not accept max_memory arg + kwargs.pop('max_memory', None) return snapshot_download(pretrained_model_id_or_path, **kwargs) def get_tokenizer(model_id_or_path, config, trust_remote_code: bool = False): diff --git a/requirements.txt b/requirements.txt index ea05a0a3..c09dc8bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ device-smi==0.3.3 sentencepiece>=0.2.0 protobuf>=5.29.1 pillow>=10.4.0 +hf_transfer>=0.1.9