diff --git a/README.md b/README.md index 5fa1e67..29ce670 100644 --- a/README.md +++ b/README.md @@ -84,28 +84,24 @@ from xllm import Config from xllm.datasets import GeneralDataset from xllm.experiments import Experiment -# 1. Init Config which controls the internal logic of xllm -config = Config(model_name_or_path="facebook/opt-350m") +# Init Config which controls the internal logic of xllm +config = Config(model_name_or_path="HuggingFaceH4/zephyr-7b-beta") -# 2. Prepare the data +# Prepare the data train_data = ["Hello!"] * 100 - -# 3. Load the data train_dataset = GeneralDataset.from_list(data=train_data) -# 4. Init Experiment +# Build Experiment from Config: init tokenizer and model, apply LoRA and so on experiment = Experiment(config=config, train_dataset=train_dataset) - -# 5. Build Experiment from Config: init tokenizer and model, apply LoRA and so on experiment.build() -# 6. Run Experiment (training) +# Run Experiment (training) experiment.run() -# 7. [Optional] Fuse LoRA layers +# [Optional] Fuse LoRA layers experiment.fuse_lora() -# 8. [Optional] Push fused model (or just LoRA weight) to the HuggingFace Hub +# [Optional] Push fused model (or just LoRA weight) to the HuggingFace Hub experiment.push_to_hub(repo_id="YOUR_NAME/MODEL_NAME") ``` @@ -116,7 +112,7 @@ experiment.push_to_hub(repo_id="YOUR_NAME/MODEL_NAME") ```python config = Config( - model_name_or_path="HuggingFaceH4/zephyr-7b-beta", + model_name_or_path="openchat/openchat_3.5", apply_lora=True, ) ``` @@ -125,7 +121,7 @@ config = Config( ```python config = Config( - model_name_or_path="HuggingFaceH4/zephyr-7b-beta", + model_name_or_path="openchat/openchat_3.5", apply_lora=True, lora_rank=8, lora_alpha=32, @@ -144,7 +140,7 @@ config = Config( ```python config = Config( - model_name_or_path="HuggingFaceH4/zephyr-7b-beta", + model_name_or_path="01-ai/Yi-34B", apply_lora=True, load_in_4bit=True, prepare_model_for_kbit_training=True, @@ -155,7 +151,7 @@ config = Config( ```python config = Config( - model_name_or_path="HuggingFaceH4/zephyr-7b-beta", + model_name_or_path="01-ai/Yi-34B", stabilize=True, apply_lora=True, load_in_4bit=True, @@ -303,7 +299,7 @@ You can explicitly specify to fuse the model after training. config = Config( model_name_or_path="HuggingFaceH4/zephyr-7b-beta", apply_lora=True, - fuse_after_train=True, + fuse_after_training=True, ) ``` @@ -315,7 +311,7 @@ config = Config( apply_lora=True, load_in_4bit=True, prepare_model_for_kbit_training=True, - fuse_after_train=True, + fuse_after_training=True, ) ``` @@ -335,17 +331,29 @@ experiment.fuse_lora() `train.py` ```python from xllm.core.config import Config +from xllm.datasets import GeneralDataset from xllm.cli.train import cli_run_train if __name__ == '__main__': - cli_run_train(config_cls=Config) + train_data = ["Hello!"] * 100 + train_dataset = GeneralDataset.from_list(data=train_data) + cli_run_train(config_cls=Config, train_dataset=train_dataset) ``` -Run train +Run train (in the `num_gpus` parameter, specify as many GPUs as you have) ```bash deepspeed --num_gpus=8 train.py --deepspeed_stage 2 ``` +You also can pass other parameters +```bash +deepspeed --num_gpus=8 train.py \ + --deepspeed_stage 2 \ + --apply_lora True \ + --stabilize True \ + --use_gradient_checkpointing True +``` + ### Colab notebooks @@ -437,7 +445,7 @@ set it up this way for demo purposes, but we're planning to add more datasets so download and handle your dataset. Simply put, you take care of your data, and X—LLM handles the rest. We've done it this way on purpose, to give you plenty of room to get creative and customize to your heart's content. -## Build your own project +### Build your own project To set up your own project using X—LLM, you need to do two things: diff --git a/src/xllm/cli/train.py b/src/xllm/cli/train.py index 9aa9d0c..1f107f6 100644 --- a/src/xllm/cli/train.py +++ b/src/xllm/cli/train.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Type +from typing import Optional, Type from transformers import ( HfArgumentParser, ) from ..core.config import Config +from ..datasets.general import GeneralDataset from ..experiments.base import Experiment from ..run.train import train from ..utils.cli import setup_cli @@ -26,11 +27,13 @@ def cli_run_train( config_cls: Type[Config] = Config, + train_dataset: Optional[GeneralDataset] = None, + eval_dataset: Optional[GeneralDataset] = None, ) -> Experiment: parser = HfArgumentParser(config_cls) config = parser.parse_args_into_dataclasses()[0] setup_cli(config=config, logger_path="./xllm_train.log") - experiment = train(config=config) + experiment = train(config=config, train_dataset=train_dataset, eval_dataset=eval_dataset) return experiment diff --git a/src/xllm/core/config.py b/src/xllm/core/config.py index 3727602..5ff3203 100644 --- a/src/xllm/core/config.py +++ b/src/xllm/core/config.py @@ -152,7 +152,7 @@ class Config: "help": "Local path to fused model. Useful if you want to quantize model after fusing on the same machine", }, ) - fuse_after_train: bool = field( + fuse_after_training: bool = field( default=False, metadata={ "help": "Fuse or not model after training", diff --git a/src/xllm/experiments/base.py b/src/xllm/experiments/base.py index 99eec06..61e3aea 100644 --- a/src/xllm/experiments/base.py +++ b/src/xllm/experiments/base.py @@ -425,7 +425,7 @@ def run(self): self.after_train() - if self.config.fuse_after_train: + if self.config.fuse_after_training: self.fuse_lora() if is_distributed_training(): diff --git a/src/xllm/run/train.py b/src/xllm/run/train.py index 8feaba2..3712b8f 100644 --- a/src/xllm/run/train.py +++ b/src/xllm/run/train.py @@ -11,21 +11,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional from ..core.config import Config +from ..datasets.general import GeneralDataset from ..experiments.base import Experiment from ..experiments.registry import experiments_registry def train( config: Config, + train_dataset: Optional[GeneralDataset] = None, + eval_dataset: Optional[GeneralDataset] = None, ) -> Experiment: experiment_cls = experiments_registry.get(config.experiment_key) if experiment_cls is None: raise ValueError(f"Experiment class {config.experiment_key} not found") - experiment: Experiment = experiment_cls(config=config) + additional_kwargs = {} + + if train_dataset is not None: + additional_kwargs["train_dataset"] = train_dataset + + if eval_dataset is not None: + additional_kwargs["train_dataset"] = eval_dataset + + experiment: Experiment = experiment_cls(config=config, **additional_kwargs) experiment.build()