Skip to content

Commit

Permalink
[fix] tokenization mapping bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
wheresmyhair committed Jan 31, 2025
1 parent 93e9df4 commit d628561
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 6 deletions.
3 changes: 0 additions & 3 deletions src/lmflow/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,9 +631,6 @@ def __post_init__(self):
if self.validation_file is not None:
extension = self.validation_file.split(".")[-1]
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."

if self.skip_dataset_check:
logger.warning("Skip dataset check is enabled. Make sure the datasets are in the correct format.")


@dataclass
Expand Down
1 change: 0 additions & 1 deletion src/lmflow/models/hf_decoder_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ def tokenize(
"load_from_cache_file": not data_args.overwrite_cache,
"desc": "Running tokenizer on dataset",
"new_fingerprint": fingerprint,
"max_length": data_args.block_size,
}

if data_args.block_size < self.tokenizer.model_max_length:
Expand Down
11 changes: 9 additions & 2 deletions src/lmflow/pipeline/finetuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import numpy as np

import lmflow.optim.optimizers as optim
from lmflow.args import OptimizerNames
from lmflow.args import OptimizerNames, DatasetArguments, ModelArguments, FinetunerArguments
from lmflow.datasets.dataset import Dataset
from lmflow.pipeline.base_tuner import BaseTuner
from lmflow.pipeline.utils.peft_trainer import PeftTrainer, PeftSavingCallback
Expand Down Expand Up @@ -64,7 +64,14 @@ class Finetuner(BaseTuner):
Keyword arguments.
"""
def __init__(self, model_args, data_args, finetuner_args, *args, **kwargs):
def __init__(
self,
model_args: ModelArguments,
data_args: DatasetArguments,
finetuner_args: FinetunerArguments,
*args,
**kwargs
):

self.model_args = model_args
self.data_args = data_args
Expand Down

0 comments on commit d628561

Please sign in to comment.