Skip to content

Commit

Permalink
Merge pull request #383 from MervinPraison/develop
Browse files Browse the repository at this point in the history
Enhance Vision Model Training with Robust Dataset Conversion and Memo…
  • Loading branch information
MervinPraison authored Feb 9, 2025
2 parents de38f2f + 308c69a commit 95140d1
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 38 deletions.
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM python:3.11-slim
WORKDIR /app
COPY . .
RUN pip install flask praisonai==2.0.75 gunicorn markdown
RUN pip install flask praisonai==2.0.76 gunicorn markdown
EXPOSE 8080
CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]
2 changes: 1 addition & 1 deletion docs/api/praisonai/deploy.html
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ <h2 id="raises">Raises</h2>
file.write(&#34;FROM python:3.11-slim\n&#34;)
file.write(&#34;WORKDIR /app\n&#34;)
file.write(&#34;COPY . .\n&#34;)
file.write(&#34;RUN pip install flask praisonai==2.0.75 gunicorn markdown\n&#34;)
file.write(&#34;RUN pip install flask praisonai==2.0.76 gunicorn markdown\n&#34;)
file.write(&#34;EXPOSE 8080\n&#34;)
file.write(&#39;CMD [&#34;gunicorn&#34;, &#34;-b&#34;, &#34;0.0.0.0:8080&#34;, &#34;api:app&#34;]\n&#39;)

Expand Down
2 changes: 1 addition & 1 deletion praisonai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class Praisonai < Formula

desc "AI tools for various AI applications"
homepage "https://github.com/MervinPraison/PraisonAI"
url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/2.0.75.tar.gz"
url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/2.0.76.tar.gz"
sha256 "1828fb9227d10f991522c3f24f061943a254b667196b40b1a3e4a54a8d30ce32" # Replace with actual SHA256 checksum
license "MIT"

Expand Down
2 changes: 1 addition & 1 deletion praisonai/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def create_dockerfile(self):
file.write("FROM python:3.11-slim\n")
file.write("WORKDIR /app\n")
file.write("COPY . .\n")
file.write("RUN pip install flask praisonai==2.0.75 gunicorn markdown\n")
file.write("RUN pip install flask praisonai==2.0.76 gunicorn markdown\n")
file.write("EXPOSE 8080\n")
file.write('CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]\n')

Expand Down
102 changes: 71 additions & 31 deletions praisonai/train_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
import torch
import shutil
import subprocess
import gc # For garbage collection

from datasets import load_dataset, concatenate_datasets
from datasets import load_dataset, concatenate_datasets, Dataset
from unsloth import FastVisionModel, is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments
from trl import SFTTrainer
from tqdm import tqdm # Add progress bar


class TrainVisionModel:
Expand Down Expand Up @@ -62,11 +65,21 @@ def prepare_model(self):
use_gradient_checkpointing="unsloth"
)
print("DEBUG: Vision model and original tokenizer loaded.")
if original_tokenizer.pad_token is None:
original_tokenizer.pad_token = original_tokenizer.eos_token
original_tokenizer.model_max_length = self.config.get("max_seq_length", 2048)

# Use the full processor that supports image inputs.
self.hf_tokenizer = original_tokenizer

# Set pad token if needed
if not hasattr(self.hf_tokenizer, 'pad_token') or self.hf_tokenizer.pad_token is None:
if hasattr(self.hf_tokenizer, 'eos_token'):
self.hf_tokenizer.pad_token = self.hf_tokenizer.eos_token
elif hasattr(self.hf_tokenizer, 'bos_token'):
self.hf_tokenizer.pad_token = self.hf_tokenizer.bos_token

# Set max length
if hasattr(self.hf_tokenizer, 'model_max_length'):
self.hf_tokenizer.model_max_length = self.config.get("max_seq_length", 2048)

# Add vision-specific LoRA adapters
self.model = FastVisionModel.get_peft_model(
self.model,
Expand All @@ -85,38 +98,62 @@ def prepare_model(self):
print("DEBUG: Vision LoRA adapters added.")

def convert_sample(self, sample):
# Use a default instruction or one from config
instr = self.config.get("vision_instruction", "You are an expert radiographer. Describe accurately what you see in this image.")

instruction = self.config.get(
"vision_instruction",
"You are an expert radiographer. Describe accurately what you see in this image."
)
conversation = [
{"role": "user", "content": [
{"type": "text", "text": instr},
{"type": "image", "image": sample["image"]}
]},
{"role": "assistant", "content": [
{"type": "text", "text": sample["caption"]}
]}
{
"role": "user",
"content": [
{"type": "text", "text": instruction},
{"type": "image", "image": sample["image"]}
]
},
{
"role": "assistant",
"content": [
{"type": "text", "text": sample["caption"]}
]
},
]

return {"messages": conversation}

def load_datasets(self):
datasets = []
all_converted = []
for dataset_info in self.config["dataset"]:
print("DEBUG: Loading vision dataset:", dataset_info)
ds = load_dataset(dataset_info["name"], split=dataset_info.get("split_type", "train"))
print("DEBUG: Converting dataset to vision conversation format...")
ds = ds.map(self.convert_sample)
datasets.append(ds)
combined = concatenate_datasets(datasets)
print("DEBUG: Combined vision dataset has", len(combined), "examples.")
return combined
print("\nDEBUG: Loading vision dataset:", dataset_info)
ds = load_dataset(
dataset_info["name"],
split=dataset_info.get("split_type", "train")
)
print("DEBUG: Dataset size:", len(ds))
print("DEBUG: First raw sample:", ds[0])
print("DEBUG: Dataset features:", ds.features)

print("\nDEBUG: Converting dataset to vision conversation format...")
converted_ds = [self.convert_sample(sample) for sample in ds]

# Debug first converted sample
print("\nDEBUG: First converted sample structure:")
first = converted_ds[0]
print("DEBUG: Message keys:", first["messages"][0]["content"][1].keys())
print("DEBUG: Image type in converted:", type(first["messages"][0]["content"][1].get("image")))

all_converted.extend(converted_ds)

print("\nDEBUG: Combined vision dataset has", len(all_converted), "examples.")
return all_converted

def train_model(self):
print("DEBUG: Starting vision training...")
raw_dataset = self.load_datasets()

# Build training arguments using SFTConfig for vision tasks
sft_config = SFTConfig(
per_device_train_batch_size=self.config.get("per_device_train_batch_size", 2),
# Build training arguments using TrainingArguments
training_args = TrainingArguments(
per_device_train_batch_size=self.config.get("per_device_train_batch_size", 1),
gradient_accumulation_steps=self.config.get("gradient_accumulation_steps", 4),
warmup_steps=self.config.get("warmup_steps", 5),
max_steps=self.config.get("max_steps", 30),
Expand All @@ -131,18 +168,21 @@ def train_model(self):
output_dir=self.config.get("output_dir", "outputs"),
report_to="none" if not os.getenv("PRAISON_WANDB") else "wandb",
remove_unused_columns=False,
dataset_text_field="",
dataset_kwargs={"skip_prepare_dataset": True},
dataset_num_proc=self.config.get("dataset_num_proc", 4),
max_seq_length=self.config.get("max_seq_length", 2048)
# Add memory optimization settings
gradient_checkpointing=True,
max_grad_norm=1.0,
)

trainer = SFTTrainer(
model=self.model,
tokenizer=self.hf_tokenizer,
data_collator=UnslothVisionDataCollator(self.model, self.hf_tokenizer),
train_dataset=raw_dataset,
args=sft_config
args=training_args,
max_seq_length=self.config.get("max_seq_length", 2048),
dataset_text_field="", # Required for vision training
dataset_kwargs={"skip_prepare_dataset": True}, # Required for vision training
packing=False # Explicitly set packing to False
)
print("DEBUG: Beginning vision trainer.train() ...")
trainer.train()
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "PraisonAI"
version = "2.0.75"
version = "2.0.76"
description = "PraisonAI is an AI Agents Framework with Self Reflection. PraisonAI application combines PraisonAI Agents, AutoGen, and CrewAI into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customisation, and efficient human-agent collaboration."
readme = "README.md"
license = ""
Expand Down Expand Up @@ -84,7 +84,7 @@ autogen = ["pyautogen>=0.2.19", "praisonai-tools>=0.0.7", "crewai"]

[tool.poetry]
name = "PraisonAI"
version = "2.0.75"
version = "2.0.76"
description = "PraisonAI is an AI Agents Framework with Self Reflection. PraisonAI application combines PraisonAI Agents, AutoGen, and CrewAI into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customisation, and efficient human–agent collaboration."
authors = ["Mervin Praison"]
license = ""
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 95140d1

Please sign in to comment.