Skip to content

Commit

Permalink
fix qwen2vl video infer processor (#1000)
Browse files Browse the repository at this point in the history
  • Loading branch information
nemonameless authored Jan 16, 2025
1 parent dd6c92e commit bf59927
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 16 deletions.
9 changes: 5 additions & 4 deletions deploy/qwen2_vl/video_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

from paddlemix.models.qwen2_vl import MIXQwen2Tokenizer
from paddlemix.models.qwen2_vl.modeling_qwen2_vl import (
Qwen2VLRotaryEmbedding,
Qwen2VLForConditionalGeneration,
Qwen2VLRotaryEmbedding,
)
from paddlemix.processors.qwen2_vl_processing import (
Qwen2VLImageProcessor,
Expand All @@ -43,10 +43,11 @@

image_processor = Qwen2VLImageProcessor()
tokenizer = MIXQwen2Tokenizer.from_pretrained(MODEL_NAME)
processor = Qwen2VLProcessor(image_processor, tokenizer)

min_pixels = 256 * 28 * 28 # 200704
max_pixels = 1280 * 28 * 28 # 1003520
processor = Qwen2VLProcessor(image_processor, tokenizer, min_pixels=min_pixels, max_pixels=max_pixels)
# min_pixels = 256 * 28 * 28 # 200704
# max_pixels = 1280 * 28 * 28 # 1003520
# processor = Qwen2VLProcessor(image_processor, tokenizer, min_pixels=min_pixels, max_pixels=max_pixels)

# Messages containing a video and a text query
messages = [
Expand Down
16 changes: 11 additions & 5 deletions paddlemix/examples/qwen2_vl/single_image_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@

def main(args):
paddle.seed(seed=0)
compute_dtype = "float16" if args.fp16 else "bfloat16"
compute_dtype = args.dtype
if "npu" in paddle.get_device():
is_bfloat16_supported = True
else:
is_bfloat16_supported = paddle.amp.is_bfloat16_supported()
if compute_dtype == "bfloat16" and not is_bfloat16_supported:
logger.warning("bfloat16 is not supported on your device,change to float32")
compute_dtype = "float32"
print("compute_dtype", compute_dtype)

model = Qwen2VLForConditionalGeneration.from_pretrained(args.model_path, dtype=compute_dtype)

Expand Down Expand Up @@ -86,7 +87,7 @@ def main(args):
start = time.time()
with paddle.no_grad():
generated_ids = model.generate(
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature, top_p=args.top_p
) # already trimmed in paddle
output_text = processor.batch_decode(
generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
Expand All @@ -103,11 +104,15 @@ def main(args):
else:
# Inference: Generation of the output
generated_ids = model.generate(
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature, top_p=args.top_p
) # already trimmed in paddle
output_text = processor.batch_decode(
generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(f"GPU memory_allocated: {paddle.device.cuda.memory_allocated() / 1024 ** 3:.2f} GB")
print(f"GPU max_memory_allocated: {paddle.device.cuda.max_memory_allocated() / 1024 ** 3:.2f} GB")
print(f"GPU memory_reserved: {paddle.device.cuda.memory_reserved() / 1024 ** 3:.2f} GB")
print(f"GPU max_memory_reserved: {paddle.device.cuda.max_memory_reserved() / 1024 ** 3:.2f} GB")
print("output_text:\n", output_text)


Expand All @@ -116,9 +121,10 @@ def main(args):
parser.add_argument("--model_path", type=str, default="Qwen/Qwen2-VL-2B-Instruct")
parser.add_argument("--question", type=str, default="Describe this image.")
parser.add_argument("--image_file", type=str, default="paddlemix/demo_images/examples_image1.jpg")
parser.add_argument("--top_p", type=float, default=0.01)
parser.add_argument("--temperature", type=float, default=0.01)
parser.add_argument("--max_new_tokens", type=int, default=128)
parser.add_argument("--fp16", action="store_true")
parser.add_argument("--max_new_tokens", type=int, default=512)
parser.add_argument("--dtype", type=str, default="bfloat16")
parser.add_argument("--benchmark", action="store_true")
args = parser.parse_args()
main(args)
22 changes: 15 additions & 7 deletions paddlemix/examples/qwen2_vl/video_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

def main(args):
paddle.seed(seed=0)
compute_dtype = "float16" if args.fp16 else "bfloat16"
compute_dtype = args.dtype
if "npu" in paddle.get_device():
is_bfloat16_supported = True
else:
Expand All @@ -41,9 +41,11 @@ def main(args):

image_processor = Qwen2VLImageProcessor()
tokenizer = MIXQwen2Tokenizer.from_pretrained(args.model_path)
min_pixels = 256 * 28 * 28 # 200704
max_pixels = 1280 * 28 * 28 # 1003520
processor = Qwen2VLProcessor(image_processor, tokenizer, min_pixels=min_pixels, max_pixels=max_pixels)
processor = Qwen2VLProcessor(image_processor, tokenizer)

# min_pixels = 256*28*28 # 200704
# max_pixels = 1280*28*28 # 1003520
# processor = Qwen2VLProcessor(image_processor, tokenizer, min_pixels=min_pixels, max_pixels=max_pixels)

# Messages containing a video and a text query
messages = [
Expand Down Expand Up @@ -87,7 +89,7 @@ def main(args):
start = time.time()
with paddle.no_grad():
generated_ids = model.generate(
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature, top_p=args.top_p
) # already trimmed in paddle
output_text = processor.batch_decode(
generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
Expand All @@ -104,11 +106,15 @@ def main(args):
else:
# Inference: Generation of the output
generated_ids = model.generate(
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature
**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature, top_p=args.top_p
) # already trimmed in paddle
output_text = processor.batch_decode(
generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(f"GPU memory_allocated: {paddle.device.cuda.memory_allocated() / 1024 ** 3:.2f} GB")
print(f"GPU max_memory_allocated: {paddle.device.cuda.max_memory_allocated() / 1024 ** 3:.2f} GB")
print(f"GPU memory_reserved: {paddle.device.cuda.memory_reserved() / 1024 ** 3:.2f} GB")
print(f"GPU max_memory_reserved: {paddle.device.cuda.max_memory_reserved() / 1024 ** 3:.2f} GB")
print("output_text:\n", output_text)


Expand All @@ -117,8 +123,10 @@ def main(args):
parser.add_argument("--model_path", type=str, default="Qwen/Qwen2-VL-2B-Instruct")
parser.add_argument("--question", type=str, default="Describe this video.")
parser.add_argument("--video_file", type=str, default="paddlemix/demo_images/red-panda.mp4")
parser.add_argument("--top_p", type=float, default=0.01)
parser.add_argument("--temperature", type=float, default=0.01)
parser.add_argument("--max_new_tokens", type=int, default=128)
parser.add_argument("--max_new_tokens", type=int, default=512)
parser.add_argument("--dtype", type=str, default="bfloat16")
parser.add_argument("--fp16", action="store_true")
parser.add_argument("--benchmark", action="store_true")
args = parser.parse_args()
Expand Down

0 comments on commit bf59927

Please sign in to comment.