diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
index 28bce6f..c35fadf 100644
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -11,193 +11,73 @@ env:
jobs:
- build-on-linux:
- name: build / linux / ffmpeg ${{ matrix.ffmpeg_version }}
- runs-on: ubuntu-latest
- container: jrottenberg/ffmpeg:${{ matrix.ffmpeg_version }}-ubuntu
-
+ check:
+ name: Check
+ runs-on: ${{ matrix.os }}
strategy:
matrix:
- ffmpeg_version: ["4.3", "4.4", "5.0", "5.1", "6.0", "6.1", "7.0"]
- fail-fast: false
-
+ os: [ubuntu-latest, macOS-latest, windows-latest]
+ rust: [stable]
steps:
- - name: Checkout
- uses: actions/checkout@v3
-
- - name: Install dependencies
- run: |
- apt update
- apt install -y --no-install-recommends clang curl pkg-config
-
- - name: Setup Rust
- uses: dtolnay/rust-toolchain@v1
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
with:
- toolchain: stable
-
- - name: Build
- run: cargo build
-
- build-on-macos:
- name: build / macos / ffmpeg latest
- runs-on: macos-latest
-
- steps:
- - name: Checkout
- uses: actions/checkout@v3
-
- - name: Install dependencies
- run: |
- brew install ffmpeg pkg-config
-
- - name: Setup Rust
- uses: dtolnay/rust-toolchain@v1
+ profile: minimal
+ toolchain: ${{ matrix.rust }}
+ override: true
+ - uses: actions-rs/cargo@v1
with:
- toolchain: stable
-
- - name: Build
- run: cargo build
-
-
- build-on-windows:
- name: build / windows / ffmpeg latest
- runs-on: windows-latest
-
- env:
- FFMPEG_DOWNLOAD_URL: https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-full-shared.7z
-
- steps:
- - name: Checkout
- uses: actions/checkout@v3
-
- - name: Install dependencies
- run: |
- $VCINSTALLDIR = $(& "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath)
- Add-Content $env:GITHUB_ENV "LIBCLANG_PATH=${VCINSTALLDIR}\VC\Tools\LLVM\x64\bin`n"
- Invoke-WebRequest "${env:FFMPEG_DOWNLOAD_URL}" -OutFile ffmpeg-release-full-shared.7z
- 7z x ffmpeg-release-full-shared.7z
- mkdir ffmpeg
- mv ffmpeg-*/* ffmpeg/
- Add-Content $env:GITHUB_ENV "FFMPEG_DIR=${pwd}\ffmpeg`n"
- Add-Content $env:GITHUB_PATH "${pwd}\ffmpeg\bin`n"
-
- - name: Setup Rust
- uses: dtolnay/rust-toolchain@v1
- with:
- toolchain: stable
-
- - name: Build
- run: cargo build
-
-
- test-on-linux:
- name: test / linux / ffmpeg ${{ matrix.ffmpeg_version }}
- runs-on: ubuntu-latest
- container: jrottenberg/ffmpeg:${{ matrix.ffmpeg_version }}-ubuntu
+ command: check
+ args: --all
+ test:
+ name: Test
+ runs-on: ${{ matrix.os }}
strategy:
matrix:
- ffmpeg_version: ["4.3", "4.4", "5.0", "5.1", "6.0", "6.1", "7.0"]
- fail-fast: false
-
+ os: [ubuntu-latest, macOS-latest, windows-latest]
+ rust: [stable]
steps:
- - name: Checkout
- uses: actions/checkout@v3
-
- - name: Install dependencies
- run: |
- apt update
- apt install -y --no-install-recommends clang curl pkg-config
-
- - name: Setup Rust
- uses: dtolnay/rust-toolchain@v1
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
with:
- toolchain: stable
-
- - name: Run Tests with All Features
- run: cargo test --all-features
-
- - name: Run Tests in Release Mode
- run: cargo test --release
-
- test-on-macos:
- name: test / macos / ffmpeg latest
- runs-on: macos-latest
-
- steps:
- - name: Checkout
- uses: actions/checkout@v3
-
- - name: Install dependencies
- run: |
- brew install ffmpeg pkg-config
-
- - name: Setup Rust
- uses: dtolnay/rust-toolchain@v1
+ profile: minimal
+ toolchain: ${{ matrix.rust }}
+ override: true
+ - uses: actions-rs/cargo@v1
with:
- toolchain: stable
-
- - name: Run Tests with All Features
- run: cargo test --all-features
-
- - name: Run Tests in Release Mode
- run: cargo test --release
-
- test-on-windows:
- name: test / windows / ffmpeg latest
- runs-on: windows-latest
-
- env:
- FFMPEG_DOWNLOAD_URL: https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-full-shared.7z
+ command: test
+ args: --all
+ fmt:
+ name: Rustfmt
+ runs-on: ubuntu-latest
steps:
- - name: Checkout
- uses: actions/checkout@v3
-
- - name: Install dependencies
- run: |
- $VCINSTALLDIR = $(& "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath)
- Add-Content $env:GITHUB_ENV "LIBCLANG_PATH=${VCINSTALLDIR}\VC\Tools\LLVM\x64\bin`n"
- Invoke-WebRequest "${env:FFMPEG_DOWNLOAD_URL}" -OutFile ffmpeg-release-full-shared.7z
- 7z x ffmpeg-release-full-shared.7z
- mkdir ffmpeg
- mv ffmpeg-*/* ffmpeg/
- Add-Content $env:GITHUB_ENV "FFMPEG_DIR=${pwd}\ffmpeg`n"
- Add-Content $env:GITHUB_PATH "${pwd}\ffmpeg\bin`n"
-
- - name: Setup Rust
- uses: dtolnay/rust-toolchain@v1
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
with:
+ profile: minimal
toolchain: stable
-
- - name: Run Tests with All Features
- run: cargo test --all-features
-
- - name: Run Tests in Release Mode
- run: cargo test --release
-
+ override: true
+ - run: rustup component add rustfmt
+ - uses: actions-rs/cargo@v1
+ with:
+ command: fmt
+ args: --all -- --check
- lints:
+ clippy:
+ name: Clippy
runs-on: ubuntu-latest
- container: jrottenberg/ffmpeg:6-ubuntu
-
steps:
- - name: Checkout
- uses: actions/checkout@v3
-
- - name: Install dependencies
- run: |
- apt update
- apt install -y --no-install-recommends clang curl pkg-config
-
- - name: Setup Rust
- uses: dtolnay/rust-toolchain@v1
+ - uses: actions/checkout@v2
+ - uses: actions-rs/toolchain@v1
with:
+ profile: minimal
toolchain: stable
- components: rustfmt, clippy
-
- - name: Rustfmt
- run: cargo fmt --all -- --check
+ override: true
+ - run: rustup component add clippy
+ - uses: actions-rs/cargo@v1
+ with:
+ command: clippy
+ args: --all --all-targets -- -D warnings
- - name: Clippy
- run: cargo clippy --all --all-targets --all-features -- -D warnings
diff --git a/.gitignore b/.gitignore
index b99985e..e1a526e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@
debug/
target/
+**/*.DS_Store
+
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
@@ -13,7 +15,6 @@ Cargo.lock
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
-
.debug
.vscode
runs/
diff --git a/Cargo.toml b/Cargo.toml
index c6c15d7..9e9179b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,64 +1,66 @@
[package]
name = "usls"
-version = "0.0.20"
+version = "0.1.0"
+rust-version = "1.79"
edition = "2021"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls"
authors = ["Jamjamjon "]
license = "MIT"
readme = "README.md"
-exclude = ["assets/*", "examples/*", "scripts/*", "runs/*"]
+exclude = ["assets/*", "examples/*", "runs/*", "benches/*"]
[dependencies]
-clap = { version = "4.2.4", features = ["derive"] }
+aksr = { version = "0.0.2" }
+image = { version = "0.25.2" }
+imageproc = { version = "0.24" }
ndarray = { version = "0.16.1", features = ["rayon"] }
-ort = { version = "2.0.0-rc.9", default-features = false }
+rayon = { version = "1.10.0" }
anyhow = { version = "1.0.75" }
regex = { version = "1.5.4" }
rand = { version = "0.8.5" }
chrono = { version = "0.4.30" }
-half = { version = "2.3.1" }
-dirs = { version = "5.0.1" }
-ureq = { version = "2.9.1", default-features = true, features = [
- "socks-proxy",
-] }
tokenizers = { version = "0.15.2" }
-rayon = "1.10.0"
+log = { version = "0.4.22" }
indicatif = "0.17.8"
-image = "0.25.2"
-imageproc = { version = "0.24" }
-ab_glyph = "0.2.23"
-geo = "0.28.0"
-prost = "0.12.4"
-fast_image_resize = { version = "4.2.1", features = ["image"] }
-serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
+serde = { version = "1.0", features = ["derive"] }
+ort = { version = "2.0.0-rc.9", default-features = false}
+prost = "0.12.4"
+ab_glyph = "0.2.23"
+dirs = { version = "5.0.1" }
tempfile = "3.12.0"
-video-rs = { version = "0.9.0", features = ["ndarray"] }
+geo = "0.28.0"
+half = { version = "2.3.1" }
+ureq = { version = "2.12.1", default-features = false, features = [ "tls" ] }
+fast_image_resize = { version = "4.2.1", features = ["image"]}
natord = "1.0.9"
-tracing = "0.1.40"
-tracing-subscriber = "0.3.18"
-minifb = "0.27.0"
+video-rs = { version = "0.10.0", features = ["ndarray"], optional = true }
+minifb = { version = "0.27.0", optional = true }
+sha2 = "0.10.8"
+[dev-dependencies]
+argh = "0.1.13"
+tracing-subscriber = { version = "0.3.18", features = ["env-filter", "chrono"] }
+
+[[example]]
+name = "viewer"
+required-features = ["ffmpeg"]
[features]
default = [
- "ort/load-dynamic",
- "ort/copy-dylibs",
- "ort/half",
- "ort/ndarray",
- "ort/cuda",
- "ort/tensorrt",
- "ort/coreml",
+ "ort/ndarray",
+ "ort/copy-dylibs",
+ "ort/load-dynamic",
+ "ort/half",
]
auto = ["ort/download-binaries"]
+ffmpeg = ["dep:video-rs", "dep:minifb"]
+cuda = [ "ort/cuda" ]
+trt = [ "ort/tensorrt" ]
+mps = [ "ort/coreml" ]
-[dev-dependencies]
-criterion = "0.5.1"
-
-[[bench]]
-name = "yolo"
-harness = false
-
-[lib]
-bench = false
+[profile.release]
+# lto = true
+strip = true
+panic = "abort"
diff --git a/README.md b/README.md
index 7211724..fb953e7 100644
--- a/README.md
+++ b/README.md
@@ -1,221 +1,161 @@
-
-
usls
-
+usls
- Documentation
-
-
+
+
+
+
+
+
+
+
+
-
+
-
+
-
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
-**`usls`** is a Rust library integrated with **ONNXRuntime** that provides a collection of state-of-the-art models for **Computer Vision** and **Vision-Language** tasks, including:
+**usls** is a Rust library integrated with **ONNXRuntime**, offering a suite of advanced models for **Computer Vision** and **Vision-Language** tasks, including:
-- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLOv11](https://github.com/ultralytics/ultralytics)
+- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLO11](https://github.com/ultralytics/ultralytics)
- **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
-- **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro)
-- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242)
+- **Vision Models**: [RT-DETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro), [FastViT](https://github.com/apple/ml-fastvit), [BEiT](https://github.com/microsoft/unilm/tree/master/beit), [MobileOne](https://github.com/apple/ml-mobileone)
+- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242)
+- **OCR Models**: [FAST](https://github.com/czczup/FAST), [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947), [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159), [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html), [TrOCR](https://huggingface.co/microsoft/trocr-base-printed), [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
-Click to expand Supported Models
-
-## Supported Models
-
-| Model | Task / Type | Example | CUDA f32 | CUDA f16 | TensorRT f32 | TensorRT f16 |
-|---------------------------------------------------------------------|----------------------------------------------------------------------------------------------|----------------------------|----------|----------|--------------|--------------|
-| [YOLOv5](https://github.com/ultralytics/yolov5) | Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv11](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [RTDETR](https://arxiv.org/abs/2304.08069) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision-Self-Supervised | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
-| [CLIP](https://github.com/openai/CLIP) | Vision-Language | [demo](examples/clip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual |
-| [BLIP](https://github.com/salesforce/BLIP) | Vision-Language | [demo](examples/blip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual |
-| [DB](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ |
-| [SVTR](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ |
-| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ❌ | ❌ |
-| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ |
-| [Depth-Anything v1 & v2](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ |
-| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ |
-| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | | |
-| [Sapiens](https://github.com/facebookresearch/sapiens/tree/main) | Body Part Segmentation | [demo](examples/sapiens) | ✅ | ✅ | | |
-| [Florence2](https://arxiv.org/abs/2311.06242) | a Variety of Vision Tasks | [demo](examples/florence2) | ✅ | ✅ | | |
-| [DepthPro](https://github.com/apple/ml-depth-pro) | Monocular Depth Estimation | [demo](examples/depth-pro) | ✅ | ✅ | | |
+👉 More Supported Models
+
+| Model | Task / Description | Example | CoreML | CUDA
FP32 | CUDA
FP16 | TensorRT
FP32 | TensorRT
FP16 |
+| -------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------- | ------ | -------------- | -------------- | ------------------ | ------------------ |
+| [BEiT](https://github.com/microsoft/unilm/tree/master/beit) | Image Classification | [demo](examples/beit) | ✅ | ✅ | ✅ | | |
+| [ConvNeXt](https://github.com/facebookresearch/ConvNeXt) | Image Classification | [demo](examples/convnext) | ✅ | ✅ | ✅ | | |
+| [FastViT](https://github.com/apple/ml-fastvit) | Image Classification | [demo](examples/fastvit) | ✅ | ✅ | ✅ | | |
+| [MobileOne](https://github.com/apple/ml-mobileone) | Image Classification | [demo](examples/mobileone) | ✅ | ✅ | ✅ | | |
+| [DeiT](https://github.com/facebookresearch/deit) | Image Classification | [demo](examples/deit) | ✅ | ✅ | ✅ | | |
+| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision Embedding | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv5](https://github.com/ultralytics/yolov5) | Image Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv8
YOLO11](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Image Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [RT-DETR](https://github.com/lyuwenyu/RT-DETR) | Object Detection | [demo](examples/rtdetr) | ✅ | ✅ | ✅ | | |
+| [PP-PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.8/configs/picodet) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | ✅ | | |
+| [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | ✅ | | |
+| [D-FINE](https://github.com/manhbd-22022602/D-FINE) | Object Detection | [demo](examples/d-fine) | ✅ | ✅ | ✅ | | |
+| [DEIM](https://github.com/ShihuaHuang95/DEIM) | Object Detection | [demo](examples/deim) | ✅ | ✅ | ✅ | | |
+| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | | |
+| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | | |
+| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | | |
+| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | | |
+| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | | |
+| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Open-Set Detection With Language | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | ✅ | | |
+| [CLIP](https://github.com/openai/CLIP) | Vision-Language Embedding | [demo](examples/clip) | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1) | Vision-Language Embedding | [demo](examples/clip) | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [BLIP](https://github.com/salesforce/BLIP) | Image Captioning | [demo](examples/blip) | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [FAST](https://github.com/czczup/FAST) | Text Detection | [demo](examples/fast) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [LinkNet](https://arxiv.org/abs/1707.03718) | Text Detection | [demo](examples/linknet) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html) | Tabel Recognition | [demo](examples/slanet) | ✅ | ✅ | ✅ | | |
+| [TrOCR](https://huggingface.co/microsoft/trocr-base-printed) | Text Recognition | [demo](examples/trocr) | ✅ | ✅ | ✅ | | |
+| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [DepthAnything v1
DepthAnything v2](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [DepthPro](https://github.com/apple/ml-depth-pro) | Monocular Depth Estimation | [demo](examples/depth-pro) | ✅ | ✅ | ✅ | | |
+| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [Sapiens](https://github.com/facebookresearch/sapiens/tree/main) | Foundation for Human Vision Models | [demo](examples/sapiens) | ✅ | ✅ | ✅ | | |
+| [Florence2](https://arxiv.org/abs/2311.06242) | a Variety of Vision Tasks | [demo](examples/florence2) | ✅ | ✅ | ✅ | | |
+
+
+## ⛳️ Cargo Features
+By default, **none of the following features are enabled**. You can enable them as needed:
-
+- **`auto`**: Automatically downloads prebuilt ONNXRuntime binaries from Pyke’s CDN for supported platforms.
+ - If disabled, you'll need to [compile `ONNXRuntime` from source](https://github.com/microsoft/onnxruntime) or [download a precompiled package](https://github.com/microsoft/onnxruntime/releases), and then [link it manually](https://ort.pyke.io/setup/linking).
-## ⛳️ ONNXRuntime Linking
+
+ 👉 For Linux or macOS Users
+
+ - Download from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
+ - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
+ ```shell
+ export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.20.1
+ ```
+
+
+- **`ffmpeg`**: Adds support for video streams, real-time frame visualization, and video export.
+
+ - Powered by [video-rs](https://github.com/oddity-ai/video-rs) and [minifb](https://github.com/emoon/rust_minifb). For any issues related to `ffmpeg` features, please refer to the issues of these two crates.
+- **`cuda`**: Enables the NVIDIA TensorRT provider.
+- **`trt`**: Enables the NVIDIA TensorRT provider.
+- **`mps`**: Enables the Apple CoreML provider.
+
+## 🎈 Example
+
+* **Using `CUDA`**
-
-You have two options to link the ONNXRuntime library
-
-- ### Option 1: Manual Linking
-
- - #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking).
-
- - #### For Linux or macOS Users:
- - Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
- - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
- ```shell
- export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0
- ```
-
-- ### Option 2: Automatic Download
- Just use `--features auto`
- ```shell
- cargo run -r --example yolo --features auto
```
+ cargo run -r -F cuda --example yolo -- --device cuda:0
+ ```
+* **Using Apple `CoreML`**
-
+ ```
+ cargo run -r -F mps --example yolo -- --device mps
+ ```
+* **Using `TensorRT`**
+
+ ```
+ cargo run -r -F trt --example yolo -- --device trt
+ ```
+* **Using `CPU`**
+
+ ```
+ cargo run -r --example yolo
+ ```
-## 🎈 Demo
+All examples are located in the [examples](./examples/) directory.
+
+## 🥂 Integrate Into Your Own Project
+
+Add `usls` as a dependency to your project's `Cargo.toml`
```Shell
-cargo run -r --example yolo # blip, clip, yolop, svtr, db, ...
+cargo add usls -F cuda
```
-## 🥂 Integrate Into Your Own Project
+Or use a specific commit:
-- #### Add `usls` as a dependency to your project's `Cargo.toml`
- ```Shell
- cargo add usls
- ```
-
- Or use a specific commit:
- ```Toml
- [dependencies]
- usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" }
- ```
-
-- #### Follow the pipeline
- - Build model with the provided `models` and `Options`
- - Load images, video and stream with `DataLoader`
- - Do inference
- - Retrieve inference results from `Vec`
- - Annotate inference results with `Annotator`
- - Display images and write them to video with `Viewer`
-
-
-
- example code
-
- ```rust
- use usls::{models::YOLO, Annotator, DataLoader, Nms, Options, Vision, YOLOTask, YOLOVersion};
-
- fn main() -> anyhow::Result<()> {
- // Build model with Options
- let options = Options::new()
- .with_trt(0)
- .with_model("yolo/v8-m-dyn.onnx")?
- .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
- .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
- .with_ixx(0, 0, (1, 2, 4).into())
- .with_ixx(0, 2, (0, 640, 640).into())
- .with_ixx(0, 3, (0, 640, 640).into())
- .with_confs(&[0.2]);
- let mut model = YOLO::new(options)?;
-
- // Build DataLoader to load image(s), video, stream
- let dl = DataLoader::new(
- // "./assets/bus.jpg", // local image
- // "images/bus.jpg", // remote image
- // "../images-folder", // local images (from folder)
- // "../demo.mp4", // local video
- // "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // online video
- "rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream
- )?
- .with_batch(2) // iterate with batch_size = 2
- .build()?;
-
- // Build annotator
- let annotator = Annotator::new()
- .with_bboxes_thickness(4)
- .with_saveout("YOLO-DataLoader");
-
- // Build viewer
- let mut viewer = Viewer::new().with_delay(10).with_scale(1.).resizable(true);
-
- // Run and annotate results
- for (xs, _) in dl {
- let ys = model.forward(&xs, false)?;
- // annotator.annotate(&xs, &ys);
- let images_plotted = annotator.plot(&xs, &ys, false)?;
-
- // show image
- viewer.imshow(&images_plotted)?;
-
- // check out window and key event
- if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) {
- break;
- }
-
- // write video
- viewer.write_batch(&images_plotted)?;
-
- // Retrieve inference results
- for y in ys {
- // bboxes
- if let Some(bboxes) = y.bboxes() {
- for bbox in bboxes {
- println!(
- "Bbox: {}, {}, {}, {}, {}, {}",
- bbox.xmin(),
- bbox.ymin(),
- bbox.xmax(),
- bbox.ymax(),
- bbox.confidence(),
- bbox.id(),
- );
- }
- }
- }
- }
-
- // finish video write
- viewer.finish_write()?;
-
- Ok(())
- }
- ```
-
-
-
+```Toml
+[dependencies]
+usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" }
+```
+
+## 🥳 If you find this helpful, please give it a star ⭐
## 📌 License
+
This project is licensed under [LICENSE](LICENSE).
diff --git a/benches/yolo.rs b/benches/yolo.rs
deleted file mode 100644
index 9ee3196..0000000
--- a/benches/yolo.rs
+++ /dev/null
@@ -1,94 +0,0 @@
-use anyhow::Result;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-
-use usls::{models::YOLO, DataLoader, Options, Vision, YOLOTask, YOLOVersion};
-
-enum Stage {
- Pre,
- Run,
- Post,
- Pipeline,
-}
-
-fn yolo_stage_bench(
- model: &mut YOLO,
- x: &[image::DynamicImage],
- stage: Stage,
- n: u64,
-) -> std::time::Duration {
- let mut t_pre = std::time::Duration::new(0, 0);
- let mut t_run = std::time::Duration::new(0, 0);
- let mut t_post = std::time::Duration::new(0, 0);
- let mut t_pipeline = std::time::Duration::new(0, 0);
- for _ in 0..n {
- let t0 = std::time::Instant::now();
- let xs = model.preprocess(x).unwrap();
- t_pre += t0.elapsed();
-
- let t = std::time::Instant::now();
- let xs = model.inference(xs).unwrap();
- t_run += t.elapsed();
-
- let t = std::time::Instant::now();
- let _ys = black_box(model.postprocess(xs, x).unwrap());
- t_post += t.elapsed();
- t_pipeline += t0.elapsed();
- }
- match stage {
- Stage::Pre => t_pre,
- Stage::Run => t_run,
- Stage::Post => t_post,
- Stage::Pipeline => t_pipeline,
- }
-}
-
-pub fn benchmark_cuda(c: &mut Criterion, h: isize, w: isize) -> Result<()> {
- let mut group = c.benchmark_group(format!("YOLO ({}-{})", w, h));
- group
- .significance_level(0.05)
- .sample_size(80)
- .measurement_time(std::time::Duration::new(20, 0));
-
- let options = Options::default()
- .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
- .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
- .with_model("yolo/v8-m-dyn.onnx")?
- .with_cuda(0)
- // .with_cpu()
- .with_dry_run(0)
- .with_ixx(0, 2, (320, h, 1280).into())
- .with_ixx(0, 3, (320, w, 1280).into())
- .with_confs(&[0.2, 0.15]);
- let mut model = YOLO::new(options)?;
-
- let xs = [DataLoader::try_read("./assets/bus.jpg")?];
-
- group.bench_function("pre-process", |b| {
- b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Pre, n))
- });
-
- group.bench_function("run", |b| {
- b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Run, n))
- });
-
- group.bench_function("post-process", |b| {
- b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Post, n))
- });
-
- group.bench_function("pipeline", |b| {
- b.iter_custom(|n| yolo_stage_bench(&mut model, &xs, Stage::Pipeline, n))
- });
-
- group.finish();
- Ok(())
-}
-
-pub fn criterion_benchmark(c: &mut Criterion) {
- // benchmark_cuda(c, 416, 416).unwrap();
- benchmark_cuda(c, 640, 640).unwrap();
- benchmark_cuda(c, 448, 768).unwrap();
- // benchmark_cuda(c, 800, 800).unwrap();
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/examples/beit/README.md b/examples/beit/README.md
new file mode 100644
index 0000000..d9eddd8
--- /dev/null
+++ b/examples/beit/README.md
@@ -0,0 +1,6 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example beit -- --device cuda --dtype fp16
+```
+
diff --git a/examples/beit/main.rs b/examples/beit/main.rs
new file mode 100644
index 0000000..aad67bd
--- /dev/null
+++ b/examples/beit/main.rs
@@ -0,0 +1,52 @@
+use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// source image
+ #[argh(
+ option,
+ default = "vec![
+ String::from(\"images/dog.jpg\"),
+ String::from(\"images/siamese.png\"),
+ String::from(\"images/ailurus-fulgens.jpg\"),
+ ]"
+ )]
+ source: Vec,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = Options::beit_base()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = ImageClassifier::try_from(options)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&args.source)?;
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // annotate
+ let annotator = Annotator::default().with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/blip/README.md b/examples/blip/README.md
index e0dfe3e..6121661 100644
--- a/examples/blip/README.md
+++ b/examples/blip/README.md
@@ -3,20 +3,12 @@ This demo shows how to use [BLIP](https://arxiv.org/abs/2201.12086) to do condit
## Quick Start
```shell
-cargo run -r --example blip
+cargo run -r -F cuda --example blip -- --device cuda:0 --source images/dog.jpg --source ./assets/bus.jpg --source images/green-car.jpg
```
## Results
```shell
-[Unconditional]: a group of people walking around a bus
-[Conditional]: three man walking in front of a bus
-Some(["three man walking in front of a bus"])
+Unconditional: Ys([Y { Texts: [Text("a dog running through a field of grass")] }, Y { Texts: [Text("a group of people walking around a bus")] }, Y { Texts: [Text("a green volkswagen beetle parked in front of a yellow building")] }])
+Conditional: Ys([Y { Texts: [Text("this image depicting a dog running in a field")] }, Y { Texts: [Text("this image depict a bus in barcelona")] }, Y { Texts: [Text("this image depict a blue volkswagen beetle parked in a street in havana, cuba")] }])
```
-
-## TODO
-
-* [ ] Multi-batch inference for image caption
-* [ ] VQA
-* [ ] Retrival
-* [ ] TensorRT support for textual model
diff --git a/examples/blip/main.rs b/examples/blip/main.rs
index da7fc89..d5e3e21 100644
--- a/examples/blip/main.rs
+++ b/examples/blip/main.rs
@@ -1,28 +1,44 @@
-use usls::{models::Blip, DataLoader, Options};
-
-fn main() -> Result<(), Box> {
- // visual
- let options_visual = Options::default()
- .with_model("blip/visual-base.onnx")?
- // .with_ixx(0, 2, 384.into())
- // .with_ixx(0, 3, 384.into())
- .with_profile(false);
-
- // textual
- let options_textual = Options::default()
- .with_model("blip/textual-base.onnx")?
- .with_tokenizer("blip/tokenizer.json")?
- .with_profile(false);
-
- // build model
- let mut model = Blip::new(options_visual, options_textual)?;
-
- // image caption (this demo use batch_size=1)
- let xs = [DataLoader::try_read("images/bus.jpg")?];
- let image_embeddings = model.encode_images(&xs)?;
- let _y = model.caption(&image_embeddings, None, true)?; // unconditional
- let y = model.caption(&image_embeddings, Some("three man"), true)?; // conditional
- println!("{:?}", y[0].texts());
-
- Ok(())
-}
+use usls::{models::Blip, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// BLIP Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// source image
+ #[argh(option, default = "vec![String::from(\"./assets/bus.jpg\")]")]
+ source: Vec,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options_visual = Options::blip_v1_base_caption_visual()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let options_textual = Options::blip_v1_base_caption_textual()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = Blip::new(options_visual, options_textual)?;
+
+ // image caption
+ let xs = DataLoader::try_read_batch(&args.source)?;
+
+ // unconditional caption
+ let ys = model.forward(&xs, None)?;
+ println!("Unconditional: {:?}", ys);
+
+ // conditional caption
+ let ys = model.forward(&xs, Some("this image depict"))?;
+ println!("Conditional: {:?}", ys);
+
+ Ok(())
+}
diff --git a/examples/clip/README.md b/examples/clip/README.md
index d85a682..71fe94e 100644
--- a/examples/clip/README.md
+++ b/examples/clip/README.md
@@ -3,18 +3,13 @@ This demo showcases how to use [CLIP](https://github.com/openai/CLIP) to compute
## Quick Start
```shell
-cargo run -r --example clip
+cargo run -r -F cuda --example clip -- --device cuda:0
```
## Results
```shell
-(90.11472%) ./examples/clip/images/carrot.jpg => 几个胡萝卜
-[0.04573484, 0.0048218793, 0.0011618224, 0.90114725, 0.0036694852, 0.031348046, 0.0121166315]
-
-(94.07785%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant
-[0.050406333, 0.0011632168, 0.0019338318, 0.0013227565, 0.003916758, 0.00047858112, 0.9407785]
-
-(86.59852%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
-[0.07032883, 0.00053773675, 0.0006372929, 0.06066096, 0.0007378078, 0.8659852, 0.0011121632]
-```
\ No newline at end of file
+(99.9675%) ./examples/clip/images/carrot.jpg => Some carrots
+(99.93718%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
+(100.0%) ./examples/clip/images/drink.jpg => Some people holding wine glasses in a restaurant
+```
diff --git a/examples/clip/images/peoples.jpg b/examples/clip/images/drink.jpg
similarity index 100%
rename from examples/clip/images/peoples.jpg
rename to examples/clip/images/drink.jpg
diff --git a/examples/clip/main.rs b/examples/clip/main.rs
index 0fd03ce..e213c31 100644
--- a/examples/clip/main.rs
+++ b/examples/clip/main.rs
@@ -1,43 +1,54 @@
-use usls::{models::Clip, DataLoader, Options};
+use anyhow::Result;
+use usls::{models::Clip, DataLoader, Ops, Options};
-fn main() -> Result<(), Box> {
- // visual
- let options_visual = Options::default().with_model("clip/visual-base-dyn.onnx")?;
+#[derive(argh::FromArgs)]
+/// CLIP Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
- // textual
- let options_textual = Options::default()
- .with_model("clip/textual-base-dyn.onnx")?
- .with_tokenizer("clip/tokenizer.json")?;
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+ let args: Args = argh::from_env();
// build model
+ let options_visual = Options::jina_clip_v1_visual()
+ // clip_vit_b32_visual()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let options_textual = Options::jina_clip_v1_textual()
+ // clip_vit_b32_textual()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
let mut model = Clip::new(options_visual, options_textual)?;
// texts
let texts = vec![
- "A photo of a dinosaur ".to_string(),
- "A photo of a cat".to_string(),
- "A photo of a dog".to_string(),
- "几个胡萝卜".to_string(),
- "There are some playing cards on a striped table cloth".to_string(),
- "There is a doll with red hair and a clock on a table".to_string(),
- "Some people holding wine glasses in a restaurant".to_string(),
+ "A photo of a dinosaur",
+ "A photo of a cat",
+ "A photo of a dog",
+ "Some carrots",
+ "There are some playing cards on a striped table cloth",
+ "There is a doll with red hair and a clock on a table",
+ "Some people holding wine glasses in a restaurant",
];
let feats_text = model.encode_texts(&texts)?; // [n, ndim]
- // load image
+ // load images
let dl = DataLoader::new("./examples/clip/images")?.build()?;
- // loop
+ // run
for (images, paths) in dl {
- let feats_image = model.encode_images(&images).unwrap();
+ let feats_image = model.encode_images(&images)?;
// use image to query texts
- let matrix = match feats_image.embedding() {
- Some(x) => x.dot2(feats_text.embedding().unwrap())?,
- None => continue,
- };
+ let matrix = Ops::dot2(&feats_image, &feats_text)?;
- // summary
for i in 0..paths.len() {
let probs = &matrix[i];
let (id, &score) = probs
@@ -52,7 +63,6 @@ fn main() -> Result<(), Box> {
paths[i].display(),
&texts[id]
);
- println!("{:?}\n", probs);
}
}
diff --git a/examples/convnext/README.md b/examples/convnext/README.md
new file mode 100644
index 0000000..fe6d945
--- /dev/null
+++ b/examples/convnext/README.md
@@ -0,0 +1,6 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example convnext -- --device cuda --dtype fp16
+```
+
diff --git a/examples/convnext/main.rs b/examples/convnext/main.rs
new file mode 100644
index 0000000..6480a07
--- /dev/null
+++ b/examples/convnext/main.rs
@@ -0,0 +1,52 @@
+use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// source image
+ #[argh(
+ option,
+ default = "vec![
+ String::from(\"images/dog.jpg\"),
+ String::from(\"images/siamese.png\"),
+ String::from(\"images/ailurus-fulgens.jpg\"),
+ ]"
+ )]
+ source: Vec,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = Options::convnext_v2_atto()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = ImageClassifier::try_from(options)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&args.source)?;
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // annotate
+ let annotator = Annotator::default().with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/d-fine/README.md b/examples/d-fine/README.md
new file mode 100644
index 0000000..61eb5ba
--- /dev/null
+++ b/examples/d-fine/README.md
@@ -0,0 +1,5 @@
+## Quick Start
+
+```shell
+cargo run -r --example d-fine
+```
diff --git a/examples/d-fine/main.rs b/examples/d-fine/main.rs
new file mode 100644
index 0000000..2726232
--- /dev/null
+++ b/examples/d-fine/main.rs
@@ -0,0 +1,28 @@
+use anyhow::Result;
+use usls::{models::RTDETR, Annotator, DataLoader, Options};
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ // options
+ let options = Options::d_fine_n_coco().commit()?;
+ let mut model = RTDETR::new(options)?;
+
+ // load
+ let x = [DataLoader::try_read("./assets/bus.jpg")?];
+
+ // run
+ let y = model.forward(&x)?;
+ println!("{:?}", y);
+
+ // annotate
+ let annotator = Annotator::default()
+ .with_bboxes_thickness(3)
+ .with_saveout(model.spec());
+ annotator.annotate(&x, &y);
+
+ Ok(())
+}
diff --git a/examples/dataloader/README.md b/examples/dataloader/README.md
new file mode 100644
index 0000000..29d81b9
--- /dev/null
+++ b/examples/dataloader/README.md
@@ -0,0 +1,5 @@
+## Quick Start
+
+```shell
+cargo run -r --example dataloader
+```
diff --git a/examples/dataloader/main.rs b/examples/dataloader/main.rs
index dbc27fe..eacb3b4 100644
--- a/examples/dataloader/main.rs
+++ b/examples/dataloader/main.rs
@@ -1,66 +1,45 @@
-use usls::{
- models::YOLO, Annotator, DataLoader, Device, Options, Viewer, Vision, YOLOTask, YOLOVersion,
-};
+use usls::DataLoader;
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
- .with_max_level(tracing::Level::ERROR)
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
.init();
- let options = Options::new()
- .with_device(Device::Cuda(0))
- .with_model("yolo/v8-m-dyn.onnx")?
- .with_yolo_version(YOLOVersion::V8)
- .with_yolo_task(YOLOTask::Detect)
- .with_batch(2)
- .with_ixx(0, 2, (416, 640, 800).into())
- .with_ixx(0, 3, (416, 640, 800).into())
- .with_confs(&[0.2]);
- let mut model = YOLO::new(options)?;
-
- // build annotator
- let annotator = Annotator::new()
- .with_bboxes_thickness(4)
- .with_saveout("YOLO-DataLoader");
-
- // build dataloader
- let dl = DataLoader::new(
+ // 1. iterator
+ let dl = DataLoader::try_from(
// "images/bus.jpg", // remote image
// "../images", // image folder
// "../demo.mp4", // local video
// "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
// "rtsp://admin:xyz@192.168.2.217:554/h265/ch1/", // rtsp h264 stream
- // "./assets/bus.jpg", // local image
- "../7.mp4",
+ "./assets/bus.jpg", // local image
)?
.with_batch(1)
+ .with_progress_bar(true)
.build()?;
- let mut viewer = Viewer::new().with_delay(10).with_scale(1.).resizable(true);
-
- // iteration
- for (xs, _) in dl {
- // inference & annotate
- let ys = model.run(&xs)?;
- let images_plotted = annotator.plot(&xs, &ys, false)?;
-
- // show image
- viewer.imshow(&images_plotted)?;
-
- // check out window and key event
- if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) {
- break;
- }
-
- // write video
- viewer.write_batch(&images_plotted)?;
+ for (_xs, _paths) in dl {
+ println!("Paths: {:?}", _paths);
}
- // finish video write
- viewer.finish_write()?;
-
- // images -> video
- // DataLoader::is2v("runs/YOLO-DataLoader", &["runs", "is2v"], 24)?;
+ // 2. read one image
+ let image = DataLoader::try_read("./assets/bus.jpg")?;
+ println!(
+ "Read one image. Height: {}, Width: {}",
+ image.height(),
+ image.width()
+ );
+
+ // 3. read several images
+ let images = DataLoader::try_read_batch(&[
+ "./assets/bus.jpg",
+ "./assets/bus.jpg",
+ "./assets/bus.jpg",
+ "./assets/bus.jpg",
+ "./assets/bus.jpg",
+ ])?;
+ println!("Read {} images.", images.len());
Ok(())
}
diff --git a/examples/db/README.md b/examples/db/README.md
index 6da1cfc..9e19375 100644
--- a/examples/db/README.md
+++ b/examples/db/README.md
@@ -4,15 +4,6 @@
cargo run -r --example db
```
-### Speed test
-
-| Model | Image size | TensorRT
f16
batch=1
(ms) | TensorRT
f32
batch=1
(ms) | CUDA
f32
batch=1
(ms) |
-| --------------- | ---------- | ---------------------------------------- | ---------------------------------------- | ------------------------------------ |
-| ppocr-v3-db-dyn | 640x640 | 1.8585 | 2.5739 | 4.3314 |
-| ppocr-v4-db-dyn | 640x640 | 2.0507 | 2.8264 | 6.6064 |
-
-***Test on RTX3060***
-
## Results
![](https://github.com/jamjamjon/assets/releases/download/db/demo-paper.png)
diff --git a/examples/db/main.rs b/examples/db/main.rs
index b133216..13bdb87 100644
--- a/examples/db/main.rs
+++ b/examples/db/main.rs
@@ -1,35 +1,48 @@
+use anyhow::Result;
use usls::{models::DB, Annotator, DataLoader, Options};
-fn main() -> Result<(), Box> {
- // build model
- let options = Options::default()
- .with_ixx(0, 0, (1, 4, 8).into())
- .with_ixx(0, 2, (608, 960, 1280).into())
- .with_ixx(0, 3, (608, 960, 1280).into())
- // .with_trt(0)
- .with_confs(&[0.4])
- .with_min_width(5.0)
- .with_min_height(12.0)
- .with_model("db/ppocr-v4-db-dyn.onnx")?;
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+ // build model
+ let options = Options::ppocr_det_v4_server_ch()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
let mut model = DB::new(options)?;
// load image
- let x = [
- DataLoader::try_read("images/db.png")?,
- DataLoader::try_read("images/street.jpg")?,
- ];
+ let x = DataLoader::try_read_batch(&[
+ "images/table.png",
+ "images/table1.jpg",
+ "images/table2.png",
+ "images/table-ch.jpg",
+ "images/db.png",
+ "images/street.jpg",
+ ])?;
// run
- let y = model.run(&x)?;
+ let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.without_bboxes(true)
+ .without_mbrs(true)
.with_polygons_alpha(60)
.with_contours_color([255, 105, 180, 255])
- .without_mbrs(true)
- .with_saveout("DB");
+ .with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
diff --git a/examples/deim/README.md b/examples/deim/README.md
new file mode 100644
index 0000000..08e833c
--- /dev/null
+++ b/examples/deim/README.md
@@ -0,0 +1,7 @@
+## Quick Start
+
+```shell
+cargo run -r --example deim
+```
+
+
diff --git a/examples/deim/main.rs b/examples/deim/main.rs
new file mode 100644
index 0000000..cf8d4e5
--- /dev/null
+++ b/examples/deim/main.rs
@@ -0,0 +1,28 @@
+use anyhow::Result;
+use usls::{models::RTDETR, Annotator, DataLoader, Options};
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ // options
+ let options = Options::deim_dfine_s_coco().commit()?;
+ let mut model = RTDETR::new(options)?;
+
+ // load
+ let x = [DataLoader::try_read("./assets/bus.jpg")?];
+
+ // run
+ let y = model.forward(&x)?;
+ println!("{:?}", y);
+
+ // annotate
+ let annotator = Annotator::default()
+ .with_bboxes_thickness(3)
+ .with_saveout(model.spec());
+ annotator.annotate(&x, &y);
+
+ Ok(())
+}
diff --git a/examples/deit/README.md b/examples/deit/README.md
new file mode 100644
index 0000000..962781f
--- /dev/null
+++ b/examples/deit/README.md
@@ -0,0 +1,7 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example deit -- --device cuda --dtype fp16
+```
+
+
diff --git a/examples/deit/main.rs b/examples/deit/main.rs
new file mode 100644
index 0000000..98d7c12
--- /dev/null
+++ b/examples/deit/main.rs
@@ -0,0 +1,52 @@
+use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// source image
+ #[argh(
+ option,
+ default = "vec![
+ String::from(\"images/dog.jpg\"),
+ String::from(\"images/siamese.png\"),
+ String::from(\"images/ailurus-fulgens.jpg\"),
+ ]"
+ )]
+ source: Vec,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = Options::deit_tiny_distill()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = ImageClassifier::try_from(options)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&args.source)?;
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // annotate
+ let annotator = Annotator::default().with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/depth-anything/main.rs b/examples/depth-anything/main.rs
index d339ff3..f1deeea 100644
--- a/examples/depth-anything/main.rs
+++ b/examples/depth-anything/main.rs
@@ -1,24 +1,26 @@
+use anyhow::Result;
use usls::{models::DepthAnything, Annotator, DataLoader, Options};
-fn main() -> Result<(), Box> {
- // options
- let options = Options::default()
- // .with_model("depth-anything/v1-s-dyn.onnx")?
- .with_model("depth-anything/v2-s.onnx")?
- .with_ixx(0, 2, (384, 512, 1024).into())
- .with_ixx(0, 3, (384, 512, 1024).into());
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ // build model
+ let options = Options::depth_anything_v2_small().commit()?;
let mut model = DepthAnything::new(options)?;
// load
let x = [DataLoader::try_read("images/street.jpg")?];
// run
- let y = model.run(&x)?;
+ let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.with_colormap("Turbo")
- .with_saveout("Depth-Anything");
+ .with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
diff --git a/examples/depth-pro/README.md b/examples/depth-pro/README.md
new file mode 100644
index 0000000..52c1418
--- /dev/null
+++ b/examples/depth-pro/README.md
@@ -0,0 +1,10 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example depth-pro -- --device cuda
+```
+
+
+## Results
+
+![](https://github.com/jamjamjon/assets/releases/download/depth-pro/demo-depth-pro.png)
diff --git a/examples/depth-pro/main.rs b/examples/depth-pro/main.rs
index eb72a9a..8919f93 100644
--- a/examples/depth-pro/main.rs
+++ b/examples/depth-pro/main.rs
@@ -1,25 +1,47 @@
+use anyhow::Result;
use usls::{models::DepthPro, Annotator, DataLoader, Options};
-fn main() -> Result<(), Box> {
- // options
- let options = Options::default()
- .with_model("depth-pro/q4f16.onnx")? // bnb4, f16
- .with_ixx(0, 0, 1.into()) // batch. Note: now only support batch_size = 1
- .with_ixx(0, 1, 3.into()) // channel
- .with_ixx(0, 2, 1536.into()) // height
- .with_ixx(0, 3, 1536.into()); // width
+#[derive(argh::FromArgs)]
+/// BLIP Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// dtype
+ #[argh(option, default = "String::from(\"q4f16\")")]
+ dtype: String,
+
+ /// source image
+ #[argh(option, default = "String::from(\"images/street.jpg\")")]
+ source: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // model
+ let options = Options::depth_pro()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
let mut model = DepthPro::new(options)?;
// load
- let x = [DataLoader::try_read("images/street.jpg")?];
+ let x = [DataLoader::try_read(&args.source)?];
// run
- let y = model.run(&x)?;
+ let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.with_colormap("Turbo")
- .with_saveout("Depth-Pro");
+ .with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
diff --git a/examples/dinov2/main.rs b/examples/dinov2/main.rs
index 4cc7732..5168785 100644
--- a/examples/dinov2/main.rs
+++ b/examples/dinov2/main.rs
@@ -1,40 +1,25 @@
-use usls::{models::Dinov2, DataLoader, Options};
+use anyhow::Result;
+use usls::{models::DINOv2, DataLoader, Options};
-fn main() -> Result<(), Box> {
- // build model
- let options = Options::default()
- .with_model("dinov2/s-dyn.onnx")?
- .with_ixx(0, 2, 224.into())
- .with_ixx(0, 3, 224.into());
- let mut model = Dinov2::new(options)?;
- let x = [DataLoader::try_read("images/bus.jpg")?];
- let y = model.run(&x)?;
- println!("{y:?}");
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
- // TODO:
- // query from vector
- // let ys = model.query_from_vec(
- // "./assets/bus.jpg",
- // &[
- // "./examples/dinov2/images/bus.jpg",
- // "./examples/dinov2/images/1.jpg",
- // "./examples/dinov2/images/2.jpg",
- // ],
- // Metric::L2,
- // )?;
+ // images
+ let xs = [
+ DataLoader::try_read("./assets/bus.jpg")?,
+ DataLoader::try_read("./assets/bus.jpg")?,
+ ];
- // or query from folder
- // let ys = model.query_from_folder("./assets/bus.jpg", "./examples/dinov2/images", Metric::IP)?;
+ // model
+ let options = Options::dinov2_small().with_batch_size(xs.len()).commit()?;
+ let mut model = DINOv2::new(options)?;
- // results
- // for (i, y) in ys.iter().enumerate() {
- // println!(
- // "Top-{:<3}{:.7} {}",
- // i + 1,
- // y.1,
- // y.2.canonicalize()?.display()
- // );
- // }
+ // encode images
+ let y = model.encode_images(&xs)?;
+ println!("Feat shape: {:?}", y.shape());
Ok(())
}
diff --git a/examples/doclayout-yolo/README.md b/examples/doclayout-yolo/README.md
new file mode 100644
index 0000000..b9b233f
--- /dev/null
+++ b/examples/doclayout-yolo/README.md
@@ -0,0 +1,10 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example doclayout-yolo -- --device cuda
+```
+
+
+## Results
+
+![](https://github.com/jamjamjon/assets/releases/download/yolo/demo-doclayout-yolo.png)
diff --git a/examples/doclayout-yolo/main.rs b/examples/doclayout-yolo/main.rs
new file mode 100644
index 0000000..99a945b
--- /dev/null
+++ b/examples/doclayout-yolo/main.rs
@@ -0,0 +1,42 @@
+use anyhow::Result;
+use usls::{models::YOLO, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let config = Options::doclayout_yolo_docstructbench()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = YOLO::new(config)?;
+
+ // load images
+ let xs = [DataLoader::try_read("images/academic.jpg")?];
+
+ // run
+ let ys = model.forward(&xs)?;
+ // println!("{:?}", ys);
+
+ // annotate
+ let annotator = Annotator::default()
+ .with_bboxes_thickness(3)
+ .with_saveout("doclayout-yolo");
+ annotator.annotate(&xs, &ys);
+
+ model.summary();
+
+ Ok(())
+}
diff --git a/examples/fast/README.md b/examples/fast/README.md
new file mode 100644
index 0000000..89227df
--- /dev/null
+++ b/examples/fast/README.md
@@ -0,0 +1,6 @@
+## Quick Start
+
+```shell
+cargo run -r --example fast
+```
+
diff --git a/examples/fast/main.rs b/examples/fast/main.rs
new file mode 100644
index 0000000..84872d1
--- /dev/null
+++ b/examples/fast/main.rs
@@ -0,0 +1,65 @@
+use anyhow::Result;
+use usls::{models::DB, Annotator, DataLoader, Options, Scale};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// scale
+ #[argh(option, default = "String::from(\"t\")")]
+ scale: String,
+
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = match args.scale.as_str().try_into()? {
+ Scale::T => Options::fast_tiny(),
+ Scale::S => Options::fast_small(),
+ Scale::B => Options::fast_base(),
+ _ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
+ };
+ let mut model = DB::new(
+ options
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?,
+ )?;
+
+ // load image
+ let x = DataLoader::try_read_batch(&[
+ "images/table.png",
+ "images/table1.jpg",
+ "images/table2.png",
+ "images/table-ch.jpg",
+ "images/db.png",
+ "images/street.jpg",
+ ])?;
+
+ // run
+ let y = model.forward(&x)?;
+
+ // annotate
+ let annotator = Annotator::default()
+ .without_bboxes(true)
+ .without_mbrs(true)
+ .with_polygons_alpha(60)
+ .with_contours_color([255, 105, 180, 255])
+ .with_saveout(model.spec());
+ annotator.annotate(&x, &y);
+
+ Ok(())
+}
diff --git a/examples/fastsam/README.md b/examples/fastsam/README.md
new file mode 100644
index 0000000..b2984e1
--- /dev/null
+++ b/examples/fastsam/README.md
@@ -0,0 +1,5 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example fastsam -- --device cuda
+```
diff --git a/examples/fastsam/main.rs b/examples/fastsam/main.rs
new file mode 100644
index 0000000..0050fda
--- /dev/null
+++ b/examples/fastsam/main.rs
@@ -0,0 +1,45 @@
+use anyhow::Result;
+use usls::{models::YOLO, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"fp16\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let config = Options::fastsam_s()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = YOLO::new(config)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&["./assets/bus.jpg"])?;
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // annotate
+ let annotator = Annotator::default()
+ .without_masks(true)
+ .with_bboxes_thickness(3)
+ .with_saveout("fastsam");
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/fastvit/README.md b/examples/fastvit/README.md
new file mode 100644
index 0000000..ca00fdf
--- /dev/null
+++ b/examples/fastvit/README.md
@@ -0,0 +1,13 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example mobileone -- --device cuda --dtype fp16
+```
+
+
+```shell
+0: Y { Probs: { Top5: [(263, 0.6109131, Some("Pembroke, Pembroke Welsh corgi")), (264, 0.2062352, Some("Cardigan, Cardigan Welsh corgi")), (231, 0.028572788, Some("collie")), (273, 0.015174894, Some("dingo, warrigal, warragal, Canis dingo")), (248, 0.014367299, Some("Eskimo dog, husky"))] } }
+1: Y { Probs: { Top5: [(284, 0.9907692, Some("siamese cat, Siamese")), (285, 0.0015794479, Some("Egyptian cat")), (174, 0.0015189401, Some("Norwegian elkhound, elkhound")), (225, 0.00031838714, Some("malinois")), (17, 0.00027021166, Some("jay"))] } }
+2: Y { Probs: { Top5: [(387, 0.94238573, Some("lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens")), (368, 0.0029994072, Some("gibbon, Hylobates lar")), (277, 0.0016564301, Some("red fox, Vulpes vulpes")), (356, 0.0015081967, Some("weasel")), (295, 0.001427932, Some("American black bear, black bear, Ursus americanus, Euarctos americanus"))] } }
+
+```
diff --git a/examples/fastvit/main.rs b/examples/fastvit/main.rs
new file mode 100644
index 0000000..cb93886
--- /dev/null
+++ b/examples/fastvit/main.rs
@@ -0,0 +1,57 @@
+use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// source image
+ #[argh(
+ option,
+ default = "vec![
+ String::from(\"images/dog.jpg\"),
+ String::from(\"images/siamese.png\"),
+ String::from(\"images/ailurus-fulgens.jpg\"),
+ ]"
+ )]
+ source: Vec,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = Options::fastvit_t8_distill()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = ImageClassifier::try_from(options)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&args.source)?;
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // results
+ for (i, y) in ys.iter().enumerate() {
+ println!("{}: {:?}", i, y);
+ }
+
+ // annotate
+ let annotator = Annotator::default().with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/florence2/README.md b/examples/florence2/README.md
new file mode 100644
index 0000000..6078515
--- /dev/null
+++ b/examples/florence2/README.md
@@ -0,0 +1,30 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example florence2 -- --device cuda --scale base --dtype fp16
+```
+
+
+```Shell
+Task: Caption(0)
+Ys([Y { Texts: [Text("A green car parked in front of a yellow building.")] }, Y { Texts: [Text("A group of people walking down a street next to a bus.")] }])
+
+Task: Caption(1)
+Ys([Y { Texts: [Text("The image shows a green car parked in front of a yellow building with two brown doors. The car is on the road, and the building has a wall and a tree in the background.")] }, Y { Texts: [Text("The image shows a group of people walking down a street next to a bus, with a building in the background. The bus is likely part of the World Electric Emission Bus, which is a new bus that will be launched in Madrid. The people are walking on the road, and there are trees and a sign board to the left of the bus.")] }])
+
+Task: Caption(2)
+Ys([Y { Texts: [Text("The image shows a vintage Volkswagen Beetle car parked on a cobblestone street in front of a yellow building with two wooden doors. The car is a light blue color with silver rims and appears to be in good condition. The building has a sloping roof and is painted in a bright yellow color. The sky is blue and there are trees in the background. The overall mood of the image is peaceful and serene.")] }, Y { Texts: [Text("The image shows a blue and white bus with the logo of the Brazilian football club, Cero Emisiones, on the side. The bus is parked on a street with a building in the background. There are several people walking on the sidewalk in front of the bus, some of them are carrying bags and one person is holding a camera. The sky is blue and there are trees and a traffic light visible in the top right corner of the image. The image appears to be taken during the day.")] }])
+```
+
+## Results
+
+| Task | Demo |
+| -----| ------|
+|Caption-To-Phrase-Grounding | |
+| Ocr-With-Region | |
+| Dense-Region-Caption | |
+| Object-Detection | |
+| Region-Proposal | |
+| Referring-Expression-Segmentation | |
+
+
diff --git a/examples/florence2/main.rs b/examples/florence2/main.rs
index 07cc7d1..7248faf 100644
--- a/examples/florence2/main.rs
+++ b/examples/florence2/main.rs
@@ -1,157 +1,176 @@
-use usls::{models::Florence2, Annotator, DataLoader, Options, Task};
-
-fn main() -> Result<(), Box> {
- let batch_size = 3;
-
- // vision encoder
- let options_vision_encoder = Options::default()
- .with_model("florence2/base-vision-encoder-f16.onnx")?
- .with_ixx(0, 2, (512, 768, 800).into())
- .with_ixx(0, 3, 768.into())
- .with_ixx(0, 0, (1, batch_size as _, 8).into());
-
- // text embed
- let options_text_embed = Options::default()
- .with_model("florence2/base-embed-tokens-f16.onnx")?
- .with_tokenizer("florence2/tokenizer.json")?
- .with_batch(batch_size);
-
- // transformer encoder
- let options_encoder = Options::default()
- .with_model("florence2/base-encoder-f16.onnx")?
- .with_batch(batch_size);
-
- // transformer decoder
- let options_decoder = Options::default()
- .with_model("florence2/base-decoder-f16.onnx")?
- .with_batch(batch_size);
-
- // transformer decoder merged
- let options_decoder_merged = Options::default()
- .with_model("florence2/base-decoder-merged-f16.onnx")?
- .with_batch(batch_size);
-
- // build model
- let mut model = Florence2::new(
- options_vision_encoder,
- options_text_embed,
- options_encoder,
- options_decoder,
- options_decoder_merged,
- )?;
-
- // load images
- let xs = [
- // DataLoader::try_read("florence2/car.jpg")?, // for testing region-related tasks
- DataLoader::try_read("florence2/car.jpg")?,
- // DataLoader::try_read("images/db.png")?,
- DataLoader::try_read("assets/bus.jpg")?,
- ];
-
- // region-related tasks
- let quantizer = usls::Quantizer::default();
- // let coords = [449., 270., 556., 372.]; // wheel
- let coords = [31., 156., 581., 373.]; // car
- let (width_car, height_car) = (xs[0].width(), xs[0].height());
- let quantized_coords = quantizer.quantize(&coords, (width_car as _, height_car as _));
-
- // run with tasks
- let ys = model.run_with_tasks(
- &xs,
- &[
- // w/ inputs
- Task::Caption(0),
- Task::Caption(1),
- Task::Caption(2),
- Task::Ocr,
- Task::OcrWithRegion,
- Task::RegionProposal,
- Task::ObjectDetection,
- Task::DenseRegionCaption,
- // w/o inputs
- Task::OpenSetDetection("a vehicle".into()),
- Task::CaptionToPhraseGrounding(
- "A vehicle with two wheels parked in front of a building.".into(),
- ),
- Task::ReferringExpressionSegmentation("a vehicle".into()),
- Task::RegionToSegmentation(
- quantized_coords[0],
- quantized_coords[1],
- quantized_coords[2],
- quantized_coords[3],
- ),
- Task::RegionToCategory(
- quantized_coords[0],
- quantized_coords[1],
- quantized_coords[2],
- quantized_coords[3],
- ),
- Task::RegionToDescription(
- quantized_coords[0],
- quantized_coords[1],
- quantized_coords[2],
- quantized_coords[3],
- ),
- ],
- )?;
-
- // annotator
- let annotator = Annotator::new()
- .without_bboxes_conf(true)
- .with_bboxes_thickness(3)
- .with_saveout_subs(&["Florence2"]);
- for (task, ys_) in ys.iter() {
- match task {
- Task::Caption(_)
- | Task::Ocr
- | Task::RegionToCategory(..)
- | Task::RegionToDescription(..) => {
- println!("Task: {:?}\n{:?}\n", task, ys_)
- }
- Task::DenseRegionCaption => {
- let annotator = annotator.clone().with_saveout("Dense-Region-Caption");
- annotator.annotate(&xs, ys_);
- }
- Task::RegionProposal => {
- let annotator = annotator
- .clone()
- .without_bboxes_name(false)
- .with_saveout("Region-Proposal");
-
- annotator.annotate(&xs, ys_);
- }
- Task::ObjectDetection => {
- let annotator = annotator.clone().with_saveout("Object-Detection");
- annotator.annotate(&xs, ys_);
- }
- Task::OpenSetDetection(_) => {
- let annotator = annotator.clone().with_saveout("Open-Set-Detection");
- annotator.annotate(&xs, ys_);
- }
- Task::CaptionToPhraseGrounding(_) => {
- let annotator = annotator
- .clone()
- .with_saveout("Caption-To-Phrase-Grounding");
- annotator.annotate(&xs, ys_);
- }
- Task::ReferringExpressionSegmentation(_) => {
- let annotator = annotator
- .clone()
- .with_saveout("Referring-Expression-Segmentation");
- annotator.annotate(&xs, ys_);
- }
- Task::RegionToSegmentation(..) => {
- let annotator = annotator.clone().with_saveout("Region-To-Segmentation");
- annotator.annotate(&xs, ys_);
- }
- Task::OcrWithRegion => {
- let annotator = annotator.clone().with_saveout("Ocr-With-Region");
- annotator.annotate(&xs, ys_);
- }
-
- _ => (),
- }
- }
-
- Ok(())
-}
+use anyhow::Result;
+use usls::{models::Florence2, Annotator, DataLoader, Options, Scale, Task};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// scale
+ #[argh(option, default = "String::from(\"base\")")]
+ scale: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // load images
+ let xs = [
+ DataLoader::try_read("images/green-car.jpg")?,
+ DataLoader::try_read("assets/bus.jpg")?,
+ ];
+
+ // build model
+ let (
+ options_vision_encoder,
+ options_text_embed,
+ options_encoder,
+ options_decoder,
+ options_decoder_merged,
+ ) = match args.scale.as_str().try_into()? {
+ Scale::B => (
+ Options::florence2_visual_encoder_base(),
+ Options::florence2_textual_embed_base(),
+ Options::florence2_texual_encoder_base(),
+ Options::florence2_texual_decoder_base(),
+ Options::florence2_texual_decoder_merged_base(),
+ ),
+ Scale::L => todo!(),
+ _ => anyhow::bail!("Unsupported Florence2 scale."),
+ };
+
+ let mut model = Florence2::new(
+ options_vision_encoder
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ options_text_embed
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ options_encoder
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ options_decoder
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ options_decoder_merged
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ )?;
+
+ // tasks
+ let tasks = [
+ // w inputs
+ Task::Caption(0),
+ Task::Caption(1),
+ Task::Caption(2),
+ Task::Ocr,
+ // Task::OcrWithRegion,
+ Task::RegionProposal,
+ Task::ObjectDetection,
+ Task::DenseRegionCaption,
+ // w/o inputs
+ Task::OpenSetDetection("a vehicle"),
+ Task::CaptionToPhraseGrounding("A vehicle with two wheels parked in front of a building."),
+ Task::ReferringExpressionSegmentation("a vehicle"),
+ Task::RegionToSegmentation(
+ // 31, 156, 581, 373, // car
+ 449, 270, 556, 372, // wheel
+ ),
+ Task::RegionToCategory(
+ // 31, 156, 581, 373,
+ 449, 270, 556, 372,
+ ),
+ Task::RegionToDescription(
+ // 31, 156, 581, 373,
+ 449, 270, 556, 372,
+ ),
+ ];
+
+ // annotator
+ let annotator = Annotator::new()
+ .without_bboxes_conf(true)
+ .with_bboxes_thickness(3)
+ .with_saveout_subs(&["Florence2"]);
+
+ // inference
+ for task in tasks.iter() {
+ let ys = model.forward(&xs, task)?;
+
+ // annotate
+ match task {
+ Task::Caption(_)
+ | Task::Ocr
+ | Task::RegionToCategory(..)
+ | Task::RegionToDescription(..) => {
+ println!("Task: {:?}\n{:?}\n", task, &ys)
+ }
+ Task::DenseRegionCaption => {
+ let annotator = annotator.clone().with_saveout("Dense-Region-Caption");
+ annotator.annotate(&xs, &ys);
+ }
+ Task::RegionProposal => {
+ let annotator = annotator
+ .clone()
+ .without_bboxes_name(false)
+ .with_saveout("Region-Proposal");
+
+ annotator.annotate(&xs, &ys);
+ }
+ Task::ObjectDetection => {
+ let annotator = annotator.clone().with_saveout("Object-Detection");
+ annotator.annotate(&xs, &ys);
+ }
+ Task::OpenSetDetection(_) => {
+ let annotator = annotator.clone().with_saveout("Open-Set-Detection");
+ annotator.annotate(&xs, &ys);
+ }
+ Task::CaptionToPhraseGrounding(_) => {
+ let annotator = annotator
+ .clone()
+ .with_saveout("Caption-To-Phrase-Grounding");
+ annotator.annotate(&xs, &ys);
+ }
+ Task::ReferringExpressionSegmentation(_) => {
+ let annotator = annotator
+ .clone()
+ .with_saveout("Referring-Expression-Segmentation");
+ annotator.annotate(&xs, &ys);
+ }
+ Task::RegionToSegmentation(..) => {
+ let annotator = annotator.clone().with_saveout("Region-To-Segmentation");
+ annotator.annotate(&xs, &ys);
+ }
+ Task::OcrWithRegion => {
+ let annotator = annotator.clone().with_saveout("Ocr-With-Region");
+ annotator.annotate(&xs, &ys);
+ }
+
+ _ => (),
+ }
+ }
+
+ model.summary();
+
+ Ok(())
+}
diff --git a/examples/grounding-dino/README.md b/examples/grounding-dino/README.md
index a94cb0b..f97321f 100644
--- a/examples/grounding-dino/README.md
+++ b/examples/grounding-dino/README.md
@@ -1,7 +1,7 @@
## Quick Start
```shell
-cargo run -r --example grounding-dino
+cargo run -r -F cuda --example grounding-dino -- --device cuda --dtype fp16
```
diff --git a/examples/grounding-dino/main.rs b/examples/grounding-dino/main.rs
index 2ceb61c..78c6493 100644
--- a/examples/grounding-dino/main.rs
+++ b/examples/grounding-dino/main.rs
@@ -1,41 +1,72 @@
+use anyhow::Result;
use usls::{models::GroundingDINO, Annotator, DataLoader, Options};
-fn main() -> Result<(), Box> {
- let opts = Options::default()
- .with_ixx(0, 0, (1, 1, 4).into())
- .with_ixx(0, 2, (640, 800, 1200).into())
- .with_ixx(0, 3, (640, 1200, 1200).into())
- // .with_i10((1, 1, 4).into())
- // .with_i11((256, 256, 512).into())
- // .with_i20((1, 1, 4).into())
- // .with_i21((256, 256, 512).into())
- // .with_i30((1, 1, 4).into())
- // .with_i31((256, 256, 512).into())
- // .with_i40((1, 1, 4).into())
- // .with_i41((256, 256, 512).into())
- // .with_i50((1, 1, 4).into())
- // .with_i51((256, 256, 512).into())
- // .with_i52((256, 256, 512).into())
- .with_model("grounding-dino/swint-ogc-dyn-u8.onnx")? // TODO: current onnx model does not support bs > 1
- // .with_model("grounding-dino/swint-ogc-dyn-f32.onnx")?
- .with_tokenizer("grounding-dino/tokenizer.json")?
- .with_confs(&[0.2])
- .with_profile(false);
- let mut model = GroundingDINO::new(opts)?;
-
- // Load images and set class names
- let x = [DataLoader::try_read("images/bus.jpg")?];
- let texts = [
- "person", "hand", "shoes", "bus", "dog", "cat", "sign", "tie", "monitor", "window",
- "glasses", "tree", "head",
- ];
-
- // Run and annotate
- let y = model.run(&x, &texts)?;
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// source image
+ #[argh(option, default = "vec![String::from(\"./assets/bus.jpg\")]")]
+ source: Vec,
+
+ /// open class names
+ #[argh(
+ option,
+ default = "vec![
+ String::from(\"person\"),
+ String::from(\"hand\"),
+ String::from(\"shoes\"),
+ String::from(\"bus\"),
+ String::from(\"dog\"),
+ String::from(\"cat\"),
+ String::from(\"sign\"),
+ String::from(\"tie\"),
+ String::from(\"monitor\"),
+ String::from(\"glasses\"),
+ String::from(\"tree\"),
+ String::from(\"head\"),
+ ]"
+ )]
+ labels: Vec,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ let options = Options::grounding_dino_tiny()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_text_names(&args.labels.iter().map(|x| x.as_str()).collect::>())
+ .commit()?;
+
+ let mut model = GroundingDINO::new(options)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&args.source)?;
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // annotate
let annotator = Annotator::default()
.with_bboxes_thickness(4)
- .with_saveout("GroundingDINO");
- annotator.annotate(&x, &y);
+ .with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ // summary
+ model.summary();
Ok(())
}
diff --git a/examples/hub/README.md b/examples/hub/README.md
new file mode 100644
index 0000000..7cddfbc
--- /dev/null
+++ b/examples/hub/README.md
@@ -0,0 +1,5 @@
+## Quick Start
+
+```shell
+RUST_LOG=usls=info cargo run -r --example hub
+```
diff --git a/examples/hub/main.rs b/examples/hub/main.rs
new file mode 100644
index 0000000..45cc7b2
--- /dev/null
+++ b/examples/hub/main.rs
@@ -0,0 +1,26 @@
+use usls::Hub;
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ // 1. Download from default github release
+ let path = Hub::default().try_fetch("images/bus.jpg")?;
+ println!("Fetch one image: {:?}", path);
+
+ // 2. Download from specific github release url
+ let path = Hub::default()
+ .try_fetch("https://github.com/jamjamjon/assets/releases/download/images/bus.jpg")?;
+ println!("Fetch one file: {:?}", path);
+
+ // 3. Fetch tags and files
+ let hub = Hub::default().with_owner("jamjamjon").with_repo("usls");
+ for tag in hub.tags().iter() {
+ let files = hub.files(tag);
+ println!("{} => {:?}", tag, files); // Should be empty
+ }
+
+ Ok(())
+}
diff --git a/examples/linknet/README.md b/examples/linknet/README.md
new file mode 100644
index 0000000..89227df
--- /dev/null
+++ b/examples/linknet/README.md
@@ -0,0 +1,6 @@
+## Quick Start
+
+```shell
+cargo run -r --example fast
+```
+
diff --git a/examples/linknet/main.rs b/examples/linknet/main.rs
new file mode 100644
index 0000000..4fc3841
--- /dev/null
+++ b/examples/linknet/main.rs
@@ -0,0 +1,65 @@
+use anyhow::Result;
+use usls::{models::DB, Annotator, DataLoader, Options, Scale};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// scale
+ #[argh(option, default = "String::from(\"t\")")]
+ scale: String,
+
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = match args.scale.as_str().try_into()? {
+ Scale::T => Options::linknet_r18(),
+ Scale::S => Options::linknet_r34(),
+ Scale::B => Options::linknet_r50(),
+ _ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
+ };
+ let mut model = DB::new(
+ options
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?,
+ )?;
+
+ // load image
+ let x = DataLoader::try_read_batch(&[
+ "images/table.png",
+ "images/table1.jpg",
+ "images/table2.png",
+ "images/table-ch.jpg",
+ "images/db.png",
+ "images/street.jpg",
+ ])?;
+
+ // run
+ let y = model.forward(&x)?;
+
+ // annotate
+ let annotator = Annotator::default()
+ .without_bboxes(true)
+ .without_mbrs(true)
+ .with_polygons_alpha(60)
+ .with_contours_color([255, 105, 180, 255])
+ .with_saveout(model.spec());
+ annotator.annotate(&x, &y);
+
+ Ok(())
+}
diff --git a/examples/mobileone/README.md b/examples/mobileone/README.md
new file mode 100644
index 0000000..ca00fdf
--- /dev/null
+++ b/examples/mobileone/README.md
@@ -0,0 +1,13 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example mobileone -- --device cuda --dtype fp16
+```
+
+
+```shell
+0: Y { Probs: { Top5: [(263, 0.6109131, Some("Pembroke, Pembroke Welsh corgi")), (264, 0.2062352, Some("Cardigan, Cardigan Welsh corgi")), (231, 0.028572788, Some("collie")), (273, 0.015174894, Some("dingo, warrigal, warragal, Canis dingo")), (248, 0.014367299, Some("Eskimo dog, husky"))] } }
+1: Y { Probs: { Top5: [(284, 0.9907692, Some("siamese cat, Siamese")), (285, 0.0015794479, Some("Egyptian cat")), (174, 0.0015189401, Some("Norwegian elkhound, elkhound")), (225, 0.00031838714, Some("malinois")), (17, 0.00027021166, Some("jay"))] } }
+2: Y { Probs: { Top5: [(387, 0.94238573, Some("lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens")), (368, 0.0029994072, Some("gibbon, Hylobates lar")), (277, 0.0016564301, Some("red fox, Vulpes vulpes")), (356, 0.0015081967, Some("weasel")), (295, 0.001427932, Some("American black bear, black bear, Ursus americanus, Euarctos americanus"))] } }
+
+```
diff --git a/examples/mobileone/main.rs b/examples/mobileone/main.rs
new file mode 100644
index 0000000..36238c2
--- /dev/null
+++ b/examples/mobileone/main.rs
@@ -0,0 +1,57 @@
+use usls::{models::ImageClassifier, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// source image
+ #[argh(
+ option,
+ default = "vec![
+ String::from(\"images/dog.jpg\"),
+ String::from(\"images/siamese.png\"),
+ String::from(\"images/ailurus-fulgens.jpg\"),
+ ]"
+ )]
+ source: Vec,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = Options::mobileone_s0()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = ImageClassifier::try_from(options)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&args.source)?;
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // results
+ for (i, y) in ys.iter().enumerate() {
+ println!("{}: {:?}", i, y);
+ }
+
+ // annotate
+ let annotator = Annotator::default().with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/modnet/main.rs b/examples/modnet/main.rs
index 660ded5..39691b3 100644
--- a/examples/modnet/main.rs
+++ b/examples/modnet/main.rs
@@ -1,22 +1,24 @@
use usls::{models::MODNet, Annotator, DataLoader, Options};
-fn main() -> Result<(), Box> {
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
// build model
- let options = Options::default()
- .with_model("modnet/dyn-f32.onnx")?
- .with_ixx(0, 2, (416, 512, 800).into())
- .with_ixx(0, 3, (416, 512, 800).into());
+ let options = Options::modnet_photographic().commit()?;
let mut model = MODNet::new(options)?;
// load image
- let x = [DataLoader::try_read("images/liuyifei.png")?];
+ let xs = [DataLoader::try_read("images/liuyifei.png")?];
// run
- let y = model.run(&x)?;
+ let ys = model.forward(&xs)?;
// annotate
- let annotator = Annotator::default().with_saveout("MODNet");
- annotator.annotate(&x, &y);
+ let annotator = Annotator::default().with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
Ok(())
}
diff --git a/examples/picodet-layout/README.md b/examples/picodet-layout/README.md
new file mode 100644
index 0000000..8e29d70
--- /dev/null
+++ b/examples/picodet-layout/README.md
@@ -0,0 +1,10 @@
+## Quick Start
+
+```shell
+cargo run -r --example picodet-layout
+```
+
+
+## Results
+
+![](https://github.com/jamjamjon/assets/releases/download/picodet/demo-layout-1x.png)
diff --git a/examples/picodet-layout/main.rs b/examples/picodet-layout/main.rs
new file mode 100644
index 0000000..fca0bcb
--- /dev/null
+++ b/examples/picodet-layout/main.rs
@@ -0,0 +1,31 @@
+use anyhow::Result;
+use usls::{models::PicoDet, Annotator, DataLoader, Options};
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ // options
+ let options = Options::picodet_layout_1x()
+ // picodet_l_layout_3cls()
+ // picodet_l_layout_17cls()
+ .commit()?;
+ let mut model = PicoDet::new(options)?;
+
+ // load
+ let xs = [DataLoader::try_read("images/academic.jpg")?];
+
+ // annotator
+ let annotator = Annotator::default()
+ .with_bboxes_thickness(3)
+ .with_saveout(model.spec());
+
+ // run
+ let ys = model.forward(&xs)?;
+ println!("{:?}", ys);
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/rtdetr/README.md b/examples/rtdetr/README.md
new file mode 100644
index 0000000..711c097
--- /dev/null
+++ b/examples/rtdetr/README.md
@@ -0,0 +1,17 @@
+## Quick Start
+
+```shell
+cargo run -r --example rtdetr
+```
+
+## Results
+
+```
+[Bboxes]: Found 5 objects
+0: Bbox { xyxy: [47.969677, 397.81808, 246.22426, 904.8823], class_id: 0, name: Some("person"), confidence: 0.94432133 }
+1: Bbox { xyxy: [668.0796, 399.28854, 810.3779, 880.7412], class_id: 0, name: Some("person"), confidence: 0.93386495 }
+2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 }
+3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 }
+4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 }
+
+```
diff --git a/examples/rtdetr/main.rs b/examples/rtdetr/main.rs
new file mode 100644
index 0000000..590b218
--- /dev/null
+++ b/examples/rtdetr/main.rs
@@ -0,0 +1,43 @@
+use anyhow::Result;
+use usls::{models::RTDETR, Annotator, DataLoader, Options};
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ // options
+ let options = Options::rtdetr_v2_s_coco()
+ // rtdetr_v1_r18vd_coco()
+ // rtdetr_v2_ms_coco()
+ // rtdetr_v2_m_coco()
+ // rtdetr_v2_l_coco()
+ // rtdetr_v2_x_coco()
+ .commit()?;
+ let mut model = RTDETR::new(options)?;
+
+ // load
+ let xs = [DataLoader::try_read("./assets/bus.jpg")?];
+
+ // run
+ let ys = model.forward(&xs)?;
+
+ // extract bboxes
+ for y in ys.iter() {
+ if let Some(bboxes) = y.bboxes() {
+ println!("[Bboxes]: Found {} objects", bboxes.len());
+ for (i, bbox) in bboxes.iter().enumerate() {
+ println!("{}: {:?}", i, bbox)
+ }
+ }
+ }
+
+ // annotate
+ let annotator = Annotator::default()
+ .with_bboxes_thickness(3)
+ .with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/examples/rtmo/main.rs b/examples/rtmo/main.rs
index aae1706..efe198a 100644
--- a/examples/rtmo/main.rs
+++ b/examples/rtmo/main.rs
@@ -1,25 +1,26 @@
+use anyhow::Result;
use usls::{models::RTMO, Annotator, DataLoader, Options, COCO_SKELETONS_16};
-fn main() -> Result<(), Box> {
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
// build model
- let options = Options::default()
- .with_model("rtmo/s-dyn.onnx")?
- .with_nk(17)
- .with_confs(&[0.3])
- .with_kconfs(&[0.5]);
- let mut model = RTMO::new(options)?;
+ let mut model = RTMO::new(Options::rtmo_s().commit()?)?;
// load image
- let x = [DataLoader::try_read("images/bus.jpg")?];
+ let xs = [DataLoader::try_read("images/bus.jpg")?];
// run
- let y = model.run(&x)?;
+ let ys = model.forward(&xs)?;
// annotate
let annotator = Annotator::default()
- .with_saveout("RTMO")
+ .with_saveout(model.spec())
.with_skeletons(&COCO_SKELETONS_16);
- annotator.annotate(&x, &y);
+ annotator.annotate(&xs, &ys);
Ok(())
}
diff --git a/examples/sam/README.md b/examples/sam/README.md
index 92af792..34db1e3 100644
--- a/examples/sam/README.md
+++ b/examples/sam/README.md
@@ -3,19 +3,18 @@
```Shell
# SAM
-cargo run -r --example sam
+cargo run -r -F cuda --example sam -- --device cuda --kind sam
# MobileSAM
-cargo run -r --example sam -- --kind mobile-sam
+cargo run -r -F cuda --example sam -- --device cuda --kind mobile-sam
# EdgeSAM
-cargo run -r --example sam -- --kind edge-sam
+cargo run -r -F cuda --example sam -- --device cuda --kind edge-sam
# SAM-HQ
-cargo run -r --example sam -- --kind sam-hq
+cargo run -r -F cuda --example sam -- --device cuda --kind sam-hq
```
-
## Results
![](https://github.com/jamjamjon/assets/releases/download/sam/demo-car.png)
diff --git a/examples/sam/main.rs b/examples/sam/main.rs
index 72ed218..ca009c7 100644
--- a/examples/sam/main.rs
+++ b/examples/sam/main.rs
@@ -1,97 +1,73 @@
-use clap::Parser;
-
+use anyhow::Result;
use usls::{
models::{SamKind, SamPrompt, SAM},
- Annotator, DataLoader, Options,
+ Annotator, DataLoader, Options, Scale,
};
-#[derive(Parser)]
-#[command(author, version, about, long_about = None)]
-pub struct Args {
- #[arg(long, value_enum, default_value_t = SamKind::Sam)]
- pub kind: SamKind,
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
- #[arg(long, default_value_t = 0)]
- pub device_id: usize,
+ /// scale
+ #[argh(option, default = "String::from(\"t\")")]
+ scale: String,
- #[arg(long)]
- pub use_low_res_mask: bool,
+ /// SAM kind
+ #[argh(option, default = "String::from(\"sam\")")]
+ kind: String,
}
-fn main() -> Result<(), Box> {
- let args = Args::parse();
-
- // Options
- let (options_encoder, options_decoder, saveout) = match args.kind {
- SamKind::Sam => {
- let options_encoder = Options::default()
- // .with_model("sam/sam-vit-b-encoder.onnx")?;
- .with_model("sam/sam-vit-b-encoder-u8.onnx")?;
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
- let options_decoder = Options::default()
- .with_sam_kind(SamKind::Sam)
- // .with_model("sam/sam-vit-b-decoder.onnx")?;
- // .with_model("sam/sam-vit-b-decoder-singlemask.onnx")?;
- .with_model("sam/sam-vit-b-decoder-u8.onnx")?;
- (options_encoder, options_decoder, "SAM")
- }
- SamKind::Sam2 => {
- let options_encoder = Options::default()
- // .with_model("sam/sam2-hiera-tiny-encoder.onnx")?;
- // .with_model("sam/sam2-hiera-small-encoder.onnx")?;
- .with_model("sam/sam2-hiera-base-plus-encoder.onnx")?;
- let options_decoder = Options::default()
- .with_sam_kind(SamKind::Sam2)
- // .with_model("sam/sam2-hiera-tiny-decoder.onnx")?;
- // .with_model("sam/sam2-hiera-small-decoder.onnx")?;
- .with_model("sam/sam2-hiera-base-plus-decoder.onnx")?;
- (options_encoder, options_decoder, "SAM2")
- }
- SamKind::MobileSam => {
- let options_encoder =
- Options::default().with_model("sam/mobile-sam-vit-t-encoder.onnx")?;
-
- let options_decoder = Options::default()
- .with_sam_kind(SamKind::MobileSam)
- .with_model("sam/mobile-sam-vit-t-decoder.onnx")?;
- (options_encoder, options_decoder, "Mobile-SAM")
- }
- SamKind::SamHq => {
- let options_encoder = Options::default().with_model("sam/sam-hq-vit-t-encoder.onnx")?;
+ let args: Args = argh::from_env();
+ // Build model
+ let (options_encoder, options_decoder) = match args.kind.as_str().try_into()? {
+ SamKind::Sam => (
+ Options::sam_v1_base_encoder(),
+ Options::sam_v1_base_decoder(),
+ ),
+ SamKind::Sam2 => match args.scale.as_str().try_into()? {
+ Scale::T => (Options::sam2_tiny_encoder(), Options::sam2_tiny_decoder()),
+ Scale::S => (Options::sam2_small_encoder(), Options::sam2_small_decoder()),
+ Scale::B => (
+ Options::sam2_base_plus_encoder(),
+ Options::sam2_base_plus_decoder(),
+ ),
+ _ => unimplemented!("Unsupported model scale: {:?}. Try b, s, t.", args.scale),
+ },
- let options_decoder = Options::default()
- .with_sam_kind(SamKind::SamHq)
- .with_model("sam/sam-hq-vit-t-decoder.onnx")?;
- (options_encoder, options_decoder, "SAM-HQ")
- }
- SamKind::EdgeSam => {
- let options_encoder = Options::default().with_model("sam/edge-sam-3x-encoder.onnx")?;
- let options_decoder = Options::default()
- .with_sam_kind(SamKind::EdgeSam)
- .with_model("sam/edge-sam-3x-decoder.onnx")?;
- (options_encoder, options_decoder, "Edge-SAM")
- }
+ SamKind::MobileSam => (
+ Options::mobile_sam_tiny_encoder(),
+ Options::mobile_sam_tiny_decoder(),
+ ),
+ SamKind::SamHq => (
+ Options::sam_hq_tiny_encoder(),
+ Options::sam_hq_tiny_decoder(),
+ ),
+ SamKind::EdgeSam => (
+ Options::edge_sam_3x_encoder(),
+ Options::edge_sam_3x_decoder(),
+ ),
};
- let options_encoder = options_encoder
- .with_cuda(args.device_id)
- .with_ixx(0, 2, (800, 1024, 1024).into())
- .with_ixx(0, 3, (800, 1024, 1024).into());
- let options_decoder = options_decoder
- .with_cuda(args.device_id)
- .use_low_res_mask(args.use_low_res_mask)
- .with_find_contours(true);
- // Build model
+ let options_encoder = options_encoder
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let options_decoder = options_decoder.commit()?;
let mut model = SAM::new(options_encoder, options_decoder)?;
// Load image
- let xs = [
- DataLoader::try_read("images/truck.jpg")?,
- // DataLoader::try_read("images/dog.jpg")?,
- ];
+ let xs = [DataLoader::try_read("images/truck.jpg")?];
// Build annotator
- let annotator = Annotator::default().with_saveout(saveout);
+ let annotator = Annotator::default().with_saveout(model.spec());
// Prompt
let prompts = vec![
@@ -102,7 +78,7 @@ fn main() -> Result<(), Box> {
];
// Run & Annotate
- let ys = model.run(&xs, &prompts)?;
+ let ys = model.forward(&xs, &prompts)?;
annotator.annotate(&xs, &ys);
Ok(())
diff --git a/examples/sapiens/README.md b/examples/sapiens/README.md
index 6bf5cfe..7699915 100644
--- a/examples/sapiens/README.md
+++ b/examples/sapiens/README.md
@@ -1,10 +1,9 @@
## Quick Start
```shell
-cargo run -r --example sapiens
+cargo run -r -F cuda --example sapiens -- --device cuda
```
-
## Results
![](https://github.com/jamjamjon/assets/releases/download/sapiens/demo.png)
diff --git a/examples/sapiens/main.rs b/examples/sapiens/main.rs
index 111d90f..08d3167 100644
--- a/examples/sapiens/main.rs
+++ b/examples/sapiens/main.rs
@@ -1,27 +1,38 @@
-use usls::{
- models::{Sapiens, SapiensTask},
- Annotator, DataLoader, Options, BODY_PARTS_28,
-};
+use anyhow::Result;
+use usls::{models::Sapiens, Annotator, DataLoader, Options};
-fn main() -> Result<(), Box> {
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
// build
- let options = Options::default()
- .with_model("sapiens/seg-0.3b-dyn.onnx")?
- .with_sapiens_task(SapiensTask::Seg)
- .with_names(&BODY_PARTS_28);
+ let options = Options::sapiens_seg_0_3b()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
let mut model = Sapiens::new(options)?;
// load
let x = [DataLoader::try_read("images/paul-george.jpg")?];
// run
- let y = model.run(&x)?;
+ let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.without_masks(true)
- .with_polygons_name(false)
- .with_saveout("Sapiens");
+ .with_polygons_name(true)
+ .with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
diff --git a/examples/slanet/README.md b/examples/slanet/README.md
new file mode 100644
index 0000000..9ee499a
--- /dev/null
+++ b/examples/slanet/README.md
@@ -0,0 +1,9 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example slanet -- --device cuda
+```
+
+## Results
+
+![](https://github.com/jamjamjon/assets/releases/download/slanet/demo.png)
diff --git a/examples/slanet/main.rs b/examples/slanet/main.rs
new file mode 100644
index 0000000..9707c53
--- /dev/null
+++ b/examples/slanet/main.rs
@@ -0,0 +1,48 @@
+use anyhow::Result;
+use usls::{models::SLANet, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// source
+ #[argh(option, default = "String::from(\"images/table.png\")")]
+ source: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let options = Options::slanet_lcnet_v2_mobile_ch()
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = SLANet::new(options)?;
+
+ // load
+ let xs = DataLoader::try_read_batch(&[args.source])?;
+
+ // run
+ let ys = model.forward(&xs)?;
+ println!("{:?}", ys);
+
+ // annotate
+ let annotator = Annotator::default()
+ .with_keypoints_radius(2)
+ .with_skeletons(&[(0, 1), (1, 2), (2, 3), (3, 0)])
+ .with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ // summary
+ model.summary();
+
+ Ok(())
+}
diff --git a/examples/svtr/README.md b/examples/svtr/README.md
index cc192bc..82c10c5 100644
--- a/examples/svtr/README.md
+++ b/examples/svtr/README.md
@@ -1,29 +1,21 @@
## Quick Start
```shell
-cargo run -r --example svtr
+cargo run -r -F cuda --example svtr -- --device cuda
```
-### Speed test
-
-| Model | Width | TensorRT
f16
batch=1
(ms) | TensorRT
f32
batch=1
(ms) | CUDA
f32
batch=1
(ms) |
-| --------------------------- | :---: | :--------------------------------------: | :--------------------------------------: | :----------------------------------: |
-| ppocr-v4-server-svtr-ch-dyn | 1500 | 4.2116 | 13.0013 | 20.8673 |
-| ppocr-v4-svtr-ch-dyn | 1500 | 2.0435 | 3.1959 | 10.1750 |
-| ppocr-v3-svtr-ch-dyn | 1500 | 1.8596 | 2.9401 | 6.8210 |
-
-***Test on RTX3060***
-
## Results
```shell
-["./examples/svtr/images/5.png"]: Some(["are closely jointed. Some examples are illustrated in Fig.7."])
-["./examples/svtr/images/6.png"]: Some(["小菊儿胡同71号"])
-["./examples/svtr/images/4.png"]: Some(["我在南锣鼓捣猫呢"])
-["./examples/svtr/images/1.png"]: Some(["你有这么高速运转的机械进入中国,记住我给出的原理"])
-["./examples/svtr/images/2.png"]: Some(["冀B6G000"])
-["./examples/svtr/images/9.png"]: Some(["from the background, but also separate text instances which"])
-["./examples/svtr/images/8.png"]: Some(["110022345"])
-["./examples/svtr/images/3.png"]: Some(["粤A·68688"])
-["./examples/svtr/images/7.png"]: Some(["Please lower your volume"])
+["./examples/svtr/images/license-ch-2.png"]: Ys([Y { Texts: [Text("粤A·68688")] }])
+["./examples/svtr/images/license-ch.png"]: Ys([Y { Texts: [Text("冀B6G000")] }])
+["./examples/svtr/images/sign-ch-2.png"]: Ys([Y { Texts: [Text("我在南锣鼓捣猫呢")] }])
+["./examples/svtr/images/sign-ch.png"]: Ys([Y { Texts: [Text("小菊儿胡同71号")] }])
+["./examples/svtr/images/text-110022345.png"]: Ys([Y { Texts: [Text("110022345")] }])
+["./examples/svtr/images/text-ch.png"]: Ys([Y { Texts: [Text("你有这么高速运转的机械进入中国,记住我给出的原理")] }])
+["./examples/svtr/images/text-en-2.png"]: Ys([Y { Texts: [Text("from the background, but also separate text instances which")] }])
+["./examples/svtr/images/text-en-dark.png"]: Ys([Y { Texts: [Text("Please lower your volume")] }])
+["./examples/svtr/images/text-en.png"]: Ys([Y { Texts: [Text("are closely jointed. Some examples are illustrated in Fig.7.")] }])
+["./examples/svtr/images/text-hello-rust-handwritten.png"]: Ys([Y { Texts: [Text("HeloRuSt")] }])
+
```
\ No newline at end of file
diff --git a/examples/svtr/images/3.png b/examples/svtr/images/license-ch-2.png
similarity index 100%
rename from examples/svtr/images/3.png
rename to examples/svtr/images/license-ch-2.png
diff --git a/examples/svtr/images/2.png b/examples/svtr/images/license-ch.png
similarity index 100%
rename from examples/svtr/images/2.png
rename to examples/svtr/images/license-ch.png
diff --git a/examples/svtr/images/4.png b/examples/svtr/images/sign-ch-2.png
similarity index 100%
rename from examples/svtr/images/4.png
rename to examples/svtr/images/sign-ch-2.png
diff --git a/examples/svtr/images/6.png b/examples/svtr/images/sign-ch.png
similarity index 100%
rename from examples/svtr/images/6.png
rename to examples/svtr/images/sign-ch.png
diff --git a/examples/svtr/images/8.png b/examples/svtr/images/text-110022345.png
similarity index 100%
rename from examples/svtr/images/8.png
rename to examples/svtr/images/text-110022345.png
diff --git a/examples/svtr/images/1.png b/examples/svtr/images/text-ch.png
similarity index 100%
rename from examples/svtr/images/1.png
rename to examples/svtr/images/text-ch.png
diff --git a/examples/svtr/images/9.png b/examples/svtr/images/text-en-2.png
similarity index 100%
rename from examples/svtr/images/9.png
rename to examples/svtr/images/text-en-2.png
diff --git a/examples/svtr/images/7.png b/examples/svtr/images/text-en-dark.png
similarity index 100%
rename from examples/svtr/images/7.png
rename to examples/svtr/images/text-en-dark.png
diff --git a/examples/svtr/images/5.png b/examples/svtr/images/text-en.png
similarity index 100%
rename from examples/svtr/images/5.png
rename to examples/svtr/images/text-en.png
diff --git a/examples/svtr/images/text-hello-rust-handwritten.png b/examples/svtr/images/text-hello-rust-handwritten.png
new file mode 100644
index 0000000..750c634
Binary files /dev/null and b/examples/svtr/images/text-hello-rust-handwritten.png differ
diff --git a/examples/svtr/main.rs b/examples/svtr/main.rs
index 43562c1..18704f8 100644
--- a/examples/svtr/main.rs
+++ b/examples/svtr/main.rs
@@ -1,24 +1,44 @@
+use anyhow::Result;
use usls::{models::SVTR, DataLoader, Options};
-fn main() -> Result<(), Box> {
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
// build model
- let options = Options::default()
- .with_ixx(0, 0, (1, 2, 8).into())
- .with_ixx(0, 2, (320, 960, 1600).into())
- .with_ixx(0, 3, (320, 960, 1600).into())
- .with_confs(&[0.2])
- .with_vocab("svtr/ppocr_rec_vocab.txt")?
- .with_model("svtr/ppocr-v4-svtr-ch-dyn.onnx")?;
+ let options = Options::ppocr_rec_v4_ch()
+ // svtr_v2_teacher_ch()
+ // .with_batch_size(2)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
let mut model = SVTR::new(options)?;
// load images
- let dl = DataLoader::new("./examples/svtr/images")?.build()?;
+ let dl = DataLoader::new("./examples/svtr/images")?
+ .with_batch(model.batch() as _)
+ .with_progress_bar(false)
+ .build()?;
// run
for (xs, paths) in dl {
- let ys = model.run(&xs)?;
- println!("{paths:?}: {:?}", ys[0].texts())
+ let ys = model.forward(&xs)?;
+ println!("{paths:?}: {:?}", ys)
}
+ //summary
+ model.summary();
+
Ok(())
}
diff --git a/examples/trocr/README.md b/examples/trocr/README.md
new file mode 100644
index 0000000..dba262c
--- /dev/null
+++ b/examples/trocr/README.md
@@ -0,0 +1,13 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example trocr -- --device cuda --dtype fp16 --scale s --kind printed
+
+cargo run -r -F cuda --example trocr -- --device cuda --dtype fp16 --scale s --kind hand-written
+
+```
+
+
+```shell
+Ys([Y { Texts: [Text("PLEASE LOWER YOUR VOLUME")] }, Y { Texts: [Text("HELLO RUST")] }])
+```
\ No newline at end of file
diff --git a/examples/trocr/main.rs b/examples/trocr/main.rs
new file mode 100644
index 0000000..3b7d8ea
--- /dev/null
+++ b/examples/trocr/main.rs
@@ -0,0 +1,96 @@
+use usls::{
+ models::{TrOCR, TrOCRKind},
+ DataLoader, Options, Scale,
+};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+
+ /// scale
+ #[argh(option, default = "String::from(\"s\")")]
+ scale: String,
+
+ /// kind
+ #[argh(option, default = "String::from(\"printed\")")]
+ kind: String,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // load images
+ let xs = DataLoader::try_read_batch(&[
+ "images/text-en-dark.png",
+ "images/text-hello-rust-handwritten.png",
+ ])?;
+
+ // build model
+ let (options_encoder, options_decoder, options_decoder_merged) =
+ match args.scale.as_str().try_into()? {
+ Scale::S => match args.kind.as_str().try_into()? {
+ TrOCRKind::Printed => (
+ Options::trocr_encoder_small_printed(),
+ Options::trocr_decoder_small_printed(),
+ Options::trocr_decoder_merged_small_printed(),
+ ),
+ TrOCRKind::HandWritten => (
+ Options::trocr_encoder_small_handwritten(),
+ Options::trocr_decoder_small_handwritten(),
+ Options::trocr_decoder_merged_small_handwritten(),
+ ),
+ },
+ Scale::B => match args.kind.as_str().try_into()? {
+ TrOCRKind::Printed => (
+ Options::trocr_encoder_base_printed(),
+ Options::trocr_decoder_base_printed(),
+ Options::trocr_decoder_merged_base_printed(),
+ ),
+ TrOCRKind::HandWritten => (
+ Options::trocr_encoder_base_handwritten(),
+ Options::trocr_decoder_base_handwritten(),
+ Options::trocr_decoder_merged_base_handwritten(),
+ ),
+ },
+ x => anyhow::bail!("Unsupported TrOCR scale: {:?}", x),
+ };
+
+ let mut model = TrOCR::new(
+ options_encoder
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ options_decoder
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ options_decoder_merged
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_batch_size(xs.len())
+ .commit()?,
+ )?;
+
+ // inference
+ let ys = model.forward(&xs)?;
+ println!("{:?}", ys);
+
+ // summary
+ model.summary();
+
+ Ok(())
+}
diff --git a/examples/viewer/README.md b/examples/viewer/README.md
new file mode 100644
index 0000000..0cfe0e0
--- /dev/null
+++ b/examples/viewer/README.md
@@ -0,0 +1,5 @@
+## Quick Start
+
+```shell
+RUST_LOG=usls=info cargo run -F ffmpeg -r --example viewer
+```
diff --git a/examples/viewer/main.rs b/examples/viewer/main.rs
new file mode 100644
index 0000000..8279204
--- /dev/null
+++ b/examples/viewer/main.rs
@@ -0,0 +1,43 @@
+use usls::{DataLoader, Key, Viewer};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// source
+ #[argh(
+ option,
+ default = "String::from(\"http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4\")"
+ )]
+ source: String,
+}
+
+fn main() -> anyhow::Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+ let dl = DataLoader::new(&args.source)?.with_batch(1).build()?;
+
+ let mut viewer = Viewer::new().with_delay(5).with_scale(1.).resizable(true);
+
+ // run & annotate
+ for (xs, _paths) in dl {
+ // show image
+ viewer.imshow(&xs)?;
+
+ // check out window and key event
+ if !viewer.is_open() || viewer.is_key_pressed(Key::Escape) {
+ break;
+ }
+
+ // write video
+ viewer.write_batch(&xs)?
+ }
+
+ // finish video write
+ viewer.finish_write()?;
+
+ Ok(())
+}
diff --git a/examples/yolo-sam/README.md b/examples/yolo-sam/README.md
index 1dfab0c..84dfb0f 100644
--- a/examples/yolo-sam/README.md
+++ b/examples/yolo-sam/README.md
@@ -1,7 +1,7 @@
## Quick Start
```shell
-cargo run -r --example yolo-sam
+cargo run -r -F cuda --example yolo-sam -- --device cuda
```
## Results
diff --git a/examples/yolo-sam/main.rs b/examples/yolo-sam/main.rs
index 3b51ace..b66fb63 100644
--- a/examples/yolo-sam/main.rs
+++ b/examples/yolo-sam/main.rs
@@ -1,31 +1,42 @@
+use anyhow::Result;
use usls::{
- models::{SamKind, SamPrompt, YOLOTask, YOLOVersion, SAM, YOLO},
- Annotator, DataLoader, Options, Vision,
+ models::{SamPrompt, SAM, YOLO},
+ Annotator, DataLoader, Options, Scale,
};
-fn main() -> Result<(), Box> {
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
// build SAM
- let options_encoder = Options::default().with_model("sam/mobile-sam-vit-t-encoder.onnx")?;
- let options_decoder = Options::default()
- .with_find_contours(true)
- .with_sam_kind(SamKind::Sam)
- .with_model("sam/mobile-sam-vit-t-decoder.onnx")?;
+ let (options_encoder, options_decoder) = (
+ Options::mobile_sam_tiny_encoder().commit()?,
+ Options::mobile_sam_tiny_decoder().commit()?,
+ );
let mut sam = SAM::new(options_encoder, options_decoder)?;
- // build YOLOv8-Det
- let options_yolo = Options::default()
- .with_yolo_version(YOLOVersion::V8)
- .with_yolo_task(YOLOTask::Detect)
- .with_model("yolo/v8-m-dyn.onnx")?
- .with_cuda(0)
- .with_ixx(0, 2, (416, 640, 800).into())
- .with_ixx(0, 3, (416, 640, 800).into())
- .with_find_contours(false)
- .with_confs(&[0.45]);
+ // build YOLOv8
+ let options_yolo = Options::yolo_detect()
+ .with_model_scale(Scale::N)
+ .with_model_version(8.0.into())
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
let mut yolo = YOLO::new(options_yolo)?;
// load one image
- let xs = [DataLoader::try_read("images/dog.jpg")?];
+ let xs = DataLoader::try_read_batch(&["images/dog.jpg"])?;
// build annotator
let annotator = Annotator::default()
@@ -36,11 +47,11 @@ fn main() -> Result<(), Box> {
.with_saveout("YOLO-SAM");
// run & annotate
- let ys_det = yolo.run(&xs)?;
- for y_det in ys_det {
+ let ys_det = yolo.forward(&xs)?;
+ for y_det in ys_det.iter() {
if let Some(bboxes) = y_det.bboxes() {
for bbox in bboxes {
- let ys_sam = sam.run(
+ let ys_sam = sam.forward(
&xs,
&[SamPrompt::default().with_bbox(
bbox.xmin(),
diff --git a/examples/yolo/README.md b/examples/yolo/README.md
index d443f43..5151aa6 100644
--- a/examples/yolo/README.md
+++ b/examples/yolo/README.md
@@ -1,175 +1,65 @@
YOLO-Series
+| Detection | Instance Segmentation | Pose |
+| :----------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------: |
+| `` | `` | `` |
-| Detection | Instance Segmentation | Pose |
-| :---------------: | :------------------------: |:---------------: |
-| | | |
-
-| Classification | Obb |
-| :------------------------: |:------------------------: |
-| |
-
-| Head Detection | Fall Detection | Trash Detection |
-| :------------------------: |:------------------------: |:------------------------: |
-| ||
-
-| YOLO-World | Face Parsing | FastSAM |
-| :------------------------: |:------------------------: |:------------------------: |
-| ||
-
-
+| Classification | Obb |
+| :----------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
+| `` | `` |
+| Head Detection | Fall Detection | Trash Detection |
+| :-----------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
+| `` | `` | `` |
+| YOLO-World | Face Parsing | FastSAM |
+| :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: |
+| `` | `` | `` |
## Quick Start
+
```Shell
-# customized
-cargo run -r --example yolo -- --task detect --ver v8 --nc 6 --model xxx.onnx # YOLOv8
+# Your customized YOLOv8 model
+cargo run -r --example yolo -- --task detect --ver v8 --num-classes 6 --model xxx.onnx # YOLOv8
# Classify
-cargo run -r --example yolo -- --task classify --ver v5 --scale s --width 224 --height 224 --nc 1000 # YOLOv5
-cargo run -r --example yolo -- --task classify --ver v8 --scale n --width 224 --height 224 --nc 1000 # YOLOv8
-cargo run -r --example yolo -- --task classify --ver v11 --scale n --width 224 --height 224 --nc 1000 # YOLOv11
+cargo run -r --example yolo -- --task classify --ver 5 --scale s --image-width 224 --image-height 224 --num-classes 1000 --use-imagenet-1k-classes # YOLOv5
+cargo run -r --example yolo -- --task classify --ver 8 --scale n --image-width 224 --image-height 224 # YOLOv8
+cargo run -r --example yolo -- --task classify --ver 11 --scale n --image-width 224 --image-height 224 # YOLOv11
# Detect
-cargo run -r --example yolo -- --task detect --ver v5 --scale n # YOLOv5
-cargo run -r --example yolo -- --task detect --ver v6 --scale n # YOLOv6
-cargo run -r --example yolo -- --task detect --ver v7 --scale t # YOLOv7
-cargo run -r --example yolo -- --task detect --ver v8 --scale n # YOLOv8
-cargo run -r --example yolo -- --task detect --ver v9 --scale t # YOLOv9
-cargo run -r --example yolo -- --task detect --ver v10 --scale n # YOLOv10
-cargo run -r --example yolo -- --task detect --ver v11 --scale n # YOLOv11
-cargo run -r --example yolo -- --task detect --ver rtdetr --scale l # RTDETR
-cargo run -r --example yolo -- --task detect --ver v8 --model yolo/v8-s-world-v2-shoes.onnx # YOLOv8-world
+cargo run -r --example yolo -- --task detect --ver 5 --scale n --use-coco-80-classes # YOLOv5
+cargo run -r --example yolo -- --task detect --ver 6 --scale n --use-coco-80-classes # YOLOv6
+cargo run -r --example yolo -- --task detect --ver 7 --scale t --use-coco-80-classes # YOLOv7
+cargo run -r --example yolo -- --task detect --ver 8 --scale n --use-coco-80-classes # YOLOv8
+cargo run -r --example yolo -- --task detect --ver 9 --scale t --use-coco-80-classes # YOLOv9
+cargo run -r --example yolo -- --task detect --ver 10 --scale n --use-coco-80-classes # YOLOv10
+cargo run -r --example yolo -- --task detect --ver 11 --scale n --use-coco-80-classes # YOLOv11
+cargo run -r --example yolo -- --task detect --ver 8 --model v8-s-world-v2-shoes.onnx # YOLOv8-world
# Pose
-cargo run -r --example yolo -- --task pose --ver v8 --scale n # YOLOv8-Pose
-cargo run -r --example yolo -- --task pose --ver v11 --scale n # YOLOv11-Pose
+cargo run -r --example yolo -- --task pose --ver 8 --scale n # YOLOv8-Pose
+cargo run -r --example yolo -- --task pose --ver 11 --scale n # YOLOv11-Pose
# Segment
-cargo run -r --example yolo -- --task segment --ver v5 --scale n # YOLOv5-Segment
-cargo run -r --example yolo -- --task segment --ver v8 --scale n # YOLOv8-Segment
-cargo run -r --example yolo -- --task segment --ver v11 --scale n # YOLOv8-Segment
-cargo run -r --example yolo -- --task segment --ver v8 --model yolo/FastSAM-s-dyn-f16.onnx # FastSAM
+cargo run -r --example yolo -- --task segment --ver 5 --scale n # YOLOv5-Segment
+cargo run -r --example yolo -- --task segment --ver 8 --scale n # YOLOv8-Segment
+cargo run -r --example yolo -- --task segment --ver 11 --scale n # YOLOv8-Segment
# Obb
-cargo run -r --example yolo -- --ver v8 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv8-Obb
-cargo run -r --example yolo -- --ver v11 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv11-Obb
+cargo run -r --example yolo -- --ver 8 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv8-Obb
+cargo run -r --example yolo -- --ver 11 --task obb --scale n --image-width 1024 --image-height 1024 --source images/dota.png # YOLOv11-Obb
```
**`cargo run -r --example yolo -- --help` for more options**
-
-## YOLOs configs with `Options`
-
-
-Use official YOLO Models
-
-```Rust
-let options = Options::default()
- .with_yolo_version(YOLOVersion::V5) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
- .with_yolo_task(YOLOTask::Classify) // YOLOTask: Classify, Detect, Pose, Segment, Obb
- .with_model("xxxx.onnx")?;
-
-```
-
-
-
-Cutomized your own YOLO model
-
-```Rust
-// This config is for YOLOv8-Segment
-use usls::{AnchorsPosition, BoxType, ClssType, YOLOPreds};
-
-let options = Options::default()
- .with_yolo_preds(
- YOLOPreds {
- bbox: Some(BoxType::Cxcywh),
- clss: ClssType::Clss,
- coefs: Some(true),
- anchors: Some(AnchorsPosition::After),
- ..Default::default()
- }
- )
- // .with_nc(80)
- // .with_names(&COCO_CLASS_NAMES_80)
- .with_model("xxxx.onnx")?;
-```
-
-
## Other YOLOv8 Solution Models
-| Model | Weights | Datasets|
-|:---------------------: | :--------------------------: | :-------------------------------: |
-| Face-Landmark Detection | [yolov8-face-dyn-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-n-face-dyn-f16.onnx) | |
-| Head Detection | [yolov8-head-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-head-f16.onnx) | |
-| Fall Detection | [yolov8-falldown-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-falldown-f16.onnx) | |
-| Trash Detection | [yolov8-plastic-bag-f16](https://github.com/jamjamjon/assets/releases/download/yolo/v8-plastic-bag-f16.onnx) | |
-| FaceParsing | [yolov8-face-parsing-dyn](https://github.com/jamjamjon/assets/releases/download/yolo/v8-face-parsing-dyn.onnx) | [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ/tree/master/face_parsing)
[[Processed YOLO labels]](https://github.com/jamjamjon/assets/releases/download/yolo/CelebAMask-HQ-YOLO-Labels.zip)[[Python Script]](../../scripts/CelebAMask-HQ-To-YOLO-Labels.py) |
-
-
-
-
-## Export ONNX Models
-
-
-
-YOLOv5
-
-[Here](https://docs.ultralytics.com/yolov5/tutorials/model_export/)
-
-
-
-
-
-YOLOv6
-
-[Here](https://github.com/meituan/YOLOv6/tree/main/deploy/ONNX)
-
-
-
-
-
-YOLOv7
-
-[Here](https://github.com/WongKinYiu/yolov7?tab=readme-ov-file#export)
-
-
-
-
-YOLOv8, YOLOv11
-
-```Shell
-pip install -U ultralytics
-
-# export onnx model with dynamic shapes
-yolo export model=yolov8m.pt format=onnx simplify dynamic
-yolo export model=yolov8m-cls.pt format=onnx simplify dynamic
-yolo export model=yolov8m-pose.pt format=onnx simplify dynamic
-yolo export model=yolov8m-seg.pt format=onnx simplify dynamic
-yolo export model=yolov8m-obb.pt format=onnx simplify dynamic
-
-# export onnx model with fixed shapes
-yolo export model=yolov8m.pt format=onnx simplify
-yolo export model=yolov8m-cls.pt format=onnx simplify
-yolo export model=yolov8m-pose.pt format=onnx simplify
-yolo export model=yolov8m-seg.pt format=onnx simplify
-yolo export model=yolov8m-obb.pt format=onnx simplify
-```
-
-
-
-
-YOLOv9
-
-[Here](https://github.com/WongKinYiu/yolov9/blob/main/export.py)
-
-
-
-
-YOLOv10
-
-[Here](https://github.com/THU-MIG/yolov10#export)
-
-
+| Model | Weights |
+| :---------------------: | :------------------------------------------------------: |
+| Face-Landmark Detection | [yolov8-n-face](https://github.com/jamjamjon/assets/releases/download/yolo/v8-n-face-fp16.onnx) |
+| Head Detection | [yolov8-head](https://github.com/jamjamjon/assets/releases/download/yolo/v8-head-fp16.onnx) |
+| Fall Detection | [yolov8-falldown](https://github.com/jamjamjon/assets/releases/download/yolo/v8-falldown-fp16.onnx) |
+| Trash Detection | [yolov8-plastic-bag](https://github.com/jamjamjon/assets/releases/download/yolo/v8-plastic-bag-fp16.onnx) |
+| FaceParsing | [yolov8-face-parsing-seg](https://github.com/jamjamjon/assets/releases/download/yolo/v8-face-parsing.onnx) |
diff --git a/examples/yolo/main.rs b/examples/yolo/main.rs
index 96c51c0..71ec5fb 100644
--- a/examples/yolo/main.rs
+++ b/examples/yolo/main.rs
@@ -1,171 +1,213 @@
use anyhow::Result;
-use clap::Parser;
-
use usls::{
- models::YOLO, Annotator, DataLoader, Device, Options, Viewer, Vision, YOLOScale, YOLOTask,
- YOLOVersion, COCO_SKELETONS_16,
+ models::YOLO, Annotator, DataLoader, Options, COCO_CLASS_NAMES_80, COCO_SKELETONS_16,
+ IMAGENET_NAMES_1K,
};
-#[derive(Parser, Clone)]
-#[command(author, version, about, long_about = None)]
-pub struct Args {
- /// Path to the model
- #[arg(long)]
- pub model: Option,
+#[derive(argh::FromArgs, Debug)]
+/// Example
+struct Args {
+ /// model file
+ #[argh(option)]
+ model: Option,
+
+ /// source
+ #[argh(option, default = "String::from(\"./assets/bus.jpg\")")]
+ source: String,
+
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
- /// Input source path
- #[arg(long, default_value_t = String::from("./assets/bus.jpg"))]
- pub source: String,
+ /// task
+ #[argh(option, default = "String::from(\"det\")")]
+ task: String,
- /// YOLO Task
- #[arg(long, value_enum, default_value_t = YOLOTask::Detect)]
- pub task: YOLOTask,
+ /// version
+ #[argh(option, default = "8.0")]
+ ver: f32,
- /// YOLO Version
- #[arg(long, value_enum, default_value_t = YOLOVersion::V8)]
- pub ver: YOLOVersion,
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
- /// YOLO Scale
- #[arg(long, value_enum, default_value_t = YOLOScale::N)]
- pub scale: YOLOScale,
+ /// scale
+ #[argh(option, default = "String::from(\"n\")")]
+ scale: String,
- /// Batch size
- #[arg(long, default_value_t = 1)]
- pub batch_size: usize,
+ /// trt_fp16
+ #[argh(option, default = "true")]
+ trt_fp16: bool,
- /// Minimum input width
- #[arg(long, default_value_t = 224)]
- pub width_min: isize,
+ /// find_contours
+ #[argh(option, default = "true")]
+ find_contours: bool,
- /// Input width
- #[arg(long, default_value_t = 640)]
- pub width: isize,
+ /// batch_size
+ #[argh(option, default = "1")]
+ batch_size: usize,
- /// Maximum input width
- #[arg(long, default_value_t = 1024)]
- pub width_max: isize,
+ /// min_batch_size
+ #[argh(option, default = "1")]
+ min_batch_size: usize,
- /// Minimum input height
- #[arg(long, default_value_t = 224)]
- pub height_min: isize,
+ /// max_batch_size
+ #[argh(option, default = "4")]
+ max_batch_size: usize,
- /// Input height
- #[arg(long, default_value_t = 640)]
- pub height: isize,
+ /// min_image_width
+ #[argh(option, default = "224")]
+ min_image_width: isize,
- /// Maximum input height
- #[arg(long, default_value_t = 1024)]
- pub height_max: isize,
+ /// image_width
+ #[argh(option, default = "640")]
+ image_width: isize,
- /// Number of classes
- #[arg(long, default_value_t = 80)]
- pub nc: usize,
+ /// max_image_width
+ #[argh(option, default = "1280")]
+ max_image_width: isize,
- /// Class confidence
- #[arg(long)]
- pub confs: Vec,
+ /// min_image_height
+ #[argh(option, default = "224")]
+ min_image_height: isize,
- /// Enable TensorRT support
- #[arg(long)]
- pub trt: bool,
+ /// image_height
+ #[argh(option, default = "640")]
+ image_height: isize,
- /// Enable CUDA support
- #[arg(long)]
- pub cuda: bool,
+ /// max_image_height
+ #[argh(option, default = "1280")]
+ max_image_height: isize,
- /// Enable CoreML support
- #[arg(long)]
- pub coreml: bool,
+ /// num_classes
+ #[argh(option)]
+ num_classes: Option,
- /// Use TensorRT half precision
- #[arg(long)]
- pub half: bool,
+ /// num_keypoints
+ #[argh(option)]
+ num_keypoints: Option,
- /// Device ID to use
- #[arg(long, default_value_t = 0)]
- pub device_id: usize,
+ /// use_coco_80_classes
+ #[argh(switch)]
+ use_coco_80_classes: bool,
- /// Enable performance profiling
- #[arg(long)]
- pub profile: bool,
+ /// use_imagenet_1k_classes
+ #[argh(switch)]
+ use_imagenet_1k_classes: bool,
- /// Disable contour drawing
- #[arg(long)]
- pub no_contours: bool,
+ /// confs
+ #[argh(option)]
+ confs: Vec,
- /// Show result
- #[arg(long)]
- pub view: bool,
+ /// keypoint_confs
+ #[argh(option)]
+ keypoint_confs: Vec,
- /// Do not save output
- #[arg(long)]
- pub nosave: bool,
+ /// exclude_classes
+ #[argh(option)]
+ exclude_classes: Vec,
+
+ /// retain_classes
+ #[argh(option)]
+ retain_classes: Vec,
+
+ /// class_names
+ #[argh(option)]
+ class_names: Vec,
+
+ /// keypoint_names
+ #[argh(option)]
+ keypoint_names: Vec,
}
fn main() -> Result<()> {
- let args = Args::parse();
-
- // model path
- let path = match &args.model {
- None => format!(
- "yolo/{}-{}-{}.onnx",
- args.ver.name(),
- args.scale.name(),
- args.task.name()
- ),
- Some(x) => x.to_string(),
- };
-
- // saveout
- let saveout = match &args.model {
- None => format!(
- "{}-{}-{}",
- args.ver.name(),
- args.scale.name(),
- args.task.name()
- ),
- Some(x) => {
- let p = std::path::PathBuf::from(&x);
- p.file_stem().unwrap().to_str().unwrap().to_string()
- }
- };
-
- // device
- let device = if args.cuda {
- Device::Cuda(args.device_id)
- } else if args.trt {
- Device::Trt(args.device_id)
- } else if args.coreml {
- Device::CoreML(args.device_id)
- } else {
- Device::Cpu(args.device_id)
- };
-
- // build options
- let options = Options::new()
- .with_model(&path)?
- .with_yolo_version(args.ver)
- .with_yolo_task(args.task)
- .with_device(device)
- .with_trt_fp16(args.half)
- .with_ixx(0, 0, (1, args.batch_size as _, 4).into())
- .with_ixx(0, 2, (args.height_min, args.height, args.height_max).into())
- .with_ixx(0, 3, (args.width_min, args.width, args.width_max).into())
- .with_confs(if args.confs.is_empty() {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ let mut options = Options::yolo()
+ .with_model_file(&args.model.unwrap_or_default())
+ .with_model_task(args.task.as_str().try_into()?)
+ .with_model_version(args.ver.into())
+ .with_model_scale(args.scale.as_str().try_into()?)
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .with_trt_fp16(args.trt_fp16)
+ .with_model_ixx(
+ 0,
+ 0,
+ (args.min_batch_size, args.batch_size, args.max_batch_size).into(),
+ )
+ .with_model_ixx(
+ 0,
+ 2,
+ (
+ args.min_image_height,
+ args.image_height,
+ args.max_image_height,
+ )
+ .into(),
+ )
+ .with_model_ixx(
+ 0,
+ 3,
+ (args.min_image_width, args.image_width, args.max_image_width).into(),
+ )
+ .with_class_confs(if args.confs.is_empty() {
&[0.2, 0.15]
} else {
&args.confs
})
- .with_nc(args.nc)
- // .with_names(&COCO_CLASS_NAMES_80)
- // .with_names2(&COCO_KEYPOINTS_17)
- .with_find_contours(!args.no_contours) // find contours or not
- .exclude_classes(&[0])
- // .retain_classes(&[0, 5])
- .with_profile(args.profile);
+ .with_keypoint_confs(if args.keypoint_confs.is_empty() {
+ &[0.5]
+ } else {
+ &args.keypoint_confs
+ })
+ .with_find_contours(args.find_contours)
+ .retain_classes(&args.retain_classes)
+ .exclude_classes(&args.exclude_classes);
+
+ if args.use_coco_80_classes {
+ options = options.with_class_names(&COCO_CLASS_NAMES_80);
+ }
+
+ if args.use_imagenet_1k_classes {
+ options = options.with_class_names(&IMAGENET_NAMES_1K);
+ }
+
+ if let Some(nc) = args.num_classes {
+ options = options.with_nc(nc);
+ }
+
+ if let Some(nk) = args.num_keypoints {
+ options = options.with_nk(nk);
+ }
+
+ if !args.class_names.is_empty() {
+ options = options.with_class_names(
+ &args
+ .class_names
+ .iter()
+ .map(|x| x.as_str())
+ .collect::>(),
+ );
+ }
+
+ if !args.keypoint_names.is_empty() {
+ options = options.with_keypoint_names(
+ &args
+ .keypoint_names
+ .iter()
+ .map(|x| x.as_str())
+ .collect::>(),
+ );
+ }
// build model
- let mut model = YOLO::new(options)?;
+ let mut model = YOLO::try_from(options.commit()?)?;
// build dataloader
let dl = DataLoader::new(&args.source)?
@@ -175,56 +217,28 @@ fn main() -> Result<()> {
// build annotator
let annotator = Annotator::default()
.with_skeletons(&COCO_SKELETONS_16)
- .without_masks(true) // No masks plotting when doing segment task.
+ .without_masks(true)
.with_bboxes_thickness(3)
- .with_keypoints_name(false) // Enable keypoints names
- .with_saveout_subs(&["YOLO"])
- .with_saveout(&saveout);
-
- // build viewer
- let mut viewer = if args.view {
- Some(Viewer::new().with_delay(5).with_scale(1.).resizable(true))
- } else {
- None
- };
+ .with_saveout(model.spec());
// run & annotate
for (xs, _paths) in dl {
- // let ys = model.run(&xs)?; // way one
- let ys = model.forward(&xs, args.profile)?; // way two
- let images_plotted = annotator.plot(&xs, &ys, !args.nosave)?;
-
- // show image
- match &mut viewer {
- Some(viewer) => viewer.imshow(&images_plotted)?,
- None => continue,
- }
-
- // check out window and key event
- match &mut viewer {
- Some(viewer) => {
- if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) {
- break;
- }
- }
- None => continue,
- }
-
- // write video
- if !args.nosave {
- match &mut viewer {
- Some(viewer) => viewer.write_batch(&images_plotted)?,
- None => continue,
- }
- }
+ let ys = model.forward(&xs)?;
+ // extract bboxes
+ // for y in ys.iter() {
+ // if let Some(bboxes) = y.bboxes() {
+ // println!("[Bboxes]: Found {} objects", bboxes.len());
+ // for (i, bbox) in bboxes.iter().enumerate() {
+ // println!("{}: {:?}", i, bbox)
+ // }
+ // }
+ // }
+
+ // plot
+ annotator.annotate(&xs, &ys);
}
- // finish video write
- if !args.nosave {
- if let Some(viewer) = &mut viewer {
- viewer.finish_write()?;
- }
- }
+ model.summary();
Ok(())
}
diff --git a/examples/yolop/main.rs b/examples/yolop/main.rs
index 2e338cc..ed8283d 100644
--- a/examples/yolop/main.rs
+++ b/examples/yolop/main.rs
@@ -1,22 +1,26 @@
+use anyhow::Result;
use usls::{models::YOLOPv2, Annotator, DataLoader, Options};
-fn main() -> Result<(), Box> {
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
// build model
- let options = Options::default()
- .with_model("yolop/v2-dyn-480x800.onnx")?
- .with_confs(&[0.3]);
+ let options = Options::yolop_v2_480x800().commit()?;
let mut model = YOLOPv2::new(options)?;
// load image
- let x = [DataLoader::try_read("images/car.jpg")?];
+ let x = [DataLoader::try_read("images/car-view.jpg")?];
// run
- let y = model.run(&x)?;
+ let y = model.forward(&x)?;
// annotate
let annotator = Annotator::default()
.with_polygons_name(true)
- .with_saveout("YOLOPv2");
+ .with_saveout(model.spec());
annotator.annotate(&x, &y);
Ok(())
diff --git a/examples/yolov8-rtdetr/README.md b/examples/yolov8-rtdetr/README.md
new file mode 100644
index 0000000..78eabd8
--- /dev/null
+++ b/examples/yolov8-rtdetr/README.md
@@ -0,0 +1,9 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example yolov8-rtdetr -- --device cuda
+```
+
+```shell
+Ys([Y { BBoxes: [Bbox { xyxy: [668.71356, 395.4159, 809.01587, 879.3043], class_id: 0, name: Some("person"), confidence: 0.950527 }, Bbox { xyxy: [48.866394, 399.50665, 248.22641, 904.7525], class_id: 0, name: Some("person"), confidence: 0.9504415 }, Bbox { xyxy: [20.197449, 230.00304, 805.026, 730.3445], class_id: 5, name: Some("bus"), confidence: 0.94705224 }, Bbox { xyxy: [221.3088, 405.65436, 345.44052, 860.2628], class_id: 0, name: Some("person"), confidence: 0.93062377 }, Bbox { xyxy: [0.34117508, 549.8391, 76.50758, 868.87646], class_id: 0, name: Some("person"), confidence: 0.71064234 }, Bbox { xyxy: [282.12543, 484.14166, 296.43207, 520.96246], class_id: 27, name: Some("tie"), confidence: 0.40305245 }] }])
+```
diff --git a/examples/yolov8-rtdetr/main.rs b/examples/yolov8-rtdetr/main.rs
new file mode 100644
index 0000000..87f611f
--- /dev/null
+++ b/examples/yolov8-rtdetr/main.rs
@@ -0,0 +1,45 @@
+use anyhow::Result;
+use usls::{models::YOLO, Annotator, DataLoader, Options};
+
+#[derive(argh::FromArgs)]
+/// Example
+struct Args {
+ /// dtype
+ #[argh(option, default = "String::from(\"auto\")")]
+ dtype: String,
+
+ /// device
+ #[argh(option, default = "String::from(\"cpu:0\")")]
+ device: String,
+}
+
+fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
+ .with_timer(tracing_subscriber::fmt::time::ChronoLocal::rfc_3339())
+ .init();
+
+ let args: Args = argh::from_env();
+
+ // build model
+ let config = Options::yolo_v8_rtdetr_l()
+ .with_model_dtype(args.dtype.as_str().try_into()?)
+ .with_model_device(args.device.as_str().try_into()?)
+ .commit()?;
+ let mut model = YOLO::new(config)?;
+
+ // load images
+ let xs = DataLoader::try_read_batch(&["./assets/bus.jpg"])?;
+
+ // run
+ let ys = model.forward(&xs)?;
+ println!("{:?}", ys);
+
+ // annotate
+ let annotator = Annotator::default()
+ .with_bboxes_thickness(3)
+ .with_saveout(model.spec());
+ annotator.annotate(&xs, &ys);
+
+ Ok(())
+}
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
deleted file mode 100644
index c6e4d7d..0000000
--- a/rust-toolchain.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[toolchain]
-channel = "1.79"
diff --git a/scripts/CelebAMask-HQ-To-YOLO-Labels.py b/scripts/CelebAMask-HQ-To-YOLO-Labels.py
deleted file mode 100644
index 95babb6..0000000
--- a/scripts/CelebAMask-HQ-To-YOLO-Labels.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import cv2
-import numpy as np
-from pathlib import Path
-from tqdm import tqdm
-
-
-mapping = {
- 'background': 0,
- 'skin': 1,
- 'nose': 2,
- 'eye_g': 3,
- 'l_eye': 4,
- 'r_eye': 5,
- 'l_brow': 6,
- 'r_brow': 7,
- 'l_ear': 8,
- 'r_ear': 9,
- 'mouth': 10,
- 'u_lip': 11,
- 'l_lip': 12,
- 'hair': 13,
- 'hat': 14,
- 'ear_r': 15,
- 'neck_l': 16,
- 'neck': 17,
- 'cloth': 18
-}
-
-
-
-def main():
- saveout_dir = Path("labels")
- if not saveout_dir.exists():
- saveout_dir.mkdir()
- else:
- import shutil
- shutil.rmtree(saveout_dir)
- saveout_dir.mkdir()
-
-
- image_list = [x for x in Path("CelebAMask-HQ-mask-anno/").rglob("*.png")]
- for image_path in tqdm(image_list, total=len(image_list)):
- image_gray = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
- stem = image_path.stem
- name, cls_ = stem.split("_", 1)
- segments = cv2.findContours(image_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
-
- saveout = saveout_dir / f"{int(name)}.txt"
- with open(saveout, 'a+') as f:
- for segment in segments:
- line = f"{mapping[cls_]}"
- segment = segment / 512
- for seg in segment:
- xn, yn = seg[0]
- line += f" {xn} {yn}"
- f.write(line + "\n")
-
-
-
-
-if __name__ == "__main__":
- main()
-
diff --git a/scripts/convert2f16.py b/scripts/convert2f16.py
deleted file mode 100644
index 6b9eec3..0000000
--- a/scripts/convert2f16.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import onnx
-from pathlib import Path
-from onnxconverter_common import float16
-
-model_f32 = "onnx_model.onnx"
-model_f16 = float16.convert_float_to_float16(onnx.load(model_f32))
-saveout = Path(model_f32).with_name(Path(model_f32).stem + "-f16.onnx")
-onnx.save(model_f16, saveout)
diff --git a/src/core/device.rs b/src/core/device.rs
deleted file mode 100644
index 583df16..0000000
--- a/src/core/device.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub enum Device {
- Auto(usize),
- Cpu(usize),
- Cuda(usize),
- Trt(usize),
- CoreML(usize),
- // Cann(usize),
- // Acl(usize),
- // Rocm(usize),
- // Rknpu(usize),
- // Openvino(usize),
- // Onednn(usize),
-}
diff --git a/src/core/hub.rs b/src/core/hub.rs
deleted file mode 100644
index c3f3e90..0000000
--- a/src/core/hub.rs
+++ /dev/null
@@ -1,426 +0,0 @@
-use anyhow::{Context, Result};
-use indicatif::{ProgressBar, ProgressStyle};
-use serde::{Deserialize, Serialize};
-use std::io::{Read, Write};
-use std::path::{Path, PathBuf};
-
-use crate::Dir;
-
-/// Represents a downloadable asset in a release
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct Asset {
- pub name: String,
- pub browser_download_url: String,
- pub size: u64,
-}
-
-/// Represents a GitHub release
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct Release {
- pub tag_name: String,
- pub assets: Vec,
-}
-
-/// Manages interactions with a GitHub repository's releases
-pub struct Hub {
- /// github api
- _gh_api_release: String,
-
- /// GitHub repository owner
- owner: String,
-
- /// GitHub repository name
- repo: String,
-
- /// Optional list of releases fetched from GitHub
- releases: Option>,
-
- /// Path to cache file
- cache: PathBuf,
-
- /// Optional release tag to be used
- tag: Option,
-
- /// Filename for the asset, used in cache management
- file_name: Option,
- file_size: Option,
-
- /// Full URL constructed for downloading the asset
- url: Option,
-
- /// Local path where the asset will be stored
- path: PathBuf,
-
- /// Directory to store the downloaded file
- to: Dir,
-
- /// Download timeout in seconds
- timeout: u64,
-
- /// Time to live (cache duration)
- ttl: std::time::Duration,
-
- /// Maximum attempts for downloading
- max_attempts: u32,
-}
-
-impl std::fmt::Debug for Hub {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- f.debug_struct("Hub")
- .field("owner", &self.owner)
- .field("repo", &self.repo)
- .field("cache", &self.cache)
- .field("path", &self.path)
- .field("releases", &self.releases.as_ref().map(|x| x.len()))
- .field("ttl", &self.ttl)
- .field("max_attempts", &self.max_attempts)
- .finish()
- }
-}
-
-impl Default for Hub {
- fn default() -> Self {
- let owner = "jamjamjon".to_string();
- let repo = "assets".to_string();
- let _gh_api_release = format!("https://api.github.com/repos/{}/{}/releases", owner, repo);
-
- Self {
- owner,
- repo,
- _gh_api_release,
- url: None,
- path: PathBuf::new(),
- to: Dir::Cache,
- tag: None,
- file_name: None,
- file_size: None,
- releases: None,
- cache: PathBuf::new(),
- timeout: 3000,
- max_attempts: 3,
- ttl: std::time::Duration::from_secs(10 * 60),
- }
- }
-}
-
-impl Hub {
- pub fn new() -> Result {
- let mut to = Dir::Cache;
- let cache = to
- .path()
- .or_else(|_| {
- to = Dir::Home;
- to.path()
- })?
- .join("cache_releases");
-
- Ok(Self {
- to,
- cache,
- ..Default::default()
- })
- }
-
- pub fn with_owner(mut self, owner: &str) -> Self {
- self.owner = owner.to_string();
- self
- }
-
- pub fn with_repo(mut self, repo: &str) -> Self {
- self.repo = repo.to_string();
- self
- }
-
- pub fn with_ttl(mut self, x: u64) -> Self {
- self.ttl = std::time::Duration::from_secs(x);
- self
- }
-
- pub fn with_timeout(mut self, x: u64) -> Self {
- self.timeout = x;
- self
- }
-
- pub fn with_max_attempts(mut self, x: u32) -> Self {
- self.max_attempts = x;
- self
- }
-
- pub fn fetch(mut self, s: &str) -> Result {
- // try to fetch from hub or local cache
- let p = PathBuf::from(s);
- match p.exists() {
- true => self.path = p,
- false => {
- // check remote
- match s.split_once('/') {
- Some((tag, file_name)) => {
- // Extract tag and file from input string
- self.tag = Some(tag.to_string());
- self.file_name = Some(file_name.to_string());
-
- // Check if releases are already loaded in memory
- if self.releases.is_none() {
- self.releases = Some(self.connect_remote()?);
- }
-
- if let Some(releases) = &self.releases {
- // Validate the tag
- let tags: Vec<&str> =
- releases.iter().map(|x| x.tag_name.as_str()).collect();
- if !tags.contains(&tag) {
- anyhow::bail!(
- "Hub tag '{}' not found in releases. Available tags: {:?}",
- tag,
- tags
- );
- }
-
- // Validate the file
- if let Some(release) = releases.iter().find(|r| r.tag_name == tag) {
- let files: Vec<&str> =
- release.assets.iter().map(|x| x.name.as_str()).collect();
- if !files.contains(&file_name) {
- anyhow::bail!(
- "Hub file '{}' not found in tag '{}'. Available files: {:?}",
- file_name,
- tag,
- files
- );
- } else {
- for f_ in release.assets.iter() {
- if f_.name.as_str() == file_name {
- self.url = Some(f_.browser_download_url.clone());
- self.file_size = Some(f_.size);
-
- break;
- }
- }
- }
- }
- self.path = self.to.path_with_subs(&[tag])?.join(file_name);
- }
- }
- _ => anyhow::bail!(
- "Download failed due to invalid format. Expected: /, got: {}",
- s
- ),
- }
- }
- }
-
- Ok(self)
- }
-
- /// Fetch releases from GitHub and cache them
- fn fetch_and_cache_releases(url: &str, cache_path: &Path) -> Result {
- let response = ureq::get(url)
- .set("User-Agent", "my-app")
- .call()
- .context("Failed to fetch releases from remote")?;
-
- if response.status() != 200 {
- anyhow::bail!(
- "Failed to fetch releases from remote ({}): status {} - {}",
- url,
- response.status(),
- response.status_text()
- );
- }
-
- let body = response
- .into_string()
- .context("Failed to read response body")?;
-
- // Ensure cache directory exists
- let parent_dir = cache_path
- .parent()
- .context("Invalid cache path; no parent directory found")?;
- std::fs::create_dir_all(parent_dir)
- .with_context(|| format!("Failed to create cache directory: {:?}", parent_dir))?;
-
- // Create temporary file
- let mut temp_file = tempfile::NamedTempFile::new_in(parent_dir)
- .context("Failed to create temporary cache file")?;
-
- // Write data to temporary file
- temp_file
- .write_all(body.as_bytes())
- .context("Failed to write to temporary cache file")?;
-
- // Persist temporary file as the cache
- temp_file.persist(cache_path).with_context(|| {
- format!("Failed to persist temporary cache file to {:?}", cache_path)
- })?;
-
- Ok(body)
- }
-
- pub fn tags(&mut self) -> Option> {
- if self.releases.is_none() {
- self.releases = self.connect_remote().ok();
- }
-
- self.releases
- .as_ref()
- .map(|releases| releases.iter().map(|x| x.tag_name.as_str()).collect())
- }
-
- pub fn files(&mut self, tag: &str) -> Option> {
- if self.releases.is_none() {
- self.releases = self.connect_remote().ok();
- }
-
- self.releases.as_ref().map(|releases| {
- releases
- .iter()
- .find(|r| r.tag_name == tag)
- .map(|a| a.assets.iter().map(|x| x.name.as_str()).collect())
- })?
- }
-
- pub fn connect_remote(&mut self) -> Result> {
- let span = tracing::span!(tracing::Level::INFO, "Hub-connect_remote");
- let _guard = span.enter();
-
- let should_download = if !self.cache.exists() {
- tracing::info!("No cache found, fetching data from GitHub");
- true
- } else {
- match std::fs::metadata(&self.cache)?.modified() {
- Err(_) => {
- tracing::info!("Cannot get file modified time, fetching new data from GitHub");
- true
- }
- Ok(modified_time) => {
- if std::time::SystemTime::now().duration_since(modified_time)? < self.ttl {
- tracing::info!("Using cached data");
- false
- } else {
- tracing::info!("Cache expired, fetching new data from GitHub");
- true
- }
- }
- }
- };
-
- let body = if should_download {
- Self::fetch_and_cache_releases(&self._gh_api_release, &self.cache)?
- } else {
- std::fs::read_to_string(&self.cache)?
- };
- let releases: Vec = serde_json::from_str(&body)?;
- Ok(releases)
- }
-
- /// Commit the downloaded file, downloading if necessary
- pub fn commit(&self) -> Result {
- if let Some(url) = &self.url {
- // Download if the file does not exist or if the size of file does not match
- if !self.path.is_file()
- || self.path.is_file()
- && Some(std::fs::metadata(&self.path)?.len()) != self.file_size
- {
- let name = format!(
- "{}/{}",
- self.tag.as_ref().unwrap(),
- self.file_name.as_ref().unwrap()
- );
- Self::download(
- url.as_str(),
- &self.path,
- Some(&name),
- Some(self.timeout),
- Some(self.max_attempts),
- )?;
- }
- }
- self.path
- .to_str()
- .map(|s| s.to_string())
- .with_context(|| format!("Failed to convert PathBuf: {:?} to String", self.path))
- }
-
- /// Download a file from a github release to a specified path with a progress bar
- pub fn download + std::fmt::Debug>(
- src: &str,
- dst: P,
- prompt: Option<&str>,
- timeout: Option,
- max_attempts: Option,
- ) -> Result<()> {
- // TODO: other url, not just github release page
-
- let max_attempts = max_attempts.unwrap_or(2);
- let timeout_duration = std::time::Duration::from_secs(timeout.unwrap_or(2000));
- let agent = ureq::AgentBuilder::new().try_proxy_from_env(true).build();
-
- for i_try in 0..max_attempts {
- let resp = agent
- .get(src)
- .timeout(timeout_duration)
- .call()
- .with_context(|| {
- format!(
- "Failed to download file from {}, timeout: {:?}",
- src, timeout_duration
- )
- })?;
- let ntotal = resp
- .header("Content-Length")
- .and_then(|s| s.parse::().ok())
- .context("Content-Length header is missing or invalid")?;
-
- let pb = ProgressBar::new(ntotal);
- pb.set_style(
- ProgressStyle::with_template(
- "{prefix:.cyan.bold} {msg} |{bar}| ({percent_precise}%, {binary_bytes}/{binary_total_bytes}, {binary_bytes_per_sec})",
- )?
- .progress_chars("██ "),
- );
- pb.set_prefix(if i_try == 0 {
- " Fetching"
- } else {
- " Re-Fetching"
- });
- pb.set_message(prompt.unwrap_or_default().to_string());
-
- let mut reader = resp.into_reader();
- let mut buffer = [0; 256];
- let mut downloaded_bytes = 0usize;
- let mut file = std::fs::File::create(&dst)
- .with_context(|| format!("Failed to create destination file: {:?}", dst))?;
-
- loop {
- let bytes_read = reader.read(&mut buffer)?;
- if bytes_read == 0 {
- break;
- }
- file.write_all(&buffer[..bytes_read])
- .context("Failed to write to file")?;
- downloaded_bytes += bytes_read;
- pb.inc(bytes_read as u64);
- }
-
- // check size
- if downloaded_bytes as u64 != ntotal {
- continue;
- }
-
- // update
- pb.set_prefix(" Downloaded");
- pb.set_style(ProgressStyle::with_template(
- crate::PROGRESS_BAR_STYLE_FINISH_3,
- )?);
- pb.finish();
-
- if i_try != max_attempts {
- break;
- } else {
- anyhow::bail!("Exceeded the maximum number of download attempts");
- }
- }
-
- Ok(())
- }
-}
diff --git a/src/core/metric.rs b/src/core/metric.rs
deleted file mode 100644
index af0a5ed..0000000
--- a/src/core/metric.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-#[derive(Debug)]
-pub enum Metric {
- IP,
- Cos,
- L2,
-}
diff --git a/src/core/mod.rs b/src/core/mod.rs
deleted file mode 100644
index 0b0c2f1..0000000
--- a/src/core/mod.rs
+++ /dev/null
@@ -1,45 +0,0 @@
-mod annotator;
-mod dataloader;
-mod device;
-mod dir;
-mod dynconf;
-mod hub;
-mod logits_sampler;
-mod media;
-mod metric;
-mod min_opt_max;
-pub mod onnx;
-pub mod ops;
-mod options;
-mod ort_engine;
-mod task;
-mod tokenizer_stream;
-mod ts;
-mod viewer;
-mod vision;
-mod x;
-mod xs;
-
-pub use annotator::Annotator;
-pub use dataloader::DataLoader;
-pub use device::Device;
-pub use dir::Dir;
-pub use dynconf::DynConf;
-pub use hub::Hub;
-pub use logits_sampler::LogitsSampler;
-pub use media::*;
-pub use metric::Metric;
-pub use min_opt_max::MinOptMax;
-pub use ops::Ops;
-pub use options::Options;
-pub use ort_engine::*;
-pub use task::Task;
-pub use tokenizer_stream::TokenizerStream;
-pub use ts::Ts;
-pub use viewer::Viewer;
-pub use vision::Vision;
-pub use x::X;
-pub use xs::Xs;
-
-// re-export
-pub use minifb::Key;
diff --git a/src/core/options.rs b/src/core/options.rs
deleted file mode 100644
index 4e906b5..0000000
--- a/src/core/options.rs
+++ /dev/null
@@ -1,295 +0,0 @@
-//! Options for build models.
-
-use anyhow::Result;
-
-use crate::{
- models::{SamKind, SapiensTask, YOLOPreds, YOLOTask, YOLOVersion},
- Device, Hub, Iiix, MinOptMax, Task,
-};
-
-/// Options for building models
-#[derive(Debug, Clone)]
-pub struct Options {
- pub onnx_path: String,
- pub task: Task,
- pub device: Device,
- pub batch_size: usize,
- pub iiixs: Vec,
- pub profile: bool,
- pub num_dry_run: usize,
-
- // trt related
- pub trt_engine_cache_enable: bool,
- pub trt_int8_enable: bool,
- pub trt_fp16_enable: bool,
-
- // options for Vision and Language models
- pub nc: Option,
- pub nk: Option,
- pub nm: Option,
- pub confs: Vec,
- pub confs2: Vec,
- pub confs3: Vec,
- pub kconfs: Vec,
- pub iou: Option,
- pub tokenizer: Option,
- pub vocab: Option,
- pub context_length: Option,
- pub names: Option>, // names
- pub names2: Option>, // names2
- pub names3: Option>, // names3
- pub min_width: Option,
- pub min_height: Option,
- pub unclip_ratio: f32, // DB
- pub yolo_task: Option,
- pub yolo_version: Option,
- pub yolo_preds: Option,
- pub find_contours: bool,
- pub sam_kind: Option,
- pub use_low_res_mask: Option,
- pub sapiens_task: Option,
- pub classes_excluded: Vec,
- pub classes_retained: Vec,
-}
-
-impl Default for Options {
- fn default() -> Self {
- Self {
- onnx_path: String::new(),
- device: Device::Cuda(0),
- profile: false,
- batch_size: 1,
- iiixs: vec![],
- num_dry_run: 3,
-
- trt_engine_cache_enable: true,
- trt_int8_enable: false,
- trt_fp16_enable: false,
- nc: None,
- nk: None,
- nm: None,
- confs: vec![0.3f32],
- confs2: vec![0.3f32],
- confs3: vec![0.3f32],
- kconfs: vec![0.5f32],
- iou: None,
- tokenizer: None,
- vocab: None,
- context_length: None,
- names: None,
- names2: None,
- names3: None,
- min_width: None,
- min_height: None,
- unclip_ratio: 1.5,
- yolo_task: None,
- yolo_version: None,
- yolo_preds: None,
- find_contours: false,
- sam_kind: None,
- use_low_res_mask: None,
- sapiens_task: None,
- task: Task::Untitled,
- classes_excluded: vec![],
- classes_retained: vec![],
- }
- }
-}
-
-impl Options {
- pub fn new() -> Self {
- Default::default()
- }
-
- pub fn with_task(mut self, task: Task) -> Self {
- self.task = task;
- self
- }
-
- pub fn with_model(mut self, onnx_path: &str) -> Result {
- self.onnx_path = Hub::new()?.fetch(onnx_path)?.commit()?;
- Ok(self)
- }
-
- pub fn with_batch_size(mut self, n: usize) -> Self {
- self.batch_size = n;
- self
- }
-
- pub fn with_batch(mut self, n: usize) -> Self {
- self.batch_size = n;
- self
- }
-
- pub fn with_dry_run(mut self, n: usize) -> Self {
- self.num_dry_run = n;
- self
- }
-
- pub fn with_device(mut self, device: Device) -> Self {
- self.device = device;
- self
- }
-
- pub fn with_cuda(mut self, id: usize) -> Self {
- self.device = Device::Cuda(id);
- self
- }
-
- pub fn with_trt(mut self, id: usize) -> Self {
- self.device = Device::Trt(id);
- self
- }
-
- pub fn with_cpu(mut self) -> Self {
- self.device = Device::Cpu(0);
- self
- }
-
- pub fn with_coreml(mut self, id: usize) -> Self {
- self.device = Device::CoreML(id);
- self
- }
-
- pub fn with_trt_fp16(mut self, x: bool) -> Self {
- self.trt_fp16_enable = x;
- self
- }
-
- pub fn with_yolo_task(mut self, x: YOLOTask) -> Self {
- self.yolo_task = Some(x);
- self
- }
-
- pub fn with_sapiens_task(mut self, x: SapiensTask) -> Self {
- self.sapiens_task = Some(x);
- self
- }
-
- pub fn with_yolo_version(mut self, x: YOLOVersion) -> Self {
- self.yolo_version = Some(x);
- self
- }
-
- pub fn with_profile(mut self, profile: bool) -> Self {
- self.profile = profile;
- self
- }
-
- pub fn with_find_contours(mut self, x: bool) -> Self {
- self.find_contours = x;
- self
- }
-
- pub fn with_sam_kind(mut self, x: SamKind) -> Self {
- self.sam_kind = Some(x);
- self
- }
-
- pub fn use_low_res_mask(mut self, x: bool) -> Self {
- self.use_low_res_mask = Some(x);
- self
- }
-
- pub fn with_names(mut self, names: &[&str]) -> Self {
- self.names = Some(names.iter().map(|x| x.to_string()).collect::>());
- self
- }
-
- pub fn with_names2(mut self, names: &[&str]) -> Self {
- self.names2 = Some(names.iter().map(|x| x.to_string()).collect::>());
- self
- }
-
- pub fn with_names3(mut self, names: &[&str]) -> Self {
- self.names3 = Some(names.iter().map(|x| x.to_string()).collect::>());
- self
- }
-
- pub fn with_vocab(mut self, vocab: &str) -> Result {
- self.vocab = Some(Hub::new()?.fetch(vocab)?.commit()?);
- Ok(self)
- }
-
- pub fn with_context_length(mut self, n: usize) -> Self {
- self.context_length = Some(n);
- self
- }
-
- pub fn with_tokenizer(mut self, tokenizer: &str) -> Result {
- self.tokenizer = Some(Hub::new()?.fetch(tokenizer)?.commit()?);
- Ok(self)
- }
-
- pub fn with_unclip_ratio(mut self, x: f32) -> Self {
- self.unclip_ratio = x;
- self
- }
-
- pub fn with_min_width(mut self, x: f32) -> Self {
- self.min_width = Some(x);
- self
- }
-
- pub fn with_min_height(mut self, x: f32) -> Self {
- self.min_height = Some(x);
- self
- }
-
- pub fn with_yolo_preds(mut self, x: YOLOPreds) -> Self {
- self.yolo_preds = Some(x);
- self
- }
-
- pub fn with_nc(mut self, nc: usize) -> Self {
- self.nc = Some(nc);
- self
- }
-
- pub fn with_nk(mut self, nk: usize) -> Self {
- self.nk = Some(nk);
- self
- }
-
- pub fn with_iou(mut self, x: f32) -> Self {
- self.iou = Some(x);
- self
- }
-
- pub fn with_confs(mut self, x: &[f32]) -> Self {
- self.confs = x.to_vec();
- self
- }
-
- pub fn with_confs2(mut self, x: &[f32]) -> Self {
- self.confs2 = x.to_vec();
- self
- }
-
- pub fn with_confs3(mut self, x: &[f32]) -> Self {
- self.confs3 = x.to_vec();
- self
- }
-
- pub fn with_kconfs(mut self, kconfs: &[f32]) -> Self {
- self.kconfs = kconfs.to_vec();
- self
- }
-
- pub fn with_ixx(mut self, i: usize, ii: usize, x: MinOptMax) -> Self {
- self.iiixs.push(Iiix::from((i, ii, x)));
- self
- }
-
- pub fn exclude_classes(mut self, xs: &[isize]) -> Self {
- self.classes_retained.clear();
- self.classes_excluded.extend_from_slice(xs);
- self
- }
-
- pub fn retain_classes(mut self, xs: &[isize]) -> Self {
- self.classes_excluded.clear();
- self.classes_retained.extend_from_slice(xs);
- self
- }
-}
diff --git a/src/core/ort_engine.rs b/src/core/ort_engine.rs
deleted file mode 100644
index d0b915d..0000000
--- a/src/core/ort_engine.rs
+++ /dev/null
@@ -1,669 +0,0 @@
-use anyhow::Result;
-use half::f16;
-use ndarray::{Array, IxDyn};
-use ort::{
- execution_providers::{ExecutionProvider, TensorRTExecutionProvider},
- session::{builder::SessionBuilder, Session},
- tensor::TensorElementType,
-};
-use prost::Message;
-use std::collections::HashSet;
-
-use crate::{
- build_progress_bar, human_bytes, onnx, Device, Dir, MinOptMax, Ops, Options, Ts, Xs,
- CHECK_MARK, CROSS_MARK, X,
-};
-
-/// A struct for input composed of the i-th input, the ii-th dimension, and the value.
-#[derive(Clone, Debug, Default)]
-pub struct Iiix {
- pub i: usize,
- pub ii: usize,
- pub x: MinOptMax,
-}
-
-impl From<(usize, usize, MinOptMax)> for Iiix {
- fn from((i, ii, x): (usize, usize, MinOptMax)) -> Self {
- Self { i, ii, x }
- }
-}
-
-/// A struct for tensor attrs composed of the names, the dtypes, and the dimensions.
-#[derive(Debug)]
-pub struct OrtTensorAttr {
- pub names: Vec,
- pub dtypes: Vec,
- pub dimss: Vec>,
-}
-
-/// ONNXRuntime Backend
-#[derive(Debug)]
-pub struct OrtEngine {
- name: String,
- session: Session,
- device: Device,
- inputs_minoptmax: Vec>,
- inputs_attrs: OrtTensorAttr,
- outputs_attrs: OrtTensorAttr,
- profile: bool,
- num_dry_run: usize,
- model_proto: onnx::ModelProto,
- params: usize,
- wbmems: usize,
- ts: Ts,
-}
-
-impl OrtEngine {
- pub fn new(config: &Options) -> Result {
- let span = tracing::span!(tracing::Level::INFO, "OrtEngine-new");
- let _guard = span.enter();
-
- // onnx graph
- let model_proto = Self::load_onnx(&config.onnx_path)?;
- let graph = match &model_proto.graph {
- Some(graph) => graph,
- None => anyhow::bail!("No graph found in this proto. Failed to parse ONNX model."),
- };
-
- // model params & mems
- let byte_alignment = 16; // 16 for simd; 8 for most
- let mut params: usize = 0;
- let mut wbmems: usize = 0;
- let mut initializer_names: HashSet<&str> = HashSet::new();
- for tensor_proto in graph.initializer.iter() {
- initializer_names.insert(&tensor_proto.name);
- let param = tensor_proto.dims.iter().product::() as usize;
- params += param;
-
- // mems
- let param = Ops::make_divisible(param, byte_alignment);
- let n = Self::nbytes_from_onnx_dtype_id(tensor_proto.data_type as usize);
- let wbmem = param * n;
- wbmems += wbmem;
- }
-
- // inputs & outputs
- let inputs_attrs = Self::io_from_onnx_value_info(&initializer_names, &graph.input)?;
- let outputs_attrs = Self::io_from_onnx_value_info(&initializer_names, &graph.output)?;
- let inputs_minoptmax =
- Self::build_inputs_minoptmax(&inputs_attrs, &config.iiixs, config.batch_size)?;
-
- // build
- ort::init().commit()?;
- let mut builder = Session::builder()?;
- let mut device = config.device.to_owned();
- match device {
- Device::Trt(device_id) => {
- Self::build_trt(
- &inputs_attrs.names,
- &inputs_minoptmax,
- &mut builder,
- device_id,
- config.trt_int8_enable,
- config.trt_fp16_enable,
- config.trt_engine_cache_enable,
- )?;
- }
- Device::Cuda(device_id) => {
- Self::build_cuda(&mut builder, device_id).unwrap_or_else(|err| {
- tracing::warn!("{err}, Using cpu");
- device = Device::Cpu(0);
- })
- }
- Device::CoreML(_) => Self::build_coreml(&mut builder).unwrap_or_else(|err| {
- tracing::warn!("{err}, Using cpu");
- device = Device::Cpu(0);
- }),
- Device::Cpu(_) => {
- Self::build_cpu(&mut builder)?;
- }
- _ => todo!(),
- }
-
- let session = builder
- .with_optimization_level(ort::session::builder::GraphOptimizationLevel::Level3)?
- .commit_from_file(&config.onnx_path)?;
-
- // summary
- tracing::info!(
- "{CHECK_MARK} Backend: ONNXRuntime | Opset: {} | Device: {:?} | Params: {}",
- model_proto.opset_import[0].version,
- device,
- human_bytes(params as f64),
- );
-
- Ok(Self {
- name: config.onnx_path.to_owned(),
- session,
- device,
- inputs_minoptmax,
- inputs_attrs,
- outputs_attrs,
- profile: config.profile,
- num_dry_run: config.num_dry_run,
- model_proto,
- params,
- wbmems,
- ts: Ts::default(),
- })
- }
-
- fn build_trt(
- names: &[String],
- inputs_minoptmax: &[Vec],
- builder: &mut SessionBuilder,
- device_id: usize,
- int8_enable: bool,
- fp16_enable: bool,
- engine_cache_enable: bool,
- ) -> Result<()> {
- let span = tracing::span!(tracing::Level::INFO, "OrtEngine-build_trt");
- let _guard = span.enter();
-
- // auto generate shapes
- let mut spec_min = String::new();
- let mut spec_opt = String::new();
- let mut spec_max = String::new();
- for (i, name) in names.iter().enumerate() {
- if i != 0 {
- spec_min.push(',');
- spec_opt.push(',');
- spec_max.push(',');
- }
- let mut s_min = format!("{}:", name);
- let mut s_opt = format!("{}:", name);
- let mut s_max = format!("{}:", name);
- for d in inputs_minoptmax[i].iter() {
- let min_ = &format!("{}x", d.min());
- let opt_ = &format!("{}x", d.opt());
- let max_ = &format!("{}x", d.max());
- s_min += min_;
- s_opt += opt_;
- s_max += max_;
- }
- s_min.pop();
- s_opt.pop();
- s_max.pop();
- spec_min += &s_min;
- spec_opt += &s_opt;
- spec_max += &s_max;
- }
- let p = Dir::Cache.path_with_subs(&["trt-cache"])?;
- let trt = TensorRTExecutionProvider::default()
- .with_device_id(device_id as i32)
- .with_int8(int8_enable)
- .with_fp16(fp16_enable)
- .with_engine_cache(engine_cache_enable)
- .with_engine_cache_path(p.to_str().unwrap())
- .with_timing_cache(false)
- .with_profile_min_shapes(spec_min)
- .with_profile_opt_shapes(spec_opt)
- .with_profile_max_shapes(spec_max);
- if trt.is_available()? && trt.register(builder).is_ok() {
- tracing::info!("🐢 Initial model serialization with TensorRT may require a wait...\n");
- Ok(())
- } else {
- anyhow::bail!("{CROSS_MARK} TensorRT initialization failed")
- }
- }
-
- fn build_cuda(builder: &mut SessionBuilder, device_id: usize) -> Result<()> {
- let ep = ort::execution_providers::CUDAExecutionProvider::default()
- .with_device_id(device_id as i32);
- if ep.is_available()? && ep.register(builder).is_ok() {
- Ok(())
- } else {
- anyhow::bail!("{CROSS_MARK} CUDA initialization failed")
- }
- }
-
- fn build_coreml(builder: &mut SessionBuilder) -> Result<()> {
- let ep = ort::execution_providers::CoreMLExecutionProvider::default().with_subgraphs(); //.with_ane_only();
- if ep.is_available()? && ep.register(builder).is_ok() {
- Ok(())
- } else {
- anyhow::bail!("{CROSS_MARK} CoreML initialization failed")
- }
- }
-
- fn build_cpu(builder: &mut SessionBuilder) -> Result<()> {
- let ep = ort::execution_providers::CPUExecutionProvider::default();
- if ep.is_available()? && ep.register(builder).is_ok() {
- Ok(())
- } else {
- anyhow::bail!("{CROSS_MARK} CPU initialization failed")
- }
- }
-
- pub fn dry_run(&mut self) -> Result<()> {
- if self.num_dry_run > 0 {
- // pb
- let name = std::path::Path::new(&self.name);
- let pb = build_progress_bar(
- self.num_dry_run as u64,
- " DryRun",
- Some(
- name.file_name()
- .and_then(|x| x.to_str())
- .unwrap_or_default(),
- ),
- crate::PROGRESS_BAR_STYLE_CYAN_2,
- )?;
-
- // dummy inputs
- let mut xs = Vec::new();
- for i in self.inputs_minoptmax.iter() {
- let mut x: Vec = Vec::new();
- for i_ in i.iter() {
- x.push(i_.opt());
- }
- let x: Array = Array::ones(x).into_dyn();
- xs.push(X::from(x));
- }
- let xs = Xs::from(xs);
-
- // run
- for _ in 0..self.num_dry_run {
- pb.inc(1);
- self.run(xs.clone())?;
- }
- self.ts.clear();
-
- // update
- let name = std::path::Path::new(&self.name);
- pb.set_message(format!(
- "{} on {:?}",
- name.file_name()
- .and_then(|x| x.to_str())
- .unwrap_or_default(),
- self.device,
- ));
- pb.set_style(indicatif::ProgressStyle::with_template(
- crate::PROGRESS_BAR_STYLE_FINISH,
- )?);
- pb.finish();
- }
- Ok(())
- }
-
- pub fn run(&mut self, xs: Xs) -> Result {
- let span = tracing::span!(tracing::Level::INFO, "OrtEngine-run");
- let _guard = span.enter();
-
- // inputs dtype alignment
- let mut xs_ = Vec::new();
- let t_pre = std::time::Instant::now();
- for (idtype, x) in self.inputs_attrs.dtypes.iter().zip(xs.into_iter()) {
- let x_ = match &idtype {
- TensorElementType::Float32 => ort::value::Value::from_array(x.view())?.into_dyn(),
- TensorElementType::Float16 => {
- ort::value::Value::from_array(x.mapv(f16::from_f32).view())?.into_dyn()
- }
- TensorElementType::Int32 => {
- ort::value::Value::from_array(x.mapv(|x_| x_ as i32).view())?.into_dyn()
- }
- TensorElementType::Int64 => {
- ort::value::Value::from_array(x.mapv(|x_| x_ as i64).view())?.into_dyn()
- }
- TensorElementType::Uint8 => {
- ort::value::Value::from_array(x.mapv(|x_| x_ as u8).view())?.into_dyn()
- }
- TensorElementType::Int8 => {
- ort::value::Value::from_array(x.mapv(|x_| x_ as i8).view())?.into_dyn()
- }
- TensorElementType::Bool => {
- ort::value::Value::from_array(x.mapv(|x_| x_ != 0.).view())?.into_dyn()
- }
- _ => todo!(),
- };
- xs_.push(Into::>::into(x_));
- }
- let t_pre = t_pre.elapsed();
- self.ts.add_or_push(0, t_pre);
-
- // inference
- let t_run = std::time::Instant::now();
- let outputs = self.session.run(&xs_[..])?;
-
- let t_run = t_run.elapsed();
- self.ts.add_or_push(1, t_run);
-
- // oputput
- let mut ys = Xs::new();
- let t_post = std::time::Instant::now();
- for (dtype, name) in self
- .outputs_attrs
- .dtypes
- .iter()
- .zip(self.outputs_attrs.names.iter())
- {
- let y = &outputs[name.as_str()];
-
- let y_ = match &dtype {
- TensorElementType::Float32 => match y.try_extract_tensor::() {
- Err(err) => {
- tracing::error!("Error: {:?}. Output name: {:?}", err, name);
- Array::zeros(0).into_dyn()
- }
- Ok(x) => x.view().into_owned(),
- },
- TensorElementType::Float16 => match y.try_extract_tensor::() {
- Err(err) => {
- tracing::error!("Error: {:?}. Output name: {:?}", err, name);
- Array::zeros(0).into_dyn()
- }
- Ok(x) => x.view().mapv(f16::to_f32).into_owned(),
- },
- TensorElementType::Int64 => match y.try_extract_tensor::() {
- Err(err) => {
- tracing::error!("Error: {:?}. Output name: {:?}", err, name);
- Array::zeros(0).into_dyn()
- }
- Ok(x) => x.view().to_owned().mapv(|x| x as f32).into_owned(),
- },
- _ => todo!(),
- };
-
- ys.push_kv(name.as_str(), X::from(y_))?;
- }
- let t_post = t_post.elapsed();
- self.ts.add_or_push(2, t_post);
-
- if self.profile {
- let len = 10usize;
- let n = 4usize;
- tracing::info!(
- "[Profile] {:>len$.n$?} ({:>len$.n$?} avg) [alignment: {:>len$.n$?} ({:>len$.n$?} avg) | inference: {:>len$.n$?} ({:>len$.n$?} avg) | to_f32: {:>len$.n$?} ({:>len$.n$?} avg)]",
- t_pre + t_run + t_post,
- self.ts.avg(),
- t_pre,
- self.ts.avgi(0),
- t_run,
- self.ts.avgi(1),
- t_post,
- self.ts.avgi(2),
- );
- }
- Ok(ys)
- }
-
- fn build_inputs_minoptmax(
- inputs_attrs: &OrtTensorAttr,
- iiixs: &[Iiix],
- batch_size: usize,
- ) -> Result>> {
- let span = tracing::span!(tracing::Level::INFO, "OrtEngine-build_inputs_minoptmax");
- let _guard = span.enter();
-
- // init
- let mut ys: Vec> = inputs_attrs
- .dimss
- .iter()
- .map(|dims| dims.iter().map(|&x| MinOptMax::from(x)).collect())
- .collect();
-
- // update from customized
- for iiix in iiixs.iter() {
- if let Some(x) = inputs_attrs
- .dimss
- .get(iiix.i)
- .and_then(|dims| dims.get(iiix.ii))
- {
- // dynamic
- if *x == 0 {
- ys[iiix.i][iiix.ii] = iiix.x.clone();
- }
- } else {
- anyhow::bail!(
- "Cannot retrieve the {}-th dimension of the {}-th input.",
- iiix.ii,
- iiix.i,
- );
- }
- }
-
- // deal with the dynamic axis
- ys.iter_mut().enumerate().for_each(|(i, xs)| {
- xs.iter_mut().enumerate().for_each(|(ii, x)| {
- if x.is_dyn() {
- let n = if ii == 0 { batch_size } else { 1 };
- let y = MinOptMax::from(n);
- tracing::warn!(
- "Using dynamic shapes in inputs without specifying it: the {}-th input, the {}-th dimension. \
- Using {:?} by default. You should make it clear when using TensorRT.",
- i + 1, ii + 1, y
- );
- *x = y;
- }
- });
- });
-
- Ok(ys)
- }
-
- #[allow(dead_code)]
- fn nbytes_from_onnx_dtype_id(x: usize) -> usize {
- match x {
- 7 | 11 | 13 => 8, // i64, f64, u64
- 1 | 6 | 12 => 4, // f32, i32, u32
- 10 | 16 | 5 | 4 => 2, // f16, bf16, i16, u16
- 2 | 3 | 9 => 1, // u8, i8, bool
- 8 => 4, // string(1~4)
- _ => todo!(),
- }
- }
-
- #[allow(dead_code)]
- fn nbytes_from_onnx_dtype(x: &ort::tensor::TensorElementType) -> usize {
- match x {
- ort::tensor::TensorElementType::Float64
- | ort::tensor::TensorElementType::Uint64
- | ort::tensor::TensorElementType::Int64 => 8, // i64, f64, u64
- ort::tensor::TensorElementType::Float32
- | ort::tensor::TensorElementType::Uint32
- | ort::tensor::TensorElementType::Int32
- | ort::tensor::TensorElementType::String => 4, // f32, i32, u32, string(1~4)
- ort::tensor::TensorElementType::Float16
- | ort::tensor::TensorElementType::Bfloat16
- | ort::tensor::TensorElementType::Int16
- | ort::tensor::TensorElementType::Uint16 => 2, // f16, bf16, i16, u16
- ort::tensor::TensorElementType::Uint8
- | ort::tensor::TensorElementType::Int8
- | ort::tensor::TensorElementType::Bool => 1, // u8, i8, bool
- }
- }
-
- #[allow(dead_code)]
- fn ort_dtype_from_onnx_dtype_id(value: i32) -> Option {
- match value {
- 0 => None,
- 1 => Some(ort::tensor::TensorElementType::Float32),
- 2 => Some(ort::tensor::TensorElementType::Uint8),
- 3 => Some(ort::tensor::TensorElementType::Int8),
- 4 => Some(ort::tensor::TensorElementType::Uint16),
- 5 => Some(ort::tensor::TensorElementType::Int16),
- 6 => Some(ort::tensor::TensorElementType::Int32),
- 7 => Some(ort::tensor::TensorElementType::Int64),
- 8 => Some(ort::tensor::TensorElementType::String),
- 9 => Some(ort::tensor::TensorElementType::Bool),
- 10 => Some(ort::tensor::TensorElementType::Float16),
- 11 => Some(ort::tensor::TensorElementType::Float64),
- 12 => Some(ort::tensor::TensorElementType::Uint32),
- 13 => Some(ort::tensor::TensorElementType::Uint64),
- 14 => None, // COMPLEX64
- 15 => None, // COMPLEX128
- 16 => Some(ort::tensor::TensorElementType::Bfloat16),
- _ => None,
- }
- }
-
- fn io_from_onnx_value_info(
- initializer_names: &HashSet<&str>,
- value_info: &[onnx::ValueInfoProto],
- ) -> Result {
- let mut dimss: Vec> = Vec::new();
- let mut dtypes: Vec = Vec::new();
- let mut names: Vec = Vec::new();
- for v in value_info.iter() {
- if initializer_names.contains(v.name.as_str()) {
- continue;
- }
- names.push(v.name.to_string());
- let dtype = match &v.r#type {
- Some(dtype) => dtype,
- None => continue,
- };
- let dtype = match &dtype.value {
- Some(dtype) => dtype,
- None => continue,
- };
- let tensor = match dtype {
- onnx::type_proto::Value::TensorType(tensor) => tensor,
- _ => continue,
- };
- let tensor_type = tensor.elem_type;
- let tensor_type = match Self::ort_dtype_from_onnx_dtype_id(tensor_type) {
- Some(dtype) => dtype,
- None => continue,
- };
- dtypes.push(tensor_type);
-
- let shapes = match &tensor.shape {
- Some(shapes) => shapes,
- None => continue,
- };
- let mut shape_: Vec = Vec::new();
- for shape in shapes.dim.iter() {
- match &shape.value {
- None => continue,
- Some(value) => match value {
- onnx::tensor_shape_proto::dimension::Value::DimValue(x) => {
- shape_.push(*x as _);
- }
- onnx::tensor_shape_proto::dimension::Value::DimParam(_) => {
- shape_.push(0);
- }
- },
- }
- }
- dimss.push(shape_);
- }
- Ok(OrtTensorAttr {
- dimss,
- dtypes,
- names,
- })
- }
-
- pub fn load_onnx>(p: P) -> Result {
- let f = std::fs::read(p)?;
- Ok(onnx::ModelProto::decode(f.as_slice())?)
- }
-
- pub fn oshapes(&self) -> &Vec> {
- &self.outputs_attrs.dimss
- }
-
- pub fn odimss(&self) -> &Vec> {
- &self.outputs_attrs.dimss
- }
-
- pub fn onames(&self) -> &Vec {
- &self.outputs_attrs.names
- }
-
- pub fn odtypes(&self) -> &Vec {
- &self.outputs_attrs.dtypes
- }
-
- pub fn ishapes(&self) -> &Vec> {
- &self.inputs_attrs.dimss
- }
-
- pub fn idimss(&self) -> &Vec> {
- &self.inputs_attrs.dimss
- }
-
- pub fn inames(&self) -> &Vec {
- &self.inputs_attrs.names
- }
-
- pub fn idtypes(&self) -> &Vec {
- &self.inputs_attrs.dtypes
- }
-
- pub fn device(&self) -> &Device {
- &self.device
- }
-
- pub fn inputs_minoptmax(&self) -> &Vec> {
- &self.inputs_minoptmax
- }
-
- pub fn batch(&self) -> &MinOptMax {
- &self.inputs_minoptmax[0][0]
- }
-
- pub fn try_height(&self) -> Option<&MinOptMax> {
- self.inputs_minoptmax.first().and_then(|x| x.get(2))
- }
-
- pub fn try_width(&self) -> Option<&MinOptMax> {
- self.inputs_minoptmax.first().and_then(|x| x.get(3))
- }
-
- pub fn height(&self) -> &MinOptMax {
- &self.inputs_minoptmax[0][2]
- }
-
- pub fn width(&self) -> &MinOptMax {
- &self.inputs_minoptmax[0][3]
- }
-
- pub fn is_batch_dyn(&self) -> bool {
- self.ishapes()[0][0] == 0
- }
-
- pub fn try_fetch(&self, key: &str) -> Option {
- match self.session.metadata() {
- Err(_) => None,
- Ok(metadata) => metadata.custom(key).unwrap_or_default(),
- }
- }
-
- pub fn session(&self) -> &Session {
- &self.session
- }
-
- pub fn ir_version(&self) -> usize {
- self.model_proto.ir_version as usize
- }
-
- pub fn opset_version(&self) -> usize {
- self.model_proto.opset_import[0].version as usize
- }
-
- pub fn producer_name(&self) -> String {
- self.model_proto.producer_name.to_string()
- }
-
- pub fn producer_version(&self) -> String {
- self.model_proto.producer_version.to_string()
- }
-
- pub fn model_version(&self) -> usize {
- self.model_proto.model_version as usize
- }
-
- pub fn parameters(&self) -> usize {
- self.params
- }
-
- pub fn memory_weights(&self) -> usize {
- self.wbmems
- }
-
- pub fn ts(&self) -> &Ts {
- &self.ts
- }
-}
diff --git a/src/core/tokenizer_stream.rs b/src/core/tokenizer_stream.rs
deleted file mode 100644
index 495d69a..0000000
--- a/src/core/tokenizer_stream.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-// TODO: refactor
-use anyhow::Result;
-
-/// This is a wrapper around a tokenizer to ensure that tokens can be returned to the user in a
-/// streaming way rather than having to wait for the full decoding.
-#[derive(Debug)]
-pub struct TokenizerStream {
- tokenizer: tokenizers::Tokenizer,
- tokens: Vec,
- prev_index: usize,
- current_index: usize,
-}
-
-impl TokenizerStream {
- pub fn new(tokenizer: tokenizers::Tokenizer) -> Self {
- Self {
- tokenizer,
- tokens: Vec::new(),
- prev_index: 0,
- current_index: 0,
- }
- }
-
- pub fn into_inner(self) -> tokenizers::Tokenizer {
- self.tokenizer
- }
-
- fn decode(&self, tokens: &[u32]) -> Result {
- match self.tokenizer.decode(tokens, true) {
- Ok(str) => Ok(str),
- Err(err) => anyhow::bail!("cannot decode: {err}"),
- }
- }
-
- pub fn next_token(&mut self, token: u32) -> Result