Initial Development (#1)

* initial commit * 0.0.1 binary data release
zachcoleman · May 21, 2022 · 83c0ae8 · 83c0ae8
1 parent f055575
commit 83c0ae8
Show file tree

Hide file tree

Showing 12 changed files with 574 additions and 2 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 88
+per-file-ignores=
+    **/__init__.py:F401
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -0,0 +1,66 @@
+name: CI
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  linux:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: messense/maturin-action@v1
+      with:
+        manylinux: auto
+        command: build
+        args: --release -o dist
+    - name: Upload wheels
+      uses: actions/upload-artifact@v2
+      with:
+        name: wheels
+        path: dist
+
+  windows:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: messense/maturin-action@v1
+      with:
+        command: build
+        args: --release --no-sdist -o dist
+    - name: Upload wheels
+      uses: actions/upload-artifact@v2
+      with:
+        name: wheels
+        path: dist
+
+  macos:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: messense/maturin-action@v1
+      with:
+        command: build
+        args: --release --no-sdist -o dist --universal2
+    - name: Upload wheels
+      uses: actions/upload-artifact@v2
+      with:
+        name: wheels
+        path: dist
+
+  release:
+    name: Release
+    runs-on: ubuntu-latest
+    if: "startsWith(github.ref, 'refs/tags/')"
+    needs: [ macos, windows, linux ]
+    steps:
+      - uses: actions/download-artifact@v2
+        with:
+          name: wheels
+      - name: Publish to PyPI
+        uses: messense/maturin-action@v1
+        env:
+          MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+        with:
+          command: upload
+          args: --skip-existing *
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,19 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+.venv*
+
+# will have compiled files and executables
+debug/
+target/
+
+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
+Cargo.lock
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
+
+# MSVC Windows builds of rustc generate these, which store debugging information
+*.pdb
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+repos:
+  - repo: https://github.com/ambv/black
+    rev: 22.3.0
+    hooks:
+    -   id: black
+  - repo: https://github.com/PyCQA/flake8
+    rev: 4.0.1
+    hooks:
+    -   id: flake8
+  - repo: https://github.com/pycqa/isort
+    rev: 5.10.1
+    hooks:
+      - id: isort
+        name: isort (python)
+      - id: isort
+        name: isort (cython)
+        types: [cython]
+      - id: isort
+        name: isort (pyi)
+        types: [pyi]
diff --git a/Cargo.toml b/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "fast-stats"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[lib]
+name = "fast_stats"
+crate-type = ["cdylib"]
+
+[dependencies]
+pyo3 = { version = "0.16.3", features = ["extension-module"] }
+numpy = "0.16.2"
+ndarray = "0.15.4"
diff --git a/README.md b/README.md
@@ -1,2 +1 @@
-# fast-stats
-A fast simple library for calculating basic statistics
+# fast-stats
diff --git a/benchmarks/timeit.ipynb b/benchmarks/timeit.ipynb
@@ -0,0 +1,189 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import fast_stats\n",
+    "from sklearn.metrics import precision_score, recall_score, f1_score\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred = np.random.randint(0, 2, (10, 512, 512)).flatten()\n",
+    "actual = np.random.randint(0, 2, (10, 512, 512)).flatten()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "600 ms ± 3.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "_ = precision_score(actual, pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "596 ms ± 1.44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "_ = recall_score(actual, pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "603 ms ± 3.91 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "_ = f1_score(actual, pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# don't actually need to flatten them for fast-stats\n",
+    "pred = np.random.randint(0, 2, (10, 512, 512))\n",
+    "actual = np.random.randint(0, 2, (10, 512, 512))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "24.1 ms ± 204 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "_ = fast_stats.precision(actual, pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "24.3 ms ± 254 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "_ = fast_stats.recall(actual, pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "24.2 ms ± 388 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "_ = fast_stats.f1_score(actual, pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert np.allclose(\n",
+    "    fast_stats.precision(actual.flatten(), pred.flatten()),\n",
+    "    precision_score(actual.flatten(), pred.flatten())\n",
+    ")\n",
+    "assert np.allclose(\n",
+    "    fast_stats.recall(actual.flatten(), pred.flatten()),\n",
+    "    recall_score(actual.flatten(), pred.flatten())\n",
+    ")\n",
+    "assert np.allclose(\n",
+    "    fast_stats.f1_score(actual.flatten(), pred.flatten()),\n",
+    "    f1_score(actual.flatten(), pred.flatten())\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "a3a671d63c09fb4878d313d605bf6366336b9695c04e11736a5d015abf9b1e42"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.11 ('.venv39': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/fast_stats/__init__.py b/fast_stats/__init__.py
@@ -0,0 +1 @@
+from .stats import f1_score, precision, recall
diff --git a/fast_stats/stats.py b/fast_stats/stats.py
@@ -0,0 +1,67 @@
+from typing import Union
+
+import numpy as np
+
+from .fast_stats import _tp_fp_fn_tn
+
+Result = Union[None, float]
+
+
+def _precision(tp: int, fp: int, zero_division: str = "none") -> Result:
+    if tp + fp == 0:
+        if zero_division == "none":
+            return None
+        elif zero_division == "zero":
+            return 0.0
+    return tp / (tp + fp)
+
+
+def _recall(tp: int, fn: int, zero_division: str = "none") -> Result:
+    if tp + fn == 0:
+        if zero_division == "none":
+            return None
+        elif zero_division == "zero":
+            return 0.0
+    return tp / (tp + fn)
+
+
+def precision(
+    y_true: np.ndarray, y_pred: np.ndarray, zero_division: str = "none"
+) -> Result:
+    assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
+    assert isinstance(y_pred, np.ndarray) and isinstance(
+        y_true, np.ndarray
+    ), "y_true and y_pred must be numpy arrays"
+
+    tp, fp, _, _ = _tp_fp_fn_tn(y_true, y_pred)
+    return _precision(tp, fp, zero_division)
+
+
+def recall(
+    y_true: np.ndarray, y_pred: np.ndarray, zero_division: str = "none"
+) -> Result:
+    assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
+    assert isinstance(y_pred, np.ndarray) and isinstance(
+        y_true, np.ndarray
+    ), "y_true and y_pred must be numpy arrays"
+
+    tp, _, fn, _ = _tp_fp_fn_tn(y_true, y_pred)
+    return _recall(tp, fn, zero_division)
+
+
+def f1_score(y_true: np.ndarray, y_pred: np.ndarray, zero_division: str = "none"):
+    assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
+    assert isinstance(y_pred, np.ndarray) and isinstance(
+        y_true, np.ndarray
+    ), "y_true and y_pred must be numpy arrays"
+
+    tp, fp, fn, _ = _tp_fp_fn_tn(y_true, y_pred)
+    p, r = _precision(tp, fp, "0"), _recall(tp, fn, "0")
+
+    if p + r == 0:
+        if zero_division == "none":
+            return None
+        elif zero_division == "zero":
+            return 0.0
+
+    return 2 * p * r / (p + r)