Skip to content

Commit

Permalink
Initial Development (#1)
Browse files Browse the repository at this point in the history
* initial commit

* 0.0.1 binary data release
  • Loading branch information
zachcoleman authored May 21, 2022
1 parent f055575 commit 83c0ae8
Show file tree
Hide file tree
Showing 12 changed files with 574 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
max-line-length = 88
per-file-ignores=
**/__init__.py:F401
66 changes: 66 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: CI

on:
push:
pull_request:

jobs:
linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: messense/maturin-action@v1
with:
manylinux: auto
command: build
args: --release -o dist
- name: Upload wheels
uses: actions/upload-artifact@v2
with:
name: wheels
path: dist

windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: messense/maturin-action@v1
with:
command: build
args: --release --no-sdist -o dist
- name: Upload wheels
uses: actions/upload-artifact@v2
with:
name: wheels
path: dist

macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- uses: messense/maturin-action@v1
with:
command: build
args: --release --no-sdist -o dist --universal2
- name: Upload wheels
uses: actions/upload-artifact@v2
with:
name: wheels
path: dist

release:
name: Release
runs-on: ubuntu-latest
if: "startsWith(github.ref, 'refs/tags/')"
needs: [ macos, windows, linux ]
steps:
- uses: actions/download-artifact@v2
with:
name: wheels
- name: Publish to PyPI
uses: messense/maturin-action@v1
env:
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
with:
command: upload
args: --skip-existing *
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,19 @@ dmypy.json

# Pyre type checker
.pyre/

.venv*

# will have compiled files and executables
debug/
target/

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
20 changes: 20 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
repos:
- repo: https://github.com/ambv/black
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
- id: isort
name: isort (cython)
types: [cython]
- id: isort
name: isort (pyi)
types: [pyi]
14 changes: 14 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "fast-stats"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "fast_stats"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.16.3", features = ["extension-module"] }
numpy = "0.16.2"
ndarray = "0.15.4"
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# fast-stats
A fast simple library for calculating basic statistics
# fast-stats
189 changes: 189 additions & 0 deletions benchmarks/timeit.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import fast_stats\n",
"from sklearn.metrics import precision_score, recall_score, f1_score\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"pred = np.random.randint(0, 2, (10, 512, 512)).flatten()\n",
"actual = np.random.randint(0, 2, (10, 512, 512)).flatten()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"600 ms ± 3.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"_ = precision_score(actual, pred)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"596 ms ± 1.44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"_ = recall_score(actual, pred)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"603 ms ± 3.91 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"_ = f1_score(actual, pred)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# don't actually need to flatten them for fast-stats\n",
"pred = np.random.randint(0, 2, (10, 512, 512))\n",
"actual = np.random.randint(0, 2, (10, 512, 512))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24.1 ms ± 204 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"_ = fast_stats.precision(actual, pred)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24.3 ms ± 254 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"_ = fast_stats.recall(actual, pred)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24.2 ms ± 388 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"_ = fast_stats.f1_score(actual, pred)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"assert np.allclose(\n",
" fast_stats.precision(actual.flatten(), pred.flatten()),\n",
" precision_score(actual.flatten(), pred.flatten())\n",
")\n",
"assert np.allclose(\n",
" fast_stats.recall(actual.flatten(), pred.flatten()),\n",
" recall_score(actual.flatten(), pred.flatten())\n",
")\n",
"assert np.allclose(\n",
" fast_stats.f1_score(actual.flatten(), pred.flatten()),\n",
" f1_score(actual.flatten(), pred.flatten())\n",
")"
]
}
],
"metadata": {
"interpreter": {
"hash": "a3a671d63c09fb4878d313d605bf6366336b9695c04e11736a5d015abf9b1e42"
},
"kernelspec": {
"display_name": "Python 3.9.11 ('.venv39': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.11"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
1 change: 1 addition & 0 deletions fast_stats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .stats import f1_score, precision, recall
67 changes: 67 additions & 0 deletions fast_stats/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from typing import Union

import numpy as np

from .fast_stats import _tp_fp_fn_tn

Result = Union[None, float]


def _precision(tp: int, fp: int, zero_division: str = "none") -> Result:
if tp + fp == 0:
if zero_division == "none":
return None
elif zero_division == "zero":
return 0.0
return tp / (tp + fp)


def _recall(tp: int, fn: int, zero_division: str = "none") -> Result:
if tp + fn == 0:
if zero_division == "none":
return None
elif zero_division == "zero":
return 0.0
return tp / (tp + fn)


def precision(
y_true: np.ndarray, y_pred: np.ndarray, zero_division: str = "none"
) -> Result:
assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
assert isinstance(y_pred, np.ndarray) and isinstance(
y_true, np.ndarray
), "y_true and y_pred must be numpy arrays"

tp, fp, _, _ = _tp_fp_fn_tn(y_true, y_pred)
return _precision(tp, fp, zero_division)


def recall(
y_true: np.ndarray, y_pred: np.ndarray, zero_division: str = "none"
) -> Result:
assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
assert isinstance(y_pred, np.ndarray) and isinstance(
y_true, np.ndarray
), "y_true and y_pred must be numpy arrays"

tp, _, fn, _ = _tp_fp_fn_tn(y_true, y_pred)
return _recall(tp, fn, zero_division)


def f1_score(y_true: np.ndarray, y_pred: np.ndarray, zero_division: str = "none"):
assert y_true.shape == y_pred.shape, "y_true and y_pred must be same shape"
assert isinstance(y_pred, np.ndarray) and isinstance(
y_true, np.ndarray
), "y_true and y_pred must be numpy arrays"

tp, fp, fn, _ = _tp_fp_fn_tn(y_true, y_pred)
p, r = _precision(tp, fp, "0"), _recall(tp, fn, "0")

if p + r == 0:
if zero_division == "none":
return None
elif zero_division == "zero":
return 0.0

return 2 * p * r / (p + r)
Loading

0 comments on commit 83c0ae8

Please sign in to comment.