diff --git a/.github/workflows/ci-emscripten.yaml b/.github/workflows/ci-emscripten.yaml new file mode 100644 index 00000000..96545571 --- /dev/null +++ b/.github/workflows/ci-emscripten.yaml @@ -0,0 +1,100 @@ +# Attributed to NumPy https://github.com/numpy/numpy/pull/25894 +# https://github.com/numpy/numpy/blob/d2d2c25fa81b47810f5cbd85ea6485eb3a3ffec3/.github/workflows/emscripten.yml +# + +name: Pyodide CI + +on: + # TODO: refine after this is ready to merge + [push, pull_request, workflow_dispatch] + +env: + FORCE_COLOR: 3 + # Disable instructions: AVX2 and SSE2 because Emscripten-specific SIMD + # support has not been implemented yet + DISABLE_NUMCODECS_AVX2: 1 + DISABLE_NUMCODECS_SSE2: 1 + # Common environment variables for both build and test jobs + PYODIDE_VERSION: 0.27.3 + # PYTHON_VERSION and EMSCRIPTEN_VERSION are determined by PYODIDE_VERSION. + # The appropriate versions can be found in the Pyodide repodata.json + # "info" field, or in Makefile.envs: + # https://github.com/pyodide/pyodide/blob/main/Makefile.envs#L2 + PYTHON_VERSION: 3.12 # any 3.12.x version works + EMSCRIPTEN_VERSION: 3.1.58 + NODE_VERSION: 20 + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + build-wasm-emscripten: + name: Build numcodecs Pyodide distribution + runs-on: ubuntu-22.04 + # To enable this workflow on a fork, comment out: + # FIXME: uncomment after this is ready to merge + # if: github.repository == 'zarr-developers/numcodecs' + steps: + - name: Checkout source + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + # https://github.com/actions/checkout/issues/2041 isn't released yet + # fetch-tags: true # required for version resolution + + - name: Set up Python ${{ env.PYTHON_VERSION }} + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Set up Emscripten toolchain + uses: mymindstorm/setup-emsdk@v14 + with: + version: ${{ env.EMSCRIPTEN_VERSION }} + actions-cache-folder: emsdk-cache + + - name: Apply necessary patch(es) + run: | + patch -p1 < tools/ci/patches/0001-disable-multiprocessing-and-pthreads.patch + patch -p1 < tools/ci/patches/0002-add-missing-unistd-headers.patch -d c-blosc/internal-complibs/zlib-*/ + + - name: Install pyodide-build + run: python -m pip install pyodide-build + + - name: Build numcodecs for Pyodide/WASM + run: pyodide build + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + + - name: Set up Pyodide virtual environment and test numcodecs for Pyodide + run: | + # Pin to a specific version of Pyodide to ensure reliability + pyodide xbuildenv install ${{ env.PYODIDE_VERSION }} + + # Set up Pyodide virtual environment and activate it + pyodide venv .venv-pyodide + source .venv-pyodide/bin/activate + + # For tests in test_zarr3.py + pip install zarr==3.0.0b1 + + # Install the built numcodecs WASM wheel and relevant dependencies + pip install $(ls dist/*.whl)"[msgpack,crc32c,pcodec,test,test_extras]" + # TODO: get zfpy built in Pyodide and install it here + + # Change into a different directory before running tests to avoid + # the test runner picking up the local numcodecs package + cd docs + + # Don't use the cache provider plugin, as it doesn't currently work + # with Pyodide: https://github.com/pypa/cibuildwheel/issues/1966 + python -m pytest -p no:cacheprovider -svra --pyargs numcodecs diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b1a1a7d9..cd2ae52d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -25,7 +25,9 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive - fetch-depth: 0 # required for version resolution + fetch-depth: 0 + # https://github.com/actions/checkout/issues/2041 isn't released yet + # fetch-tags: true # required for version resolution - name: Set up Conda uses: conda-incubator/setup-miniconda@v3.1.1 diff --git a/.gitignore b/.gitignore index 6b7d99f1..80ce22b8 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ coverage.xml *,cover .hypothesis/ cover/ +.pytest_cache/ # Cython annotation files numcodecs/*.html @@ -104,3 +105,6 @@ numcodecs/version.py # Cython generated numcodecs/*.c + +# Pyodide builds +/.pyodide-xbuildenv-* diff --git a/docs/contributing.rst b/docs/contributing.rst index e5cd936e..d323926f 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -144,12 +144,11 @@ docstrings. The simplest way to run the unit tests is to invoke:: $ pytest -v -NumCodecs currently supports Python 6-3.9, so the above command must +NumCodecs currently supports Python 3.8 and later, so the above command must succeed before code can be accepted into the main code base. -All tests are automatically run via Travis (Linux) and AppVeyor (Windows) continuous -integration services for every pull request. Tests must pass under both services before -code can be accepted. +All tests are automatically run via continuous integration services for every pull request +provided by GitHub Actions. Tests must pass under both services before code can be accepted. Code standards ~~~~~~~~~~~~~~ @@ -163,11 +162,11 @@ Conformance can be checked by running:: Test coverage ~~~~~~~~~~~~~ -NumCodecs maintains 100% test coverage under the latest Python stable release (currently -Python 3.9). Both unit tests and docstring doctests are included when computing -coverage. Running ``pytest -v`` will automatically run the test suite with coverage -and produce a coverage report. This should be 100% before code can be accepted into the -main code base. +NumCodecs maintains 100% test coverage under the latest Python stable release. +Both unit tests and docstring doctests are included when computing coverage. Running +``pytest -v`` will automatically run the test suite with coverage and produce a +coverage report. This should be 100% before code can be accepted into the main +code base. When submitting a pull request, coverage will also be collected across all supported Python versions via the Codecov service, and will be reported back within the pull @@ -179,7 +178,7 @@ Documentation Docstrings for user-facing classes and functions should follow the `numpydoc `_ standard, including sections for Parameters and Examples. All examples will be run as doctests -under Python 3.9. +under a stable version of Python. NumCodecs uses Sphinx for documentation, hosted on readthedocs.org. Documentation is written in the RestructuredText markup language (.rst files) in the ``docs`` folder. @@ -207,8 +206,8 @@ Pull requests submitted by an external contributor should be reviewed and approv one core developers before being merged. Ideally, pull requests submitted by a core developer should be reviewed and approved by at least one other core developers before being merged. -Pull requests should not be merged until all CI checks have passed (Travis, AppVeyor, -Codecov) against code that has had the latest main merged in. +Pull requests should not be merged until all CI checks have passed (GitHub Actions, +CodeCov) against code that has had the latest main merged in. Compatibility and versioning policies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/release.rst b/docs/release.rst index 14fc9423..afecaf3f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -20,8 +20,12 @@ Unreleased Enhancements ~~~~~~~~~~~~ +* Add official support for Emscripten (Pyodide) builds. These builds have + existed upstream/in-tree in Pyodide, and are now tested here more extensively. + By :user:`Agriya Khetarpal `. + * Add support for the Linux AArch64 architecture, and bump the minimum -macOS deployment target for x86_64 to 10.13. + macOS deployment target for x86_64 to 10.13. By :user:`Agriya Khetarpal `, :issue:`288`. Improvements diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 3caa3607..04806b25 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -88,6 +88,8 @@ def get_mutex(): mutex = None except ImportError: mutex = None + except ModuleNotFoundError: + mutex = None _MUTEX = mutex _MUTEX_IS_INIT = True return _MUTEX diff --git a/numcodecs/tests/common.py b/numcodecs/tests/common.py index bb7c4780..857f61c9 100644 --- a/numcodecs/tests/common.py +++ b/numcodecs/tests/common.py @@ -1,6 +1,8 @@ import array import json as _json import os +import platform +import sys from glob import glob import numpy as np @@ -26,6 +28,8 @@ 'เฮลโลเวิลด์', ] +is_wasm = (sys.platform == 'emscripten') or (platform.machine() in ['wasm32', 'wasm64']) + def compare_arrays(arr, res, precision=None): # ensure numpy array with matching dtype diff --git a/numcodecs/tests/test_blosc.py b/numcodecs/tests/test_blosc.py index 0bc14010..d546d0cb 100644 --- a/numcodecs/tests/test_blosc.py +++ b/numcodecs/tests/test_blosc.py @@ -19,6 +19,7 @@ check_err_decode_object_buffer, check_err_encode_object_buffer, check_max_buffer_size, + is_wasm, ) codecs = [ @@ -223,6 +224,7 @@ def _decode_worker(enc): return compressor.decode(enc) +@pytest.mark.skipif(is_wasm, reason="WASM/Pyodide does not support multiprocessing") @pytest.mark.parametrize('pool', [Pool, ThreadPool]) def test_multiprocessing(use_threads, pool): data = np.arange(1000000) diff --git a/numcodecs/tests/test_entrypoints_backport.py b/numcodecs/tests/test_entrypoints_backport.py index 7e1c32bc..ded5d33a 100644 --- a/numcodecs/tests/test_entrypoints_backport.py +++ b/numcodecs/tests/test_entrypoints_backport.py @@ -6,6 +6,7 @@ import pytest import numcodecs.registry +from numcodecs.tests.common import is_wasm importlib_spec = importlib.util.find_spec("importlib_metadata") if importlib_spec is None or importlib_spec.loader is None: # pragma: no cover @@ -29,6 +30,7 @@ def get_entrypoints_with_importlib_metadata_loaded(): assert cls.codec_id == "test" +@pytest.mark.skipif(is_wasm, reason="Spawning processes is not supported in Pyodide/WASM") def test_entrypoint_codec_with_importlib_metadata(): p = Process(target=get_entrypoints_with_importlib_metadata_loaded) p.start() diff --git a/numcodecs/tests/test_shuffle.py b/numcodecs/tests/test_shuffle.py index 6e6d744a..dc2387b2 100644 --- a/numcodecs/tests/test_shuffle.py +++ b/numcodecs/tests/test_shuffle.py @@ -14,6 +14,7 @@ check_backwards_compatibility, check_config, check_encode_decode, + is_wasm, ) codecs = [ @@ -87,6 +88,7 @@ def _decode_worker(enc): return compressor.decode(enc) +@pytest.mark.skipif(is_wasm, reason="WASM/Pyodide does not support multiprocessing") @pytest.mark.parametrize('pool', [Pool, ThreadPool]) def test_multiprocessing(pool): data = np.arange(1000000) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 0d8ecc74..508f8563 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from numcodecs.tests.common import is_wasm + if TYPE_CHECKING: # pragma: no cover import zarr else: @@ -53,6 +55,7 @@ def test_docstring(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): assert "See :class:`numcodecs." in codec_class.__doc__ # type: ignore[operator] +@pytest.mark.skipif(is_wasm, reason="Threads are not supported in Pyodide/WASM") @pytest.mark.parametrize( "codec_class", [ @@ -85,6 +88,8 @@ def test_generic_compressor( np.testing.assert_array_equal(data, a[:, :]) +# TODO: undo skips here when we can test async code in WASM +@pytest.mark.skipif(is_wasm, reason="testing async code not yet supported in Pyodide/WASM") @pytest.mark.parametrize( ("codec_class", "codec_config"), [ @@ -124,6 +129,8 @@ def test_generic_filter( np.testing.assert_array_equal(data, a[:, :]) +# TODO: undo skips here when we can test async code in WASM +@pytest.mark.skipif(is_wasm, reason="testing async code not yet supported in Pyodide/WASM") def test_generic_filter_bitround(store: StorePath): data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16)) @@ -142,6 +149,8 @@ def test_generic_filter_bitround(store: StorePath): assert np.allclose(data, a[:, :], atol=0.1) +# TODO: undo skips here when we can test async code in WASM +@pytest.mark.skipif(is_wasm, reason="testing async code not yet supported in Pyodide/WASM") def test_generic_filter_quantize(store: StorePath): data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) @@ -160,6 +169,8 @@ def test_generic_filter_quantize(store: StorePath): assert np.allclose(data, a[:, :], atol=0.001) +# TODO: undo skips here when we can test async code in WASM +@pytest.mark.skipif(is_wasm, reason="testing async code not yet supported in Pyodide/WASM") def test_generic_filter_packbits(store: StorePath): data = np.zeros((16, 16), dtype="bool") data[0:4, :] = True @@ -189,6 +200,8 @@ def test_generic_filter_packbits(store: StorePath): ) +# TODO: undo skips here when we can test async code in WASM +@pytest.mark.skipif(is_wasm, reason="testing async code not yet supported in Pyodide/WASM") @pytest.mark.parametrize( "codec_class", [ diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index f1743ffb..e4c10ae4 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -25,6 +25,20 @@ from __future__ import annotations +# Short workaround for skipping the doctest above in a WASM environment +# compiled via Emscripten where threads are not available, and accessing +# the pytest config has no effect and ignoring warnings does not work. +try: + import pytest + + from numcodecs.tests.common import is_wasm + + if is_wasm: # pragma: no cover + pytest.skip("zarr3 doctests not supported in WASM", allow_module_level=True) +# not running tests +except (ImportError, ModuleNotFoundError): # pragma: no cover + pass + import asyncio import math from dataclasses import dataclass, replace diff --git a/setup.py b/setup.py index c27fad35..4efaf6f0 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,8 @@ def blosc_extension(): define_macros = [] # ensure pthread is properly linked on POSIX systems - if os.name == 'posix': + # and not when compiling to WASM. + if os.name == 'posix' and "PYODIDE" not in os.environ: extra_compile_args.append('-pthread') extra_link_args.append('-pthread') diff --git a/tools/ci/patches/0001-disable-multiprocessing-and-pthreads.patch b/tools/ci/patches/0001-disable-multiprocessing-and-pthreads.patch new file mode 100644 index 00000000..dcab2de0 --- /dev/null +++ b/tools/ci/patches/0001-disable-multiprocessing-and-pthreads.patch @@ -0,0 +1,34 @@ +This patch disables multiprocessing and pthread for blosc. This file +is adapted from and attributed to the Pyodide developers and can be +viewed at the upstream Pyodide repository at the following link: + +https://github.com/pyodide/pyodide/blob/d32e376013d8977b66c6aa828042b1fee8047aea/packages/numcodecs/patches/fixblosc.patch + + +diff --git a/c-blosc/blosc/blosc.h b/c-blosc/blosc/blosc.h +index 40857d0..8a1e969 100644 +--- a/c-blosc/blosc/blosc.h ++++ b/c-blosc/blosc/blosc.h +@@ -50,7 +50,7 @@ extern "C" { + ((INT_MAX - BLOSC_MAX_TYPESIZE * sizeof(int32_t)) / 3) + + /* The maximum number of threads (for some static arrays) */ +-#define BLOSC_MAX_THREADS 256 ++#define BLOSC_MAX_THREADS 1 + + /* Codes for shuffling (see blosc_compress) */ + #define BLOSC_NOSHUFFLE 0 /* no shuffle */ + + diff --git a/c-blosc/blosc/blosc.c b/c-blosc/blosc/blosc.c +index a5a5bd5..2a7797c 100644 +--- a/c-blosc/blosc/blosc.c ++++ b/c-blosc/blosc/blosc.c +@@ -2236,6 +2236,7 @@ void blosc_atfork_child(void) { + + void blosc_init(void) + { ++ g_initlib = 1; + /* Return if we are already initialized */ + if (g_initlib) return; + + diff --git a/tools/ci/patches/0002-add-missing-unistd-headers.patch b/tools/ci/patches/0002-add-missing-unistd-headers.patch new file mode 100644 index 00000000..7bbc88c1 --- /dev/null +++ b/tools/ci/patches/0002-add-missing-unistd-headers.patch @@ -0,0 +1,51 @@ +This patch adds missing headers in vendored zlib as done in numcodecs.js. This file +is attributed to the Pyodide developers and can be viewed at the upstream +Pyodide repository at the following link; + +https://github.com/pyodide/pyodide/blob/d32e376013d8977b66c6aa828042b1fee8047aea/packages/numcodecs/patches/fixzlib.patch + +This patch is applied in the c-blosc/internal-complibs/zlib-/ directory +in the .github/workflows/ci-emscripten.yaml workflow file. + +diff --git a/gzlib.c b/gzlib.c +index fae202e..80606a6 100644 +--- a/gzlib.c ++++ b/gzlib.c +@@ -2,7 +2,7 @@ + * Copyright (C) 2004, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ +- ++#include + #include "gzguts.h" + + #if defined(_WIN32) && !defined(__BORLANDC__) + +diff --git a/gzread.c b/gzread.c +index bf4538e..afe6acd 100644 +--- a/gzread.c ++++ b/gzread.c +@@ -2,7 +2,7 @@ + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ +- ++#include + #include "gzguts.h" + + /* Local functions */ + + +diff --git a/gzwrite.c b/gzwrite.c +index aa767fb..a87676f 100644 +--- a/gzwrite.c ++++ b/gzwrite.c +@@ -2,7 +2,7 @@ + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ +- ++#include + #include "gzguts.h" + + /* Local functions */