Skip to content

Commit

Permalink
enh: create a default basin-based output file for background computation
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 13, 2023
1 parent ac89b22 commit feb4fee
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
instead of an input file path
- fix: BackgroundSparseMed did not work for datasets of length < 100
- fix: bad f-string in BackgroundSparseMed
- enh: create a default basin-based output file for background computation
- ref: remove functools.cache decorator from HDF5Data
- tests: add tests for BackgroundSparseMed
0.12.3
Expand Down
8 changes: 7 additions & 1 deletion dcnum/feat/feat_background/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import h5py

from ...meta import ppid
from ...write import create_with_basins


class Background(abc.ABC):
Expand Down Expand Up @@ -70,10 +71,15 @@ def __init__(self, input_data, output_path, num_cpus=None, **kwargs):
self.input_data = input_data

if self.h5out is None:
# "a", because output file is already an .rtdc file
if not output_path.exists():
# If the output path does not exist, then we create
# an output file with basins (for user convenience).
create_with_basins(path_out=output_path,
basin_paths=self.paths_ref)
# TODO:
# - properly setup HDF5 caching
# - create image_bg here instead of in subclasses
# "a", because output file is already an .rtdc file
self.h5out = h5py.File(output_path, "a", libver="latest")

@staticmethod
Expand Down
60 changes: 60 additions & 0 deletions tests/test_feat_background_bg_sparsemed.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,72 @@
import json

import h5py
import numpy as np
import pytest

from dcnum.feat.feat_background import bg_sparse_median
from dcnum.read import HDF5Data

from helper_methods import retrieve_data


def test_basic_background_output_basin_none(
tmp_path):
"""In dcnum 0.13.0, we introduced `create_with_basins`"""
event_count = 720
output_path = tmp_path / "test.h5"
# image shape: 5 * 7
input_data = np.arange(5*7).reshape(1, 5, 7) * np.ones((event_count, 1, 1))
assert np.all(input_data[0] == input_data[1])
assert np.all(input_data[0].flatten() == np.arange(5*7))

with bg_sparse_median.BackgroundSparseMed(input_data=input_data,
output_path=output_path,
kernel_size=10,
split_time=0.011,
thresh_cleansing=0,
frac_cleansing=.8,
) as bic:
bic.process()
# Make sure the basins exist in the input file
with h5py.File(output_path) as h5:
assert "basins" not in h5, "because the input is not a file"


def test_basic_background_output_basin_simple(
tmp_path):
"""In dcnum 0.13.0, we introduced `create_with_basins`"""
event_count = 720
output_path = tmp_path / "test.h5"
input_path = tmp_path / "input.h5"
# image shape: 5 * 7
with h5py.File(input_path, "a") as h5:
h5["events/image"] = \
np.arange(5*7).reshape(1, 5, 7) * np.ones((event_count, 1, 1))

with bg_sparse_median.BackgroundSparseMed(input_data=input_path,
output_path=output_path,
kernel_size=10,
split_time=0.011,
thresh_cleansing=0,
frac_cleansing=.8,
) as bic:
bic.process()

# Make sure the basins exist in the input file
with h5py.File(output_path) as h5:
assert "basins" in h5
key = list(h5["basins"].keys())[0]
bn_lines = [k.decode("utf-8") for k in h5["basins"][key]]
bdat = json.loads(" ".join(bn_lines))
assert bdat["paths"][0] == str(input_path)

# Add a cherry on top (make sure everything is parseable with HDF5Data)
with HDF5Data(output_path) as hd:
assert "image" in hd
assert "image_bg" in hd


@pytest.mark.parametrize("event_count,kernel_size,split_time",
[(720, 10, 0.01),
(730, 10, 0.01),
Expand Down

0 comments on commit feb4fee

Please sign in to comment.