Skip to content

Commit

Permalink
[df] Add DistRDF test for DefaultValueFor, FilterAvailable, FilterMis…
Browse files Browse the repository at this point in the history
…sing
  • Loading branch information
gpetruc authored and vepadulano committed Jan 23, 2025
1 parent 8a8c1fe commit 9368048
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 0 deletions.
23 changes: 23 additions & 0 deletions python/distrdf/backends/check_definepersample.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,29 @@ def declare_definepersample_code():
for count in samplescounts:
assert count.GetValue() == 10, f"{count.GetValue()=}"

def test_defaults_and_missing(self, payload):
"""
Test DefaultValueFor, FilterAvailable, FilterMissing operations
string of operations.
"""
filenames = [
f"../data/ttree/distrdf_roottest_check_rungraphs.root", # 10k entries, defining b1, b2, b3 (Int_t), all always equal to 42
f"../data/ttree/distrdf_roottest_check_reducer_merge_1.root", # 100 entries defining 'v' (Double_t)
]
connection, _ = payload
df = ROOT.RDataFrame("tree", filenames, executor=connection)
c10k = df.FilterAvailable("b1").Count()
c100 = df.FilterAvailable("v").Count()
c100b = df.FilterMissing("b1").Count()
cD10k = df.DefaultValueFor("b1",40).Filter("b1 == 42").Count()
cD10100 = df.DefaultValueFor("b1",42).Filter("b1 == 42").Count()
sV = df.DefaultValueFor("v",0.1).Sum("v")
assert c10k.GetValue() == 10000, f"{c10k.GetValue()=}"
assert c100.GetValue() == 100, f"{c100.GetValue()=}"
assert c100b.GetValue() == 100, f"{c100b.GetValue()=}"
assert cD10k.GetValue() == 10000, f"{cD10k.GetValue()=}"
assert cD10100.GetValue() == 10100, f"{cD10100.GetValue()=}"
assert sV.GetValue() == 5950.0, f"{sV.GetValue()=}"

if __name__ == "__main__":
pytest.main(args=[__file__])
37 changes: 37 additions & 0 deletions python/distrdf/backends/check_missing_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import pytest

import ROOT


class TestMissingValues:
"""Tests of dealing with missing values in the input dataset."""

def test_defaults_and_missing(self, payload):
"""
Test DefaultValueFor, FilterAvailable, FilterMissing operations
string of operations.
"""
filenames = [
# 10k entries, defining b1, b2, b3 (Int_t), all always equal to 42
f"../data/ttree/distrdf_roottest_check_rungraphs.root",
# 100 entries defining 'v' (Double_t)
f"../data/ttree/distrdf_roottest_check_reducer_merge_1.root",
]
connection, _ = payload
df = ROOT.RDataFrame("tree", filenames, executor=connection)
c10k = df.FilterAvailable("b1").Count()
c100 = df.FilterAvailable("v").Count()
c100b = df.FilterMissing("b1").Count()
cD10k = df.DefaultValueFor("b1", 40).Filter("b1 == 42").Count()
cD10100 = df.DefaultValueFor("b1", 42).Filter("b1 == 42").Count()
sV = df.DefaultValueFor("v", 0.1).Sum("v")
assert c10k.GetValue() == 10000, f"{c10k.GetValue()=}"
assert c100.GetValue() == 100, f"{c100.GetValue()=}"
assert c100b.GetValue() == 100, f"{c100b.GetValue()=}"
assert cD10k.GetValue() == 10000, f"{cD10k.GetValue()=}"
assert cD10100.GetValue() == 10100, f"{cD10100.GetValue()=}"
assert sV.GetValue() == 5950.0, f"{sV.GetValue()=}"


if __name__ == "__main__":
pytest.main(args=[__file__])
1 change: 1 addition & 0 deletions python/distrdf/backends/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from check_distribute_headers_sharedlibs_files import *
from check_inv_mass import *
from check_live_visualize import *
from check_missing_values import *
from check_reducer_merge import *
from check_rungraphs import *
from check_variations import *
Expand Down

0 comments on commit 9368048

Please sign in to comment.