diff --git a/monai/data/utils.py b/monai/data/utils.py index d03dbd3234..9915e0f363 100644 --- a/monai/data/utils.py +++ b/monai/data/utils.py @@ -1484,7 +1484,7 @@ def convert_tables_to_dicts( rows.append(i) # convert to a list of dictionaries corresponding to every row - data_ = df.loc[rows] if col_names is None else df.loc[rows, col_names] + data_ = df.iloc[rows] if col_names is None else df.iloc[rows][col_names] if isinstance(col_types, dict): # fill default values for NaN defaults = {k: v["default"] for k, v in col_types.items() if v is not None and v.get("default") is not None} @@ -1500,7 +1500,7 @@ def convert_tables_to_dicts( if col_groups is not None: groups: dict[str, list] = {} for name, cols in col_groups.items(): - groups[name] = df.loc[rows, cols].values + groups[name] = df.iloc[rows][cols].values # invert items of groups to every row of data data = [dict(d, **{k: v[i] for k, v in groups.items()}) for i, d in enumerate(data)] diff --git a/tests/data/test_csv_dataset.py b/tests/data/test_csv_dataset.py index 71be4fdd22..1fec6b2bb5 100644 --- a/tests/data/test_csv_dataset.py +++ b/tests/data/test_csv_dataset.py @@ -179,6 +179,13 @@ def prepare_csv_file(data, filepath): }, ) + # test pre-loaded DataFrame subset + df = pd.read_csv(filepath1) + df_subset = df.iloc[[1, 3, 4]] + dataset = CSVDataset(src=df_subset, col_groups={"ehr": [f"ehr_{i}" for i in range(3)]}) + self.assertEqual(len(dataset), 3) + np.testing.assert_allclose([round(i, 4) for i in dataset[1]["ehr"]], [3.3333, 3.2353, 3.4000]) + # test pre-loaded multiple DataFrames, join tables with kwargs dfs = [pd.read_csv(i) for i in filepaths] dataset = CSVDataset(src=dfs, on="subject_id")