Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support DataFrames in features #829

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 48 additions & 14 deletions backend/hdf5/BaseTagHDF5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,21 +132,33 @@ ndsize_t BaseTagHDF5::featureCount() const {
std::shared_ptr<IFeature> BaseTagHDF5::getFeature(const std::string &name_or_id) const {
std::shared_ptr<FeatureHDF5> feature;
boost::optional<H5Group> g = feature_group(false);

if (g) {
boost::optional<H5Group> group = g->findGroupByNameOrAttribute("name", name_or_id);
if (group)
feature = std::make_shared<FeatureHDF5>(file(), block(), group.get());
else {
for (ndsize_t i = 0; i < g->objectCount(); i++) {
H5Group gr = g->openGroup(g->objectName(i), false);
std::shared_ptr<FeatureHDF5> feat = std::make_shared<FeatureHDF5>(file(), block(), gr);
std::shared_ptr<base::IDataArray> da = feat->data();
if (da->name() == name_or_id || da->id() == name_or_id) {
feature = std::make_shared<FeatureHDF5>(file(), block(), gr);
break;
bool found = false;
ndsize_t index;
for (index = 0; index < featureCount(); ++index) {
Feature feat = getFeature(index);
if (feat.targetType() == TargetType::DataArray) {
DataArray da = feat.dataArray();
found = da && (da.name() == name_or_id || da.id() == name_or_id);
if (found) {
break;
}
} else {
DataFrame df = feat.dataFrame();
found = df && (df.name() == name_or_id || df.id() == name_or_id);
if (found) {
break;
}
}
}
if (found) {
H5Group gr = g->openGroup(g->objectName(index), false);
feature = std::make_shared<FeatureHDF5>(file(), block(), gr);
}
}
}
return feature;
Expand All @@ -160,16 +172,38 @@ std::shared_ptr<IFeature> BaseTagHDF5::getFeature(ndsize_t index) const {
}


std::shared_ptr<IFeature> BaseTagHDF5::createFeature(const std::string &name_or_id, LinkType link_type) {
if(!block()->hasEntity({name_or_id, ObjectType::DataArray})) {
throw std::runtime_error("DataArray not found in Block!");
std::shared_ptr<IFeature> BaseTagHDF5::createFeature(const std::string &name_or_id, LinkType link_type, TargetType target_type) {
bool entity_valid = true;
std::shared_ptr<IFeature> ptr;
switch(target_type) {
case TargetType::DataArray:
entity_valid = block()->hasEntity({name_or_id, ObjectType::DataArray});
break;
case TargetType::DataFrame:
entity_valid = block()->hasEntity({name_or_id, ObjectType::DataFrame});
break;
}
if (!entity_valid) {
throw std::runtime_error("DataArray/DataFrame not found in Block!");
}
std::string rep_id = util::createId();
boost::optional<H5Group> g = feature_group(true);

H5Group group = g->openGroup(rep_id, true);
DataArray data = std::dynamic_pointer_cast<IDataArray>(block()->getEntity({name_or_id, ObjectType::DataArray}));
return std::make_shared<FeatureHDF5>(file(), block(), group, rep_id, data, link_type);

switch(target_type) {
case TargetType::DataArray: {
DataArray data = std::dynamic_pointer_cast<IDataArray>(block()->getEntity({name_or_id, ObjectType::DataArray}));
ptr = std::make_shared<FeatureHDF5>(file(), block(), group, rep_id, data, link_type);
break;
}
case TargetType::DataFrame: {
entity_valid = block()->hasEntity({name_or_id, ObjectType::DataFrame});
DataFrame data = std::dynamic_pointer_cast<IDataFrame>(block()->getEntity({name_or_id, ObjectType::DataFrame}));
ptr = std::make_shared<FeatureHDF5>(file(), block(), group, rep_id, data, link_type);
break;
}
}
return ptr;
}


Expand Down
2 changes: 1 addition & 1 deletion backend/hdf5/BaseTagHDF5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class BaseTagHDF5 : public EntityWithSourcesHDF5, virtual public base::IBaseTag
virtual std::shared_ptr<base::IFeature> getFeature(ndsize_t index) const;


virtual std::shared_ptr<base::IFeature> createFeature(const std::string &name_or_id, LinkType link_type);
virtual std::shared_ptr<base::IFeature> createFeature(const std::string &name_or_id, LinkType link_type, TargetType target_type);


virtual bool deleteFeature(const std::string &name_or_id);
Expand Down
123 changes: 117 additions & 6 deletions backend/hdf5/FeatureHDF5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <nix/util/util.hpp>
#include <nix/DataArray.hpp>
#include "DataArrayHDF5.hpp"
#include "DataFrameHDF5.hpp"


using namespace std;
Expand Down Expand Up @@ -38,6 +39,22 @@ LinkType linkTypeFromString(const string &str) {
}


string targetTypeToString(TargetType target_type) {
static vector<string> type_names = {"DataArray", "DataFrame"};
return type_names[static_cast<int>(target_type)];
}


TargetType targetTypeFromString(const string &str) {
if (str == "DataArray")
return TargetType::DataArray;
else if (str == "DataFrame")
return TargetType::DataFrame;
else
throw runtime_error("Unable to create a TargetType from the string: " + str);
}


FeatureHDF5::FeatureHDF5(const shared_ptr<IFile> &file, const shared_ptr<IBlock> &block, const H5Group &group)
: EntityHDF5(file, group), block(block)
{
Expand All @@ -57,7 +74,25 @@ FeatureHDF5::FeatureHDF5(const shared_ptr<IFile> &file, const shared_ptr<IBlock>
linkType(link_type);
// TODO: the line below currently throws an exception if the DataArray
// is not in block - to consider if we prefer copying it to the block
this->data(data.id());
this->data(data);
}


FeatureHDF5::FeatureHDF5(const shared_ptr<IFile> &file, const shared_ptr<IBlock> &block, const H5Group &group,
const string &id, DataFrame data, LinkType link_type)
: FeatureHDF5(file, block, group, id, data, link_type, util::getTime())
{
}


FeatureHDF5::FeatureHDF5(const shared_ptr<IFile> &file, const shared_ptr<IBlock> &block, const H5Group &group,
const string &id, DataFrame data, LinkType link_type, time_t time)
: EntityHDF5(file, group, id, time), block(block)
{
linkType(link_type);
// TODO: the line below currently throws an exception if the DataArray
// is not in block - to consider if we prefer copying it to the block
this->data(data);
}


Expand All @@ -68,8 +103,14 @@ void FeatureHDF5::linkType(LinkType link_type) {
}


void FeatureHDF5::data(const std::string &name_or_id) {
std::shared_ptr<IDataArray> ida = block->getEntity<IDataArray>(name_or_id);
void FeatureHDF5::targetType(TargetType ttype) {
group().setAttr("target_type", targetTypeToString(ttype));
forceUpdatedAt();
}


void FeatureHDF5::data(const DataArray &data) {
std::shared_ptr<IDataArray> ida = block->getEntity<IDataArray>(data.name());
if (!ida) {
throw std::runtime_error("FeatureHDF5::data: DataArray not found in block!");
}
Expand All @@ -80,13 +121,57 @@ void FeatureHDF5::data(const std::string &name_or_id) {
auto target = dynamic_pointer_cast<DataArrayHDF5>(ida);

group().createLink(target->group(), "data");
targetType(TargetType::DataArray);

forceUpdatedAt();
}


shared_ptr<IDataArray> FeatureHDF5::data() const {
shared_ptr<IDataArray> da;
void FeatureHDF5::data(const DataFrame &data) {
std::shared_ptr<IDataFrame> idf = block->getEntity<IDataFrame>(data.name());
if (!idf) {
throw std::runtime_error("FeatureHDF5::data: DataFrame not found in block!");
}
if (group().hasGroup("data")) {
group().removeGroup("data");
}

auto target = dynamic_pointer_cast<DataFrameHDF5>(idf);

group().createLink(target->group(), "data");
targetType(TargetType::DataFrame);
forceUpdatedAt();
}


void FeatureHDF5::data(const std::string &name_or_id) {
TargetType tt = TargetType::DataArray;
if (group().hasGroup("data")) {
group().removeGroup("data");
}
std::shared_ptr<IDataArray> ida = block->getEntity<IDataArray>(name_or_id);
if (!ida) {
std::shared_ptr<IDataFrame> idf = block->getEntity<IDataFrame>(name_or_id);
if (!idf) {
throw std::runtime_error("FeatureHDF5::data: entity is not found in block, neither DataArray nor DataFrame!");
}
tt = TargetType::DataFrame;
auto target = dynamic_pointer_cast<DataFrameHDF5>(idf);
group().createLink(target->group(), "data");
} else {
auto target = dynamic_pointer_cast<DataArrayHDF5>(ida);
group().createLink(target->group(), "data");
}
targetType(tt);
forceUpdatedAt();
}


shared_ptr<IDataArray> FeatureHDF5::dataArray() const {
if (targetType() != TargetType::DataArray) {
throw std::runtime_error("Cannot convert Feature data to DataArray! Feature target is of type DataFrame!");
}
shared_ptr<IDataArray> da;
if (group().hasGroup("data")) {
H5Group other_group = group().openGroup("data", false);
da = make_shared<DataArrayHDF5>(file(), block, other_group);
Expand All @@ -98,16 +183,42 @@ shared_ptr<IDataArray> FeatureHDF5::data() const {
}


shared_ptr<IDataFrame> FeatureHDF5::dataFrame() const {
if (targetType() != TargetType::DataFrame) {
throw std::runtime_error("Cannot convert Feature data to DataFrame! Feature target is of type DataArray!");
}
shared_ptr<IDataFrame> df;
if (group().hasGroup("data")) {
H5Group other_group = group().openGroup("data", false);
df = make_shared<DataFrameHDF5>(file(), block, other_group);

if (!block->hasEntity(df)) {
throw std::runtime_error("FeatureHDF5::data: DataFrame not found!");
}
}
return df;
}


LinkType FeatureHDF5::linkType() const {
if (group().hasAttr("link_type")) {
string link_type;
group().getAttr("link_type", link_type);
return linkTypeFromString(link_type);
} else {
throw MissingAttr("data");
throw MissingAttr("link_type");
}
}

TargetType FeatureHDF5::targetType() const {
if (group().hasAttr("target_type")) {
string target_type;
group().getAttr("target_type", target_type);
return targetTypeFromString(target_type);
} else {
return TargetType::DataArray;
}
}

FeatureHDF5::~FeatureHDF5() {}

Expand Down
52 changes: 48 additions & 4 deletions backend/hdf5/FeatureHDF5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ std::string linkTypeToString(LinkType link_type);
*/
LinkType linkTypeFromString(const std::string &str);

/**
* Converts a TargetType into a string.
*
* @param target_type The type to convert.
*/
std::string targetTypeToString(TargetType target_type);

/**
* Create a target type from a string. If no matching type
* exists, an exception will be thrown.
*
* @return The target type that matches the string.
*/
TargetType targetTypeFromString(const std::string &str);


/**
* Class that represents a NIX feature entity
Expand All @@ -44,6 +59,7 @@ class FeatureHDF5 : virtual public base::IFeature, public EntityHDF5 {
private:

std::shared_ptr<base::IBlock> block;
void targetType(TargetType type);

public:

Expand All @@ -53,29 +69,57 @@ class FeatureHDF5 : virtual public base::IFeature, public EntityHDF5 {
FeatureHDF5(const std::shared_ptr<base::IFile> &file, const std::shared_ptr<base::IBlock> &block, const H5Group &group);

/**
* Standard constructor for new Feature
* Standard constructor for new Feature linking to a DataArray
*/
FeatureHDF5(const std::shared_ptr<base::IFile> &file, const std::shared_ptr<base::IBlock> &block, const H5Group &group,
const std::string &id, DataArray data, LinkType link_type);

/**
* Standard constructor for new Feature with time
* Standard constructor for new Feature linking to a DataFrame
*/
FeatureHDF5(const std::shared_ptr<base::IFile> &file, const std::shared_ptr<base::IBlock> &block, const H5Group &group,
const std::string &id, DataFrame data, LinkType link_type);

/**
* Standard constructor for new Feature linking a DataArray with time
*/
FeatureHDF5(const std::shared_ptr<base::IFile> &file, const std::shared_ptr<base::IBlock> &block, const H5Group &group,
const std::string &id, DataArray data, LinkType link_type, time_t time);

/**
* Standard constructor for new Feature linking a DataFrame with time
*/
FeatureHDF5(const std::shared_ptr<base::IFile> &file, const std::shared_ptr<base::IBlock> &block, const H5Group &group,
const std::string &id, DataFrame data, LinkType link_type, time_t time);


void linkType(LinkType type);


LinkType linkType() const;


TargetType targetType() const;

/**
* links to the given data (DataArray or DataFrame) method tries to find the given name_or_id
* first among the DataArrays and then the DataFrames. When passing a name there might be
* an ambiguity that cannot be resolved. Rather, use the id or use the data(DataArray) or
* data(DataFrame) overloads instead.
*/
DEPRECATED void data(const std::string &name_or_id);


void data(const DataArray &data);


void data(const DataFrame &data);

void data(const std::string &name_or_id);

std::shared_ptr<base::IDataArray> dataArray() const;

std::shared_ptr<base::IDataArray> data() const;

std::shared_ptr<base::IDataFrame> dataFrame() const;

virtual ~FeatureHDF5();

Expand Down
2 changes: 1 addition & 1 deletion backend/hdf5/h5x/H5Object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void H5Object::close() {


H5Object::~H5Object() {
close();
H5Object::close();
}


Expand Down
Loading