Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/embeddings #99

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
d4b14ad
embeddings scratch
georgiannajames Jan 8, 2025
4d17146
dev version bump
georgiannajames Jan 13, 2025
105250a
dev bump
georgiannajames Jan 13, 2025
ddb222a
cargo lock
georgiannajames Jan 13, 2025
549a979
update dependencies
georgiannajames Jan 13, 2025
520da75
resolving mypy and ruff
georgiannajames Jan 14, 2025
7b3f954
lint
georgiannajames Jan 14, 2025
dfb4e2d
lint
georgiannajames Jan 14, 2025
63caf07
embeddings updates
georgiannajames Jan 14, 2025
87d8264
embeddings updates
georgiannajames Jan 14, 2025
0173e3b
download fix
georgiannajames Jan 15, 2025
47737f0
format index helper funcs
georgiannajames Jan 15, 2025
caa0fdf
embedder helper funcs
georgiannajames Jan 15, 2025
7107486
undo download change
georgiannajames Jan 15, 2025
7cd6575
remove unused import
georgiannajames Jan 16, 2025
a504898
name change
georgiannajames Jan 17, 2025
9a72c4a
embeddings updates
georgiannajames Jan 21, 2025
08c943b
update for body modifications
georgiannajames Jan 22, 2025
6f0c9bd
lint fix
georgiannajames Jan 22, 2025
a21d690
update haystack post modifications
georgiannajames Jan 22, 2025
a5dfc17
adding test cov to embeddings.py
georgiannajames Jan 22, 2025
892d6ff
adding tests
georgiannajames Jan 23, 2025
f2aac93
func name update
georgiannajames Jan 23, 2025
05fb2ef
tests
georgiannajames Jan 24, 2025
4e2fb95
adding tests
georgiannajames Jan 24, 2025
8a44f76
addings tests
georgiannajames Jan 27, 2025
48c2538
embeddings cov to 100
georgiannajames Jan 27, 2025
3068708
added tests
georgiannajames Jan 27, 2025
38334fe
download fix, needs test update
georgiannajames Jan 27, 2025
6219fa7
download fix
georgiannajames Jan 28, 2025
ca32c6d
integ test update
georgiannajames Jan 28, 2025
b81fd6f
test update
georgiannajames Jan 28, 2025
2ae5f56
fix download
georgiannajames Jan 28, 2025
b113c3b
Merge branch 'master' into feature/embeddings
georgiannajames Jan 28, 2025
31cb584
lock update
georgiannajames Jan 28, 2025
d44e0fd
duplicate test
georgiannajames Jan 28, 2025
d914516
lint
georgiannajames Jan 28, 2025
1a131c3
mypy
georgiannajames Jan 28, 2025
02154c3
lint
georgiannajames Jan 28, 2025
202a25e
fix test
georgiannajames Jan 28, 2025
cf960de
kwargs desc and to_df fix
georgiannajames Jan 31, 2025
57c0454
organize imports fix
georgiannajames Jan 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pyfusion"
version = "2.0.6"
version = "2.0.7-dev0"
edition = "2021"


Expand Down
2 changes: 1 addition & 1 deletion py_src/fusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = """Fusion Devs"""
__email__ = "[email protected]"
__version__ = "2.0.6"
__version__ = "2.0.7-dev0"

from fusion._fusion import FusionCredentials
from fusion.fs_sync import fsync
Expand Down
22 changes: 9 additions & 13 deletions py_src/fusion/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _use_client(self, client: Fusion | None) -> Fusion:
if res is None:
raise ValueError("A Fusion client object is required.")
return res

@classmethod
def _from_series(
cls: type[Attribute],
Expand Down Expand Up @@ -436,7 +436,7 @@ def delete(
resp = client.session.delete(url)
requests_raise_for_status(resp)
return resp if return_resp_obj else None

def set_lineage(
self,
attributes: list[Attribute],
Expand Down Expand Up @@ -479,22 +479,18 @@ def set_lineage(
if attribute.application_id is None:
raise ValueError(f"The 'application_id' attribute is required for setting lineage.")
attr_dict = {
"catalog": catalog,
"attribute": attribute.identifier,
"applicationId": attribute.application_id
}
"catalog": catalog,
"attribute": attribute.identifier,
"applicationId": attribute.application_id,
}
target_attributes.append(attr_dict)

url = f"{client.root_url}catalogs/{catalog}/attributes/lineage"
data = [
{
"source": {
"catalog": catalog,
"attribute": self.identifier,
"applicationId": self.application_id
},
"targets": target_attributes
}
"source": {"catalog": catalog, "attribute": self.identifier, "applicationId": self.application_id},
"targets": target_attributes,
}
]
resp = client.session.post(url, json=data)
requests_raise_for_status(resp)
Expand Down
3 changes: 3 additions & 0 deletions py_src/fusion/dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class DataFlow(Dataset):
type_ (str | None): The type of dataset. Defaults to "Flow".

"""

producer_application_id: dict[str, str] | None = None
consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None
flow_details: dict[str, str] | None = None
Expand Down Expand Up @@ -77,6 +78,7 @@ def add_registered_attribute(
@dataclass
class InputDataFlow(DataFlow):
"""InputDataFlow class for maintaining input data flow metadata."""

flow_details: dict[str, str] | None = field(default_factory=lambda: {"flowDirection": "Input"})

def __repr__(self: InputDataFlow) -> str:
Expand All @@ -93,6 +95,7 @@ def __repr__(self: InputDataFlow) -> str:
@dataclass
class OutputDataFlow(DataFlow):
"""OutputDataFlow class for maintaining output data flow metadata."""

flow_details: dict[str, str] | None = field(default_factory=lambda: {"flowDirection": "Output"})

def __repr__(self: OutputDataFlow) -> str:
Expand Down
3 changes: 1 addition & 2 deletions py_src/fusion/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class Dataset(metaclass=CamelCaseMeta):
is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
is_active (bool | None, optional): is_active. Defaults to None.
owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
application_id (str | dict[str, str] | None, optional): The application (most commonly seal ID) that the
application_id (str | dict[str, str] | None, optional): The application (most commonly seal ID) that the
dataset/report/flow is owned by. Accepts string format for seal IDs, or a dictionary containing 'id' and
'type' as keys. Defaults to None.
_client (Any, optional): A Fusion client object. Defaults to None.
Expand Down Expand Up @@ -585,7 +585,6 @@ def create(
if data.get("report", None) and data["report"]["tier"] == "":
raise ValueError("Tier cannot be blank for reports.")


url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
resp: requests.Response = client.session.post(url, json=data)
requests_raise_for_status(resp)
Expand Down
Loading