Skip to content

Commit

Permalink
Implement serialization and deserialization of graph documents
Browse files Browse the repository at this point in the history
  • Loading branch information
sumanth-survey-sparrow committed Jan 20, 2025
1 parent b3172d8 commit 4af76b9
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 0 deletions.
42 changes: 42 additions & 0 deletions libs/experimental/langchain_experimental/graph_transformers/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import asyncio
import json
from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union, cast
import pickle

from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from langchain_core.documents import Document
Expand Down Expand Up @@ -931,6 +933,46 @@ def convert_to_graph_documents(
"""
return [self.process_response(document, config) for document in documents]

def save_graph_documents(self, graph_documents: List[GraphDocument], file_name: str = 'graph_document.pkl') -> None:

Check failure on line 936 in libs/experimental/langchain_experimental/graph_transformers/llm.py

View workflow job for this annotation

GitHub Actions / cd libs/experimental / make lint #3.11

Ruff (E501)

langchain_experimental/graph_transformers/llm.py:936:89: E501 Line too long (120 > 88)
"""
Serializing the graph documents to a file
"""
# get the current working directory
project_dir = os.getcwd()
intermediate_file_path = os.path.join(project_dir, file_name)

# open the file in write binary mode
db_file = open(intermediate_file_path, 'wb')

pickle.dump(graph_documents, db_file)
file_path = os.path.abspath(db_file.name)

# close the file
db_file.close()
print(f"Graph documents saved to {file_path}")

Check failure on line 952 in libs/experimental/langchain_experimental/graph_transformers/llm.py

View workflow job for this annotation

GitHub Actions / cd libs/experimental / make lint #3.11

Ruff (T201)

langchain_experimental/graph_transformers/llm.py:952:9: T201 `print` found

def load_graph_documents(self, file_name: str = "graph_document.pkl") -> List[GraphDocument]:

Check failure on line 954 in libs/experimental/langchain_experimental/graph_transformers/llm.py

View workflow job for this annotation

GitHub Actions / cd libs/experimental / make lint #3.11

Ruff (E501)

langchain_experimental/graph_transformers/llm.py:954:89: E501 Line too long (97 > 88)
"""
Deserializing the graph documents from a file
"""
# get the current working directory
project_dir = os.getcwd()
intermediate_file_path = os.path.join(project_dir, file_name)

# handling if user provides the full path
if not os.path.exists(file_name):
intermediate_file_path = file_name

# open the file in read binary mode
db_file = open(intermediate_file_path, 'rb')

graph_documents = pickle.load(db_file)

# close the file
db_file.close()
return graph_documents


async def aprocess_response(
self, document: Document, config: Optional[RunnableConfig] = None
) -> GraphDocument:
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os

from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-4o-2024-08-06")
llm_transformer = LLMGraphTransformer(llm=llm)

def test_save_load_graph_document():

text = """
Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
She was, in 1906, the first woman to become a professor at the University of Paris.
"""
documents = [Document(page_content=text)]
graph_documents = llm_transformer.convert_to_graph_documents(documents)

intermediate_file_name = "graph_document.pkl"
llm_transformer.save_graph_documents(graph_documents, intermediate_file_name)

loaded_graph_documents = llm_transformer.load_graph_documents(intermediate_file_name)

# deleting the file after testing
os.remove(intermediate_file_name)

# checking all the both graph documents are same or not
assert graph_documents == loaded_graph_documents

0 comments on commit 4af76b9

Please sign in to comment.