Skip to content

Commit

Permalink
Merge pull request #9 from langchain-ai/brace/verify-github-links
Browse files Browse the repository at this point in the history
Implement verify github & general links node
  • Loading branch information
bracesproul authored Nov 23, 2024
2 parents 62220cb + 6ff9d3f commit c4b06be
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 10 deletions.
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@
},
"dependencies": {
"@langchain/anthropic": "^0.3.8",
"@langchain/community": "^0.3.15",
"@langchain/core": "^0.3.18",
"@langchain/google-vertexai-web": "^0.1.2",
"@langchain/langgraph": "^0.2.22",
"@mendable/firecrawl-js": "^1.8.5",
"@slack/web-api": "^7.7.0",
"moment": "^2.30.1"
"moment": "^2.30.1",
"zod": "^3.23.8"
},
"devDependencies": {
"@eslint/eslintrc": "^3.1.0",
Expand Down
79 changes: 76 additions & 3 deletions src/agent/subgraphs/generate-post/nodes/verify-general.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,90 @@
import { LangGraphRunnableConfig } from "@langchain/langgraph";
import { GraphAnnotation } from "../state.js";
import { GraphAnnotation, VerifyContentAnnotation } from "../state.js";
import { z } from "zod";
import { ChatAnthropic } from "@langchain/anthropic";
import { FireCrawlLoader } from "@langchain/community/document_loaders/web/firecrawl";

type VerifyGeneralContentReturn = {
relevantLinks: (typeof GraphAnnotation.State)["relevantLinks"];
pageContents: (typeof GraphAnnotation.State)["pageContents"];
};

const RELEVANCY_SCHEMA = z
.object({
reasoning: z
.string()
.describe(
"Reasoning for why the webpage is or isn't relevant to LangChain's products.",
),
relevant: z
.boolean()
.describe(
"Whether or not the webpage is relevant to LangChain's products.",
),
})
.describe("The relevancy of the content to LangChain's products.");

const VERIFY_LANGCHAIN_RELEVANT_CONTENT_PROMPT = `You are a highly regarded marketing employee at LangChain.
You're provided with a webpage containing content a third party submitted to LangChain claiming it's relevant and implements LangChain's products.
Your task is to carefully read over the entire page, and determine whether or not the content actually implements and is relevant to LangChain's products.
You're doing this to ensure the content is relevant to LangChain, and it can be used as marketing material to promote LangChain.
For context, LangChain has three main products you should be looking out for:
- **LangChain** - the main open source libraries developers use for building AI applications. These are open source Python/JavaScript/TypeScript libraries.
- **LangGraph** - an open source library for building agentic AI applications. This is a Python/JavaScript/TypeScript library.
LangChain also offers a hosted cloud platform called 'LangGraph Cloud' or 'LangGraph Platform' which developers can use to host their LangGraph applications in production.
- **LangSmith** - this is LangChain's SaaS product for building AI applications. It offers solutions for evaluating AI systems, observability, datasets and testing.
Given this context, examine the webpage content closely, and determine if the content implements LangChain's products.
You should provide reasoning as to why or why not the content implements LangChain's products, then a simple true or false for whether or not it implements some.`;

/**
* Verifies the content provided is relevant to LangChain products.
*/
export async function verifyGeneralContent(
_state: typeof GraphAnnotation.State,
state: typeof VerifyContentAnnotation.State,
_config: LangGraphRunnableConfig,
): Promise<VerifyGeneralContentReturn> {
throw new Error("Not implemented");
const relevancyModel = new ChatAnthropic({
model: "claude-3-5-sonnet-20241022",
temperature: 0,
}).withStructuredOutput(RELEVANCY_SCHEMA, {
name: "relevancy",
});

const loader = new FireCrawlLoader({
url: state.link, // The URL to scrape
mode: "crawl",
});
const docs = await loader.load();
const pageContent = docs.map((d) => d.pageContent).join("\n");

const { relevant } = await relevancyModel
.withConfig({
runName: "check-general-relevancy-model",
})
.invoke([
{
role: "system",
content: VERIFY_LANGCHAIN_RELEVANT_CONTENT_PROMPT,
},
{
role: "user",
content: pageContent,
},
]);

if (relevant) {
return {
// TODO: Replace with actual relevant link/page content (summary in this case)
relevantLinks: [state.link],
pageContents: [pageContent],
};
}

// Not relevant, return empty arrays so this URL is not included.
return {
relevantLinks: [],
pageContents: [],
};
}
99 changes: 96 additions & 3 deletions src/agent/subgraphs/generate-post/nodes/verify-github.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,110 @@
import { z } from "zod";
import { LangGraphRunnableConfig } from "@langchain/langgraph";
import { GraphAnnotation } from "../state.js";
import { GraphAnnotation, VerifyContentAnnotation } from "../state.js";
import { ChatAnthropic } from "@langchain/anthropic";

type VerifyGitHubContentReturn = {
relevantLinks: (typeof GraphAnnotation.State)["relevantLinks"];
pageContents: (typeof GraphAnnotation.State)["pageContents"];
};

const RELEVANCY_SCHEMA = z
.object({
reasoning: z
.string()
.describe(
"Reasoning for why the Readme of the GitHub repository is or isn't relevant to LangChain's products.",
),
relevant: z
.boolean()
.describe(
"Whether or not the Readme of the GitHub repository is relevant to LangChain's products.",
),
})
.describe("The relevancy of the content to LangChain's products.");

const VERIFY_LANGCHAIN_RELEVANT_CONTENT_PROMPT = `You are a highly regarded marketing employee at LangChain.
You're given the Readme of a GitHub repository and need to verify the repository implements LangChain's products.
You're doing this to ensure the content is relevant to LangChain, and it can be used as marketing material to promote LangChain.
For context, LangChain has three main products you should be looking out for:
- **LangChain** - the main open source libraries developers use for building AI applications. These are open source Python/JavaScript/TypeScript libraries.
- **LangGraph** - an open source library for building agentic AI applications. This is a Python/JavaScript/TypeScript library.
LangChain also offers a hosted cloud platform called 'LangGraph Cloud' or 'LangGraph Platform' which developers can use to host their LangGraph applications in production.
- **LangSmith** - this is LangChain's SaaS product for building AI applications. It offers solutions for evaluating AI systems, observability, datasets and testing.
Given this context, examine the Readme closely, and determine if the repository implements LangChain's products.
You should provide reasoning as to why or why not the repository implements LangChain's products, then a simple true or false for whether or not it implements some.`;

/**
* Verifies the content provided is relevant to LangChain products.
*/
export async function verifyGitHubContent(
_state: typeof GraphAnnotation.State,
state: typeof VerifyContentAnnotation.State,
_config: LangGraphRunnableConfig,
): Promise<VerifyGitHubContentReturn> {
throw new Error("Not implemented");
const relevancyModel = new ChatAnthropic({
model: "claude-3-5-sonnet-20241022",
temperature: 0,
}).withStructuredOutput(RELEVANCY_SCHEMA, {
name: "relevancy",
});

let baseGitHubRepoUrl = "";
try {
const githubUrl = new URL(state.link);
// Ensure the url only contains the owner/repo path
baseGitHubRepoUrl = githubUrl.pathname.split("/").slice(0, 3).join("/");
} catch (e) {
console.error("Failed to parse GitHub URL", e);
return {
relevantLinks: [],
pageContents: [],
};
}

const rawMainReadmeLink = `${baseGitHubRepoUrl}/refs/heads/main/README.md`;
const rawMasterReadmeLink = `${baseGitHubRepoUrl}/refs/heads/master/README.md`;
// Attempt to fetch the contents of main, if it fails, try master, finally, just read the content of the original URL.
let readmeContent = "";
try {
readmeContent = await fetch(rawMainReadmeLink).then((res) => res.text());
} catch (_) {
try {
readmeContent = await fetch(rawMasterReadmeLink).then((res) =>
res.text(),
);
} catch (_) {
readmeContent = await fetch(state.link).then((res) => res.text());
}
}

const { relevant } = await relevancyModel
.withConfig({
runName: "check-github-relevancy-model",
})
.invoke([
{
role: "system",
content: VERIFY_LANGCHAIN_RELEVANT_CONTENT_PROMPT,
},
{
role: "user",
content: readmeContent,
},
]);

if (relevant) {
return {
// TODO: Replace with actual relevant link/page content (summary in this case)
relevantLinks: [state.link],
pageContents: [readmeContent],
};
}

// Not relevant, return empty arrays so this URL is not included.
return {
relevantLinks: [],
pageContents: [],
};
}
Loading

0 comments on commit c4b06be

Please sign in to comment.