diff --git a/.env.example b/.env.example index ee75325..108014e 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,8 @@ TAVILY_API_KEY=... # To separate your traces from other application LANGCHAIN_PROJECT=data-enrichment +# LANGCHAIN_API_KEY=... +# LANGCHAIN_TRACING_V2=true # The following depend on your selected configuration diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 621cd63..43ff6b8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -50,4 +50,7 @@ jobs: path: src/ - name: Run tests + env: + ANTHROPIC_API_KEY: afakekey + TAVILY_API_KEY: anotherfakekey run: yarn test diff --git a/README.md b/README.md index bd69b84..6c307f8 100644 --- a/README.md +++ b/README.md @@ -93,50 +93,74 @@ End setup instructions 3. Consider a research topic and desired extraction schema. -As an example, here is a research topic we can consider. +As an example, here is a research topic we can consider: + +``` +"Autonomous agents" +``` + +With an `extractionSchema` of: + +```json +{ + "type": "object", + "properties": { + "facts": { + "type": "array", + "description": "An array of facts retrieved from the provided sources", + "items": { + "type": "string" + } + } + }, + "required": ["facts"] +} +``` + +Another example topic with a more complex schema is: ``` "Top 5 chip providers for LLM Training" ``` -And here is a desired extraction schema. +And here is a desired `extractionSchema`: ```json -"extractionSchema": { - "type": "object", - "properties": { - "companies": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Company name" - }, - "technologies": { - "type": "string", - "description": "Brief summary of key technologies used by the company" - }, - "market_share": { - "type": "string", - "description": "Overview of market share for this company" - }, - "future_outlook": { - "type": "string", - "description": "Brief summary of future prospects and developments in the field for this company" - }, - "key_powers": { - "type": "string", - "description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage" - } - }, - "required": ["name", "technologies", "market_share", "future_outlook"] - }, - "description": "List of companies" - } - }, - "required": ["companies"] +{ + "type": "object", + "properties": { + "companies": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Company name" + }, + "technologies": { + "type": "string", + "description": "Brief summary of key technologies used by the company" + }, + "market_share": { + "type": "string", + "description": "Overview of market share for this company" + }, + "future_outlook": { + "type": "string", + "description": "Brief summary of future prospects and developments in the field for this company" + }, + "key_powers": { + "type": "string", + "description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage" + } + }, + "required": ["name", "technologies", "market_share", "future_outlook"] + }, + "description": "List of companies" + } + }, + "required": ["companies"] } ``` @@ -145,7 +169,7 @@ And here is a desired extraction schema. ## How to customize 1. **Customize research targets**: Provide a custom JSON `extractionSchema` when calling the graph to gather different types of information. -2. **Select a different model**: We default to anthropic (claude-3-5-sonnet-20240620). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`. +2. **Select a different model**: We default to anthropic (`claude-3-5-sonnet-20240620`). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`. 3. **Customize the prompt**: We provide a default prompt in [src/enrichment_agent/prompts.ts](./src/enrichment_agent/prompts.ts). You can easily update this via configuration. For quick prototyping, these configurations can be set in the studio UI. @@ -163,7 +187,7 @@ While iterating on your graph, you can edit past state and rerun your app from p Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right. -You can find the latest (under construction) docs on [LangGraph.JS](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case. +You can find the latest (under construction) docs on [LangGraph.js](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case. LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates. diff --git a/package.json b/package.json index 9260942..6fd50ae 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "dependencies": { "@langchain/anthropic": "^0.3.1", "@langchain/community": "^0.3.1", - "@langchain/core": "^0.3.2", + "@langchain/core": "^0.3.3", "@langchain/langgraph": "^0.2.8", "langchain": "^0.3.2", "langsmith": "^0.1.59", @@ -36,6 +36,7 @@ "@jest/globals": "^29.7.0", "@tsconfig/recommended": "^1.0.7", "@types/jest": "^29.5.0", + "@types/node": "^20.14.8", "@typescript-eslint/eslint-plugin": "^5.59.8", "@typescript-eslint/parser": "^5.59.8", "dotenv": "^16.4.5", diff --git a/src/enrichment_agent/configuration.ts b/src/enrichment_agent/configuration.ts index 063ab5e..e1dd081 100644 --- a/src/enrichment_agent/configuration.ts +++ b/src/enrichment_agent/configuration.ts @@ -49,15 +49,15 @@ export const ConfigurationAnnotation = Annotation.Root({ export function ensureConfiguration( config?: RunnableConfig, ): typeof ConfigurationAnnotation.State { - const configurable = (config?.configurable || {}) as Partial< + const configurable = (config?.configurable ?? {}) as Partial< typeof ConfigurationAnnotation.State >; return { - model: configurable.model || "anthropic/claude-3-5-sonnet-20240620", - prompt: configurable.prompt || MAIN_PROMPT, - maxSearchResults: configurable.maxSearchResults || 10, - maxInfoToolCalls: configurable.maxInfoToolCalls || 3, - maxLoops: configurable.maxLoops || 6, + model: configurable.model ?? "anthropic/claude-3-5-sonnet-20240620", + prompt: configurable.prompt ?? MAIN_PROMPT, + maxSearchResults: configurable.maxSearchResults ?? 5, + maxInfoToolCalls: configurable.maxInfoToolCalls ?? 3, + maxLoops: configurable.maxLoops ?? 6, }; } diff --git a/src/enrichment_agent/graph.ts b/src/enrichment_agent/graph.ts index 2a766a3..7d58c1e 100644 --- a/src/enrichment_agent/graph.ts +++ b/src/enrichment_agent/graph.ts @@ -14,12 +14,13 @@ import { RunnableConfig } from "@langchain/core/runnables"; import { tool } from "@langchain/core/tools"; import { StateGraph } from "@langchain/langgraph"; import { z } from "zod"; + import { ConfigurationAnnotation, ensureConfiguration, } from "./configuration.js"; import { AnyRecord, InputStateAnnotation, StateAnnotation } from "./state.js"; -import { toolNode, TOOLS } from "./tools.js"; +import { MODEL_TOOLS, toolNode } from "./tools.js"; import { loadChatModel } from "./utils.js"; /** @@ -43,15 +44,13 @@ import { loadChatModel } from "./utils.js"; async function callAgentModel( state: typeof StateAnnotation.State, config: RunnableConfig, -): Promise<{ - messages: BaseMessage[]; - info?: AnyRecord; - loopStep: number; -}> { +): Promise { const configuration = ensureConfiguration(config); // First, define the info tool. This uses the user-provided // json schema to define the research targets - const infoTool = tool(async (_args: AnyRecord) => {}, { + // We pass an empty function because we will not actually invoke this tool. + // We are just using it for formatting. + const infoTool = tool(async () => {}, { name: "Info", description: "Call this when you have gathered all the relevant info", schema: state.extractionSchema, @@ -61,7 +60,7 @@ async function callAgentModel( if (!rawModel.bindTools) { throw new Error("Chat model does not support tool binding"); } - const model = rawModel.bindTools([...TOOLS, infoTool], { + const model = rawModel.bindTools([...MODEL_TOOLS, infoTool], { tool_choice: "any", }); @@ -73,7 +72,7 @@ async function callAgentModel( // Next, we'll call the model. const response: AIMessage = await model.invoke(messages); - const response_messages = [response]; + const responseMessages = [response]; // If the model has collected enough information to fill uot // the provided schema, great! It will call the "Info" tool @@ -96,13 +95,13 @@ async function callAgentModel( } } else { // If LLM didn't respect the tool_choice - response_messages.push( + responseMessages.push( new HumanMessage("Please respond by calling one of the provided tools."), ); } return { - messages: response_messages, + messages: responseMessages, info, // This increments the step counter. // We configure a max step count to avoid infinite research loops @@ -187,7 +186,7 @@ If you don't think it is good, you should be very specific about what could be i ); messages.push({ role: "user", content: p1 }); - // Calll the model + // Call the model const response = await boundModel.invoke(messages); if (response.is_satisfactory && presumedInfo) { return { @@ -197,7 +196,7 @@ If you don't think it is good, you should be very specific about what could be i tool_call_id: lastMessage.tool_calls?.[0]?.id || "", content: response.reason.join("\n"), name: "Info", - additional_kwargs: { artifact: response }, + artifact: response, status: "success", }), ], @@ -209,7 +208,7 @@ If you don't think it is good, you should be very specific about what could be i tool_call_id: lastMessage.tool_calls?.[0]?.id || "", content: `Unsatisfactory response:\n${response.improvement_instructions}`, name: "Info", - additional_kwargs: { artifact: response }, + artifact: response, status: "error", }), ], diff --git a/src/enrichment_agent/state.ts b/src/enrichment_agent/state.ts index d40190f..11821f4 100644 --- a/src/enrichment_agent/state.ts +++ b/src/enrichment_agent/state.ts @@ -1,6 +1,5 @@ import { Annotation, messagesStateReducer } from "@langchain/langgraph"; import { type BaseMessage } from "@langchain/core/messages"; -import { z } from "zod"; // eslint-disable-next-line export type AnyRecord = Record; @@ -11,11 +10,11 @@ export const InputStateAnnotation = Annotation.Root({ * The info state trackes the current extracted data for the given topic, * conforming to the provided schema. */ - info: Annotation>>, + info: Annotation, /** * The schema defines the information the agent is tasked with filling out. */ - extractionSchema: Annotation>, + extractionSchema: Annotation, // Feel free to add additional attributes to your state as needed. // Common examples include retrieved documents, extracted entities, API connections, etc. }); @@ -60,17 +59,17 @@ export const StateAnnotation = Annotation.Root({ reducer: messagesStateReducer, default: () => [], }), + topic: Annotation, /** * The info state trackes the current extracted data for the given topic, * conforming to the provided schema. */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any info: Annotation, + /** * The schema defines the information the agent is tasked with filling out. */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any extractionSchema: Annotation, /** diff --git a/src/enrichment_agent/tools.ts b/src/enrichment_agent/tools.ts index 5a6a89b..27313e3 100644 --- a/src/enrichment_agent/tools.ts +++ b/src/enrichment_agent/tools.ts @@ -7,11 +7,12 @@ */ import { TavilySearchResults } from "@langchain/community/tools/tavily_search"; import { RunnableConfig } from "@langchain/core/runnables"; +import { tool } from "@langchain/core/tools"; +import { INFO_PROMPT } from "./prompts.js"; import { ensureConfiguration } from "./configuration.js"; -import { AnyRecord, StateAnnotation } from "./state.js"; -import { StructuredTool, tool } from "@langchain/core/tools"; -import { curry, getTextContent, loadChatModel } from "./utils.js"; +import { StateAnnotation } from "./state.js"; +import { getTextContent, loadChatModel } from "./utils.js"; import { AIMessage, isBaseMessage, @@ -19,10 +20,14 @@ import { } from "@langchain/core/messages"; import { z } from "zod"; -async function search( - { query }: { query: string }, - config: RunnableConfig, -): Promise | null> { +/** + * Initialize tools within a function so that they have access to the current + * state and config at runtime. + */ +function initializeTools( + state?: typeof StateAnnotation.State, + config?: RunnableConfig, +) { /** * Search for general results. * @@ -31,135 +36,93 @@ async function search( * for answering questions about current events. */ const configuration = ensureConfiguration(config); - const wrapped = new TavilySearchResults({ + const searchTool = new TavilySearchResults({ maxResults: configuration.maxSearchResults, }); - const result = await wrapped.invoke(query, config); - return result as Array | null; -} -const INFO_PROMPT = `You are doing web research on behalf of a user. You are trying to find out this information: + async function scrapeWebsite({ url }: { url: string }): Promise { + /** + * Scrape and summarize content from a given URL. + */ + const response = await fetch(url); + const content = await response.text(); + const truncatedContent = content.slice(0, 50000); + const p = INFO_PROMPT.replace( + "{info}", + JSON.stringify(state?.extractionSchema, null, 2), + ) + .replace("{url}", url) + .replace("{content}", truncatedContent); - -{info} - + const rawModel = await loadChatModel(configuration.model); + const result = await rawModel.invoke(p); + return getTextContent(result.content); + } -You just scraped the following website: {url} - -Based on the website content below, jot down some notes about the website. - - -{content} -`; - -async function scrapeWebsite( - { - url, - __state, - }: { - url: string; - __state?: typeof StateAnnotation.State; - }, - config: RunnableConfig, -): Promise { - /** - * Scrape and summarize content from a given URL. - */ - const response = await fetch(url); - const content = await response.text(); - const truncatedContent = content.slice(0, 50000); - const configuration = ensureConfiguration(config); - const p = INFO_PROMPT.replace( - "{info}", - JSON.stringify(__state?.extractionSchema, null, 2), - ) - .replace("{url}", url) - .replace("{content}", truncatedContent); + const scraperTool = tool(scrapeWebsite, { + name: "scrapeWebsite", + description: "Scrape content from a given website URL", + schema: z.object({ + url: z.string().url().describe("The URL of the website to scrape"), + }), + }); - const rawModel = await loadChatModel(configuration.model); - const result = await rawModel.invoke(p, { callbacks: config?.callbacks }); - return getTextContent(result.content); + return [searchTool, scraperTool]; } -export const createToolNode = (tools: StructuredTool[]) => { - const toolNode = async ( - state: typeof StateAnnotation.State, - config: RunnableConfig, - ) => { - const message = state.messages[state.messages.length - 1]; - const outputs = await Promise.all( - (message as AIMessage).tool_calls?.map(async (call) => { - const tool = tools.find((tool) => tool.name === call.name); - try { - if (tool === undefined) { - throw new Error(`Tool "${call.name}" not found.`); - } - const newCall = { - ...call, - args: { - __state: state, - ...call.args, - }, - }; - const output = await tool.invoke( - { ...newCall, type: "tool_call" }, - config, - ); - if (isBaseMessage(output) && output._getType() === "tool") { - return output; - } else { - return new ToolMessage({ - name: tool.name, - content: - typeof output === "string" ? output : JSON.stringify(output), - tool_call_id: call.id ?? "", - }); - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (e: any) { +export const toolNode = async ( + state: typeof StateAnnotation.State, + config: RunnableConfig, +) => { + const message = state.messages[state.messages.length - 1]; + // Initialize the tools within the context of the node so that the tools + // have the current state of the graph and the config in scope. + // See: https://js.langchain.com/docs/how_to/tool_runtime + const tools = initializeTools(state, config); + const outputs = await Promise.all( + (message as AIMessage).tool_calls?.map(async (call) => { + const tool = tools.find((tool) => tool.name === call.name); + try { + if (tool === undefined) { + throw new Error(`Tool "${call.name}" not found.`); + } + const newCall = { + ...call, + args: { + __state: state, + ...call.args, + }, + }; + const output = await tool.invoke( + { ...newCall, type: "tool_call" }, + config, + ); + if (isBaseMessage(output) && output._getType() === "tool") { + return output; + } else { return new ToolMessage({ - content: `Error: ${e.message}\n Please fix your mistakes.`, - name: call.name, + name: tool.name, + content: + typeof output === "string" ? output : JSON.stringify(output), tool_call_id: call.id ?? "", - status: "error", }); } - }) ?? [], - ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (e: any) { + return new ToolMessage({ + content: `Error: ${e.message}\n Please fix your mistakes.`, + name: call.name, + tool_call_id: call.id ?? "", + status: "error", + }); + } + }) ?? [], + ); - return { messages: outputs }; - }; - return toolNode; + return { messages: outputs }; }; -const searchTool = tool(search, { - name: "search", - description: "Search the internet for information on a given topic", - schema: z.object({ - query: z.string().describe("The search query to look up"), - }), -}); - -// Exposed to the -export const TOOLS = [ - searchTool, - tool(curry(scrapeWebsite, { __state: undefined }), { - name: "scrapeWebsite", - description: "Scrape content from a given website URL", - schema: z.object({ - url: z.string().url().describe("The URL of the website to scrape"), - }), - }), -]; - -export const toolNode = createToolNode([ - searchTool, - tool(scrapeWebsite, { - name: "scrapeWebsite", - description: "Scrape content from a given website URL", - schema: z.object({ - url: z.string().url().describe("The URL of the website to scrape"), - __state: z.any(), - }), - }), -]); +// No state or config required here since these are just bound to the chat model +// and are only used to define schema. +// The tool node above will actually call the functions. +export const MODEL_TOOLS = initializeTools(); diff --git a/src/enrichment_agent/utils.ts b/src/enrichment_agent/utils.ts index 39f341e..b13e164 100644 --- a/src/enrichment_agent/utils.ts +++ b/src/enrichment_agent/utils.ts @@ -5,21 +5,6 @@ import { } from "@langchain/core/messages"; import { initChatModel } from "langchain/chat_models/universal"; -export function curry< - // eslint-disable-next-line @typescript-eslint/no-explicit-any - F extends (...args: any[]) => any, - P extends Partial[0]> = Partial[0]>, ->(fn: F, partialArg: P) { - return function ( - this: unknown, - arg: Omit[0], keyof P> & Partial

, - ...rest: Parameters extends [unknown, ...infer R] ? R : never - ): ReturnType { - const mergedArg = { ...partialArg, ...arg } as Parameters[0]; - return fn.apply(this, [mergedArg, ...rest]) as ReturnType; - }; -} - /** * Helper function to extract text content from a complex message. * diff --git a/tests/agent.test.ts b/tests/agent.test.ts index af0eec4..4011dce 100644 --- a/tests/agent.test.ts +++ b/tests/agent.test.ts @@ -2,6 +2,10 @@ import { describe, it, expect } from "@jest/globals"; import { graph } from "../src/enrichment_agent/graph.js"; describe("Web Research Agent", () => { + beforeAll(() => { + process.env.TAVILY_API_KEY = "dummy"; + }); + it("should initialize and compile the graph", () => { expect(graph).toBeDefined(); expect(graph.name).toBe("ResearchTopic"); diff --git a/yarn.lock b/yarn.lock index fbfaa80..4e7ea17 100644 --- a/yarn.lock +++ b/yarn.lock @@ -656,10 +656,10 @@ zod "^3.22.3" zod-to-json-schema "^3.22.5" -"@langchain/core@^0.3.2": - version "0.3.2" - resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.3.2.tgz#aff6d83149a40e0e735910f583aca0f1dd7d1bab" - integrity sha512-FeoDOStP8l1YdxgykpXnVoEnl4lxGNSOdYzUJN/EdFtkc6cIjDDS5+xewajme0+egaUsO4tGLezKaFpoWxAyQA== +"@langchain/core@^0.3.3": + version "0.3.3" + resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.3.3.tgz#af12fd767ff2fcedb0a71bd79e6588d7dd52b6b6" + integrity sha512-WAtkmhbdl2T41qzimTzhb3pXCHQxO4onqxzPxgdf3KftQdTwLq0YYBDhozRMZLNAd/+cfH0ymZGaZSsnc9Ogsg== dependencies: ansi-styles "^5.0.0" camelcase "6" @@ -876,6 +876,13 @@ dependencies: undici-types "~5.26.4" +"@types/node@^20.14.8": + version "20.16.5" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.16.5.tgz#d43c7f973b32ffdf9aa7bd4f80e1072310fd7a53" + integrity sha512-VwYCweNo3ERajwy0IUlqqcyZ8/A7Zwa9ZP3MnENWcB11AejO+tLy3pu850goUW2FC/IJMdZUfKpX/yxL1gymCA== + dependencies: + undici-types "~6.19.2" + "@types/retry@0.12.0": version "0.12.0" resolved "https://registry.yarnpkg.com/@types/retry/-/retry-0.12.0.tgz#2b35eccfcee7d38cd72ad99232fbd58bffb3c84d"