diff --git a/.dockerignore b/.dockerignore index 288da10..3ffcc99 100644 --- a/.dockerignore +++ b/.dockerignore @@ -14,3 +14,5 @@ node_modules # data data +src/storage +dist diff --git a/CHANGELOG.md b/CHANGELOG.md index c973d29..3bc88ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ This changelog summarizes all changes of the RAG Web Browser -### 2024-11-13 +### 1.0.4 (2025-01-04) + +🚀 Features +- Include Model Context Protocol in Standby Mode + +### 1.0.3 (2024-11-13) 🚀 Features - Improve README.md and simplify configuration @@ -11,13 +16,13 @@ This changelog summarizes all changes of the RAG Web Browser - Rename googleSearchResults to searchResults and searchProxyGroup to serpProxyGroup - Implement input validation -### 2024-11-08 +### 0.1.4 (2024-11-08) 🚀 Features - Add functionality to extract content from a specific URL - Update README.md to include new functionality and provide examples -### 2024-10-17 +### 0.0.32 (2024-10-17) 🚀 Features - Handle errors when request is added to Playwright queue. diff --git a/README.md b/README.md index d6da491..2f8c72e 100644 --- a/README.md +++ b/README.md @@ -159,9 +159,45 @@ Learn more about [adding custom actions to your GPTs with Apify Actors](https:// The RAG Web Browser Actor can also be used as an [MCP server](https://github.com/modelcontextprotocol) and integrated with AI applications and agents, such as Claude Desktop. For example, in Claude Desktop, you can configure the MCP server in its settings to perform web searches and extract content. - Alternatively, you can develop a custom MCP client to interact with the RAG Web Browser Actor. +In the Standby mode, the Actor runs an HTTP server that supports the MCP protocol via SSE (Server-Sent Events). + +1. Initiate SSE connection: + ```shell + curl https://rag-web-browser.apify.actor/sse?token= + ``` + On connection, you’ll receive a `sessionId`: + ```text + event: endpoint + data: /message?sessionId=5b2 + ``` + +1. Send a message to the server by making a POST request with the `sessionId`, `APIFY-API-TOKEN` and your query: + ```shell + curl -X POST "https://rag-web-browser.apify.actor/message?session_id=5b2&token=" -H "Content-Type: application/json" -d '{ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": { + "arguments": { "query": "recent news about LLMs", "maxResults": 1 }, + "name": "rag-web-browser" + } + }' + ``` + For the POST request, the server will respond with: + ```text + Accepted + ``` + +1. Receive a response at the initiated SSE connection: + The server invoked `Actor` and its tool using the provided query and sent the response back to the client via SSE. + + ```text + event: message + data: {"result":{"content":[{"type":"text","text":"[{\"searchResult\":{\"title\":\"Language models recent news\",\"description\":\"Amazon Launches New Generation of LLM Foundation Model...\"}} + ``` + To learn more about MCP server integration, check out the [RAG Web Browser MCP server documentation](https://github.com/apify/mcp-server-rag-web-browser). ## ⏳ Performance optimization @@ -277,3 +313,9 @@ And then you can run it locally using [Apify CLI](https://docs.apify.com/cli) as ```bash APIFY_META_ORIGIN=STANDBY apify run -p ``` + +Server will start on `http://localhost:3000` and you can send requests to it, for example: + +```bash +curl "http://localhost:3000/search?query=example.com" +``` diff --git a/package-lock.json b/package-lock.json index ef66f2e..2252ae2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,10 +10,12 @@ "license": "ISC", "dependencies": { "@crawlee/memory-storage": "^3.11.1", + "@modelcontextprotocol/sdk": "^1.0.4", "@mozilla/readability": "^0.5.0", "apify": "^3.2.6", "cheerio": "^1.0.0-rc.12", "crawlee": "^3.12.0", + "express": "^4.21.2", "joplin-turndown-plugin-gfm": "^1.0.12", "jsdom": "^24.1.1", "playwright": "^1.47.0", @@ -22,6 +24,7 @@ "devDependencies": { "@apify/eslint-config-ts": "^0.3.0", "@apify/tsconfig": "^0.1.0", + "@types/express": "^5.0.0", "@types/turndown": "^5.0.5", "@typescript-eslint/eslint-plugin": "^6.7.2", "@typescript-eslint/parser": "^6.7.2", @@ -1204,6 +1207,17 @@ "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", "license": "BSD-2-Clause" }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.0.4.tgz", + "integrity": "sha512-C+jw1lF6HSGzs7EZpzHbXfzz9rj9him4BaoumlTciW/IDDgIpweF/qiCWKlP02QKg5PPcgY6xY2WCt5y2tpYow==", + "license": "MIT", + "dependencies": { + "content-type": "^1.0.5", + "raw-body": "^3.0.0", + "zod": "^3.23.8" + } + }, "node_modules/@mozilla/readability": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.5.0.tgz", @@ -1327,18 +1341,72 @@ "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==", "license": "MIT" }, + "node_modules/@types/body-parser": { + "version": "1.19.5", + "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.5.tgz", + "integrity": "sha512-fB3Zu92ucau0iQ0JMCFQE7b/dv8Ot07NI3KaZIkIUNXq82k4eBAqUaneXfleGY9JWskeS9y+u0nXMyspcuQrCg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/connect": "*", + "@types/node": "*" + } + }, + "node_modules/@types/connect": { + "version": "3.4.38", + "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.38.tgz", + "integrity": "sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/content-type": { "version": "1.1.8", "resolved": "https://registry.npmjs.org/@types/content-type/-/content-type-1.1.8.tgz", "integrity": "sha512-1tBhmVUeso3+ahfyaKluXe38p+94lovUZdoVfQ3OnJo9uJC42JT7CBoN3k9HYhAae+GwiBYmHu+N9FZhOG+2Pg==", "license": "MIT" }, + "node_modules/@types/express": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@types/express/-/express-5.0.0.tgz", + "integrity": "sha512-DvZriSMehGHL1ZNLzi6MidnsDhUZM/x2pRdDIKdwbUNqqwHxMlRdkxtn6/EPKyqKpHqTl/4nRZsRNLpZxZRpPQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/body-parser": "*", + "@types/express-serve-static-core": "^5.0.0", + "@types/qs": "*", + "@types/serve-static": "*" + } + }, + "node_modules/@types/express-serve-static-core": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-5.0.2.tgz", + "integrity": "sha512-vluaspfvWEtE4vcSDlKRNer52DvOGrB2xv6diXy6UKyKW0lqZiWHGNApSyxOv+8DE5Z27IzVvE7hNkxg7EXIcg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "@types/qs": "*", + "@types/range-parser": "*", + "@types/send": "*" + } + }, "node_modules/@types/http-cache-semantics": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.4.tgz", "integrity": "sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==", "license": "MIT" }, + "node_modules/@types/http-errors": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@types/http-errors/-/http-errors-2.0.4.tgz", + "integrity": "sha512-D0CFMMtydbJAegzOyHjtiKPLlvnm3iTZyZRSZoLq2mRhDdmLfIWOCYPfQJ4cu2erKghU++QvjcUjp/5h7hESpA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/jsdom": { "version": "21.1.7", "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.7.tgz", @@ -1364,6 +1432,13 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/mime": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.5.tgz", + "integrity": "sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/node": { "version": "22.9.3", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.9.3.tgz", @@ -1373,6 +1448,20 @@ "undici-types": "~6.19.8" } }, + "node_modules/@types/qs": { + "version": "6.9.17", + "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.17.tgz", + "integrity": "sha512-rX4/bPcfmvxHDv0XjfJELTTr+iB+tn032nPILqHm5wbthUUUuVtNGGqzhya9XUxjTP8Fpr0qYgSZZKxGY++svQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/range-parser": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/@types/range-parser/-/range-parser-1.2.7.tgz", + "integrity": "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/sax": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/@types/sax/-/sax-1.2.7.tgz", @@ -1389,6 +1478,29 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/send": { + "version": "0.17.4", + "resolved": "https://registry.npmjs.org/@types/send/-/send-0.17.4.tgz", + "integrity": "sha512-x2EM6TJOybec7c52BX0ZspPodMsQUd5L6PRwOunVyVUhXiBSKf3AezDL8Dgvgt5o0UfKNfuA0eMLr2wLT4AiBA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/mime": "^1", + "@types/node": "*" + } + }, + "node_modules/@types/serve-static": { + "version": "1.15.7", + "resolved": "https://registry.npmjs.org/@types/serve-static/-/serve-static-1.15.7.tgz", + "integrity": "sha512-W8Ym+h8nhuRwaKPaDw34QUkwsGi6Rc4yYqvKFo5rm2FUEhCFbzVWrxXUxuKK8TASjWsysJY0nsmNCGhCOIsrOw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/http-errors": "*", + "@types/node": "*", + "@types/send": "*" + } + }, "node_modules/@types/tough-cookie": { "version": "4.0.5", "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz", @@ -1617,6 +1729,19 @@ "npm": ">=7.0.0" } }, + "node_modules/accepts": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", + "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==", + "license": "MIT", + "dependencies": { + "mime-types": "~2.1.34", + "negotiator": "0.6.3" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/acorn": { "version": "8.14.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", @@ -1827,6 +1952,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==", + "license": "MIT" + }, "node_modules/array-includes": { "version": "3.1.8", "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.8.tgz", @@ -2084,6 +2215,72 @@ "readable-stream": "^3.4.0" } }, + "node_modules/body-parser": { + "version": "1.20.3", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz", + "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==", + "license": "MIT", + "dependencies": { + "bytes": "3.1.2", + "content-type": "~1.0.5", + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "http-errors": "2.0.0", + "iconv-lite": "0.4.24", + "on-finished": "2.4.1", + "qs": "6.13.0", + "raw-body": "2.5.2", + "type-is": "~1.6.18", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/body-parser/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/body-parser/node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/body-parser/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, + "node_modules/body-parser/node_modules/raw-body": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz", + "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==", + "license": "MIT", + "dependencies": { + "bytes": "3.1.2", + "http-errors": "2.0.0", + "iconv-lite": "0.4.24", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/boolbase": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", @@ -2168,6 +2365,15 @@ "ieee754": "^1.1.13" } }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/cacheable-lookup": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz", @@ -2199,7 +2405,6 @@ "version": "1.0.7", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", - "dev": true, "license": "MIT", "dependencies": { "es-define-property": "^1.0.0", @@ -2440,6 +2645,18 @@ "dev": true, "license": "MIT" }, + "node_modules/content-disposition": { + "version": "0.5.4", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", + "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", + "license": "MIT", + "dependencies": { + "safe-buffer": "5.2.1" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/content-type": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", @@ -2449,6 +2666,21 @@ "node": ">= 0.6" } }, + "node_modules/cookie": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz", + "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==", + "license": "MIT" + }, "node_modules/crawlee": { "version": "3.12.0", "resolved": "https://registry.npmjs.org/crawlee/-/crawlee-3.12.0.tgz", @@ -2712,7 +2944,6 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", - "dev": true, "license": "MIT", "dependencies": { "es-define-property": "^1.0.0", @@ -2753,6 +2984,25 @@ "node": ">=0.4.0" } }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/destroy": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", + "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==", + "license": "MIT", + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, "node_modules/devtools-protocol": { "version": "0.0.1387316", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1387316.tgz", @@ -2861,6 +3111,12 @@ "integrity": "sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==", "license": "MIT" }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, "node_modules/electron-to-chromium": { "version": "1.5.65", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.65.tgz", @@ -2874,6 +3130,15 @@ "dev": true, "license": "MIT" }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/enhanced-resolve": { "version": "5.17.1", "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz", @@ -2965,7 +3230,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", - "dev": true, "license": "MIT", "dependencies": { "get-intrinsic": "^1.2.4" @@ -2978,7 +3242,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -3116,6 +3379,12 @@ "node": ">=6" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, "node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -3711,6 +3980,15 @@ "node": ">=0.10.0" } }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/event-stream": { "version": "3.3.4", "resolved": "https://registry.npmjs.org/event-stream/-/event-stream-3.3.4.tgz", @@ -3726,6 +4004,67 @@ "through": "~2.3.1" } }, + "node_modules/express": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz", + "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==", + "license": "MIT", + "dependencies": { + "accepts": "~1.3.8", + "array-flatten": "1.1.1", + "body-parser": "1.20.3", + "content-disposition": "0.5.4", + "content-type": "~1.0.4", + "cookie": "0.7.1", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "2.0.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "1.3.1", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "merge-descriptors": "1.0.3", + "methods": "~1.1.2", + "on-finished": "2.4.1", + "parseurl": "~1.3.3", + "path-to-regexp": "0.1.12", + "proxy-addr": "~2.0.7", + "qs": "6.13.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.2.1", + "send": "0.19.0", + "serve-static": "1.16.2", + "setprototypeof": "1.2.0", + "statuses": "2.0.1", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "engines": { + "node": ">= 0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/express/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, "node_modules/external-editor": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/external-editor/-/external-editor-3.1.0.tgz", @@ -3892,6 +4231,39 @@ "node": ">=8" } }, + "node_modules/finalhandler": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz", + "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "on-finished": "2.4.1", + "parseurl": "~1.3.3", + "statuses": "2.0.1", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/finalhandler/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/finalhandler/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, "node_modules/find-up": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", @@ -4023,6 +4395,24 @@ "node": ">= 18" } }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/from": { "version": "0.1.7", "resolved": "https://registry.npmjs.org/from/-/from-0.1.7.tgz", @@ -4068,7 +4458,6 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "dev": true, "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" @@ -4126,7 +4515,6 @@ "version": "1.2.4", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -4319,7 +4707,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz", "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==", - "dev": true, "license": "MIT", "dependencies": { "get-intrinsic": "^1.1.3" @@ -4510,7 +4897,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", - "dev": true, "license": "MIT", "dependencies": { "es-define-property": "^1.0.0" @@ -4523,7 +4909,6 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz", "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4536,7 +4921,6 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz", "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4565,7 +4949,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, "license": "MIT", "dependencies": { "function-bind": "^1.1.2" @@ -4632,6 +5015,22 @@ "integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==", "license": "BSD-2-Clause" }, + "node_modules/http-errors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", + "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", + "license": "MIT", + "dependencies": { + "depd": "2.0.0", + "inherits": "2.0.4", + "setprototypeof": "1.2.0", + "statuses": "2.0.1", + "toidentifier": "1.0.1" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/http-proxy-agent": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", @@ -4846,6 +5245,15 @@ "node": ">= 12" } }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, "node_modules/is-any-array": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", @@ -5626,6 +6034,24 @@ "resolved": "https://registry.npmjs.org/map-stream/-/map-stream-0.1.0.tgz", "integrity": "sha512-CkYQrPYZfWnu/DAmVCpTSX/xHpKZ80eKh2lAkyA6AJTef6bW+6JpbQZN5rofum7da+SyN1bi5ctTm+lTfcCW3g==" }, + "node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/merge-descriptors": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/merge2": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", @@ -5636,6 +6062,15 @@ "node": ">= 8" } }, + "node_modules/methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/micromatch": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", @@ -5650,6 +6085,18 @@ "node": ">=8.6" } }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -5802,6 +6249,15 @@ "dev": true, "license": "MIT" }, + "node_modules/negotiator": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", + "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/node-releases": { "version": "2.0.18", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz", @@ -5852,7 +6308,6 @@ "version": "1.13.3", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.3.tgz", "integrity": "sha512-kDCGIbxkDSXE3euJZZXzc6to7fCrKHNI/hSRQnRuQ+BWjFNzZwiFF8fj/6o2t2G9/jTj8PSIYTfCLelLZEeRpA==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -5957,6 +6412,18 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -6158,6 +6625,15 @@ "url": "https://github.com/inikulin/parse5?sponsor=1" } }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -6194,6 +6670,12 @@ "dev": true, "license": "MIT" }, + "node_modules/path-to-regexp": { + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz", + "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", + "license": "MIT" + }, "node_modules/path-type": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", @@ -6394,6 +6876,19 @@ "node": ">= 4" } }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/proxy-chain": { "version": "2.5.5", "resolved": "https://registry.npmjs.org/proxy-chain/-/proxy-chain-2.5.5.tgz", @@ -6432,6 +6927,21 @@ "node": ">=6" } }, + "node_modules/qs": { + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.0.6" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/querystringify": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", @@ -6471,6 +6981,30 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.0.tgz", + "integrity": "sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==", + "license": "MIT", + "dependencies": { + "bytes": "3.1.2", + "http-errors": "2.0.0", + "iconv-lite": "0.6.3", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/react-is": { "version": "16.13.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", @@ -6828,11 +7362,73 @@ "node": ">=10" } }, + "node_modules/send": { + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz", + "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "2.4.1", + "range-parser": "~1.2.1", + "statuses": "2.0.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/send/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/send/node_modules/debug/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, + "node_modules/send/node_modules/encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/serve-static": { + "version": "1.16.2", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz", + "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==", + "license": "MIT", + "dependencies": { + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "0.19.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/set-function-length": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", - "dev": true, "license": "MIT", "dependencies": { "define-data-property": "^1.1.4", @@ -6862,6 +7458,12 @@ "node": ">= 0.4" } }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -6889,7 +7491,6 @@ "version": "1.0.6", "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", - "dev": true, "license": "MIT", "dependencies": { "call-bind": "^1.0.7", @@ -6976,6 +7577,15 @@ "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==", "license": "BSD-3-Clause" }, + "node_modules/statuses": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", + "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/stream-chain": { "version": "2.2.5", "resolved": "https://registry.npmjs.org/stream-chain/-/stream-chain-2.2.5.tgz", @@ -7298,6 +7908,15 @@ "node": ">=8.0" } }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, "node_modules/token-types": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.0.0.tgz", @@ -7453,6 +8072,19 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "license": "MIT", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/typed-array-buffer": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz", @@ -7594,6 +8226,15 @@ "node": ">= 10.0.0" } }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/update-browserslist-db": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.1.tgz", @@ -7650,6 +8291,15 @@ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", "license": "MIT" }, + "node_modules/utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, "node_modules/vali-date": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/vali-date/-/vali-date-1.0.0.tgz", @@ -7659,6 +8309,15 @@ "node": ">=0.10.0" } }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/w3c-xmlserializer": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", @@ -8024,6 +8683,15 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zod": { + "version": "3.24.1", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.1.tgz", + "integrity": "sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } } } } diff --git a/package.json b/package.json index c10441b..5416c03 100644 --- a/package.json +++ b/package.json @@ -8,10 +8,12 @@ }, "dependencies": { "@crawlee/memory-storage": "^3.11.1", + "@modelcontextprotocol/sdk": "^1.0.4", "@mozilla/readability": "^0.5.0", "apify": "^3.2.6", "cheerio": "^1.0.0-rc.12", "crawlee": "^3.12.0", + "express": "^4.21.2", "joplin-turndown-plugin-gfm": "^1.0.12", "jsdom": "^24.1.1", "playwright": "^1.47.0", @@ -20,6 +22,7 @@ "devDependencies": { "@apify/eslint-config-ts": "^0.3.0", "@apify/tsconfig": "^0.1.0", + "@types/express": "^5.0.0", "@types/turndown": "^5.0.5", "@typescript-eslint/eslint-plugin": "^6.7.2", "@typescript-eslint/parser": "^6.7.2", @@ -31,7 +34,7 @@ }, "scripts": { "start": "npm run start:dev", - "start:prod": "node dist/main.js", + "start:prod": "node dist/src/main.js", "start:dev": "tsx src/main.ts", "build": "tsc", "lint": "eslint ./src --ext .ts", diff --git a/src/const.ts b/src/const.ts index f06ec3b..38c684f 100644 --- a/src/const.ts +++ b/src/const.ts @@ -4,6 +4,12 @@ export enum ContentCrawlerStatus { FAILED = 'failed', } +export enum Routes { + SEARCH = '/search', + SSE = '/sse', + MESSAGE = '/message', +} + export const PLAYWRIGHT_REQUEST_TIMEOUT_NORMAL_MODE_SECS = 60; // TODO: It would be better to simply use input_schema.json rather then hard-coding these values, diff --git a/src/crawlers.ts b/src/crawlers.ts index 36c79d1..19c9f30 100644 --- a/src/crawlers.ts +++ b/src/crawlers.ts @@ -11,11 +11,10 @@ import { PlaywrightCrawlingContext, RequestOptions, } from 'crawlee'; -import { ServerResponse } from 'http'; import { scrapeOrganicResults } from './google-search/google-extractors-urls.js'; import { failedRequestHandlerPlaywright, requestHandlerPlaywright } from './playwright-req-handler.js'; -import { createResponse, addEmptyResultToResponse, sendResponseError } from './responses.js'; +import { addEmptyResultToResponse, sendResponseError } from './responses.js'; import { PlaywrightScraperSettings, UserData } from './types.js'; import { addTimeMeasureEvent, createRequest } from './utils.js'; @@ -35,6 +34,7 @@ export function getPlaywrightCrawlerKey( /** * Creates and starts a Google search crawler and Playwright content crawler with the provided configurations. + * A crawler won't be created if it already exists. */ export async function createAndStartCrawlers( cheerioCrawlerOptions: CheerioCrawlerOptions, @@ -42,20 +42,21 @@ export async function createAndStartCrawlers( playwrightScraperSettings: PlaywrightScraperSettings, startCrawlers: boolean = true, ) { - const crawler1 = await createAndStartSearchCrawler( + const { crawler: searchCrawler } = await createAndStartSearchCrawler( cheerioCrawlerOptions, startCrawlers, ); - const crawler2 = await createAndStartCrawlerPlaywright( + const { key: playwrightCrawlerKey, crawler: playwrightCrawler } = await createAndStartCrawlerPlaywright( playwrightCrawlerOptions, playwrightScraperSettings, startCrawlers, ); - return [crawler1, crawler2]; + return { searchCrawler, playwrightCrawler, playwrightCrawlerKey }; } /** * Creates and starts a Google search crawler with the provided configuration. + * A crawler won't be created if it already exists. */ async function createAndStartSearchCrawler( cheerioCrawlerOptions: CheerioCrawlerOptions, @@ -63,7 +64,7 @@ async function createAndStartSearchCrawler( ) { const key = getSearchCrawlerKey(cheerioCrawlerOptions); if (crawlers.has(key)) { - return crawlers.get(key); + return { key, crawler: crawlers.get(key) }; } log.info(`Creating new cheerio crawler with key ${key}`); @@ -88,7 +89,7 @@ async function createAndStartSearchCrawler( log.info(`Extracted ${results.length} results: \n${results.map((r) => r.url).join('\n')}`); addTimeMeasureEvent(request.userData!, 'before-playwright-queue-add'); - const responseId = request.uniqueKey; + const responseId = request.userData.responseId!; let rank = 1; for (const result of results) { result.rank = rank++; @@ -112,9 +113,13 @@ async function createAndStartSearchCrawler( } crawlers.set(key, crawler); log.info(`Number of crawlers ${crawlers.size}`); - return crawler; + return { key, crawler }; } +/** + * Creates and starts a Playwright content crawler with the provided configuration. + * A crawler won't be created if it already exists. + */ async function createAndStartCrawlerPlaywright( crawlerOptions: PlaywrightCrawlerOptions, settings: PlaywrightScraperSettings, @@ -122,7 +127,7 @@ async function createAndStartCrawlerPlaywright( ) { const key = getPlaywrightCrawlerKey(crawlerOptions, settings); if (crawlers.has(key)) { - return crawlers.get(key); + return { key, crawler: crawlers.get(key) }; } log.info(`Creating new playwright crawler with key ${key}`); @@ -143,7 +148,7 @@ async function createAndStartCrawlerPlaywright( } crawlers.set(key, crawler); log.info(`Number of crawlers ${crawlers.size}`); - return crawler; + return { key, crawler }; } /** @@ -152,7 +157,6 @@ async function createAndStartCrawlerPlaywright( */ export const addSearchRequest = async ( request: RequestOptions, - response: ServerResponse | null, cheerioCrawlerOptions: CheerioCrawlerOptions, ) => { const key = getSearchCrawlerKey(cheerioCrawlerOptions); @@ -162,11 +166,6 @@ export const addSearchRequest = async ( log.error(`Cheerio crawler not found: key ${key}`); return; } - - if (response) { - createResponse(request.uniqueKey!, response); - log.info(`Created response for request ${request.uniqueKey}, request.url: ${request.url}`); - } addTimeMeasureEvent(request.userData!, 'before-cheerio-queue-add'); await crawler.requestQueue!.addRequest(request); log.info(`Added request to cheerio-google-search-crawler: ${request.url}`); diff --git a/src/main.ts b/src/main.ts index e2e58c9..9085f14 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,105 +1,60 @@ +import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js'; import { Actor } from 'apify'; import { log } from 'crawlee'; -import { createServer, IncomingMessage, ServerResponse } from 'http'; +import express, { Request, Response } from 'express'; -import { PLAYWRIGHT_REQUEST_TIMEOUT_NORMAL_MODE_SECS } from './const.js'; -import { addPlaywrightCrawlRequest, addSearchRequest, createAndStartCrawlers, getPlaywrightCrawlerKey } from './crawlers.js'; -import { UserInputError } from './errors.js'; +import { Routes } from './const.js'; +import { createAndStartCrawlers } from './crawlers.js'; import { processInput } from './input.js'; -import { addTimeoutToAllResponses, createResponse, sendResponseError } from './responses.js'; +import { RagWebBrowserServer } from './mcp/server.js'; +import { addTimeoutToAllResponses } from './responses.js'; +import { handleSearchRequest, handleSearchNormalMode } from './search.js'; import { Input } from './types.js'; -import { - addTimeMeasureEvent, - checkAndRemoveExtraParams, - createRequest, - createSearchRequest, - interpretAsUrl, - parseParameters, - randomId, -} from './utils.js'; await Actor.init(); -const ROUTE_SEARCH = '/search'; - Actor.on('migrating', () => { addTimeoutToAllResponses(60); }); -async function getSearch(request: IncomingMessage, response: ServerResponse) { - try { - const requestReceivedTime = Date.now(); - const params = parseParameters(request.url?.slice(ROUTE_SEARCH.length, request.url.length) ?? ''); - log.info(`Received query parameters: ${JSON.stringify(params)}`); - checkAndRemoveExtraParams(params); - - // Process the query parameters the same way se normal inputs - const { - input, - cheerioCrawlerOptions, - playwrightCrawlerOptions, - playwrightScraperSettings, - } = await processInput(params as Partial); - - // playwrightCrawlerKey is used to identify the crawler that should process the search results - const playwrightCrawlerKey = getPlaywrightCrawlerKey(playwrightCrawlerOptions, playwrightScraperSettings); - await createAndStartCrawlers(cheerioCrawlerOptions, playwrightCrawlerOptions, playwrightScraperSettings); +const app = express(); - const inputUrl = interpretAsUrl(input.query); - input.query = inputUrl ?? input.query; - // Create a request depending on whether the input is a URL or search query - const responseId = randomId(); - const req = inputUrl - ? createRequest({ url: input.query }, responseId, null) - : createSearchRequest( - input.query, - input.maxResults, - playwrightCrawlerKey, - cheerioCrawlerOptions.proxyConfiguration, - ); - addTimeMeasureEvent(req.userData!, 'request-received', requestReceivedTime); - if (inputUrl) { - // If the input query is a URL, we don't need to run the search crawler - log.info(`Skipping Google Search query as ${input.query} is a valid URL`); - createResponse(responseId, response); - await addPlaywrightCrawlRequest(req, responseId, playwrightCrawlerKey); - } else { - await addSearchRequest(req, response, cheerioCrawlerOptions); - } - setTimeout(() => { - sendResponseError(req.uniqueKey!, 'Timed out'); - }, input.requestTimeoutSecs * 1000); - } catch (e) { - const error = e as Error; - const errorMessage = { errorMessage: error.message }; - const statusCode = error instanceof UserInputError ? 400 : 500; - log.error(`UserInputError occurred: ${error.message}`); - response.writeHead(statusCode, { 'Content-Type': 'application/json' }); - response.end(JSON.stringify(errorMessage)); - } -} +const mcpServer = new RagWebBrowserServer(); +let transport: SSEServerTransport; -const server = createServer(async (req, res) => { - log.info(`Request received: ${req.method} ${req.url}`); - - if (req.url?.startsWith(ROUTE_SEARCH)) { - if (req.method === 'GET') { - await getSearch(req, res); - } else if (req.method === 'HEAD') { - res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(); - } else { - res.writeHead(400, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ errorMessage: 'Bad request' })); - } - } else { - res.writeHead(404, { 'Content-Type': 'application/json' }); - res.end( - JSON.stringify({ - message: `There is nothing at this HTTP endpoint. Send a GET request to ${process.env.ACTOR_STANDBY_URL}/search?query=hello+world instead`, - }), - ); - } +const HELP_MESSAGE = `Send a GET request to ${process.env.ACTOR_STANDBY_URL}/search?query=hello+world` + + ` or to ${process.env.ACTOR_STANDBY_URL}/messages to use Model context protocol.`; + +app.get('/', async (req, res) => { + log.info(`Received GET message at: ${req.url}`); + res.status(200).json({ message: `Actor is running in Standby mode. ${HELP_MESSAGE}` }); +}); + +app.get(Routes.SEARCH, async (req: Request, res: Response) => { + log.info(`Received GET message at: ${req.url}`); + await handleSearchRequest(req, res); +}); + +app.head(Routes.SEARCH, async (req: Request, res: Response) => { + log.info(`Received HEAD message at: ${req.url}`); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(); +}); + +app.get(Routes.SSE, async (req: Request, res: Response) => { + log.info(`Received GET message at: ${req.url}`); + transport = new SSEServerTransport(Routes.MESSAGE, res); + await mcpServer.connect(transport); +}); + +app.post(Routes.MESSAGE, async (req: Request, res: Response) => { + log.info(`Received POST message at: ${req.url}`); + await transport.handlePostMessage(req, res); +}); + +// Catch-all for undefined routes +app.use((req, res) => { + res.status(404).json({ message: `The is nothing at route ${req.method} ${req.originalUrl}. ${HELP_MESSAGE}` }); }); const standbyMode = Actor.getEnv().metaOrigin === 'STANDBY'; @@ -117,55 +72,17 @@ log.info(`Loaded input: ${JSON.stringify(input)}, if (standbyMode) { log.info('Actor is running in the STANDBY mode.'); + const host = Actor.isAtHome() ? process.env.ACTOR_STANDBY_URL : 'http://localhost'; const port = Actor.isAtHome() ? process.env.ACTOR_STANDBY_PORT : 3000; - server.listen(port, async () => { + app.listen(port, async () => { // Pre-create default crawlers - log.info(`The Actor web server is listening for user requests at ${process.env.ACTOR_STANDBY_URL}.`); + log.info(`The Actor web server is listening for user requests at ${host}.`); await createAndStartCrawlers(cheerioCrawlerOptions, playwrightCrawlerOptions, playwrightScraperSettings); }); } else { log.info('Actor is running in the NORMAL mode.'); try { - const startedTime = Date.now(); - cheerioCrawlerOptions.keepAlive = false; - playwrightCrawlerOptions.keepAlive = false; - playwrightCrawlerOptions.requestHandlerTimeoutSecs = PLAYWRIGHT_REQUEST_TIMEOUT_NORMAL_MODE_SECS; - - // playwrightCrawlerKey is used to identify the crawler that should process the search results - const playwrightCrawlerKey = getPlaywrightCrawlerKey(playwrightCrawlerOptions, playwrightScraperSettings); - const [searchCrawler, playwrightCrawler] = await createAndStartCrawlers( - cheerioCrawlerOptions, - playwrightCrawlerOptions, - playwrightScraperSettings, - false, - ); - - const inputUrl = interpretAsUrl(input.query); - input.query = inputUrl ?? input.query; - // Create a request depending on whether the input is a URL or search query - const req = inputUrl - ? createRequest({ url: input.query }, randomId(), null) - : createSearchRequest( - input.query, - input.maxResults, - playwrightCrawlerKey, - cheerioCrawlerOptions.proxyConfiguration, - ); - addTimeMeasureEvent(req.userData!, 'actor-started', startedTime); - if (inputUrl) { - // If the input query is a URL, we don't need to run the search crawler - log.info(`Skipping Google Search query because "${input.query}" is a valid URL.`); - await addPlaywrightCrawlRequest(req, req.uniqueKey!, playwrightCrawlerKey); - } else { - await addSearchRequest(req, null, cheerioCrawlerOptions); - addTimeMeasureEvent(req.userData!, 'before-cheerio-run', startedTime); - log.info(`Running Google Search crawler with request: ${JSON.stringify(req)}`); - await searchCrawler!.run(); - } - - addTimeMeasureEvent(req.userData!, 'before-playwright-run', startedTime); - log.info(`Running target page crawler with request: ${JSON.stringify(req)}`); - await playwrightCrawler!.run(); + await handleSearchNormalMode(input, cheerioCrawlerOptions, playwrightCrawlerOptions, playwrightScraperSettings); } catch (e) { const error = e as Error; await Actor.fail(error.message as string); diff --git a/src/mcp/server.ts b/src/mcp/server.ts new file mode 100644 index 0000000..1487a1b --- /dev/null +++ b/src/mcp/server.ts @@ -0,0 +1,80 @@ +#!/usr/bin/env node + +/** + * Model Context Protocol (MCP) server for RAG Web Browser Actor + */ + +import { Server } from '@modelcontextprotocol/sdk/server/index.js'; +import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js'; +import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; + +import inputSchema from '../../.actor/input_schema.json' with { type: 'json' }; +import { handleModelContextProtocol } from '../search.js'; +import { Input } from '../types.js'; + +const TOOL_SEARCH = inputSchema.title.toLowerCase().replace(/ /g, '-'); + +const TOOLS = [ + { + name: TOOL_SEARCH, + description: inputSchema.description, + inputSchema, + }, +]; + +/** + * Create an MCP server with a tool to call RAG Web Browser Actor + */ +export class RagWebBrowserServer { + private server: Server; + + constructor() { + this.server = new Server( + { + name: 'mcp-server-rag-web-browser', + version: '0.1.0', + }, + { + capabilities: { + tools: {}, + }, + }, + ); + this.setupErrorHandling(); + this.setupToolHandlers(); + } + + private setupErrorHandling(): void { + this.server.onerror = (error) => { + console.error('[MCP Error]', error); // eslint-disable-line no-console + }; + process.on('SIGINT', async () => { + await this.server.close(); + process.exit(0); + }); + } + + private setupToolHandlers(): void { + this.server.setRequestHandler(ListToolsRequestSchema, async () => { + return { + tools: TOOLS, + }; + }); + this.server.setRequestHandler(CallToolRequestSchema, async (request) => { + const { name, arguments: args } = request.params; + switch (name) { + case TOOL_SEARCH: { + const content = await handleModelContextProtocol(args as unknown as Input); + return { content: content.map((message) => ({ type: 'text', text: JSON.stringify(message) })) }; + } + default: { + throw new Error(`Unknown tool: ${name}`); + } + } + }); + } + + async connect(transport: Transport): Promise { + await this.server.connect(transport); + } +} diff --git a/src/responses.ts b/src/responses.ts index 2f74aed..67c98ee 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -1,19 +1,15 @@ import { log } from 'apify'; import { RequestOptions } from 'crawlee'; -import { ServerResponse } from 'http'; import { ContentCrawlerStatus } from './const.js'; import { Output, UserData } from './types.js'; -class ResponseData { - response: ServerResponse; +type ResponseData = { resultsMap: Map; - - constructor(response: ServerResponse) { - this.response = response; - this.resultsMap = new Map(); - } -} + resolve: (value: Output[]) => void; + reject: (reason?: unknown) => void; + timeoutId?: NodeJS.Timeout; +}; const responseData = new Map(); @@ -28,18 +24,31 @@ const getResponse = (responseId: string): ResponseData | null => { }; /** - * Create a response object for a search request + * Create a response promise * (for content crawler requests there is no need to create a response object). */ -export const createResponse = (responseId: string, response: ServerResponse) => { - responseData.set(responseId, new ResponseData(response)); -}; +export function createResponsePromise(responseId: string, timeoutSecs: number): Promise { + log.info(`Created responsePromise for response ID: ${responseId}`); + return new Promise((resolve, reject) => { + const data: ResponseData = { + resultsMap: new Map(), + resolve, + reject, + }; + responseData.set(responseId, data); + + // Set a timeout to reject the promise if it takes too long + data.timeoutId = setTimeout(() => { + sendResponseError(responseId, 'Timed out'); + }, timeoutSecs * 1000); + }); +} /** * Add empty result to response object when the content crawler request is created. * This is needed to keep track of all results and to know that all results have been handled. */ -export const addEmptyResultToResponse = (responseId: string, request: RequestOptions) => { +export function addEmptyResultToResponse(responseId: string, request: RequestOptions) { const res = getResponse(responseId); if (!res) return; @@ -49,51 +58,58 @@ export const addEmptyResultToResponse = (responseId: string, request: RequestOpt crawl: { createdAt: new Date(), requestStatus: ContentCrawlerStatus.PENDING, uniqueKey: request.uniqueKey! }, }; res.resultsMap.set(request.uniqueKey!, result as Output); -}; +} -export const addResultToResponse = (responseId: string, uniqueKey: string, result: Output) => { +export function addResultToResponse(responseId: string, uniqueKey: string, result: Output) { const res = getResponse(responseId); if (!res) return; - if (!res.resultsMap.get(uniqueKey)) { - log.info( - `Result for request ${result.metadata.url} (key: ${uniqueKey}) were not found in response ${responseId}`, - ); + const existing = res.resultsMap.get(uniqueKey); + if (!existing) { + log.info(`Result for request ${result.metadata.url} (key: ${uniqueKey}) not found in response ${responseId}`); return; } - res.resultsMap.set(uniqueKey, { ...res.resultsMap.get(uniqueKey), ...result }); - log.info(`Updated request ${responseId} with result.`); -}; + res.resultsMap.set(uniqueKey, { ...existing, ...result }); + log.info(`Updated response ${responseId} with a result from ${result.metadata.url}`); +} -export const sendResponseOk = (responseId: string, result: unknown, contentType: string) => { +export function sendResponseOk(responseId: string, result: string | Output[]) { const res = getResponse(responseId); if (!res) return; - res.response.writeHead(200, { 'Content-Type': contentType }); - res.response.end(result); - log.info(`Response for request ${responseId} has been sent`); + if (res.timeoutId) clearTimeout(res.timeoutId); + + let parsedResults: Output[]; + if (typeof result === 'string') { + parsedResults = JSON.parse(result) as Output[]; + } else { + parsedResults = result as Output[]; + } + + res.resolve(parsedResults); + log.info(`Response ${responseId} resolved successfully with ${parsedResults.length} results.`); responseData.delete(responseId); -}; +} /** * Check if all results have been handled. It is used to determine if the response can be sent. */ -const checkAllResultsHandled = (responseId: string) => { +function checkAllResultsHandled(responseId: string): boolean { const res = getResponse(responseId); - if (!res) return; + if (!res) return false; - for (const key of res.resultsMap.keys()) { - if (res.resultsMap.get(key)!.crawl.requestStatus === ContentCrawlerStatus.PENDING) { + for (const value of res.resultsMap.values()) { + if (value.crawl.requestStatus === ContentCrawlerStatus.PENDING) { return false; } } return true; -}; +} /** * Sort results by rank. */ -const sortResultsByRank = (res: ResponseData): Output[] => { +function sortResultsByRank(res: ResponseData): Output[] { const resultsArray = Array.from(res.resultsMap.values()); resultsArray.sort((a, b) => { const ra = a.searchResult.rank ?? Infinity; @@ -101,53 +117,52 @@ const sortResultsByRank = (res: ResponseData): Output[] => { return ra - rb; }); return resultsArray; -}; +} /** * Send response with error status code. If the response contains some handled requests, * return 200 status otherwise 500. */ -export const sendResponseError = (responseId: string, message: string) => { +export function sendResponseError(responseId: string, message: string) { const res = getResponse(responseId); if (!res) return; - let returnStatusCode = 500; - for (const key of res.resultsMap.keys()) { - const { requestStatus } = res.resultsMap.get(key)!.crawl; - if (requestStatus === ContentCrawlerStatus.PENDING) { - const r = res.resultsMap.get(key)!; - r.crawl.httpStatusCode = 500; - r.crawl.httpStatusMessage = message; - r.crawl.requestStatus = ContentCrawlerStatus.FAILED; - r.metadata.title = ''; - r.text = ''; - } else if (requestStatus === ContentCrawlerStatus.HANDLED) { - returnStatusCode = 200; + if (res.timeoutId) clearTimeout(res.timeoutId); + + let returnStatus = 500; + for (const [key, val] of res.resultsMap) { + if (val.crawl.requestStatus === ContentCrawlerStatus.PENDING) { + val.crawl.httpStatusCode = 500; + val.crawl.httpStatusMessage = message; + val.crawl.requestStatus = ContentCrawlerStatus.FAILED; + val.metadata.title = ''; + val.text = ''; + } else if (val.crawl.requestStatus === ContentCrawlerStatus.HANDLED) { + returnStatus = 200; } + res.resultsMap.set(key, val); } - res.response.writeHead(returnStatusCode, { 'Content-Type': 'application/json' }); - if (returnStatusCode === 200) { + if (returnStatus === 200) { log.warning(`Response for request ${responseId} has been sent with partial results`); - res.response.end(JSON.stringify(sortResultsByRank(res))); + res.resolve(sortResultsByRank(res)); } else { log.error(`Response for request ${responseId} has been sent with error: ${message}`); - res.response.end(JSON.stringify({ errorMessage: message })); + res.reject(new Error(message)); } responseData.delete(responseId); -}; +} /** * Send response if all results have been handled or failed. */ -export const sendResponseIfFinished = (responseId: string) => { +export function sendResponseIfFinished(responseId: string) { const res = getResponse(responseId); if (!res) return; if (checkAllResultsHandled(responseId)) { - sendResponseOk(responseId, JSON.stringify(sortResultsByRank(res)), 'application/json'); - responseData.delete(responseId); + sendResponseOk(responseId, sortResultsByRank(res)); } -}; +} /** * Add timeout to all responses when actor is migrating (source: SuperScraper). */ diff --git a/src/search.ts b/src/search.ts new file mode 100644 index 0000000..9149b71 --- /dev/null +++ b/src/search.ts @@ -0,0 +1,164 @@ +import { CheerioCrawlerOptions, log, PlaywrightCrawlerOptions } from 'crawlee'; +import { IncomingMessage, ServerResponse } from 'http'; + +import { PLAYWRIGHT_REQUEST_TIMEOUT_NORMAL_MODE_SECS, Routes } from './const.js'; +import { addPlaywrightCrawlRequest, addSearchRequest, createAndStartCrawlers } from './crawlers.js'; +import { UserInputError } from './errors.js'; +import { processInput } from './input.js'; +import { createResponsePromise } from './responses.js'; +import { Input, Output, PlaywrightScraperSettings } from './types.js'; +import { + addTimeMeasureEvent, + checkAndRemoveExtraParams, + createRequest, + createSearchRequest, + interpretAsUrl, + parseParameters, + randomId, +} from './utils.js'; + +/** + * Prepares the request for the search. + * Decide whether input.query is a URL or a search query. If it's a URL, we don't need to run the search crawler. + * Return the request, isUrl and responseId. + */ +function prepareRequest( + input: Input, + cheerioCrawlerOptions: CheerioCrawlerOptions, + playwrightCrawlerKey: string, +) { + const interpretedUrl = interpretAsUrl(input.query); + const query = interpretedUrl ?? input.query; + const responseId = randomId(); + + const req = interpretedUrl + ? createRequest( + { url: query }, + responseId, + null, + ) + : createSearchRequest( + query, + responseId, + input.maxResults, + playwrightCrawlerKey, + cheerioCrawlerOptions.proxyConfiguration, + ); + + addTimeMeasureEvent(req.userData!, 'request-received', Date.now()); + return { req, isUrl: !!interpretedUrl, responseId }; +} + +/** + * Internal function that handles the common logic for search. + * Returns a promise that resolves to the final results array of Output objects. + */ +async function runSearchProcess(params: Partial): Promise { + // Process the query parameters the same way as normal inputs + const { + input, + cheerioCrawlerOptions, + playwrightCrawlerOptions, + playwrightScraperSettings, + } = await processInput(params); + + // Create and start crawlers + const { playwrightCrawlerKey } = await createAndStartCrawlers( + cheerioCrawlerOptions, + playwrightCrawlerOptions, + playwrightScraperSettings, + ); + + const { req, isUrl, responseId } = prepareRequest(input, cheerioCrawlerOptions, playwrightCrawlerKey); + + // Create a promise that resolves when all requests are processed + const resultsPromise = createResponsePromise(responseId, input.requestTimeoutSecs); + + if (isUrl) { + // If input is a direct URL, skip the search crawler + log.info(`Skipping Google Search query as "${input.query}" is a valid URL`); + await addPlaywrightCrawlRequest(req, responseId, playwrightCrawlerKey); + } else { + // If input is a search query, run the search crawler first + await addSearchRequest(req, cheerioCrawlerOptions); + } + + // Return promise that resolves when all requests are processed + return resultsPromise; +} + +/** + * Handles the search request at the /search endpoint (HTTP scenario). + * Uses the unified runSearchProcess function and then sends an HTTP response. + */ +export async function handleSearchRequest(request: IncomingMessage, response: ServerResponse) { + try { + const params = parseParameters(request.url?.slice(Routes.SEARCH.length) ?? ''); + log.info(`Received query parameters: ${JSON.stringify(params)}`); + checkAndRemoveExtraParams(params); + + const results = await runSearchProcess(params); + + response.writeHead(200, { 'Content-Type': 'application/json' }); + response.end(JSON.stringify(results)); + } catch (e) { + const error = e as Error; + const statusCode = error instanceof UserInputError ? 400 : 500; + log.error(`Error occurred: ${error.message}`); + response.writeHead(statusCode, { 'Content-Type': 'application/json' }); + response.end(JSON.stringify({ errorMessage: error.message })); + } +} + +/** + * Handles the model context protocol scenario (non-HTTP scenario). + * Uses the same runSearchProcess function but just returns the results as a promise. + */ +export async function handleModelContextProtocol(params: Partial): Promise { + try { + log.info(`Received parameters: ${JSON.stringify(params)}`); + return await runSearchProcess(params); + } catch (e) { + const error = e as Error; + log.error(`UserInputError occurred: ${error.message}`); + return [{ text: error.message }] as Output[]; + } +} + +/** + * Runs the search and scrape in normal mode. + */ +export async function handleSearchNormalMode(input: Input, + cheerioCrawlerOptions: CheerioCrawlerOptions, + playwrightCrawlerOptions: PlaywrightCrawlerOptions, + playwrightScraperSettings: PlaywrightScraperSettings, +) { + const startedTime = Date.now(); + cheerioCrawlerOptions.keepAlive = false; + playwrightCrawlerOptions.keepAlive = false; + playwrightCrawlerOptions.requestHandlerTimeoutSecs = PLAYWRIGHT_REQUEST_TIMEOUT_NORMAL_MODE_SECS; + + // playwrightCrawlerKey is used to identify the crawler that should process the search results + const { playwrightCrawlerKey, searchCrawler, playwrightCrawler } = await createAndStartCrawlers( + cheerioCrawlerOptions, + playwrightCrawlerOptions, + playwrightScraperSettings, + false, + ); + + const { req, isUrl } = prepareRequest(input, cheerioCrawlerOptions, playwrightCrawlerKey); + if (isUrl) { + // If the input query is a URL, we don't need to run the search crawler + log.info(`Skipping Google Search query because "${input.query}" is a valid URL.`); + await addPlaywrightCrawlRequest(req, '', playwrightCrawlerKey); + } else { + await addSearchRequest(req, cheerioCrawlerOptions); + addTimeMeasureEvent(req.userData!, 'before-cheerio-run', startedTime); + log.info(`Running Google Search crawler with request: ${JSON.stringify(req)}`); + await searchCrawler!.run(); + } + + addTimeMeasureEvent(req.userData!, 'before-playwright-run', startedTime); + log.info(`Running target page crawler with request: ${JSON.stringify(req)}`); + await playwrightCrawler!.run(); +} diff --git a/src/utils.ts b/src/utils.ts index e71902b..d41d944 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -43,6 +43,7 @@ export function randomId() { */ export function createSearchRequest( query: string, + responseId: string, maxResults: number, playwrightCrawlerKey: string, proxyConfiguration: ProxyConfiguration | undefined, @@ -58,7 +59,7 @@ export function createSearchRequest( return { url: urlSearch, uniqueKey: randomId(), - userData: { maxResults, timeMeasures: [], query, playwrightCrawlerKey }, + userData: { maxResults, timeMeasures: [], query, playwrightCrawlerKey, responseId }, }; } @@ -71,7 +72,7 @@ export function createSearchRequest( export function createRequest( result: OrganicResult, responseId: string, - timeMeasures: TimeMeasure[] | null, + timeMeasures: TimeMeasure[] | null = null, ): RequestOptions { return { url: result.url!,