diff --git a/.actor/input_schema.json b/.actor/input_schema.json index 00620cc..15b30b9 100644 --- a/.actor/input_schema.json +++ b/.actor/input_schema.json @@ -38,7 +38,7 @@ "description": "The maximum time (in seconds) allowed for request. If the request exceeds this time, it will be marked as failed and only already finished results will be returned", "minimum": 1, "maximum": 600, - "default": 60 + "default": 45 }, "proxyGroupSearch": { "title": "Search Proxy Group", diff --git a/CHANGELOG.md b/CHANGELOG.md index 31af11c..aaf1791 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ This changelog summarizes all changes of the RAG Web Browser +### 2024-10-07 + +πŸš€ Features +- Add a short description how to create a custom action + ### 2024-09-24 πŸš€ Features diff --git a/README.md b/README.md index b2ac0f5..3510815 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ curl -X GET https://rag-web-browser.apify.actor?token=APIFY_API_TOKEN Then, you can send requests to the `/search` path along with your `query` and the number of results (`maxResults`) you want to retrieve. ```shell -curl -X GET https://rag-web-browser.apify.actor/search?token=APIFY_API_TOKEN&query=apify&maxResults=1 +curl -X GET https://rag-web-browser.apify.actor/search?token=APIFY_API_TOKEN\&query=apify\&maxResults=1 ``` Here’s an example of the server response (truncated for brevity): @@ -182,10 +182,22 @@ For example, the following outputs (truncated for brevity) illustrate this behav ] ``` -## ֎ How to use RAG Web Browser in your GPT as custom action? +## ֎ How to use RAG Web Browser in your GPT as a custom action? -You can easily call the RAG Web Browser to your GPT by uploading its OpenAPI specification and creating a custom action. -Follow the steps in the article [Add custom actions to your GPTs with Apify Actors](https://blog.apify.com/add-custom-actions-to-your-gpts/). +You can easily add the RAG Web Browser to your GPT by uploading its OpenAPI specification and creating a custom action. +Follow the detailed guide in the article [Add custom actions to your GPTs with Apify Actors](https://blog.apify.com/add-custom-actions-to-your-gpts/). + +Here's a quick guide to adding the RAG Web Browser to your GPT as a custom action: + +1. Click on **Explore GPTs** in the left sidebar, then select **+ Create** in the top right corner. +1. Complete all required details in the form. +1. Under the **Actions** section, click **Create new action**. +1. In the Action settings, set **Authentication** to **API key** and choose Bearer as **Auth Type**. +1. In the **schema** field, paste the OpenAPI specification for the RAG Web Browser. + 1. **Normal mode**: Copy the OpenAPI schema from the [RAG-Web-Browser Actor](https://console.apify.com/actors/3ox4R101TgZz67sLr/input) under the API -> OpenAPI specification. + 1. **Standby mode**: Copy the OpenAPI schema from the [OpenAPI standby mode](https://raw.githubusercontent.com/apify/rag-web-browser/refs/heads/master/standby-openapi.json) json file. + +![Apify-RAG-Web-Browser-custom-action](https://raw.githubusercontent.com/apify/rag-web-browser/refs/heads/master/docs/apify-gpt-custom-action.png) ## πŸ‘·πŸΌ Development diff --git a/docs/apify-gpt-custom-action.png b/docs/apify-gpt-custom-action.png new file mode 100644 index 0000000..a5a2b26 Binary files /dev/null and b/docs/apify-gpt-custom-action.png differ diff --git a/docs/standby-openapi.json b/docs/standby-openapi.json new file mode 100644 index 0000000..b4e7e56 --- /dev/null +++ b/docs/standby-openapi.json @@ -0,0 +1,200 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "RAG Web Browser", + "description": "Web browser for OpenAI Assistants API and RAG pipelines, similar to a web browser in ChatGPT. It queries Google Search, scrapes the top N pages from the results, and returns their cleaned content as Markdown for further processing by an LLM.", + "version": "v1" + }, + "servers": [ + { + "url": "https://rag-web-browser.apify.actor" + } + ], + "paths": { + "/search": { + "get": { + "operationId": "apify_rag-web-browser", + "x-openai-isConsequential": false, + "summary": "Web browser for OpenAI Assistants API and RAG pipelines, similar to a web browser in ChatGPT. It queries Google Search, scrapes the top N pages from the results, and returns their cleaned content as Markdown for further processing by an LLM.", + "parameters": [ + { + "name": "query", + "in": "query", + "description": "Use regular search words or enter Google Search URLs. You can also apply advanced Google search techniques, such as AI site:twitter.com or javascript OR python", + "required": true, + "schema": { + "type": "string", + "pattern": "[^\\s]+" + } + }, + { + "name": "maxResults", + "in": "query", + "description": "The number of top organic search results to return and scrape text from", + "required": false, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 50 + } + }, + { + "name": "outputFormats", + "in": "query", + "description": "Select the desired output formats for the retrieved content", + "required": false, + "schema": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "text", + "markdown", + "html" + ] + }, + "default": [ + "text" + ] + }, + "style": "form", + "explode": false + }, + { + "name": "requestTimeoutSecs", + "in": "query", + "description": "The maximum time (in seconds) allowed for request. If the request exceeds this time, it will be marked as failed and only already finished results will be returned", + "required": false, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 600, + "default": 45 + } + }, + { + "name": "proxyGroupSearch", + "in": "query", + "description": "Proxy group for loading search results.", + "required": false, + "schema": { + "type": "string", + "enum": [ + "GOOGLE_SERP", + "SHADER" + ], + "default": "GOOGLE_SERP" + } + }, + { + "name": "maxRequestRetriesSearch", + "in": "query", + "description": "Maximum retries for Google search requests on errors.", + "required": false, + "schema": { + "type": "integer", + "minimum": 0, + "maximum": 3, + "default": 1 + } + }, + { + "name": "initialConcurrency", + "in": "query", + "description": "Initial number of browsers running in parallel.", + "required": false, + "schema": { + "type": "integer", + "minimum": 0, + "maximum": 50, + "default": 5 + } + }, + { + "name": "minConcurrency", + "in": "query", + "description": "Minimum number of browsers running in parallel.", + "required": false, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "default": 3 + } + }, + { + "name": "maxConcurrency", + "in": "query", + "description": "Maximum number of browsers running in parallel.", + "required": false, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "default": 10 + } + }, + { + "name": "maxRequestRetries", + "in": "query", + "description": "Maximum retries for Playwright content crawler.", + "required": false, + "schema": { + "type": "integer", + "minimum": 0, + "maximum": 3, + "default": 1 + } + }, + { + "name": "requestTimeoutContentCrawlSecs", + "in": "query", + "description": "Timeout for content crawling (seconds).", + "required": false, + "schema": { + "type": "integer", + "minimum": 1, + "maximum": 60, + "default": 30 + } + }, + { + "name": "dynamicContentWaitSecs", + "in": "query", + "description": "Time to wait for dynamic content to load (seconds).", + "required": false, + "schema": { + "type": "integer", + "default": 10 + } + }, + { + "name": "removeCookieWarnings", + "in": "query", + "description": "Removes cookie consent dialogs to improve text extraction.", + "required": false, + "schema": { + "type": "boolean", + "default": true + } + }, + { + "name": "debugMode", + "in": "query", + "description": "Stores debugging information in dataset if enabled.", + "required": false, + "schema": { + "type": "boolean", + "default": false + } + } + ], + "responses": { + "200": { + "description": "OK" + } + } + } + } + } +} diff --git a/src/defaults.json b/src/defaults.json index 34b4607..f7cae11 100644 --- a/src/defaults.json +++ b/src/defaults.json @@ -14,6 +14,6 @@ "query": "", "readableTextCharThreshold": 100, "removeCookieWarnings": true, - "requestTimeoutSecs": 60, + "requestTimeoutSecs": 45, "requestTimeoutContentCrawlSecs": 30 }