Add a short description how to create a custom action (#19)

apify · Oct 7, 2024 · 4119b47 · 4119b47
1 parent 932a411
commit 4119b47
Show file tree

Hide file tree

Showing 6 changed files with 223 additions and 6 deletions.
diff --git a/.actor/input_schema.json b/.actor/input_schema.json
@@ -38,7 +38,7 @@
             "description": "The maximum time (in seconds) allowed for request. If the request exceeds this time, it will be marked as failed and only already finished results will be returned",
             "minimum": 1,
             "maximum": 600,
-            "default": 60
+            "default": 45
         },
         "proxyGroupSearch": {
             "title": "Search Proxy Group",

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 This changelog summarizes all changes of the RAG Web Browser
 
+### 2024-10-07
+
+🚀 Features
+- Add a short description how to create a custom action
+
 ### 2024-09-24
 
 🚀 Features

diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ curl -X GET https://rag-web-browser.apify.actor?token=APIFY_API_TOKEN
 
 Then, you can send requests to the `/search` path along with your `query` and the number of results (`maxResults`) you want to retrieve.
 ```shell
-curl -X GET https://rag-web-browser.apify.actor/search?token=APIFY_API_TOKEN&query=apify&maxResults=1
+curl -X GET https://rag-web-browser.apify.actor/search?token=APIFY_API_TOKEN\&query=apify\&maxResults=1
 ```
 
 Here’s an example of the server response (truncated for brevity):
@@ -182,10 +182,22 @@ For example, the following outputs (truncated for brevity) illustrate this behav
 ]
 ```
 
-## ֎  How to use RAG Web Browser in your GPT as custom action?
+## ֎  How to use RAG Web Browser in your GPT as a custom action?
 
-You can easily call the RAG Web Browser to your GPT by uploading its OpenAPI specification and creating a custom action.
-Follow the steps in the article [Add custom actions to your GPTs with Apify Actors](https://blog.apify.com/add-custom-actions-to-your-gpts/).
+You can easily add the RAG Web Browser to your GPT by uploading its OpenAPI specification and creating a custom action.
+Follow the detailed guide in the article [Add custom actions to your GPTs with Apify Actors](https://blog.apify.com/add-custom-actions-to-your-gpts/).
+
+Here's a quick guide to adding the RAG Web Browser to your GPT as a custom action:
+
+1. Click on **Explore GPTs** in the left sidebar, then select **+ Create** in the top right corner.
+1. Complete all required details in the form.
+1. Under the **Actions** section, click **Create new action**.
+1. In the Action settings, set **Authentication** to **API key** and choose Bearer as **Auth Type**.
+1. In the **schema** field, paste the OpenAPI specification for the RAG Web Browser.
+   1. **Normal mode**: Copy the OpenAPI schema from the [RAG-Web-Browser Actor](https://console.apify.com/actors/3ox4R101TgZz67sLr/input) under the API -> OpenAPI specification.
+   1. **Standby mode**: Copy the OpenAPI schema from the [OpenAPI standby mode](https://raw.githubusercontent.com/apify/rag-web-browser/refs/heads/master/standby-openapi.json) json file.
+
+![Apify-RAG-Web-Browser-custom-action](https://raw.githubusercontent.com/apify/rag-web-browser/refs/heads/master/docs/apify-gpt-custom-action.png)
 
 ## 👷🏼 Development
 

diff --git a/docs/apify-gpt-custom-action.png b/docs/apify-gpt-custom-action.png
diff --git a/docs/standby-openapi.json b/docs/standby-openapi.json
@@ -0,0 +1,200 @@
+{
+    "openapi": "3.1.0",
+    "info": {
+        "title": "RAG Web Browser",
+        "description": "Web browser for OpenAI Assistants API and RAG pipelines, similar to a web browser in ChatGPT. It queries Google Search, scrapes the top N pages from the results, and returns their cleaned content as Markdown for further processing by an LLM.",
+        "version": "v1"
+    },
+    "servers": [
+        {
+            "url": "https://rag-web-browser.apify.actor"
+        }
+    ],
+    "paths": {
+        "/search": {
+            "get": {
+                "operationId": "apify_rag-web-browser",
+                "x-openai-isConsequential": false,
+                "summary": "Web browser for OpenAI Assistants API and RAG pipelines, similar to a web browser in ChatGPT. It queries Google Search, scrapes the top N pages from the results, and returns their cleaned content as Markdown for further processing by an LLM.",
+                "parameters": [
+                    {
+                        "name": "query",
+                        "in": "query",
+                        "description": "Use regular search words or enter Google Search URLs. You can also apply advanced Google search techniques, such as AI site:twitter.com or javascript OR python",
+                        "required": true,
+                        "schema": {
+                            "type": "string",
+                            "pattern": "[^\\s]+"
+                        }
+                    },
+                    {
+                        "name": "maxResults",
+                        "in": "query",
+                        "description": "The number of top organic search results to return and scrape text from",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 1,
+                            "maximum": 50
+                        }
+                    },
+                    {
+                        "name": "outputFormats",
+                        "in": "query",
+                        "description": "Select the desired output formats for the retrieved content",
+                        "required": false,
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "type": "string",
+                                "enum": [
+                                    "text",
+                                    "markdown",
+                                    "html"
+                                ]
+                            },
+                            "default": [
+                                "text"
+                            ]
+                        },
+                        "style": "form",
+                        "explode": false
+                    },
+                    {
+                        "name": "requestTimeoutSecs",
+                        "in": "query",
+                        "description": "The maximum time (in seconds) allowed for request. If the request exceeds this time, it will be marked as failed and only already finished results will be returned",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 1,
+                            "maximum": 600,
+                            "default": 45
+                        }
+                    },
+                    {
+                        "name": "proxyGroupSearch",
+                        "in": "query",
+                        "description": "Proxy group for loading search results.",
+                        "required": false,
+                        "schema": {
+                            "type": "string",
+                            "enum": [
+                                "GOOGLE_SERP",
+                                "SHADER"
+                            ],
+                            "default": "GOOGLE_SERP"
+                        }
+                    },
+                    {
+                        "name": "maxRequestRetriesSearch",
+                        "in": "query",
+                        "description": "Maximum retries for Google search requests on errors.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 0,
+                            "maximum": 3,
+                            "default": 1
+                        }
+                    },
+                    {
+                        "name": "initialConcurrency",
+                        "in": "query",
+                        "description": "Initial number of browsers running in parallel.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 0,
+                            "maximum": 50,
+                            "default": 5
+                        }
+                    },
+                    {
+                        "name": "minConcurrency",
+                        "in": "query",
+                        "description": "Minimum number of browsers running in parallel.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 1,
+                            "maximum": 50,
+                            "default": 3
+                        }
+                    },
+                    {
+                        "name": "maxConcurrency",
+                        "in": "query",
+                        "description": "Maximum number of browsers running in parallel.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 1,
+                            "maximum": 50,
+                            "default": 10
+                        }
+                    },
+                    {
+                        "name": "maxRequestRetries",
+                        "in": "query",
+                        "description": "Maximum retries for Playwright content crawler.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 0,
+                            "maximum": 3,
+                            "default": 1
+                        }
+                    },
+                    {
+                        "name": "requestTimeoutContentCrawlSecs",
+                        "in": "query",
+                        "description": "Timeout for content crawling (seconds).",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "minimum": 1,
+                            "maximum": 60,
+                            "default": 30
+                        }
+                    },
+                    {
+                        "name": "dynamicContentWaitSecs",
+                        "in": "query",
+                        "description": "Time to wait for dynamic content to load (seconds).",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "default": 10
+                        }
+                    },
+                    {
+                        "name": "removeCookieWarnings",
+                        "in": "query",
+                        "description": "Removes cookie consent dialogs to improve text extraction.",
+                        "required": false,
+                        "schema": {
+                            "type": "boolean",
+                            "default": true
+                        }
+                    },
+                    {
+                        "name": "debugMode",
+                        "in": "query",
+                        "description": "Stores debugging information in dataset if enabled.",
+                        "required": false,
+                        "schema": {
+                            "type": "boolean",
+                            "default": false
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/src/defaults.json b/src/defaults.json
@@ -14,6 +14,6 @@
     "query": "",
     "readableTextCharThreshold": 100,
     "removeCookieWarnings": true,
-    "requestTimeoutSecs": 60,
+    "requestTimeoutSecs": 45,
     "requestTimeoutContentCrawlSecs": 30
 }