From 8595eccca6a73df9a748c7f6b81a4b06b952b66a Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Mon, 3 Apr 2023 13:34:43 -0400
Subject: [PATCH] Add type descriptors documentation (#113)

Add documentation for type descriptors
---
 docs/source/type_descriptors.ipynb | 347 +++++++++++++++++++++++++++++
 1 file changed, 347 insertions(+)
 create mode 100644 docs/source/type_descriptors.ipynb

diff --git a/docs/source/type_descriptors.ipynb b/docs/source/type_descriptors.ipynb
new file mode 100644
index 0000000..4f1d2f8
--- /dev/null
+++ b/docs/source/type_descriptors.ipynb
@@ -0,0 +1,347 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4b3a0584-b52c-4873-abb8-8382e13ff5c0",
+   "metadata": {},
+   "source": [
+    "# Type Descriptors\n",
+    "\n",
+    "Let's explore type-descriptors for a bit.\n",
+    "\n",
+    "Many LLMs are somewhat finicky, and a slightly better phrased prompt may help improve results. So if you want\n",
+    "to use your own type-descriptions, you can define a custom one and pass it as an argument when creating an extraction chain.\n",
+    "\n",
+    "At the moment, Kor only uses a very limited number of internal types. There's no way to represent a `Union` or even a `Boolean`. For the time being use a `Text` node (or `str` in pydnatic) to capture more complex / missing types."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0b4597b2-2a43-4491-8830-bf9f79428074",
+   "metadata": {
+    "nbsphinx": "hidden",
+    "tags": [
+     "remove-cell"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import sys\n",
+    "\n",
+    "sys.path.insert(0, \"../../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c719e4fc-3ccf-4633-a787-b2fe0d1eac65",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import enum\n",
+    "from typing import Optional, List\n",
+    "\n",
+    "from kor import from_pydantic\n",
+    "from pydantic import BaseModel, Field"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a59195d-bdef-47ad-a568-0984ada9259a",
+   "metadata": {},
+   "source": [
+    "## Let's define a schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "fface268-cda5-430e-a0dc-c354ee4cfe2f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class Action(enum.Enum):\n",
+    "    play = \"play\"\n",
+    "    stop = \"stop\"\n",
+    "    previous = \"previous\"\n",
+    "    next_ = \"next\"\n",
+    "\n",
+    "\n",
+    "class MusicRequest(BaseModel):\n",
+    "    song: Optional[List[str]] = Field(\n",
+    "        description=\"The song(s) that the user would like to be played.\"\n",
+    "    )\n",
+    "    album: Optional[List[str]] = Field(\n",
+    "        description=\"The album(s) that the user would like to be played.\"\n",
+    "    )\n",
+    "    artist: Optional[List[str]] = Field(\n",
+    "        description=\"The artist(s) whose music the user would like to hear.\",\n",
+    "        examples=[(\"Songs by paul simon\", \"paul simon\")],\n",
+    "    )\n",
+    "    action: Optional[Action] = Field(\n",
+    "        description=\"The action that should be taken; one of `play`, `stop`, `next`, `previous`\",\n",
+    "        examples=[\n",
+    "            (\"Please stop the music\", \"stop\"),\n",
+    "            (\"play something\", \"play\"),\n",
+    "            (\"play a song\", \"play\"),\n",
+    "            (\"next song\", \"next\"),\n",
+    "        ],\n",
+    "    )\n",
+    "    volume: Optional[float] = Field(\n",
+    "        description=\"Set the volume\",\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4fe1ec70-1428-4433-acac-c190674a666e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "schema, validator = from_pydantic(MusicRequest, description=\"Music recorder\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5472725a-95ec-4601-acd0-7e87890a6360",
+   "metadata": {},
+   "source": [
+    "## TypeScript"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8301c624-6933-4cd5-b5b6-1640c19ff32b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from kor import TypeScriptDescriptor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "42e007b7-7aab-4468-9793-7e7514bea98e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "descriptor = TypeScriptDescriptor()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "99ed0af7-b608-4b46-96fa-ea9798113760",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "```TypeScript\n",
+      "\n",
+      "musicrequest: { // Music recorder\n",
+      " song: Array<string> // The song(s) that the user would like to be played.\n",
+      " album: Array<string> // The album(s) that the user would like to be played.\n",
+      " artist: Array<string> // The artist(s) whose music the user would like to hear.\n",
+      " action: \"play\" | \"stop\" | \"previous\" | \"next\" // The action that should be taken; one of `play`, `stop`, `next`, `previous`\n",
+      " volume: number // Set the volume\n",
+      "}\n",
+      "```\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(descriptor.describe(schema))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1618bc04-ea40-4fdb-837f-e85e266299dd",
+   "metadata": {},
+   "source": [
+    "## BulletPoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "de9683ac-e5b3-46b2-a456-6beb7a5f38aa",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from kor import BulletPointDescriptor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "19fd08fb-0098-46c8-a540-9d998bc6ee16",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "descriptor = BulletPointDescriptor()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "c0788a1d-8eb7-442b-be98-1680501befae",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* musicrequest: Object # Music recorder\n",
+      "*  song: Text # The song(s) that the user would like to be played.\n",
+      "*  album: Text # The album(s) that the user would like to be played.\n",
+      "*  artist: Text # The artist(s) whose music the user would like to hear.\n",
+      "*  action: Selection # The action that should be taken; one of `play`, `stop`, `next`, `previous`\n",
+      "*  volume: Number # Set the volume\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(descriptor.describe(schema))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5b213137-cdb4-4309-ba2e-3b6bf716a2e2",
+   "metadata": {},
+   "source": [
+    "## Custom\n",
+    "\n",
+    "Here's an example on how to define your own type-description."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "966b3818-b591-41f5-b70e-fb51415bc3ae",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from typing import Any"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "6f7526ff-0ee0-4a17-8478-36e1c4445ca7",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from kor import TypeScriptDescriptor, Object\n",
+    "from kor.nodes import AbstractSchemaNode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "93ad957a-7d6f-4ad3-b824-384cca1aa682",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class MeowDescriptor(TypeScriptDescriptor):\n",
+    "    def visit_default(self, node: \"AbstractSchemaNode\", **kwargs: Any) -> List[str]:\n",
+    "        \"\"\"Default action for a node.\"\"\"\n",
+    "        depth = kwargs[\"depth\"]\n",
+    "        space = \" \" + depth * \" ~(^._.)\" + \" \"\n",
+    "        return [f\"{space}{node.id}: {node.__class__.__name__} # {node.description}\"]\n",
+    "    \n",
+    "    def visit_object(self, node: Object, **kwargs: Any) -> List[str]:\n",
+    "        \"\"\"Visit an object node.\"\"\"\n",
+    "        depth = kwargs[\"depth\"]\n",
+    "        code_lines = self.visit_default(node, depth=depth)\n",
+    "        for child in node.attributes:\n",
+    "            code_lines.extend(child.accept(self, depth=depth + 1))\n",
+    "        return code_lines\n",
+    "    \n",
+    "    def describe(self, node: Object) -> str:\n",
+    "        \"\"\"Describe the type of the given node.\"\"\"\n",
+    "        code_lines = node.accept(self, depth=0)\n",
+    "        return \"\\n\".join(code_lines)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "225782dd-20c7-4d96-b10c-57bc8dceb683",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  musicrequest: Object # Music recorder\n",
+      "  ~(^._.) song: Text # The song(s) that the user would like to be played.\n",
+      "  ~(^._.) album: Text # The album(s) that the user would like to be played.\n",
+      "  ~(^._.) artist: Text # The artist(s) whose music the user would like to hear.\n",
+      "  ~(^._.) action: Selection # The action that should be taken; one of `play`, `stop`, `next`, `previous`\n",
+      "  ~(^._.) volume: Number # Set the volume\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(MeowDescriptor().describe(schema))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}