From 8595eccca6a73df9a748c7f6b81a4b06b952b66a Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 3 Apr 2023 13:34:43 -0400 Subject: [PATCH] Add type descriptors documentation (#113) Add documentation for type descriptors --- docs/source/type_descriptors.ipynb | 347 +++++++++++++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100644 docs/source/type_descriptors.ipynb diff --git a/docs/source/type_descriptors.ipynb b/docs/source/type_descriptors.ipynb new file mode 100644 index 0000000..4f1d2f8 --- /dev/null +++ b/docs/source/type_descriptors.ipynb @@ -0,0 +1,347 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4b3a0584-b52c-4873-abb8-8382e13ff5c0", + "metadata": {}, + "source": [ + "# Type Descriptors\n", + "\n", + "Let's explore type-descriptors for a bit.\n", + "\n", + "Many LLMs are somewhat finicky, and a slightly better phrased prompt may help improve results. So if you want\n", + "to use your own type-descriptions, you can define a custom one and pass it as an argument when creating an extraction chain.\n", + "\n", + "At the moment, Kor only uses a very limited number of internal types. There's no way to represent a `Union` or even a `Boolean`. For the time being use a `Text` node (or `str` in pydnatic) to capture more complex / missing types." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0b4597b2-2a43-4491-8830-bf9f79428074", + "metadata": { + "nbsphinx": "hidden", + "tags": [ + "remove-cell" + ] + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import sys\n", + "\n", + "sys.path.insert(0, \"../../\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c719e4fc-3ccf-4633-a787-b2fe0d1eac65", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import enum\n", + "from typing import Optional, List\n", + "\n", + "from kor import from_pydantic\n", + "from pydantic import BaseModel, Field" + ] + }, + { + "cell_type": "markdown", + "id": "3a59195d-bdef-47ad-a568-0984ada9259a", + "metadata": {}, + "source": [ + "## Let's define a schema" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fface268-cda5-430e-a0dc-c354ee4cfe2f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "class Action(enum.Enum):\n", + " play = \"play\"\n", + " stop = \"stop\"\n", + " previous = \"previous\"\n", + " next_ = \"next\"\n", + "\n", + "\n", + "class MusicRequest(BaseModel):\n", + " song: Optional[List[str]] = Field(\n", + " description=\"The song(s) that the user would like to be played.\"\n", + " )\n", + " album: Optional[List[str]] = Field(\n", + " description=\"The album(s) that the user would like to be played.\"\n", + " )\n", + " artist: Optional[List[str]] = Field(\n", + " description=\"The artist(s) whose music the user would like to hear.\",\n", + " examples=[(\"Songs by paul simon\", \"paul simon\")],\n", + " )\n", + " action: Optional[Action] = Field(\n", + " description=\"The action that should be taken; one of `play`, `stop`, `next`, `previous`\",\n", + " examples=[\n", + " (\"Please stop the music\", \"stop\"),\n", + " (\"play something\", \"play\"),\n", + " (\"play a song\", \"play\"),\n", + " (\"next song\", \"next\"),\n", + " ],\n", + " )\n", + " volume: Optional[float] = Field(\n", + " description=\"Set the volume\",\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4fe1ec70-1428-4433-acac-c190674a666e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "schema, validator = from_pydantic(MusicRequest, description=\"Music recorder\")" + ] + }, + { + "cell_type": "markdown", + "id": "5472725a-95ec-4601-acd0-7e87890a6360", + "metadata": {}, + "source": [ + "## TypeScript" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8301c624-6933-4cd5-b5b6-1640c19ff32b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from kor import TypeScriptDescriptor" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "42e007b7-7aab-4468-9793-7e7514bea98e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "descriptor = TypeScriptDescriptor()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "99ed0af7-b608-4b46-96fa-ea9798113760", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```TypeScript\n", + "\n", + "musicrequest: { // Music recorder\n", + " song: Array // The song(s) that the user would like to be played.\n", + " album: Array // The album(s) that the user would like to be played.\n", + " artist: Array // The artist(s) whose music the user would like to hear.\n", + " action: \"play\" | \"stop\" | \"previous\" | \"next\" // The action that should be taken; one of `play`, `stop`, `next`, `previous`\n", + " volume: number // Set the volume\n", + "}\n", + "```\n", + "\n" + ] + } + ], + "source": [ + "print(descriptor.describe(schema))" + ] + }, + { + "cell_type": "markdown", + "id": "1618bc04-ea40-4fdb-837f-e85e266299dd", + "metadata": {}, + "source": [ + "## BulletPoint" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "de9683ac-e5b3-46b2-a456-6beb7a5f38aa", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from kor import BulletPointDescriptor" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "19fd08fb-0098-46c8-a540-9d998bc6ee16", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "descriptor = BulletPointDescriptor()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c0788a1d-8eb7-442b-be98-1680501befae", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* musicrequest: Object # Music recorder\n", + "* song: Text # The song(s) that the user would like to be played.\n", + "* album: Text # The album(s) that the user would like to be played.\n", + "* artist: Text # The artist(s) whose music the user would like to hear.\n", + "* action: Selection # The action that should be taken; one of `play`, `stop`, `next`, `previous`\n", + "* volume: Number # Set the volume\n" + ] + } + ], + "source": [ + "print(descriptor.describe(schema))" + ] + }, + { + "cell_type": "markdown", + "id": "5b213137-cdb4-4309-ba2e-3b6bf716a2e2", + "metadata": {}, + "source": [ + "## Custom\n", + "\n", + "Here's an example on how to define your own type-description." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "966b3818-b591-41f5-b70e-fb51415bc3ae", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from typing import Any" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6f7526ff-0ee0-4a17-8478-36e1c4445ca7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from kor import TypeScriptDescriptor, Object\n", + "from kor.nodes import AbstractSchemaNode" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "93ad957a-7d6f-4ad3-b824-384cca1aa682", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "class MeowDescriptor(TypeScriptDescriptor):\n", + " def visit_default(self, node: \"AbstractSchemaNode\", **kwargs: Any) -> List[str]:\n", + " \"\"\"Default action for a node.\"\"\"\n", + " depth = kwargs[\"depth\"]\n", + " space = \" \" + depth * \" ~(^._.)\" + \" \"\n", + " return [f\"{space}{node.id}: {node.__class__.__name__} # {node.description}\"]\n", + " \n", + " def visit_object(self, node: Object, **kwargs: Any) -> List[str]:\n", + " \"\"\"Visit an object node.\"\"\"\n", + " depth = kwargs[\"depth\"]\n", + " code_lines = self.visit_default(node, depth=depth)\n", + " for child in node.attributes:\n", + " code_lines.extend(child.accept(self, depth=depth + 1))\n", + " return code_lines\n", + " \n", + " def describe(self, node: Object) -> str:\n", + " \"\"\"Describe the type of the given node.\"\"\"\n", + " code_lines = node.accept(self, depth=0)\n", + " return \"\\n\".join(code_lines)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "225782dd-20c7-4d96-b10c-57bc8dceb683", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " musicrequest: Object # Music recorder\n", + " ~(^._.) song: Text # The song(s) that the user would like to be played.\n", + " ~(^._.) album: Text # The album(s) that the user would like to be played.\n", + " ~(^._.) artist: Text # The artist(s) whose music the user would like to hear.\n", + " ~(^._.) action: Selection # The action that should be taken; one of `play`, `stop`, `next`, `previous`\n", + " ~(^._.) volume: Number # Set the volume\n" + ] + } + ], + "source": [ + "print(MeowDescriptor().describe(schema))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}