From 5e40290682fd6dba4f7c82634654718b896785e7 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Sun, 7 May 2023 15:15:13 -0400 Subject: [PATCH] Add kor to sys path when generating docs (#116) Add kor to sys path when generating docs to make it possible to generate docs even if the package isn't installed. --- docs/source/conf.py | 10 +- docs/source/prompt_examples.ipynb | 293 ------------------------------ 2 files changed, 5 insertions(+), 298 deletions(-) delete mode 100644 docs/source/prompt_examples.ipynb diff --git a/docs/source/conf.py b/docs/source/conf.py index f013a79..01e6453 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -10,15 +10,15 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) +import os +import sys +from typing import List +import toml # -- Project information ----------------------------------------------------- -from typing import List -import toml +sys.path.insert(0, os.path.abspath("../../")) with open("../../pyproject.toml") as f: data = toml.load(f) diff --git a/docs/source/prompt_examples.ipynb b/docs/source/prompt_examples.ipynb deleted file mode 100644 index 5298302..0000000 --- a/docs/source/prompt_examples.ipynb +++ /dev/null @@ -1,293 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "6fcfbb4c-e91d-46aa-8653-38f094146c0c", - "metadata": {}, - "source": [ - "# Prompt Examples\n", - "\n", - "Here, we'll be extracting phone numbers from text. We'll do it with an without examples to illustrate the idea. " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "db4e848b-a369-4452-9df5-1b305f44ef6b", - "metadata": { - "nbsphinx": "hidden", - "tags": [ - "remove-cell" - ] - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "import sys\n", - "\n", - "sys.path.insert(0, \"../../\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "718c66a7-6186-4ed8-87e9-5ed28e3f209e", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from kor.extraction import Extractor\n", - "from kor.nodes import Object, Text, Number\n", - "from langchain.chat_models import ChatOpenAI" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "9666b3b9-e48e-41ab-91b5-7bc6ec5983df", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "llm = ChatOpenAI(\n", - " model_name=\"gpt-3.5-turbo\",\n", - " temperature=0,\n", - " max_tokens=2000,\n", - " frequency_penalty=0,\n", - " presence_penalty=0,\n", - " top_p=1.0,\n", - ")\n", - "model = Extractor(llm)" - ] - }, - { - "cell_type": "markdown", - "id": "dacba845-d621-446c-a3cb-1a63092d574b", - "metadata": { - "tags": [] - }, - "source": [ - "## No Examples\n", - "\n", - "Sometimes it's sufficient to specify what to extract without providing any examples." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "086f5465-6455-4e90-9984-a65cbc2cf6fa", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "schema = Text(\n", - " id=\"phone_number\",\n", - " description=(\n", - " \"Any phone numbers found in the text. Should be output in the format:\"\n", - " \" (xxx)-xxx-xxxx\"\n", - " ),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f44d099a-0878-4d4b-b9a7-8fb14848d6f4", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'phone_number': ['(123)-444-9999', '(333)-123-4321', '888-111-1222']}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model(\n", - " (\n", - " \"My phone number is (123)-444-9999. I found my true love on a blue sunday.\"\n", - " \" Her number was (333)1234321. I had no phone when yesterday. \"\n", - " \"I received a call from 8881111222\"\n", - " ),\n", - " schema,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b94aaf17-8abc-4199-8ed7-263c342b82b5", - "metadata": {}, - "source": [ - "And the prompt that was used is:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "796cc8f4-0105-4dba-a62c-3167d30579fd", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Your goal is to extract structured information from the user's input that matches the form described below. When extracting information please make sure it matches the type information exactly. Do not add any attributes that do not appear in the schema shown below.\n", - "\n", - "```TypeScript\n", - "\n", - "{\n", - " phone_number: string[] // Any phone numbers found in the text. Should be output in the format: (xxx)-xxx-xxxx\n", - "}\n", - "```\n", - "\n", - "\n", - "For Union types the output must EXACTLY match one of the members of the Union type.\n", - "\n", - "Please enclose the extracted information in HTML style tags with the tag name corresponding to the corresponding component ID. Use angle style brackets for the tags ('>' and '<'). Only output tags when you're confident about the information that was extracted from the user's query. If you can extract several pieces of relevant information from the query, then include all of them. If the type is an array, please repeat the corresponding tag name multiple times once for each relevant extraction. Do NOT output anything except for the extracted information. Only output information inside the HTML style tags. Do not include any notes or any clarifications. \n", - "\n", - "Input: [user input goes here]\n", - "Output:\n" - ] - } - ], - "source": [ - "print(model.prompt_generator.format_as_string(\"[user input goes here]\", schema))" - ] - }, - { - "cell_type": "markdown", - "id": "5418f208-7e65-4691-af0b-743628d2cd76", - "metadata": { - "tags": [] - }, - "source": [ - "## Add One Example" - ] - }, - { - "cell_type": "markdown", - "id": "e37dc306-0080-4973-9576-7705c65fc75e", - "metadata": {}, - "source": [ - "Let's add one example to see whether it helps format the last phone number correctly." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "3c5a4b67-d830-4d1b-9feb-5076490b9781", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'phone_number': ['(123)-444-9999', '(333)-123-4321', '(888)-111-1222']}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "schema = Text(\n", - " id=\"phone_number\",\n", - " description=(\n", - " \"Any phone numbers found in the text. Should be output in the format:\"\n", - " \" (xxx)-xxx-xxxx\"\n", - " ),\n", - " examples=[(\"Someone called me from 1231231234\", \"(123)-123-1234\")],\n", - ")\n", - "\n", - "model(\n", - " (\n", - " \"My phone number is (123)-444-9999. I found my true love on a blue sunday.\"\n", - " \" Her number was (333)1234321. I had no phone when yesterday. \"\n", - " \"I received a call from 8881111222\"\n", - " ),\n", - " schema,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "7ffc5d55-f144-4251-875a-6ba27a5a1b17", - "metadata": {}, - "source": [ - "And the prompt that was used is:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f4bf56fb-02f0-48e7-bc0c-b1e1126cdeb8", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Your goal is to extract structured information from the user's input that matches the form described below. When extracting information please make sure it matches the type information exactly. Do not add any attributes that do not appear in the schema shown below.\n", - "\n", - "```TypeScript\n", - "\n", - "{\n", - " phone_number: string[] // Any phone numbers found in the text. Should be output in the format: (xxx)-xxx-xxxx\n", - "}\n", - "```\n", - "\n", - "\n", - "For Union types the output must EXACTLY match one of the members of the Union type.\n", - "\n", - "Please enclose the extracted information in HTML style tags with the tag name corresponding to the corresponding component ID. Use angle style brackets for the tags ('>' and '<'). Only output tags when you're confident about the information that was extracted from the user's query. If you can extract several pieces of relevant information from the query, then include all of them. If the type is an array, please repeat the corresponding tag name multiple times once for each relevant extraction. Do NOT output anything except for the extracted information. Only output information inside the HTML style tags. Do not include any notes or any clarifications. \n", - "\n", - "Input: Someone called me from 1231231234\n", - "Output: (123)-123-1234\n", - "Input: [user input goes here]\n", - "Output:\n" - ] - } - ], - "source": [ - "print(model.prompt_generator.format_as_string(\"[user input goes here]\", schema))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}