From ec4cda7a97b7e6c43271a5d830de0a2cc364108c Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Fri, 15 Mar 2024 19:33:12 -0500
Subject: [PATCH] feat: Add jupytext support for notebooks (#7)

* Add jupytext to requirements and rebuild lock files.
* Add jupytext .ipynb to .py pairing rules to pyproject.toml.
* Convert .ipynb to .py:percent with jupytext.
* Ignore .ipynb files to make versioning easier.
* Apply ruff formatting.
* Add section on using jupytext to README.
---
 .gitignore                      |   3 +-
 README.md                       |   7 +
 notebooks/Pre-processing.ipynb  | 728 --------------------------------
 notebooks/Run-assignments.ipynb | 620 ---------------------------
 notebooks/pre-processing.py     | 415 ++++++++++++++++++
 notebooks/run-assignments.py    | 331 +++++++++++++++
 pyproject.toml                  |   5 +
 requirements.lock               |  27 +-
 requirements.txt                |   1 +
 9 files changed, 787 insertions(+), 1350 deletions(-)
 delete mode 100644 notebooks/Pre-processing.ipynb
 delete mode 100644 notebooks/Run-assignments.ipynb
 create mode 100644 notebooks/pre-processing.py
 create mode 100644 notebooks/run-assignments.py

diff --git a/.gitignore b/.gitignore
index 8599432..0a6fdcf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ __pycache__
 *.json
 venv/
 *ipynb_checkpoints
-.DS_Store
\ No newline at end of file
+.DS_Store
+*.ipynb
diff --git a/README.md b/README.md
index 531ed08..b1c852c 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,13 @@ When you're ready to run tests, run:
 python3 -m pytest
 ```
 
+## Using `jupytext` with Jupyter notebooks
+
+[`jupytext`](https://jupytext.readthedocs.io/) allows for easier versioning of Jupyter notebooks by saving all of the information that exists in them in [specially formatted](https://jupytext.readthedocs.io/en/latest/formats-scripts.html#the-percent-format) `.py` files and then generating the notebook representation when you [select them in a Jupyter interface](https://jupytext.readthedocs.io/en/latest/text-notebooks.html#how-to-open-a-text-notebook-in-jupyter-lab).
+Version the `.py` files as you normally would with any other text file.
+To run the `.py` files as Jupyter notebooks, select them in the Jupyter file browser, right click, and then select _Open With → Notebook_.
+Any changes made in a Jupyter notebook will be automatically synced to the [paired](https://jupytext.readthedocs.io/en/latest/paired-notebooks.html) `.py` file.
+
 # File breakdown
 
 Here's a short explanation of each file/folder in this template:
diff --git a/notebooks/Pre-processing.ipynb b/notebooks/Pre-processing.ipynb
deleted file mode 100644
index 91269fc..0000000
--- a/notebooks/Pre-processing.ipynb
+++ /dev/null
@@ -1,728 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2f0334ca",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import duckdb\n",
-    "\n",
-    "# Raw data to import\n",
-    "raw_files = dict(\n",
-    "    scipy_reviewers = \"../data/scipy_reviewers.csv\", # people who signed up as reviewers\n",
-    "    pretalx_sessions = \"../data/sessions.csv\", # all proposal exported from pretalx\n",
-    "    pretalx_speakers = \"../data/speakers.csv\", # all speakers exported from pretalx\n",
-    "    pretalx_reviewers = \"../data/pretalx_reviewers.csv\", # all reviewers copy-pasted from pretalx\n",
-    "    coi_reviewers = \"../data/scipy_coi_export.csv\", # all responses to the coi form\n",
-    "    coi_authors = \"../data/coi_authors.csv\", # copy pasted values of author names from coi form\n",
-    "    tracks = \"../data/tracks.csv\" # manually entered track IDs\n",
-    ")\n",
-    "\n",
-    "# Output\n",
-    "database_file = \"../data/assign_reviews.db\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "03d0f519",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con = duckdb.connect(database_file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9ce918a8-7441-4e64-bcf6-a3e6e8f864aa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def create_and_show_table(file_name, table_name, show=True):\n",
-    "    con.sql(f\"create or replace table {table_name} as select * from read_csv(\\\"{file_name}\\\", header=true)\")\n",
-    "    if show is True:\n",
-    "        return con.sql(f\"table {table_name}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bd7267b9-c1f5-45c1-996b-af93c7b02f08",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython import display\n",
-    "for table_name, file_name in raw_files.items():\n",
-    "    print(table_name)\n",
-    "    display.display(create_and_show_table(file_name, table_name).df())\n",
-    "    print(\"\\n\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a7570eed-6187-4b40-ac8c-ed25e4beb8cb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "table tracks\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2924fa43-915c-4c63-b715-08e2075983e4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "with dupes as\n",
-    "    (\n",
-    "        select\n",
-    "            name,\n",
-    "            num,\n",
-    "            email\n",
-    "        from\n",
-    "            (\n",
-    "                select\n",
-    "                    name,\n",
-    "                    count(*) as num,\n",
-    "                    string_agg(Email) as email\n",
-    "                from\n",
-    "                    scipy_reviewers\n",
-    "                    group by Name\n",
-    "            )\n",
-    "            where\n",
-    "                num>1\n",
-    "        )\n",
-    "\n",
-    "select * from dupes\n",
-    "\"\"\").df()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "04ea463b-4151-4dfe-ae2d-08b1c763e9a0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "select count(*) from scipy_reviewers\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dd8ddb47-daf1-4ac7-abfa-39f7d839be13",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "select count(*) from pretalx_reviewers\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "11439a50-cfbb-403b-a0fa-cf4b91f31e11",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "select count(*) from coi_reviewers\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "55e81f6d-c8c9-4cfc-9770-db731068c55f",
-   "metadata": {},
-   "source": [
-    "This is a table with all reviewers who\n",
-    "1. signed up\n",
-    "2. created an account on pretalx\n",
-    "3. submitted the COI form"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7b9ecee5-ae65-4c49-8e6c-4ebcd6a796f2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "create or replace table reviewers as\n",
-    "    select\n",
-    "        scipy_reviewers.Name as name,\n",
-    "        scipy_reviewers.Email as email,\n",
-    "        \\\"Track(s) to review for (check all that apply)\\\" as tracks,\n",
-    "        \\\"Mark the speaker(s) or company/organization/affiliation(s) that could pose a conflict of interest\\\" as coi\n",
-    "    from scipy_reviewers\n",
-    "    join pretalx_reviewers on scipy_reviewers.Email = pretalx_reviewers.Email\n",
-    "    join coi_reviewers on coi_reviewers.Email = pretalx_reviewers.Email\n",
-    "\"\"\")\n",
-    "\n",
-    "df = con.sql(\"select distinct * from reviewers\").df()\n",
-    "num_reviewers = len(df)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "461de254-93e5-4a97-b7d5-3074df212fdb",
-   "metadata": {},
-   "source": [
-    "Reviewers who signed up for pretalx but did not fill in COI"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4221ff7c-c824-4517-820c-1f8323d95970",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con = duckdb.connect(database_file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b70bc5b8-118a-43a4-9b07-2d6dbc59eb1a",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "df = con.sql(\"select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email\").df()\n",
-    "num_pretalx_no_coi = len(df)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "64d329e5-88f2-4107-a33f-59bdf1a98c8d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df.to_csv(\"input/signed_up_for_pretalx_no_coi.csv\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5c475b2b-ac98-495d-94e8-5135366523e2",
-   "metadata": {},
-   "source": [
-    "Reviewers who filled in COI but did not sign up for pretalx"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "868dcd1e-8f4a-4e01-8ad2-0538d8b4557d",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "df = con.sql(\"select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email\").df()\n",
-    "num_coi_no_pretalx = len(df)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c1b6f1c8-e00f-4e79-ad6c-f509591a362a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df.to_csv(\"input/submitted_coi_no_pretalx.csv\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c493bc49-adbc-4173-a9c0-38178c7e8135",
-   "metadata": {},
-   "source": [
-    "People who signed up as reviewer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "edf7bf56-7884-49fa-811c-0e5b3c3c5e92",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "df = con.sql(\"\"\"\n",
-    "select distinct * from scipy_reviewers\n",
-    "\"\"\").df()\n",
-    "num_signed_up = len(df)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0025d41a-ecf6-4c08-985f-3e30807675e3",
-   "metadata": {},
-   "source": [
-    "People who signed up as reviewer and signed up for pretalx and submitted COI but used different email addresses"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4feef06c-985a-485c-b4f1-73d87c2f8f1c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = con.sql(\"\"\"\n",
-    "create or replace table reviewers_with_email_typos as\n",
-    "(with no_coi as\n",
-    "(select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email),\n",
-    "no_pretalx as\n",
-    "(select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email)\n",
-    "select distinct scipy_reviewers.Name, scipy_reviewers.Email, no_pretalx.Email as no_pretalx_email, no_coi.email as no_coi_email from scipy_reviewers\n",
-    "join no_coi on no_coi.Name = scipy_reviewers.Name\n",
-    "join no_pretalx on no_pretalx.Name = no_coi.Name)\n",
-    "\"\"\")\n",
-    "df = con.sql(\"table reviewers_with_email_typos\").df()\n",
-    "num_typos = len(df)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5a2c83c8-29e2-412e-b84e-ab4944e650c4",
-   "metadata": {},
-   "source": [
-    "People who signed up as reviewer and signed up for pretalx and submitted COI but used different names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cf854063-4a88-4be2-8cd9-04fd78f1cf9f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = con.sql(\"\"\"\n",
-    "(with no_coi as\n",
-    "(select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email),\n",
-    "no_pretalx as\n",
-    "(select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email)\n",
-    "select distinct scipy_reviewers.Name, scipy_reviewers.Email, no_pretalx.Name as no_pretalx_name, no_coi.name as no_coi_name from scipy_reviewers\n",
-    "join no_coi on no_coi.Email = scipy_reviewers.Email\n",
-    "join no_pretalx on no_pretalx.Email = no_coi.Email)\n",
-    "\"\"\").df()\n",
-    "num_typos_name = len(df)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "eb7b392a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df.to_csv(\"input/reviewers_multi_email.csv\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4e7c6208-0e73-4447-b749-578b7f84f5a9",
-   "metadata": {},
-   "source": [
-    "People who signed up as reviewer and didn't sign up for pretalx nor submitted COI"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d7bf3576-78e6-4367-abc0-f48e250807e4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = con.sql(\"\"\"\n",
-    "(with no_coi as\n",
-    "(select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email),\n",
-    "no_pretalx as\n",
-    "(select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email)\n",
-    "select distinct scipy_reviewers.Name, scipy_reviewers.Email from scipy_reviewers\n",
-    "anti join reviewers on reviewers.Name = scipy_reviewers.Name\n",
-    "anti join no_coi on no_coi.Name = scipy_reviewers.Name\n",
-    "anti join no_pretalx on no_pretalx.Name = scipy_reviewers.Name)\n",
-    "\"\"\").df()\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "29faf2df-6c56-447a-9303-30e297ad4ca7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = con.sql(\"\"\"\n",
-    "select distinct * from scipy_reviewers\n",
-    "anti join reviewers on scipy_reviewers.Email = reviewers.email\n",
-    "\"\"\").df()\n",
-    "num_no_show = len(df)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ad4b6bb5-2698-4246-afd0-2003c9ac6bda",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df.to_csv(\"input/all_reviewers_without_assignments.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b4e2ebd4-1c7c-47b2-88a5-f12f52e3b132",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_no_show = num_signed_up-num_reviewers-num_pretalx_no_coi-num_coi_no_pretalx\n",
-    "num_partial = sum([num_pretalx_no_coi, num_coi_no_pretalx, num_no_show])\n",
-    "num_reviewers, num_signed_up, num_pretalx_no_coi, num_coi_no_pretalx, num_no_show, num_partial"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "af12d40a-1f28-4154-9ded-1f358d1d3e06",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"select * from reviewers where instr(name, 'eli')\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "223b6992-1f77-44f7-83df-8fae78b30d29",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# con.sql(\"table reviewers\").df().to_csv(\"input/reviewers_to_assign_with_name.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dbb586ee-df00-4fb9-9d16-e3e5d570c3de",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"select * from reviewers where instr(Name, 'Wu')\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d82591ec-8532-4b0b-bcf5-ebbc7275c266",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sum([num_pretalx_no_coi, num_coi_no_pretalx, num_reviewers])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e1b3cbed-547b-49b8-80e6-4ab0df57b08f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "with dupes as\n",
-    "    (\n",
-    "        select\n",
-    "            *\n",
-    "        from\n",
-    "            (\n",
-    "                select\n",
-    "                    name,\n",
-    "                    count(*) as num,\n",
-    "                    string_agg(email) as email,\n",
-    "                    string_agg(tracks) as tracks,\n",
-    "                    string_agg(coi) as coi\n",
-    "                from\n",
-    "                    reviewers\n",
-    "                    group by name\n",
-    "            )\n",
-    "            where\n",
-    "                num>1\n",
-    "        )\n",
-    "\n",
-    "select * from dupes\n",
-    "\"\"\").df().T.to_json()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fa1a29e2-b27c-4ed4-a075-4c726e6dda48",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"create or replace table reviewers as (select distinct * from reviewers)\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2a548f2e-6b79-4c28-8b57-1efae53694ec",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "create or replace table reviewers_with_tracks as\n",
-    "with reviewers_no_dupes as (select distinct * from reviewers)\n",
-    "select reviewers_no_dupes.name, email, list(tracks.name) as tracks, list(tracks.track_id) as track_ids from reviewers_no_dupes\n",
-    "    join tracks on instr(reviewers_no_dupes.tracks, tracks.name)\n",
-    "    group by reviewers_no_dupes.name, email\n",
-    "\"\"\"\n",
-    ")\n",
-    "\n",
-    "con.sql(\"select distinct * from reviewers_with_tracks\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3427dba1-d840-46a2-a304-7a643c1aeee0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"select ID as submission_id, \\\"Speaker IDs\\\" as speaker_ids from pretalx_sessions\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0657b537-bd53-426a-9345-ceca87f36a73",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(f\"\"\"\n",
-    "create or replace table reviewers_with_coi as\n",
-    "\n",
-    "with submissions_with_authors as (\n",
-    "    select\n",
-    "        ID as submission_id,\n",
-    "        \\\"Speaker IDs\\\" as speaker_ids\n",
-    "    from\n",
-    "        pretalx_sessions\n",
-    ")\n",
-    "select\n",
-    "    reviewers.name,\n",
-    "    reviewers.email,\n",
-    "    list(pretalx_speakers.Name) as speakers,\n",
-    "    list(pretalx_speakers.ID) AS speaker_ids,\n",
-    "    list(submissions_with_authors.submission_id) as submission_ids\n",
-    "from\n",
-    "    reviewers\n",
-    "    left join coi_authors on instr(coi, coi_authors.author)\n",
-    "    left join pretalx_speakers on contains(coi_authors.author, pretalx_speakers.Name)\n",
-    "    left join submissions_with_authors on contains(submissions_with_authors.speaker_ids, pretalx_speakers.ID)\n",
-    "group by reviewers.name, reviewers.email\n",
-    "order by reviewers.name\n",
-    "\"\"\"\n",
-    ")\n",
-    "\n",
-    "con.sql(\"table reviewers_with_coi\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ffef4114-385b-44b0-9bc1-f21a8a1402ed",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "with reviewers_with_coi_pre as (\n",
-    "    select name, email, author\n",
-    "    from reviewers\n",
-    "    join coi_authors on instr(coi, coi_authors.author)\n",
-    ")\n",
-    "select count(*), author from reviewers_with_coi_pre anti join pretalx_speakers on contains(reviewers_with_coi_pre.author, pretalx_speakers.Name) group by author\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "acdd01c4-eb7b-4f80-a8dc-462a9bc5508b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"table reviewers_with_tracks\").df()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "90185548-bfdd-47d1-bdac-d95af43b8c0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"select email as reviewer_id, list(track_id) as tracks from reviewers_with_tracks group by email\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "adeb8dcc-614c-430e-a65f-932ee13a881c",
-   "metadata": {},
-   "source": [
-    "# Final tables for script"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cac7157c-c2eb-4f2e-bd02-cc2fe788afc4",
-   "metadata": {},
-   "source": [
-    "## reviewers_to_assign"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "183fabc0-a5fa-446d-9375-381cd75ff7c7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "create or replace table reviewers_to_assign as\n",
-    "select\n",
-    "    reviewers_with_coi.email as reviewer_id,\n",
-    "    reviewers_with_tracks.track_ids as tracks,\n",
-    "    reviewers_with_coi.submission_ids as conflicts_submission_ids\n",
-    "from reviewers_with_coi\n",
-    "join reviewers_with_tracks on reviewers_with_tracks.email = reviewers_with_coi.email\n",
-    "\"\"\")\n",
-    "\n",
-    "con.sql(\"table reviewers_to_assign\").df()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4c1facf8-046c-49e0-b21b-67c2d7b5dc64",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# con.sql(\"table reviewers_to_assign\").df().to_csv(\"input/reviewers_to_assign.csv\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5f717e8c-9b97-4666-a2c7-80ab491d61af",
-   "metadata": {},
-   "source": [
-    "## submissions_to_assign"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "34f07bb1-101e-45f1-8598-e6a459393de9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "create or replace table submissions_to_assign as\n",
-    "select\n",
-    "    ID as submission_id,\n",
-    "    string_split(\\\"Speaker IDs\\\", '\\n') as author_ids,\n",
-    "    track_id as track\n",
-    "from pretalx_sessions\n",
-    "    join tracks on pretalx_sessions.Track = tracks.name\n",
-    "\"\"\")\n",
-    "\n",
-    "con.sql(\"table submissions_to_assign\").df()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "59c00158-ce1b-4f58-9cff-dbb34a5cd8af",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# con.sql(\"table submissions_to_assign\").df().to_csv(\"input/submissions_to_assign.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d2c113b9-e32f-40c9-ab97-df975d12f741",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# con.sql(\"table submissions_to_assign\").df().author_ids.iloc[1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a5c7a0fd-b18e-4f57-b0ef-23fdd06531ff",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.close()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e6dd57d1-49cb-42e3-a774-81b2d2dca2a0",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/Run-assignments.ipynb b/notebooks/Run-assignments.ipynb
deleted file mode 100644
index 59e2b88..0000000
--- a/notebooks/Run-assignments.ipynb
+++ /dev/null
@@ -1,620 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bbfa2c54-c1f1-4241-bc27-91fc1b1265bf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# %%\n",
-    "####################\n",
-    "## ASSIGN REVIEWS ##\n",
-    "####################\n",
-    "# Imports\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import duckdb\n",
-    "\n",
-    "import sys\n",
-    "sys.path.append(\"..\")\n",
-    "from assign_reviews import create_objective_fun, create_lb_ub, create_constraints, solve_milp, format_and_output_result"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ad38296b-0b09-4c67-bff3-37943db918b1",
-   "metadata": {},
-   "source": [
-    "# Start script"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1b72db3b-80e4-41b5-992c-e01edc0c223e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mkdir output"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1e0ec716-20a4-470b-9aea-5dc5853be804",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ASSIGN_TUTORIALS_TO_ANYONE = False\n",
-    "TUTORIAL_COEFF = 0.8\n",
-    "\n",
-    "DEBUG = True\n",
-    "\n",
-    "database_file = \"../data/assign_reviews.db\"\n",
-    "con = duckdb.connect(database_file)\n",
-    "df_submissions = con.sql(\"table submissions_to_assign\").df()\n",
-    "df_reviewers = con.sql(\"table reviewers_to_assign\").df()\n",
-    "\n",
-    "df_submissions = df_submissions.assign(assigned_reviewer_ids=[[]] * len(df_submissions))\n",
-    "df_reviewers = df_reviewers.assign(assigned_submission_ids=[[]] * len(df_reviewers))\n",
-    "\n",
-    "len(df_submissions), len(df_reviewers)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ef711304-a232-4b5f-a5f2-08ae410c8d62",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_submissions[df_submissions.track==\"TUT\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "34384f90-6f5a-42b6-b6d6-b27c756ee9b8",
-   "metadata": {},
-   "source": [
-    "## Step 1. Assign tutorial reviewers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e6e46cab-603a-4c28-8d75-bcbf982cdb56",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "MIN_TUTORIALS_PER_PERSON = 0\n",
-    "MAX_TUTORIALS_PER_PERSON = 5\n",
-    "MIN_REVIEWERS_PER_TUTORIAL = 3\n",
-    "MAX_REVIEWERS_PER_TUTORIAL = 4\n",
-    "\n",
-    "df_submissions_tutorials = df_submissions[df_submissions.track==\"TUT\"]\n",
-    "\n",
-    "solution = solve_milp(\n",
-    "    df_reviewers,\n",
-    "    df_submissions_tutorials,\n",
-    "    MIN_TUTORIALS_PER_PERSON,\n",
-    "    MAX_TUTORIALS_PER_PERSON,\n",
-    "    MIN_REVIEWERS_PER_TUTORIAL,\n",
-    "    MAX_REVIEWERS_PER_TUTORIAL,\n",
-    "    TUTORIAL_COEFF,\n",
-    "    ASSIGN_TUTORIALS_TO_ANYONE\n",
-    ")\n",
-    "reviewers, submissions = format_and_output_result(df_reviewers, df_submissions_tutorials, solution, post_fix=\"00\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7109479e-3e56-4ef0-9f17-1be5e9d6f7f9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(reviewers)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "029c405c-48fb-4598-896e-e0227e9b6987",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_reviewers_with_tut = df_reviewers.assign(assigned_submission_ids=df.assigned_submission_ids)\n",
-    "df_reviewers_with_tut"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9ee582a7-e840-4273-ae03-aa88d1912157",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"select * from df_reviewers_with_tut\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9c3195a4-2baf-4dd0-a7b3-62ce193e00fc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"create or replace table reviewer_assignments_00 as select * from df_reviewers_with_tut\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1f2fcddf-8573-4ce5-9e33-ab3c2ff73cc5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(submissions)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "03146222-8257-446e-820c-89710391d08d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "create or replace table submission_assignments_00 as\n",
-    "select df_submissions.submission_id, df_submissions.author_ids, df_submissions.track,\n",
-    "list_concat(df_submissions.assigned_reviewer_ids, df.assigned_reviewer_ids) as assigned_reviewer_ids\n",
-    "from df_submissions\n",
-    "left join df on df.submission_id = df_submissions.submission_id\n",
-    "\"\"\")\n",
-    "con.sql(\"table submission_assignments_00\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d303b378-63ff-405c-aaf1-4af51f09bfea",
-   "metadata": {},
-   "source": [
-    "## Step 2. Assign talk reviewers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e9695336-6696-4dc3-86eb-9d0eea82bcea",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_reviewers_with_tut[df_reviewers_with_tut.assigned_submission_ids.apply(len) == 0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0174c6e7-0feb-4073-9ee7-0f994439dd2c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "MIN_REVIEWS_PER_PERSON = 5\n",
-    "MAX_REVIEWS_PER_PERSON = 9\n",
-    "MIN_REVIEWERS_PER_SUBMISSION = 2\n",
-    "MAX_REVIEWERS_PER_SUBMISSION = 4\n",
-    "\n",
-    "df_reviewers_no_submissions = df_reviewers_with_tut[df_reviewers_with_tut.assigned_submission_ids.apply(len) == 0]\n",
-    "df_submissions_no_tutorials = df_submissions[df_submissions.track!=\"TUT\"]\n",
-    "\n",
-    "solution = solve_milp(\n",
-    "    df_reviewers_no_submissions,\n",
-    "    df_submissions_no_tutorials,\n",
-    "    MIN_REVIEWS_PER_PERSON,\n",
-    "    MAX_REVIEWS_PER_PERSON,\n",
-    "    MIN_REVIEWERS_PER_SUBMISSION,\n",
-    "    MAX_REVIEWERS_PER_SUBMISSION,\n",
-    "    TUTORIAL_COEFF,\n",
-    "    ASSIGN_TUTORIALS_TO_ANYONE\n",
-    ")\n",
-    "if solution is not None:\n",
-    "    reviewers, submissions = format_and_output_result(df_reviewers_no_submissions, df_submissions_no_tutorials, solution, post_fix=\"01\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "941e4ea6-e3d0-4da5-bb32-9c1bbb6698bf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_reviewers_with_tut"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e56630ab-54b5-4c81-a91b-0ee795f6ac35",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(reviewers)[[\"reviewer_id\", \"assigned_submission_ids\"]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "aa3bc6b2-00a1-4c63-b8c4-816bce8ecead",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d4a8cf52-4eba-4231-a779-fcfcd4ae3387",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "create or replace table reviewer_assignments_01 as\n",
-    "select\n",
-    "    df_reviewers_with_tut.reviewer_id, tracks, conflicts_submission_ids,\n",
-    "    list_concat(df_reviewers_with_tut.assigned_submission_ids, df.assigned_submission_ids) as assigned_submission_ids\n",
-    "from df_reviewers_with_tut\n",
-    "left join df on df.reviewer_id = df_reviewers_with_tut.reviewer_id\n",
-    "\"\"\")\n",
-    "con.sql(\"table reviewer_assignments_01\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "656b2edc-0374-4c66-b7aa-19fb315c73d1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(submissions)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9a1716c3-d879-4d91-9aae-69601f372f77",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "create or replace table submission_assignments_01 as\n",
-    "select submission_assignments_00.submission_id, submission_assignments_00.author_ids, submission_assignments_00.track,\n",
-    "list_concat(submission_assignments_00.assigned_reviewer_ids, df.assigned_reviewer_ids) as assigned_reviewer_ids\n",
-    "from submission_assignments_00\n",
-    "left join df on df.submission_id = submission_assignments_00.submission_id\n",
-    "\"\"\")\n",
-    "con.sql(\"table submission_assignments_01\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d2cc2801-c6c5-417a-9f5d-cf60ee8d3dc7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(submissions)\n",
-    "df = df.assign(num_reviewers=df.assigned_reviewer_ids.apply(len))\n",
-    "df[df.num_reviewers>2]\n",
-    "df[df.num_reviewers==2]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b7c3c63e-908b-474a-ab02-02873b89b942",
-   "metadata": {},
-   "source": [
-    "## Step 3. Assign talks to tutorial reviewers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b1e3d00e-f3bd-4166-b00d-10c9c5c4c2ec",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(submissions)\n",
-    "df = df.assign(num_reviewers=df.assigned_reviewer_ids.apply(len))\n",
-    "df_submissions_few_reviewers = df[df.num_reviewers==2]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2afdf9a9-b4bd-4cbf-b8ec-9148236cc53f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "MIN_REVIEWS_PER_PERSON = 0\n",
-    "MAX_REVIEWS_PER_PERSON = 4\n",
-    "MIN_REVIEWERS_PER_SUBMISSION = 1\n",
-    "MAX_REVIEWERS_PER_SUBMISSION = 2\n",
-    "\n",
-    "df_reviewers_only_tut = df_reviewers_with_tut[df_reviewers_with_tut.assigned_submission_ids.apply(len) > 0]\n",
-    "\n",
-    "solution = solve_milp(\n",
-    "    df_reviewers_only_tut,\n",
-    "    df_submissions_few_reviewers,\n",
-    "    MIN_REVIEWS_PER_PERSON,\n",
-    "    MAX_REVIEWS_PER_PERSON,\n",
-    "    MIN_REVIEWERS_PER_SUBMISSION,\n",
-    "    MAX_REVIEWERS_PER_SUBMISSION,\n",
-    "    TUTORIAL_COEFF,\n",
-    "    ASSIGN_TUTORIALS_TO_ANYONE\n",
-    ")\n",
-    "\n",
-    "if solution is not None:\n",
-    "    reviewers, submissions = format_and_output_result(df_reviewers_only_tut, df_submissions_few_reviewers, solution, post_fix=\"02\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8ddc69f0-6408-4d4a-91eb-7e1d967aa6ab",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(submissions)\n",
-    "df = df.assign(num_reviewers=df.assigned_reviewer_ids.apply(len))\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e6dcb32f-b32a-42ea-b36c-49674f39ecb2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(reviewers)\n",
-    "df = df[[\"reviewer_id\", \"assigned_submission_ids\"]]\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "132eb314-268d-4778-bce1-dda40cbdd2a3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "create or replace table reviewer_assignments_02 as\n",
-    "select\n",
-    "    reviewer_assignments_01.reviewer_id, tracks, conflicts_submission_ids,\n",
-    "    list_concat(reviewer_assignments_01.assigned_submission_ids, df.assigned_submission_ids) as assigned_submission_ids\n",
-    "from reviewer_assignments_01\n",
-    "left join df on df.reviewer_id = reviewer_assignments_01.reviewer_id\n",
-    "\"\"\")\n",
-    "con.sql(\"table reviewer_assignments_02\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a5397c3f-d63f-493e-908a-47e857e09f57",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"select count(*), string_agg(reviewer_id), len(assigned_submission_ids) as num_submissions from reviewer_assignments_02 group by num_submissions\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2e57c7c6-d2f4-4742-83cb-f876f01574ce",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(submissions)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "12ecbfa5-60bc-4c13-b209-cb686aa04c7f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "create or replace table submission_assignments_02 as\n",
-    "select submission_assignments_01.submission_id, submission_assignments_01.author_ids, submission_assignments_01.track,\n",
-    "list_concat(submission_assignments_01.assigned_reviewer_ids, df.assigned_reviewer_ids) as assigned_reviewer_ids\n",
-    "from submission_assignments_01\n",
-    "left join df on df.submission_id = submission_assignments_01.submission_id\n",
-    "\"\"\")\n",
-    "con.sql(\"table submission_assignments_02\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f8186a06-981a-4f88-bde7-be4479c57697",
-   "metadata": {},
-   "source": [
-    "## Final counts/checks"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "135ef2a0-69ab-4b76-afc4-41e898ee9083",
-   "metadata": {},
-   "source": [
-    "All submissions have at least 3 reviewers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d8719243-645d-40d1-97ee-36e21f6cfdc9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "select string_agg(submission_id), count(track), len(assigned_reviewer_ids) from submission_assignments_02 group by len(assigned_reviewer_ids)\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "de1edcf3-2866-422b-ade4-b9c4caa5bbfd",
-   "metadata": {},
-   "source": [
-    "Step 1: Only tutorial assignments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2e3747df-88a0-4ebd-9f3c-4f0d73c4d9ce",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "select string_agg(reviewer_id), count(reviewer_id), string_agg(tracks), len(assigned_submission_ids) from reviewer_assignments_00 group by len(assigned_submission_ids)\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4ff68efd-6ef6-44c0-ae5e-16d8e22e230c",
-   "metadata": {},
-   "source": [
-    "Step 2: Add talks assignments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "40ec3a7e-e444-4890-b03b-54abdc90a13d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "select string_agg(reviewer_id), count(reviewer_id), string_agg(tracks), len(assigned_submission_ids) from reviewer_assignments_01 group by len(assigned_submission_ids)\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7be6ab3a-21d5-46f5-bcb1-5d1ed79573f7",
-   "metadata": {},
-   "source": [
-    "Step 3: Assign talks to tutorial reviewers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "acea73f1-c7b1-4d32-8cca-2ab37ef6ab67",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.sql(\"\"\"\n",
-    "select string_agg(reviewer_id), count(reviewer_id), string_agg(tracks), len(assigned_submission_ids) from reviewer_assignments_02 group by len(assigned_submission_ids)\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "aa5491c1-7086-4b54-ac34-8dd541f94767",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.close()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f993a8fb-25e1-4250-bbd6-8e67cf662659",
-   "metadata": {},
-   "source": [
-    "## Final export"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e67db9e7-5ca8-40e9-b90b-1a4dd97fb05d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import duckdb\n",
-    "database_file = \"../data/assign_reviews.db\"\n",
-    "con = duckdb.connect(database_file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "585bbb58-3582-41b9-b805-b9a607508f5e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "reviewer_assignments_final = {\n",
-    "    item[\"reviewer_id\"]: item[\"assigned_submission_ids\"]\n",
-    "    for item in\n",
-    "    con.sql(\"table reviewer_assignments_02\").df()[[\"reviewer_id\", \"assigned_submission_ids\"]].to_dict(\"records\")\n",
-    "}\n",
-    "with open(f\"output/reviewer-assignments.json\", \"w\") as fp:\n",
-    "        fp.write(json.dumps(reviewer_assignments_final, indent=4))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c502cf9b-f573-4565-bc59-b98e0c0070b1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "con.close()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4f88e36e-9b7e-4c4e-a43d-b7248a2f0f1e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/pre-processing.py b/notebooks/pre-processing.py
new file mode 100644
index 0000000..9f6c49f
--- /dev/null
+++ b/notebooks/pre-processing.py
@@ -0,0 +1,415 @@
+# ---
+# jupyter:
+#   jupytext:
+#     notebook_metadata_filter: all,-jupytext.text_representation.jupytext_version
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.12.1
+# ---
+
+# %%
+import duckdb
+from IPython import display
+
+# Raw data to import
+raw_files = dict(
+    scipy_reviewers="../data/scipy_reviewers.csv",  # people who signed up as reviewers
+    pretalx_sessions="../data/sessions.csv",  # all proposal exported from pretalx
+    pretalx_speakers="../data/speakers.csv",  # all speakers exported from pretalx
+    pretalx_reviewers="../data/pretalx_reviewers.csv",  # all reviewers copy-pasted from pretalx
+    coi_reviewers="../data/scipy_coi_export.csv",  # all responses to the coi form
+    coi_authors="../data/coi_authors.csv",  # copy pasted values of author names from coi form
+    tracks="../data/tracks.csv",  # manually entered track IDs
+)
+
+# Output
+database_file = "../data/assign_reviews.db"
+
+# %%
+con = duckdb.connect(database_file)
+
+
+# %%
+def create_and_show_table(file_name, table_name, show=True):
+    con.sql(f'create or replace table {table_name} as select * from read_csv("{file_name}", header=true)')
+    if show is True:
+        return con.sql(f"table {table_name}")
+
+
+# %%
+for table_name, file_name in raw_files.items():
+    print(table_name)
+    display.display(create_and_show_table(file_name, table_name).df())
+    print("\n")
+
+# %%
+con.sql(
+    """
+table tracks
+"""
+)
+
+# %%
+con.sql(
+    """
+with dupes as
+    (
+        select
+            name,
+            num,
+            email
+        from
+            (
+                select
+                    name,
+                    count(*) as num,
+                    string_agg(Email) as email
+                from
+                    scipy_reviewers
+                    group by Name
+            )
+            where
+                num>1
+        )
+
+select * from dupes
+"""
+).df()
+
+# %%
+con.sql(
+    """
+select count(*) from scipy_reviewers
+"""
+)
+
+# %%
+con.sql(
+    """
+select count(*) from pretalx_reviewers
+"""
+)
+
+# %%
+con.sql(
+    """
+select count(*) from coi_reviewers
+"""
+)
+
+# %% [markdown]
+# This is a table with all reviewers who
+# 1. signed up
+# 2. created an account on pretalx
+# 3. submitted the COI form
+
+# %%
+con.sql(
+    """
+create or replace table reviewers as
+    select
+        scipy_reviewers.Name as name,
+        scipy_reviewers.Email as email,
+        \"Track(s) to review for (check all that apply)\" as tracks,
+        \"Mark the speaker(s) or company/organization/affiliation(s) that could pose a conflict of interest\" as coi
+    from scipy_reviewers
+    join pretalx_reviewers on scipy_reviewers.Email = pretalx_reviewers.Email
+    join coi_reviewers on coi_reviewers.Email = pretalx_reviewers.Email
+"""
+)
+
+df = con.sql("select distinct * from reviewers").df()
+num_reviewers = len(df)
+df
+
+# %% [markdown]
+# Reviewers who signed up for pretalx but did not fill in COI
+
+# %%
+con = duckdb.connect(database_file)
+
+# %%
+df = con.sql(
+    "select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email"
+).df()
+num_pretalx_no_coi = len(df)
+df
+
+# %%
+# df.to_csv("input/signed_up_for_pretalx_no_coi.csv")
+
+# %% [markdown]
+# Reviewers who filled in COI but did not sign up for pretalx
+
+# %%
+df = con.sql(
+    "select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email"
+).df()
+num_coi_no_pretalx = len(df)
+df
+
+# %%
+# df.to_csv("input/submitted_coi_no_pretalx.csv")
+
+# %% [markdown]
+# People who signed up as reviewer
+
+# %%
+df = con.sql(
+    """
+select distinct * from scipy_reviewers
+"""
+).df()
+num_signed_up = len(df)
+df
+
+# %% [markdown]
+# People who signed up as reviewer and signed up for pretalx and submitted COI but used different email addresses
+
+# %%
+df = con.sql(
+    """
+create or replace table reviewers_with_email_typos as
+(with no_coi as
+(select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email),
+no_pretalx as
+(select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email)
+select distinct scipy_reviewers.Name, scipy_reviewers.Email, no_pretalx.Email as no_pretalx_email, no_coi.email as no_coi_email from scipy_reviewers
+join no_coi on no_coi.Name = scipy_reviewers.Name
+join no_pretalx on no_pretalx.Name = no_coi.Name)
+"""  # noqa: E501
+)
+df = con.sql("table reviewers_with_email_typos").df()
+num_typos = len(df)
+df
+
+# %% [markdown]
+# People who signed up as reviewer and signed up for pretalx and submitted COI but used different names
+
+# %%
+df = con.sql(
+    """
+(with no_coi as
+(select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email),
+no_pretalx as
+(select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email)
+select distinct scipy_reviewers.Name, scipy_reviewers.Email, no_pretalx.Name as no_pretalx_name, no_coi.name as no_coi_name from scipy_reviewers
+join no_coi on no_coi.Email = scipy_reviewers.Email
+join no_pretalx on no_pretalx.Email = no_coi.Email)
+"""  # noqa: E501
+).df()
+num_typos_name = len(df)
+df
+
+# %%
+# df.to_csv("input/reviewers_multi_email.csv")
+
+# %% [markdown]
+# People who signed up as reviewer and didn't sign up for pretalx nor submitted COI
+
+# %%
+df = con.sql(
+    """
+(with no_coi as
+(select * from pretalx_reviewers anti join coi_reviewers on pretalx_reviewers.Email = coi_reviewers.Email),
+no_pretalx as
+(select * from coi_reviewers anti join pretalx_reviewers on coi_reviewers.Email = pretalx_reviewers.Email)
+select distinct scipy_reviewers.Name, scipy_reviewers.Email from scipy_reviewers
+anti join reviewers on reviewers.Name = scipy_reviewers.Name
+anti join no_coi on no_coi.Name = scipy_reviewers.Name
+anti join no_pretalx on no_pretalx.Name = scipy_reviewers.Name)
+"""
+).df()
+df
+
+# %%
+df = con.sql(
+    """
+select distinct * from scipy_reviewers
+anti join reviewers on scipy_reviewers.Email = reviewers.email
+"""
+).df()
+num_no_show = len(df)
+df
+
+# %%
+# df.to_csv("input/all_reviewers_without_assignments.csv")
+
+# %%
+num_no_show = num_signed_up - num_reviewers - num_pretalx_no_coi - num_coi_no_pretalx
+num_partial = sum([num_pretalx_no_coi, num_coi_no_pretalx, num_no_show])
+num_reviewers, num_signed_up, num_pretalx_no_coi, num_coi_no_pretalx, num_no_show, num_partial
+
+# %%
+con.sql("select * from reviewers where instr(name, 'eli')")
+
+# %%
+# con.sql("table reviewers").df().to_csv("input/reviewers_to_assign_with_name.csv")
+
+# %%
+con.sql("select * from reviewers where instr(Name, 'Wu')")
+
+# %%
+sum([num_pretalx_no_coi, num_coi_no_pretalx, num_reviewers])
+
+# %%
+con.sql(
+    """
+with dupes as
+    (
+        select
+            *
+        from
+            (
+                select
+                    name,
+                    count(*) as num,
+                    string_agg(email) as email,
+                    string_agg(tracks) as tracks,
+                    string_agg(coi) as coi
+                from
+                    reviewers
+                    group by name
+            )
+            where
+                num>1
+        )
+
+select * from dupes
+"""
+).df().T.to_json()
+
+# %%
+con.sql("create or replace table reviewers as (select distinct * from reviewers)")
+
+# %%
+con.sql(
+    """
+create or replace table reviewers_with_tracks as
+with reviewers_no_dupes as (select distinct * from reviewers)
+select reviewers_no_dupes.name, email, list(tracks.name) as tracks, list(tracks.track_id) as track_ids from reviewers_no_dupes
+    join tracks on instr(reviewers_no_dupes.tracks, tracks.name)
+    group by reviewers_no_dupes.name, email
+"""  # noqa: E501
+)
+
+con.sql("select distinct * from reviewers_with_tracks")
+
+# %%
+con.sql('select ID as submission_id, "Speaker IDs" as speaker_ids from pretalx_sessions')
+
+# %%
+con.sql(
+    """
+create or replace table reviewers_with_coi as
+
+with submissions_with_authors as (
+    select
+        ID as submission_id,
+        \"Speaker IDs\" as speaker_ids
+    from
+        pretalx_sessions
+)
+select
+    reviewers.name,
+    reviewers.email,
+    list(pretalx_speakers.Name) as speakers,
+    list(pretalx_speakers.ID) AS speaker_ids,
+    list(submissions_with_authors.submission_id) as submission_ids
+from
+    reviewers
+    left join coi_authors on instr(coi, coi_authors.author)
+    left join pretalx_speakers on contains(coi_authors.author, pretalx_speakers.Name)
+    left join submissions_with_authors on contains(submissions_with_authors.speaker_ids, pretalx_speakers.ID)
+group by reviewers.name, reviewers.email
+order by reviewers.name
+"""
+)
+
+con.sql("table reviewers_with_coi")
+
+# %%
+con.sql(
+    """
+with reviewers_with_coi_pre as (
+    select name, email, author
+    from reviewers
+    join coi_authors on instr(coi, coi_authors.author)
+)
+select count(*), author from reviewers_with_coi_pre anti join pretalx_speakers on contains(reviewers_with_coi_pre.author, pretalx_speakers.Name) group by author
+"""  # noqa: E501
+)
+
+# %%
+con.sql("table reviewers_with_tracks").df()
+
+# %%
+con.sql("select email as reviewer_id, list(track_id) as tracks from reviewers_with_tracks group by email")
+
+# %% [markdown]
+# # Final tables for script
+
+# %% [markdown]
+# ## reviewers_to_assign
+
+# %%
+con.sql(
+    """
+create or replace table reviewers_to_assign as
+select
+    reviewers_with_coi.email as reviewer_id,
+    reviewers_with_tracks.track_ids as tracks,
+    reviewers_with_coi.submission_ids as conflicts_submission_ids
+from reviewers_with_coi
+join reviewers_with_tracks on reviewers_with_tracks.email = reviewers_with_coi.email
+"""
+)
+
+con.sql("table reviewers_to_assign").df()
+
+# %%
+# con.sql("table reviewers_to_assign").df().to_csv("input/reviewers_to_assign.csv")
+
+# %% [markdown]
+# ## submissions_to_assign
+
+# %%
+con.sql(
+    """
+create or replace table submissions_to_assign as
+select
+    ID as submission_id,
+    string_split(\"Speaker IDs\", '\n') as author_ids,
+    track_id as track
+from pretalx_sessions
+    join tracks on pretalx_sessions.Track = tracks.name
+"""
+)
+
+con.sql("table submissions_to_assign").df()
+
+# %%
+# con.sql("table submissions_to_assign").df().to_csv("input/submissions_to_assign.csv")
+
+# %%
+# con.sql("table submissions_to_assign").df().author_ids.iloc[1]
+
+# %%
+con.close()
+
+# %%
diff --git a/notebooks/run-assignments.py b/notebooks/run-assignments.py
new file mode 100644
index 0000000..a4fc383
--- /dev/null
+++ b/notebooks/run-assignments.py
@@ -0,0 +1,331 @@
+# ---
+# jupyter:
+#   jupytext:
+#     notebook_metadata_filter: all,-jupytext.text_representation.jupytext_version
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#   kernelspec:
+#     display_name: Python 3 (ipykernel)
+#     language: python
+#     name: python3
+#   language_info:
+#     codemirror_mode:
+#       name: ipython
+#       version: 3
+#     file_extension: .py
+#     mimetype: text/x-python
+#     name: python
+#     nbconvert_exporter: python
+#     pygments_lexer: ipython3
+#     version: 3.12.1
+# ---
+
+# %%
+####################
+## ASSIGN REVIEWS ##
+####################
+# Imports
+import json
+import sys
+
+import duckdb
+import pandas as pd
+
+sys.path.append("..")
+from assign_reviews import format_and_output_result, solve_milp
+
+# %% [markdown]
+# # Start script
+
+# %%
+# mkdir output
+
+# %%
+ASSIGN_TUTORIALS_TO_ANYONE = False
+TUTORIAL_COEFF = 0.8
+
+DEBUG = True
+
+database_file = "../data/assign_reviews.db"
+con = duckdb.connect(database_file)
+df_submissions = con.sql("table submissions_to_assign").df()
+df_reviewers = con.sql("table reviewers_to_assign").df()
+
+df_submissions = df_submissions.assign(assigned_reviewer_ids=[[]] * len(df_submissions))
+df_reviewers = df_reviewers.assign(assigned_submission_ids=[[]] * len(df_reviewers))
+
+len(df_submissions), len(df_reviewers)
+
+# %%
+df_submissions[df_submissions.track == "TUT"]
+
+# %% [markdown]
+# ## Step 1. Assign tutorial reviewers
+
+# %%
+MIN_TUTORIALS_PER_PERSON = 0
+MAX_TUTORIALS_PER_PERSON = 5
+MIN_REVIEWERS_PER_TUTORIAL = 3
+MAX_REVIEWERS_PER_TUTORIAL = 4
+
+df_submissions_tutorials = df_submissions[df_submissions.track == "TUT"]
+
+solution = solve_milp(
+    df_reviewers,
+    df_submissions_tutorials,
+    MIN_TUTORIALS_PER_PERSON,
+    MAX_TUTORIALS_PER_PERSON,
+    MIN_REVIEWERS_PER_TUTORIAL,
+    MAX_REVIEWERS_PER_TUTORIAL,
+    TUTORIAL_COEFF,
+    ASSIGN_TUTORIALS_TO_ANYONE,
+)
+reviewers, submissions = format_and_output_result(df_reviewers, df_submissions_tutorials, solution, post_fix="00")
+
+# %%
+df = pd.DataFrame(reviewers)
+df
+
+# %%
+df_reviewers_with_tut = df_reviewers.assign(assigned_submission_ids=df.assigned_submission_ids)
+df_reviewers_with_tut
+
+# %%
+con.sql("select * from df_reviewers_with_tut")
+
+# %%
+con.sql("create or replace table reviewer_assignments_00 as select * from df_reviewers_with_tut")
+
+# %%
+df = pd.DataFrame(submissions)
+df
+
+# %%
+con.sql(
+    """
+create or replace table submission_assignments_00 as
+select df_submissions.submission_id, df_submissions.author_ids, df_submissions.track,
+list_concat(df_submissions.assigned_reviewer_ids, df.assigned_reviewer_ids) as assigned_reviewer_ids
+from df_submissions
+left join df on df.submission_id = df_submissions.submission_id
+"""
+)
+con.sql("table submission_assignments_00")
+
+# %% [markdown]
+# ## Step 2. Assign talk reviewers
+
+# %%
+df_reviewers_with_tut[df_reviewers_with_tut.assigned_submission_ids.apply(len) == 0]
+
+# %%
+MIN_REVIEWS_PER_PERSON = 5
+MAX_REVIEWS_PER_PERSON = 9
+MIN_REVIEWERS_PER_SUBMISSION = 2
+MAX_REVIEWERS_PER_SUBMISSION = 4
+
+df_reviewers_no_submissions = df_reviewers_with_tut[df_reviewers_with_tut.assigned_submission_ids.apply(len) == 0]
+df_submissions_no_tutorials = df_submissions[df_submissions.track != "TUT"]
+
+solution = solve_milp(
+    df_reviewers_no_submissions,
+    df_submissions_no_tutorials,
+    MIN_REVIEWS_PER_PERSON,
+    MAX_REVIEWS_PER_PERSON,
+    MIN_REVIEWERS_PER_SUBMISSION,
+    MAX_REVIEWERS_PER_SUBMISSION,
+    TUTORIAL_COEFF,
+    ASSIGN_TUTORIALS_TO_ANYONE,
+)
+if solution is not None:
+    reviewers, submissions = format_and_output_result(
+        df_reviewers_no_submissions, df_submissions_no_tutorials, solution, post_fix="01"
+    )
+
+# %%
+df_reviewers_with_tut
+
+# %%
+df = pd.DataFrame(reviewers)[["reviewer_id", "assigned_submission_ids"]]
+
+# %%
+df
+
+# %%
+con.sql(
+    """
+create or replace table reviewer_assignments_01 as
+select
+    df_reviewers_with_tut.reviewer_id, tracks, conflicts_submission_ids,
+    list_concat(df_reviewers_with_tut.assigned_submission_ids, df.assigned_submission_ids) as assigned_submission_ids
+from df_reviewers_with_tut
+left join df on df.reviewer_id = df_reviewers_with_tut.reviewer_id
+"""
+)
+con.sql("table reviewer_assignments_01")
+
+# %%
+df = pd.DataFrame(submissions)
+df
+
+# %%
+con.sql(
+    """
+create or replace table submission_assignments_01 as
+select submission_assignments_00.submission_id, submission_assignments_00.author_ids, submission_assignments_00.track,
+list_concat(submission_assignments_00.assigned_reviewer_ids, df.assigned_reviewer_ids) as assigned_reviewer_ids
+from submission_assignments_00
+left join df on df.submission_id = submission_assignments_00.submission_id
+"""
+)
+con.sql("table submission_assignments_01")
+
+# %%
+df = pd.DataFrame(submissions)
+df = df.assign(num_reviewers=df.assigned_reviewer_ids.apply(len))
+df[df.num_reviewers > 2]
+df[df.num_reviewers == 2]
+
+# %% [markdown]
+# ## Step 3. Assign talks to tutorial reviewers
+
+# %%
+df = pd.DataFrame(submissions)
+df = df.assign(num_reviewers=df.assigned_reviewer_ids.apply(len))
+df_submissions_few_reviewers = df[df.num_reviewers == 2]
+
+# %%
+MIN_REVIEWS_PER_PERSON = 0
+MAX_REVIEWS_PER_PERSON = 4
+MIN_REVIEWERS_PER_SUBMISSION = 1
+MAX_REVIEWERS_PER_SUBMISSION = 2
+
+df_reviewers_only_tut = df_reviewers_with_tut[df_reviewers_with_tut.assigned_submission_ids.apply(len) > 0]
+
+solution = solve_milp(
+    df_reviewers_only_tut,
+    df_submissions_few_reviewers,
+    MIN_REVIEWS_PER_PERSON,
+    MAX_REVIEWS_PER_PERSON,
+    MIN_REVIEWERS_PER_SUBMISSION,
+    MAX_REVIEWERS_PER_SUBMISSION,
+    TUTORIAL_COEFF,
+    ASSIGN_TUTORIALS_TO_ANYONE,
+)
+
+if solution is not None:
+    reviewers, submissions = format_and_output_result(
+        df_reviewers_only_tut, df_submissions_few_reviewers, solution, post_fix="02"
+    )
+
+# %%
+df = pd.DataFrame(submissions)
+df = df.assign(num_reviewers=df.assigned_reviewer_ids.apply(len))
+df
+
+# %%
+df = pd.DataFrame(reviewers)
+df = df[["reviewer_id", "assigned_submission_ids"]]
+df
+
+# %%
+con.sql(
+    """
+create or replace table reviewer_assignments_02 as
+select
+    reviewer_assignments_01.reviewer_id, tracks, conflicts_submission_ids,
+    list_concat(reviewer_assignments_01.assigned_submission_ids, df.assigned_submission_ids) as assigned_submission_ids
+from reviewer_assignments_01
+left join df on df.reviewer_id = reviewer_assignments_01.reviewer_id
+"""
+)
+con.sql("table reviewer_assignments_02")
+
+# %%
+con.sql(
+    "select count(*), string_agg(reviewer_id), len(assigned_submission_ids) as num_submissions from reviewer_assignments_02 group by num_submissions"  # noqa: E501
+)
+
+# %%
+df = pd.DataFrame(submissions)
+df
+
+# %%
+con.sql(
+    """
+create or replace table submission_assignments_02 as
+select submission_assignments_01.submission_id, submission_assignments_01.author_ids, submission_assignments_01.track,
+list_concat(submission_assignments_01.assigned_reviewer_ids, df.assigned_reviewer_ids) as assigned_reviewer_ids
+from submission_assignments_01
+left join df on df.submission_id = submission_assignments_01.submission_id
+"""
+)
+con.sql("table submission_assignments_02")
+
+# %% [markdown]
+# ## Final counts/checks
+
+# %% [markdown]
+# All submissions have at least 3 reviewers
+
+# %%
+con.sql(
+    """
+select string_agg(submission_id), count(track), len(assigned_reviewer_ids) from submission_assignments_02 group by len(assigned_reviewer_ids)
+"""  # noqa: E501
+)
+
+# %% [markdown]
+# Step 1: Only tutorial assignments
+
+# %%
+con.sql(
+    """
+select string_agg(reviewer_id), count(reviewer_id), string_agg(tracks), len(assigned_submission_ids) from reviewer_assignments_00 group by len(assigned_submission_ids)
+"""  # noqa: E501
+)
+
+# %% [markdown]
+# Step 2: Add talks assignments
+
+# %%
+con.sql(
+    """
+select string_agg(reviewer_id), count(reviewer_id), string_agg(tracks), len(assigned_submission_ids) from reviewer_assignments_01 group by len(assigned_submission_ids)
+"""  # noqa: E501
+)
+
+# %% [markdown]
+# Step 3: Assign talks to tutorial reviewers
+
+# %%
+con.sql(
+    """
+select string_agg(reviewer_id), count(reviewer_id), string_agg(tracks), len(assigned_submission_ids) from reviewer_assignments_02 group by len(assigned_submission_ids)
+"""  # noqa: E501
+)
+
+# %%
+con.close()
+
+# %% [markdown]
+# ## Final export
+
+# %%
+database_file = "../data/assign_reviews.db"
+con = duckdb.connect(database_file)
+
+# %%
+reviewer_assignments_final = {
+    item["reviewer_id"]: item["assigned_submission_ids"]
+    for item in con.sql("table reviewer_assignments_02")
+    .df()[["reviewer_id", "assigned_submission_ids"]]
+    .to_dict("records")
+}
+with open("output/reviewer-assignments.json", "w") as fp:
+    fp.write(json.dumps(reviewer_assignments_final, indent=4))
+
+# %%
+con.close()
diff --git a/pyproject.toml b/pyproject.toml
index e16a72a..5743198 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,3 +17,8 @@ pythonpath = ['.']
 
 [tool.coverage.report]
 show_missing = true
+
+[tool.jupytext]
+# Always pair ipynb notebooks to py:percent files
+formats = ["ipynb", "py:percent"]
+notebook_metadata_filter = "all,-jupytext.text_representation.jupytext_version"
diff --git a/requirements.lock b/requirements.lock
index dd8c12b..aeb45fe 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -492,6 +492,15 @@ jupyterlab-server==2.25.4 \
     # via
     #   jupyterlab
     #   notebook
+jupytext==1.16.1 \
+    --hash=sha256:68c7b68685e870e80e60fda8286fbd6269e9c74dc1df4316df6fe46eabc94c99 \
+    --hash=sha256:796ec4f68ada663569e5d38d4ef03738a01284bfe21c943c485bc36433898bd0
+markdown-it-py==3.0.0 \
+    --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \
+    --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb
+    # via
+    #   jupytext
+    #   mdit-py-plugins
 markupsafe==2.1.5 \
     --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \
     --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \
@@ -562,6 +571,14 @@ matplotlib-inline==0.1.6 \
     # via
     #   ipykernel
     #   ipython
+mdit-py-plugins==0.4.0 \
+    --hash=sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9 \
+    --hash=sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b
+    # via jupytext
+mdurl==0.1.2 \
+    --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \
+    --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba
+    # via markdown-it-py
 mistune==3.0.2 \
     --hash=sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205 \
     --hash=sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8
@@ -579,6 +596,7 @@ nbformat==5.10.3 \
     --hash=sha256:d9476ca28676799af85385f409b49d95e199951477a159a576ef2a675151e5e8
     # via
     #   jupyter-server
+    #   jupytext
     #   nbclient
     #   nbconvert
 nest-asyncio==1.6.0 \
@@ -646,6 +664,7 @@ packaging==24.0 \
     #   jupyter-server
     #   jupyterlab
     #   jupyterlab-server
+    #   jupytext
     #   nbconvert
 pandas==2.2.1 \
     --hash=sha256:04f6ec3baec203c13e3f8b139fb0f9f86cd8c0b94603ae3ae8ce9a422e9f5bee \
@@ -806,7 +825,9 @@ pyyaml==6.0.1 \
     --hash=sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585 \
     --hash=sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d \
     --hash=sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f
-    # via jupyter-events
+    # via
+    #   jupyter-events
+    #   jupytext
 pyzmq==25.1.2 \
     --hash=sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565 \
     --hash=sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b \
@@ -1144,6 +1165,10 @@ tinycss2==1.2.1 \
     --hash=sha256:2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847 \
     --hash=sha256:8cff3a8f066c2ec677c06dbc7b45619804a6938478d9d73c284b29d14ecb0627
     # via nbconvert
+toml==0.10.2 \
+    --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
+    --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f
+    # via jupytext
 tornado==6.4 \
     --hash=sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0 \
     --hash=sha256:10aeaa8006333433da48dec9fe417877f8bcc21f48dda8d661ae79da357b2a63 \
diff --git a/requirements.txt b/requirements.txt
index f8cac43..a60c50d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ notebook
 jupyterlab
 duckdb
 duckdb_engine
+jupytext