diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..4f72bc6
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,123 @@
+# See: https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/#the-whole-ci-cd-workflow
+name: Publish Python π distribution π¦ to PyPI and TestPyPI
+
+on:
+ push:
+ tags:
+ - "*"
+
+jobs:
+ build:
+ name: Build distribution π¦
+ runs-on: ubuntu-latest
+ if: startsWith(github.ref, 'refs/tags/') # only on tag pushes
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: "3.x"
+ - name: Install pypa/build
+ run: >-
+ python3 -m
+ pip install
+ build
+ --user
+ - name: Build a binary wheel and a source tarball
+ run: python3 -m build
+ - name: Store the distribution packages
+ uses: actions/upload-artifact@v3
+ with:
+ name: python-package-distributions
+ path: dist/
+
+ # publish-to-testpypi:
+ # name: Publish Python π distribution π¦ to TestPyPI
+ # needs:
+ # - build
+ # runs-on: ubuntu-latest
+
+ # environment:
+ # name: testpypi
+ # url: https://test.pypi.org/p/swap-anything
+
+ # permissions:
+ # id-token: write # IMPORTANT: mandatory for trusted publishing
+
+ # steps:
+ # - name: Download all the dists
+ # uses: actions/download-artifact@v3
+ # with:
+ # name: python-package-distributions
+ # path: dist/
+ # - name: Publish distribution π¦ to TestPyPI
+ # uses: pypa/gh-action-pypi-publish@release/v1
+ # with:
+ # repository-url: https://test.pypi.org/legacy/
+
+ publish-to-pypi:
+ name: >-
+ Publish Python π distribution π¦ to PyPI
+ if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
+ needs:
+ - build
+ # - publish-to-testpypi
+ runs-on: ubuntu-latest
+ environment:
+ name: pypi
+ url: https://pypi.org/p/swap-anything
+ permissions:
+ id-token: write # IMPORTANT: mandatory for trusted publishing
+
+ steps:
+ - name: Download all the dists
+ uses: actions/download-artifact@v3
+ with:
+ name: python-package-distributions
+ path: dist/
+ - name: Publish distribution π¦ to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+
+ github-release:
+ name: >-
+ Sign the Python π distribution π¦ with Sigstore
+ and upload them to GitHub Release
+ needs:
+ - publish-to-pypi
+ runs-on: ubuntu-latest
+
+ permissions:
+ contents: write # IMPORTANT: mandatory for making GitHub Releases
+ id-token: write # IMPORTANT: mandatory for sigstore
+
+ steps:
+ - name: Download all the dists
+ uses: actions/download-artifact@v3
+ with:
+ name: python-package-distributions
+ path: dist/
+ - name: Sign the dists with Sigstore
+ uses: sigstore/gh-action-sigstore-python@v1.2.3
+ with:
+ inputs: >-
+ ./dist/*.tar.gz
+ ./dist/*.whl
+ - name: Create GitHub Release
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
+ run: >-
+ gh release create
+ '${{ github.ref_name }}'
+ --repo '${{ github.repository }}'
+ --notes ""
+ - name: Upload artifact signatures to GitHub Release
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
+ # Upload to GitHub Release using the `gh` CLI.
+ # `dist/` contains the built packages, and the
+ # sigstore-produced signatures and certificates.
+ run: >-
+ gh release upload
+ '${{ github.ref_name }}' dist/**
+ --repo '${{ github.repository }}'
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index ce92016..2e742e6 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -14,8 +14,7 @@ concurrency:
cancel-in-progress: false
jobs:
- # Single deploy job since we're just deploying
- deploy:
+ test:
runs-on: ubuntu-latest
timeout-minutes: 10
@@ -31,7 +30,7 @@ jobs:
python-version-file: pyproject.toml
- name: Install Python dependencies
- run: pip install --disable-pip-version-check -e .[test]
+ run: pip install --disable-pip-version-check -e .[all,test]
- name: Run tests
run: pytest --cov=src --cov-report=xml tests
diff --git a/README.md b/README.md
index 51b14f2..63b2e87 100644
--- a/README.md
+++ b/README.md
@@ -5,17 +5,146 @@ A mix and match (swap) library to empower swapping-based projects.
[![Docs](https://github.com/founderswap/swap-anything/actions/workflows/build_docs.yaml/badge.svg)](https://founderswap.github.io/swap-anything/)
[![Tests](https://github.com/founderswap/swap-anything/actions/workflows/test.yaml/badge.svg)](https://github.com/founderswap/swap-anything/actions/workflows/test.yaml)
[![codecov](https://codecov.io/gh/founderswap/swap-anything/graph/badge.svg?token=QF6L5Y8EPM)](https://codecov.io/gh/founderswap/swap-anything)
+[![PyPI version](https://badge.fury.io/py/swap-anything.svg)](https://badge.fury.io/py/swap-anything)
-> NOTE: `swapanything` is still in its proof-of-concept phase (some
-> of the things in readme are not implemented yet!). If you want to
+> NOTE: `swapanything` is still in its early steps. If you want to
> contribute or sponsor this project, visit
> [www.founderswap.xyz](https://www.founderswap.xyz)
## Quickstart
-Check the [developer guide](./docs/about/developer-guide.md)
+> Want to develop with us?
+> Check the [developer guide](./docs/about/developer-guide.md)
-### Using CLI (example)
+
+### Your first matching round
+
+This library allow you to match subjects (people, things, whatever) depending
+on their availability slots (calendar slots, timeframe, location,
+any combination of the abovementioned). Truly, you can use this library as
+backend for any sort of matching need.
+
+The simplest way to test this library is to use the `swapanything` python
+package to make a simple swapping exercise.
+
+```python
+from swapanything.backend import simple as backend
+from swapanything.select import select_matches
+import pandas as pd
+
+availabilities = [
+ ["KungFury", "9:00"],
+ ["KungFury", "10:00"],
+ ["KungFury", "13:00"],
+ ["KungFury", "14:00"],
+ ["Triceracop", "9:00"],
+ ["Triceracop", "11:00"],
+ ["Hackerman", "10:00"],
+ ["Hackerman", "11:00"],
+ ["Katana", "12:00"],
+ ["Barbarianna", "12:00"],
+ ["Thor", "13:00"],
+ ["Thor", "14:00"],
+ ["Thor", "15:00"],
+ ["T-Rex", "15:00"],
+ ["T-Rex", "16:00"],
+ ["Hoff 9000", "16:00"],
+]
+
+availabilities_df = pd.DataFrame(
+ availabilities, columns=["subject", "availability"]
+)
+
+be = backend.SimpleBackend(
+ availabilities=availabilities_df,
+ availabilities_column="availability",
+ availability_subject_column="subject",
+)
+
+all_possible_matches = be.get_all_matches()
+# subject availability
+# 0 (Barbarianna, Katana) (12:00,)
+# 1 (Hackerman, KungFury) (10:00,)
+# 2 (Hackerman, Triceracop) (11:00,)
+# 3 (Hoff 9000, T-Rex) (16:00,)
+# 4 (KungFury, Thor) (13:00, 14:00)
+# 5 (KungFury, Triceracop) (9:00,)
+# 6 (T-Rex, Thor) (15:00,)
+
+select_matches(all_possible_matches, backend=be)
+# subject availability
+# 0 (Barbarianna, Katana) (12:00,)
+# 1 (Hackerman, Triceracop) (11:00,)
+# 2 (Hoff 9000, T-Rex) (16:00,)
+# 3 (KungFury, Thor) (13:00, 14:00)
+
+```
+
+Imagine now that we want to provide a super high importance
+to the match `(KungFury, Triceracop)`.
+With `select_matches` you can use match scores, and the
+algorithm will try to maximize number of matches and total
+score!
+
+This way we ensure that high quality matches are selected.
+
+```python
+scores = [1, 1, 1, 1, 1, 9001, 1]
+# (KungFury, Triceracop)... it's over 9000!
+select_matches(all_possible_matches, backend=be, match_scores=scores)
+# subject availability
+# 0 (Barbarianna, Katana) (12:00,)
+# 1 (KungFury, Triceracop) (9:00,)
+# 2 (T-Rex, Thor) (15:00,)
+
+```
+
+### Advanced Backends
+
+With python, it is possible to integrate `swapanything` in your application
+or custom tool. `swapanything` comes with some pre-configured data backends
+(e.g. Airtable, Excel Spreadsheets, SQL) that you can easily use to
+kickstart your swaping-based app!
+
+#### Airtable
+
+Install airtable dependencies:
+
+```shell
+pip install swap-anything[airtable]
+```
+
+```python
+from swapanything.backend import airtable
+from swapanything.select import select_matches
+import os
+
+
+airtable_backend = airtable.AirTableBackend(
+ # subject_id is the record id of the subjects table
+ subject_features=["Interests", "Tags", "Score1", "Score2"],
+ availability_subject_column="AvailabilitiesSubjectId",
+ availabilities_column="Availabilities",
+ exclusions_subject_columns=["Subject1", "Subject2"]
+ # Tables
+ subjects_table_name="Subjects",
+ availabilities_table_name="Availabilities",
+ exclusions_table_name="Matches",
+ # Airtable credentials
+ client_id=os.environ["AIRTABLE_BASE_ID"],
+ client_secret=os.environ["AIRTABLE_API_KEY"],
+)
+
+subjects = airtable_backend.get_subjects()
+availabilities = airtable_backend.get_availabilities()
+
+all_matches = be.get_all_matches(exclusions=True)
+selected = select_matches(matches, backend=airtable_backend)
+```
+
+### Using CLI (POC)
+
+> This part is in proof of concept stage. Yet to be done!
You can start swapping using spreadsheets as sources/destinations of data.
Let's prepare 3 files:
@@ -70,44 +199,3 @@ This will result in the following `output.xlsx`, containing all new matches:
| subject1 | subject2 | slot |
| :------- | :------- | :--------------- |
| sub001 | sub002 | 2023-01-01 15:30 |
-
-### Using the Python API
-
-With python, it is possible to integrate `swapanything` in your application
-or custom tool. `swapanything` comes with some pre-configured data backends
-(e.g. Airtable, Excel Spreadsheets, SQL) that you can easily use to
-kickstart your swaping-based app!
-
-```python
-from swapanything.backend import airtable as be
-from swapanything import Scorer, Selector, Swapper
-import os
-
-
-data_backend = be.AirTableBackend(
- # subject_id is the record id of the subjects table
- subject_features=["Interests", "Tags", "Score1", "Score2"],
- availability_subject_column="AvailabilitiesSubjectId",
- availabilities_column="Availabilities",
- exclusions_subject_columns=["Subject1", "Subject2"]
- # Tables
- subjects_table_name="Subjects",
- availabilities_table_name="Availabilities",
- exclusions_table_name="Matches",
- # Airtable credentials
- client_id=os.environ["AIRTABLE_BASE_ID"],
- client_secret=os.environ["AIRTABLE_API_KEY"],
-)
-
-match_scorer = Scorer(model="simple")
-selector = Selector(relevance_weight=.5, total_number_weight=.5)
-model = Swapper(scorer=match_scorer, selector=selector)
-
-exclusions = data_backend.get_exclusions()
-all_possible_matches = data_backend.get_matches(exclusions=exclusions)
-subjects = data_backend.get_subjects()
-
-match_scores = model.score(all_possible_matches, subjects)
-matches = model.select(match_scores)
-
-```
diff --git a/docs/api-reference/swapanything/backend/airtable.md b/docs/api-reference/swapanything/backend/airtable.md
new file mode 100644
index 0000000..6aa50ae
--- /dev/null
+++ b/docs/api-reference/swapanything/backend/airtable.md
@@ -0,0 +1,3 @@
+# airtable
+
+::: swapanything.backend.airtable
diff --git a/docs/api-reference/swapanything/backend/simple.md b/docs/api-reference/swapanything/backend/simple.md
new file mode 100644
index 0000000..2367feb
--- /dev/null
+++ b/docs/api-reference/swapanything/backend/simple.md
@@ -0,0 +1,3 @@
+# simple
+
+::: swapanything.backend.simple
diff --git a/docs/api-reference/swapanything/index.md b/docs/api-reference/swapanything/index.md
new file mode 100644
index 0000000..b564453
--- /dev/null
+++ b/docs/api-reference/swapanything/index.md
@@ -0,0 +1,3 @@
+# swapanything
+
+::: swapanything
diff --git a/docs/api-reference/swapanything/select.md b/docs/api-reference/swapanything/select.md
new file mode 100644
index 0000000..fa8acfb
--- /dev/null
+++ b/docs/api-reference/swapanything/select.md
@@ -0,0 +1,3 @@
+# airtable
+
+::: swapanything.select
diff --git a/docs/getting-started.md b/docs/getting-started.md
new file mode 100644
index 0000000..5037e99
--- /dev/null
+++ b/docs/getting-started.md
@@ -0,0 +1,127 @@
+# Getting Started
+
+
+### Your first matching round
+
+This library allow you to match subjects (people, things, whatever) depending
+on their availability slots (calendar slots, timeframe, location,
+any combination of the abovementioned). Truly, you can use this library as
+backend for any sort of matching need.
+
+The simplest way to test this library is to use the `swapanything` python
+package to make a simple swapping exercise.
+
+```python
+from swapanything.backend import simple as backend
+from swapanything.select import select_matches
+import pandas as pd
+
+availabilities = [
+ ["KungFury", "9:00"],
+ ["KungFury", "10:00"],
+ ["KungFury", "13:00"],
+ ["KungFury", "14:00"],
+ ["Triceracop", "9:00"],
+ ["Triceracop", "11:00"],
+ ["Hackerman", "10:00"],
+ ["Hackerman", "11:00"],
+ ["Katana", "12:00"],
+ ["Barbarianna", "12:00"],
+ ["Thor", "13:00"],
+ ["Thor", "14:00"],
+ ["Thor", "15:00"],
+ ["T-Rex", "15:00"],
+ ["T-Rex", "16:00"],
+ ["Hoff 9000", "16:00"],
+]
+
+availabilities_df = pd.DataFrame(
+ availabilities, columns=["subject", "availability"]
+)
+
+be = backend.SimpleBackend(
+ availabilities=availabilities_df,
+ availabilities_column="availability",
+ availability_subject_column="subject",
+)
+
+all_possible_matches = be.get_all_matches()
+# subject availability
+# 0 (Barbarianna, Katana) (12:00,)
+# 1 (Hackerman, KungFury) (10:00,)
+# 2 (Hackerman, Triceracop) (11:00,)
+# 3 (Hoff 9000, T-Rex) (16:00,)
+# 4 (KungFury, Thor) (13:00, 14:00)
+# 5 (KungFury, Triceracop) (9:00,)
+# 6 (T-Rex, Thor) (15:00,)
+
+select_matches(all_possible_matches, backend=be)
+# subject availability
+# 0 (Barbarianna, Katana) (12:00,)
+# 1 (Hackerman, Triceracop) (11:00,)
+# 2 (Hoff 9000, T-Rex) (16:00,)
+# 3 (KungFury, Thor) (13:00, 14:00)
+
+```
+
+Imagine now that we want to provide a super high importance
+to the match `(KungFury, Triceracop)`.
+With `select_matches` you can use match scores, and the
+algorithm will try to maximize number of matches and total
+score!
+
+This way we ensure that high quality matches are selected.
+
+```python
+scores = [1, 1, 1, 1, 1, 9001, 1]
+# (KungFury, Triceracop)... it's over 9000!
+select_matches(all_possible_matches, backend=be, match_scores=scores)
+# subject availability
+# 0 (Barbarianna, Katana) (12:00,)
+# 1 (KungFury, Triceracop) (9:00,)
+# 2 (T-Rex, Thor) (15:00,)
+
+```
+
+### Advanced Backends
+
+With python, it is possible to integrate `swapanything` in your application
+or custom tool. `swapanything` comes with some pre-configured data backends
+(e.g. Airtable, Excel Spreadsheets, SQL) that you can easily use to
+kickstart your swaping-based app!
+
+#### Airtable
+
+Install airtable dependencies:
+
+```shell
+pip install swap-anything[airtable]
+```
+
+```python
+from swapanything.backend import airtable
+from swapanything.select import select_matches
+import os
+
+
+airtable_backend = airtable.AirTableBackend(
+ # subject_id is the record id of the subjects table
+ subject_features=["Interests", "Tags", "Score1", "Score2"],
+ availability_subject_column="AvailabilitiesSubjectId",
+ availabilities_column="Availabilities",
+ exclusions_subject_columns=["Subject1", "Subject2"]
+ # Tables
+ subjects_table_name="Subjects",
+ availabilities_table_name="Availabilities",
+ exclusions_table_name="Matches",
+ # Airtable credentials
+ client_id=os.environ["AIRTABLE_BASE_ID"],
+ client_secret=os.environ["AIRTABLE_API_KEY"],
+)
+
+subjects = airtable_backend.get_subjects()
+availabilities = airtable_backend.get_availabilities()
+
+all_matches = be.get_all_matches(exclusions=True)
+selected = select_matches(matches, backend=airtable_backend)
+```
diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md
deleted file mode 100644
index bad5562..0000000
--- a/docs/getting-started/index.md
+++ /dev/null
@@ -1 +0,0 @@
-# Getting Started
diff --git a/docs/how-to/index.md b/docs/how-to/index.md
deleted file mode 100644
index 56d3f55..0000000
--- a/docs/how-to/index.md
+++ /dev/null
@@ -1 +0,0 @@
-# How-to...
diff --git a/docs/index.md b/docs/index.md
index 67e2c1f..f0aa4c9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,7 +6,21 @@ hide:
# Swap Anything Docs
-
+
![swap-anything-logo](/static/founderswap-logo.jpg)
-
Welcome to Swap Anything!
-
+
+# Welcome to Swap Anything!
+
+A mix and match (swap) library to empower swapping-based projects.
+
+[![Docs](https://github.com/founderswap/swap-anything/actions/workflows/build_docs.yaml/badge.svg)](https://founderswap.github.io/swap-anything/)
+[![Tests](https://github.com/founderswap/swap-anything/actions/workflows/test.yaml/badge.svg)](https://github.com/founderswap/swap-anything/actions/workflows/test.yaml)
+[![codecov](https://codecov.io/gh/founderswap/swap-anything/graph/badge.svg?token=QF6L5Y8EPM)](https://codecov.io/gh/founderswap/swap-anything)
+[![PyPI version](https://badge.fury.io/py/swap-anything.svg)](https://badge.fury.io/py/swap-anything)
+
+NOTE: `swapanything` is still in its early steps. If you want to
+contribute or sponsor this project, visit
+[www.founderswap.xyz](https://www.founderswap.xyz)
+
+
+
diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md
deleted file mode 100644
index cd3d452..0000000
--- a/docs/user-guide/index.md
+++ /dev/null
@@ -1 +0,0 @@
-# User Guide
diff --git a/environment.yml b/environment.yml
index 395c87a..8466ba8 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,4 +9,4 @@ dependencies:
- python-dotenv
- matplotlib
- pip:
- - -e .[dev,test,docs]
+ - -e .[all,dev,test,docs]
diff --git a/mkdocs.yml b/mkdocs.yml
index 1e0421a..e352abd 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -50,6 +50,7 @@ plugins:
markdown_extensions:
- def_list
+ - md_in_html
- pymdownx.superfences:
custom_fences:
- name: mermaid
@@ -68,3 +69,14 @@ markdown_extensions:
# embed code
- pymdownx.snippets
+
+nav:
+- Swap Anything: index.md
+- Getting Started: getting-started.md
+- Api Reference:
+ - swapanything:
+ - api-reference/swapanything/index.md
+ - backend:
+ - api-reference/swapanything/backend/simple.md
+ - api-reference/swapanything/backend/airtable.md
+ - select: api-reference/swapanything/select.md
diff --git a/pyproject.toml b/pyproject.toml
index cddbce0..7003ea4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,12 +2,13 @@
# Software usage is hereby granted to the customer according to the terms in LICENSE file.
[build-system]
-requires = ["setuptools>=61.0.0"]
+requires = ["setuptools>=60", "setuptools-scm>=8.0"]
build-backend = "setuptools.build_meta"
[project]
name = "swap-anything"
-version = "0.0.1"
+# version = ""
+dynamic = ["version"]
description = "A mix and match (swap) library to empower swapping-based projects."
authors = [
{ name = "@ggbaro", email = "46573388+ggbaro@users.noreply.github.com" },
@@ -16,7 +17,7 @@ maintainers = [
{ name = "@ggbaro", email = "46573388+ggbaro@users.noreply.github.com" },
]
readme = "README.md"
-requires-python = ">=3.11,<3.12"
+requires-python = ">=3.11,<3.13"
keywords = []
classifiers = [
"Intended Audience :: Information Technology",
@@ -40,17 +41,17 @@ license = { file = "LICENSE" }
dependencies = [
"pandas>=2,<3",
"pydantic>=2,<3",
- "requests",
"networkx>=3.1,<3.2",
- "pyairtable>=1.5.0,<1.6",
"pydantic-settings",
]
-[project.optional-dependencies]
-dev = ["black", "ruff", "pre-commit"]
-
-test = ["pytest", "pytest-cov", "Faker"]
+[tool.setuptools_scm]
+[project.optional-dependencies]
+all = ["pyairtable>=1.5.0,<1.6"]
+airtable = ["pyairtable>=1.5.0,<1.6"]
+dev = ["black", "ruff", "pre-commit", "setuptools-scm"]
+test = ["requests", "pytest", "pytest-cov", "Faker"]
docs = [
"mkdocs",
"mkdocs-material",
diff --git a/src/swapanything/backend/__init__.py b/src/swapanything/backend/__init__.py
index e69de29..e21c127 100644
--- a/src/swapanything/backend/__init__.py
+++ b/src/swapanything/backend/__init__.py
@@ -0,0 +1 @@
+from ._base import BackendType
diff --git a/src/swapanything/backend/_base.py b/src/swapanything/backend/_base.py
index ebe26a6..324acc4 100644
--- a/src/swapanything/backend/_base.py
+++ b/src/swapanything/backend/_base.py
@@ -1,10 +1,14 @@
from abc import ABC, abstractmethod
from itertools import combinations
-from typing import Annotated, Iterable
+from typing import Annotated, Iterable, TypeVar
import pandas as pd
+class BackendError(Exception):
+ pass
+
+
def _get_matching_subjects_by_slot(
availabilities: pd.DataFrame,
availabilities_column: str,
@@ -34,8 +38,8 @@ def _get_matches_from_slots(
.reset_index()
.explode(availability_subject_column)
)
- matches[availability_subject_column] = matches[availability_subject_column].apply(
- lambda x: tuple(sorted(x))
+ matches[availability_subject_column] = (
+ matches[availability_subject_column].apply(sorted).apply(tuple)
)
return matches
@@ -102,7 +106,27 @@ def get_all_matches(
exclusions_subject_columns=self.exclusions_subject_columns,
)
+ # go from:
+ # [{"avail": "A", "subj": (1, 2)},
+ # {"avail": "B", "subj": (1, 2)}]
+ # to:
+ # [{"subj": (1, 2), "avail": ("A", "B")}]
+ matches = matches.sort_values(
+ # Sort to guarantee idempotency downstream
+ [self.availability_subject_column, self.availabilities_column]
+ )
+ matches = (
+ matches.groupby(self.availability_subject_column)[
+ [self.availabilities_column]
+ ]
+ .agg(tuple)
+ .reset_index()
+ )
+
if return_matching_subjects_by_slot:
return matches, matching_subjects_by_slot
else:
return matches
+
+
+BackendType = TypeVar("BackendType", bound=BackendBase)
diff --git a/src/swapanything/backend/simple.py b/src/swapanything/backend/simple.py
new file mode 100644
index 0000000..81f73fa
--- /dev/null
+++ b/src/swapanything/backend/simple.py
@@ -0,0 +1,46 @@
+from typing import Annotated, Iterable, Optional
+
+import pandas as pd
+
+from ._base import BackendBase
+
+
+class SimpleBackend(BackendBase):
+ subject_features: list[str]
+ availability_subject_column: str
+ availabilities_column: str
+ exclusions_subject_columns: Annotated[Iterable[str], 2]
+
+ def __init__(
+ self,
+ availabilities: pd.DataFrame,
+ availability_subject_column: str,
+ availabilities_column: str,
+ subject_features: Optional[list[str]] = None,
+ subjects: Optional[pd.DataFrame] = None,
+ exclusions: Optional[pd.DataFrame] = None,
+ exclusions_subject_columns: Annotated[Iterable[str], 2] = [],
+ ) -> None:
+ self.availabilities = availabilities
+ self.availability_subject_column = availability_subject_column
+ self.availabilities_column = availabilities_column
+ self.exclusions = exclusions
+ self.exclusions_subject_columns = exclusions_subject_columns
+
+ if not isinstance(subjects, pd.DataFrame):
+ self.subjects = availabilities[
+ [availability_subject_column]
+ ].drop_duplicates()
+ self.subject_features = subject_features or [] # force empty
+ else:
+ self.subjects = subjects
+ self.subject_features = subject_features
+
+ def get_subjects(self, *args, **kwargs) -> pd.DataFrame:
+ return self.subjects
+
+ def get_availabilities(self, *args, **kwargs) -> pd.DataFrame:
+ return self.availabilities
+
+ def get_exclusions(self, *args, **kwargs) -> pd.DataFrame:
+ return self.exclusions
diff --git a/src/swapanything/select.py b/src/swapanything/select.py
new file mode 100644
index 0000000..b2e2e42
--- /dev/null
+++ b/src/swapanything/select.py
@@ -0,0 +1,44 @@
+from typing import Iterable, Optional
+
+import networkx as nx
+import numpy as np
+import pandas as pd
+
+from .backend import BackendType
+
+
+def select_matches(
+ matches: pd.DataFrame,
+ backend: BackendType,
+ match_scores: Optional[pd.Series] = None,
+ maxcardinality: Optional[bool] = None,
+) -> pd.DataFrame:
+ assert matches[backend.availability_subject_column].is_unique
+
+ _matches = matches[backend.availability_subject_column].apply(pd.Series)
+ _matches.columns = ("s1", "s2")
+ if isinstance(match_scores, Iterable):
+ _matches["score"] = np.array(match_scores)
+ maxcardinality = maxcardinality or False
+ else:
+ _matches["score"] = 1
+ maxcardinality = True
+
+ G = nx.from_pandas_edgelist(_matches, "s1", "s2", ["score"])
+ results_weighted = nx.algorithms.matching.max_weight_matching(
+ G, maxcardinality=maxcardinality, weight="score"
+ )
+ results_weighted = pd.Index(
+ {tuple(sorted(x)) for x in results_weighted},
+ tupleize_cols=False,
+ name=backend.availability_subject_column,
+ )
+
+ selected = (
+ matches.set_index(backend.availability_subject_column)
+ .reindex(pd.Index(results_weighted, tupleize_cols=False))
+ .sort_index()
+ .reset_index()
+ )
+
+ return selected
diff --git a/tests/unit_test/swapanything_test/backend/test__base.py b/tests/unit_test/swapanything_test/backend/test__base.py
index 035b247..3bd94cb 100644
--- a/tests/unit_test/swapanything_test/backend/test__base.py
+++ b/tests/unit_test/swapanything_test/backend/test__base.py
@@ -108,10 +108,10 @@ def get_subjects(self) -> None:
def get_availabilities(self) -> pd.DataFrame:
return pd.DataFrame(
[
+ ["sub3", "A"],
["sub1", "A"],
["sub2", "B"],
["sub5", "A"],
- ["sub3", "A"],
["sub4", "C"],
],
columns=[SUBJ_COL, AVAIL_COL],
@@ -127,11 +127,10 @@ def get_exclusions(self) -> pd.DataFrame:
expected_result = pd.DataFrame(
[
- ["A", ("sub1", "sub5")],
- ["A", ("sub3", "sub5")],
+ [("sub1", "sub5"), ("A",)],
+ [("sub3", "sub5"), ("A",)],
],
- columns=["avail", SUBJ_COL],
- index=[0, 0],
+ columns=[SUBJ_COL, "avail"],
)
be = TestBackend()
diff --git a/tests/unit_test/swapanything_test/backend/test_simple.py b/tests/unit_test/swapanything_test/backend/test_simple.py
new file mode 100644
index 0000000..cd95596
--- /dev/null
+++ b/tests/unit_test/swapanything_test/backend/test_simple.py
@@ -0,0 +1,58 @@
+import pandas as pd
+from swapanything.backend import simple as backend
+
+
+def test_simple_backend():
+ availabilities = pd.DataFrame(
+ [
+ {"sub": "sub1", "avail": "a1"},
+ {"sub": "sub1", "avail": "a2"},
+ {"sub": "sub2", "avail": "a1"},
+ ]
+ )
+ be = backend.SimpleBackend(
+ availabilities=availabilities,
+ availabilities_column="avail",
+ availability_subject_column="sub",
+ )
+
+ expected_subjects = pd.DataFrame([{"sub": "sub1"}, {"sub": "sub2"}], index=[0, 2])
+
+ subjects = be.get_subjects()
+ assert subjects.equals(expected_subjects)
+
+ assert be.get_availabilities().equals(availabilities)
+
+ assert not be.get_exclusions()
+
+
+def test_simple_backend_subjects():
+ availabilities = pd.DataFrame(
+ [
+ {"sub": "sub1", "avail": "a1"},
+ {"sub": "sub1", "avail": "a2"},
+ {"sub": "sub2", "avail": "a1"},
+ ]
+ )
+
+ orig_subjects = pd.DataFrame(
+ [
+ {"sub": "sub1", "feat": 1},
+ {"sub": "sub2", "feat": 2},
+ ]
+ )
+
+ be = backend.SimpleBackend(
+ availabilities=availabilities,
+ availabilities_column="avail",
+ availability_subject_column="sub",
+ subjects=orig_subjects,
+ subject_features=["feat"],
+ )
+
+ subjects = be.get_subjects()
+ assert subjects.equals(orig_subjects)
+
+ assert be.get_availabilities().equals(availabilities)
+
+ assert not be.get_exclusions()
diff --git a/tests/unit_test/swapanything_test/test_large_data.py b/tests/unit_test/swapanything_test/test_large_data.py
new file mode 100644
index 0000000..4fc5a9b
--- /dev/null
+++ b/tests/unit_test/swapanything_test/test_large_data.py
@@ -0,0 +1,210 @@
+from random import randint, seed
+
+import pandas as pd
+import pytest
+from faker import Faker
+from faker.providers import DynamicProvider
+from swapanything.backend import simple as backend
+from swapanything.select import select_matches
+
+n = 42
+samples = 1000
+
+seed(n)
+index = [f"Sub{i+1}" for i in range(samples)]
+# Create faker object
+industries_provider = DynamicProvider(
+ provider_name="industry",
+ elements=[
+ "AI/ML",
+ "AR/VR",
+ "Agritech",
+ "Altro",
+ "Blockchain",
+ "Clean Energy",
+ "Cybersecurity",
+ "Data Analytics",
+ "E-Commerce/Online Marketplaces",
+ "Edtech",
+ "Fintech",
+ "Foodtech",
+ "Gaming",
+ "Greentech",
+ "Healthtech/Medtech",
+ "IoT",
+ "ML",
+ "Media & Entertainment",
+ "Mobile Apps",
+ "No Code/ Low Code",
+ "Online Marketplaces",
+ "SaaS",
+ "Social Media",
+ "Sportstech",
+ "Wearable tech",
+ "Web Development",
+ "Web3",
+ ],
+)
+
+roles_provider = DynamicProvider(
+ provider_name="role",
+ elements=[
+ "CEO",
+ "CTO",
+ "COO",
+ "CFO",
+ "Marketing Manager",
+ "Sales Manager",
+ "Business Development Manager",
+ "Product Manager",
+ "Human Resources Manager",
+ "Software Developers/Engineers",
+ "UX/UI Designers",
+ "Customer Support Manager",
+ "Data Analyst",
+ "Digital Content Specialist",
+ "Public Relations Specialist",
+ ],
+)
+
+growth_provider = DynamicProvider(
+ provider_name="growth",
+ elements=["Pre-Seed", "Seed", "Bootstrap", "Series A", "Verso l'infinito ed oltre"],
+)
+
+language_provider = DynamicProvider(
+ provider_name="language",
+ elements=["Italian", "English", "Spanish", "Frech", "German"],
+)
+
+availabilities_provider = DynamicProvider(
+ provider_name="availability",
+ elements=[
+ "Mon 10:00",
+ "Mon 12:00",
+ "Mon 16:00",
+ "Mon 18:00",
+ "Thu 10:00",
+ "Thu 12:00",
+ "Thu 16:00",
+ "Thu 18:00",
+ "Fri 10:00",
+ "Fri 12:00",
+ "Fri 16:00",
+ "Fri 18:00",
+ ],
+)
+
+goals_provider = DynamicProvider(
+ provider_name="goals",
+ elements=[
+ "Creare partnership e sinergie con altri Founders",
+ "Fare del buon sano networking",
+ "Trovare supporto su temi specifici",
+ "Confrontarmi con founders nel mio stesso stadio di crescita",
+ "Confrontarmi con founders piΓΉ avanti di me",
+ "Dare supporto a founders piΓΉ indietro di me",
+ "Trovare un Co-Founder",
+ "Altro",
+ ],
+)
+
+index_provider = DynamicProvider(
+ provider_name="index",
+ elements=index,
+)
+
+
+def fake_ind(min, max):
+ return [fake.industry() for _ in range(randint(min, max))]
+
+
+def fake_lang(min, max):
+ return [fake.language() for _ in range(randint(min, max))]
+
+
+def fake_goal(min, max):
+ return [fake.goals() for _ in range(randint(min, max))]
+
+
+def fake_avail(min, max):
+ return [fake.availability() for _ in range(randint(min, max))]
+
+
+fake = Faker("it_IT")
+Faker.seed(n)
+fake.add_provider(industries_provider)
+fake.add_provider(roles_provider)
+fake.add_provider(availabilities_provider)
+fake.add_provider(index_provider)
+fake.add_provider(growth_provider)
+fake.add_provider(language_provider)
+fake.add_provider(goals_provider)
+
+
+@pytest.fixture(scope="session")
+def get_large_datasets() -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+ # Creation syntetic data
+ name = [fake.unique.name() for _ in range(samples)]
+
+ subjects_data = {
+ "Index": index,
+ "Nome e Cognome": name,
+ "Industry": [
+ fake_ind(0, 3) for _ in range(samples)
+ ], # Select randomly from zero to max 3 industries among the ones currently in the airtable database
+ "Role": [fake.role() for _ in range(samples)],
+ "Growth Stage": [fake.growth() for _ in range(samples)],
+ "Languages": [
+ fake_lang(1, 3) for _ in range(samples)
+ ], # Select randomly from 1 to max 3 languages among ['Italian', 'English', 'Spanish', 'Frech', "German"]
+ "Goal": [fake_goal(1, 8) for _ in range(samples)],
+ } # Select randomly from 1 to max 8 goals among the ones currently in the airtable database
+
+ availabilities_data = {
+ "Index": index,
+ "Nome e Cognome": name,
+ "Availabilities": [fake_avail(1, 5) for _ in range(samples)],
+ } # Select randomly from 1 to max 5 slot availabilities among ["Mon 10:00", "Mon 12:00", "Mon 16:00", "Mon 18:00", "Thu 10:00", "Thu 12:00", "Thu 16:00", "Thu 18:00", "Fri 10:00", "Fri 12:00", "Fri 16:00", "Fri 18:00"]
+
+ exclusions_data = {
+ "SubjectA": [fake.unique.index() for _ in range(3)] + index,
+ "SubjectB": [fake.unique.index() for _ in range(3)] + index,
+ }
+
+ # DataFrame Creation
+ subjects = pd.DataFrame(subjects_data)
+ availabilities = pd.DataFrame(availabilities_data).explode("Availabilities")
+ exclusions = pd.DataFrame(exclusions_data)
+
+ for i in range(len(exclusions)):
+ if int(exclusions["SubjectA"][i][3:]) > int(exclusions["SubjectB"][i][3:]):
+ temp = exclusions["SubjectA"][i]
+ exclusions["SubjectA"][i] = exclusions["SubjectB"][i]
+ exclusions["SubjectB"][i] = temp
+ else:
+ pass
+
+ exclusions.drop_duplicates(inplace=True)
+ return subjects, availabilities, exclusions
+
+
+def test_large_dateset(
+ get_large_datasets: tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame],
+):
+ _, availabilities, exclusions = get_large_datasets
+
+ be = backend.SimpleBackend(
+ availabilities=availabilities,
+ availabilities_column="Availabilities",
+ availability_subject_column="Index",
+ )
+
+ all_possible_matches = be.get_all_matches()
+ assert isinstance(all_possible_matches, pd.DataFrame)
+ assert all_possible_matches["Index"].is_unique
+
+ selected = select_matches(all_possible_matches, backend=be)
+ assert isinstance(selected, pd.DataFrame)
+ assert selected["Index"].is_unique
+ assert len(selected) < len(all_possible_matches)
diff --git a/tests/unit_test/swapanything_test/test_select.py b/tests/unit_test/swapanything_test/test_select.py
new file mode 100644
index 0000000..c5401a7
--- /dev/null
+++ b/tests/unit_test/swapanything_test/test_select.py
@@ -0,0 +1,118 @@
+from copy import deepcopy
+
+import pandas as pd
+import pytest
+from swapanything import select
+from swapanything.backend import _base
+
+
+@pytest.fixture
+def subject_features() -> list[str]:
+ return ["a", "b", "c"]
+
+
+@pytest.fixture
+def availability_subject_column() -> str:
+ return "subj"
+
+
+@pytest.fixture
+def availabilities_column() -> str:
+ return "avail"
+
+
+@pytest.fixture
+def exclusions_subject_columns() -> list[str]:
+ return ["es1", "es2"]
+
+
+@pytest.fixture
+def dummy_backend(
+ subject_features: list[str],
+ availability_subject_column: str,
+ availabilities_column: str,
+ exclusions_subject_columns: list[str],
+) -> _base.BackendType:
+ class TestBackend(_base.BackendBase):
+ def __init__(self) -> None:
+ self.subject_features = subject_features
+ self.availability_subject_column = availability_subject_column
+ self.availabilities_column = availabilities_column
+ self.exclusions_subject_columns = exclusions_subject_columns
+
+ def get_subjects(self) -> None:
+ raise NotImplementedError()
+
+ def get_availabilities(self) -> pd.DataFrame:
+ raise NotImplementedError()
+
+ def get_exclusions(self) -> pd.DataFrame:
+ raise NotImplementedError()
+
+ return TestBackend()
+
+
+_M = [
+ [("sub1", "sub2"), ("A",)],
+ [("sub1", "sub3"), ("B",)],
+ [("sub2", "sub3"), ("C",)],
+ [("sub4", "sub5"), ("D",)],
+ [("sub1", "sub6"), ("E", "F")],
+ [("sub6", "sub7"), ("G",)],
+ [("sub7", "sub8"), ("H",)],
+]
+
+
+@pytest.fixture
+def possible_matchings() -> list[list[tuple]]:
+ return deepcopy(_M)
+
+
+@pytest.mark.parametrize(
+ "scores,expected_result_ixs",
+ [
+ (
+ None,
+ [4, 2, 3, 6],
+ # [("sub1", "sub6"), ("E", "F")],
+ # [("sub2", "sub3"), ("C",)],
+ # [("sub4", "sub5"), ("D",)],
+ # [("sub7", "sub8"), ("H",)],
+ ),
+ (
+ # to test mismatching index
+ pd.Series([1.0, 1.0, 1.0, 1.0, 1.0, 9001.0, 1.0]),
+ [0, 3, 5]
+ # [("sub1", "sub2"), ("A",)],
+ # [("sub4", "sub5"), ("D",)],
+ # [("sub6", "sub7"), ("G",)],
+ ),
+ ([1.0, 1.0, 1.0, 1.0, 1.0, 9001.0, 1.0], [0, 3, 5]),
+ ],
+)
+def test_select_matches(
+ scores,
+ expected_result_ixs,
+ possible_matchings: list[list[tuple]],
+ dummy_backend: _base.BackendType,
+) -> None:
+ matches = pd.DataFrame(
+ possible_matchings,
+ columns=[
+ dummy_backend.availability_subject_column,
+ dummy_backend.availabilities_column,
+ ],
+ index=[0] * 7,
+ )
+
+ results = select.select_matches(
+ matches,
+ backend=dummy_backend,
+ match_scores=scores,
+ )
+
+ expected_result = matches.iloc[expected_result_ixs].reset_index(drop=True)
+ assert isinstance(results, pd.DataFrame)
+ assert results.columns[0] == dummy_backend.availability_subject_column
+ assert results.columns[1] == dummy_backend.availabilities_column
+ assert results.equals(expected_result)