Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add support for tulu.la event aggregator #64

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ You can get the latest events using:
$ conrad refresh
</pre>

You can also get the latest events (more than 800 of them) from an event aggregator https://tulu.la:

<pre>
$ conrad refresh --source=tulula
</pre>

### Continuous updates (upcoming)

The event list is maintained in `data/events.json`. This list is continuously updated using the available `scrapers`.
Expand Down
58 changes: 50 additions & 8 deletions conrad/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
import click
import requests
import sqlalchemy
from sqlalchemy.exc import IntegrityError
from colorama import Fore, Style
from cli_helpers import tabular_output

from . import __version__, CONRAD_HOME
from .db import engine, Session
from .models import Base, Event, Reminder
from .utils import initialize_database, validate
from .tulula import Tulula


def set_default_pager():
Expand All @@ -31,6 +33,30 @@ def get_events():
f.write(json.dumps(response.json()))


def get_tulula_events():
"""Returns events from event aggregator https://tulu.la"""
events = Tulula().get_new_events()
return [
{
"name": event.name,
"url": event.url,
"city": event.city,
"state": event.state,
"country": event.country,
"cfp_open": event.cfp_is_active,
"cfp_start_date": event.cfp_date_start,
"cfp_end_date": event.cfp_date_end,
"start_date": event.date_start,
"end_date": event.date_end,
# converts list of tags to a string to be compatible with events.json
"tags": json.dumps(event.tags),
"kind": event.kind,
"source": event.source,

}
for event in events]


def refresh_database(events):
session = Session()
for event in events:
Expand All @@ -45,16 +71,25 @@ def refresh_database(events):
state=event["state"],
country=event["country"],
cfp_open=event["cfp_open"],
cfp_start_date=dt.datetime.strptime(event["cfp_start_date"], "%Y-%m-%d"),
cfp_end_date=dt.datetime.strptime(event["cfp_end_date"], "%Y-%m-%d"),
start_date=dt.datetime.strptime(event["start_date"], "%Y-%m-%d"),
end_date=dt.datetime.strptime(event["end_date"], "%Y-%m-%d"),
cfp_start_date=dt.datetime.strptime(event["cfp_start_date"], "%Y-%m-%d")
if event["cfp_start_date"] else None,
cfp_end_date=dt.datetime.strptime(event["cfp_end_date"], "%Y-%m-%d")
if event["cfp_end_date"] else None,
start_date=dt.datetime.strptime(event["start_date"], "%Y-%m-%d")
if event["start_date"] else None,
end_date=dt.datetime.strptime(event["end_date"], "%Y-%m-%d")
if event["end_date"] else None,
source=event["source"],
tags=event["tags"],
kind=event["kind"],
)
session.add(e)
session.commit()
try:
session.commit()
except IntegrityError:
# ignore duplicate error
session.rollback()

session.close()


Expand All @@ -67,21 +102,28 @@ def cli(ctx, *args, **kwargs):


@cli.command("refresh", short_help="Refresh event database.")
@click.option('--source', default="conrad",
type=click.Choice(['conrad', 'tulula'], case_sensitive=False))
@click.confirmation_option(prompt="Would you like conrad to look for new events?")
@click.pass_context
def _refresh(ctx, *args, **kwargs):
if not os.path.exists(CONRAD_HOME):
os.makedirs(CONRAD_HOME)

get_events()
if not os.path.exists(os.path.join(CONRAD_HOME, "conrad.db")):
initialize_database()
else:
Event.__table__.drop(engine)
Base.metadata.tables["event"].create(bind=engine)

with open(os.path.join(CONRAD_HOME, "events.json"), "r") as f:
events = json.load(f)
events = []
if kwargs["source"] == "conrad":
get_events()
with open(os.path.join(CONRAD_HOME, "events.json"), "r") as f:
events = json.load(f)
elif kwargs["source"] == "tulula":
events = get_tulula_events()

refresh_database(events)

# TODO: print("10 new events found!")
Expand Down
184 changes: 184 additions & 0 deletions conrad/tulula.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import requests
import datetime
import base64
from collections import namedtuple
import logging
import time

# this agent is required to allow proper rate limiting.
TULULA_CONRAD_USER_AGENT = "conrad.api.v1"
TULULA_API_ENDPOINT = "https://tulu.la/api/public"
# rate limited by server, 5 requests per second
TULULA_TIME_BETWEEN_REQUESTS = 0.2
TULULA_MAX_PAGES = 100

TululaEvent = namedtuple(
"TululaEvent",
[
"name",
"date_start",
"date_end",
"cfp_date_start",
"cfp_date_end",
"cfp_is_active",
"country",
"state",
"city",
"tags",
"source",
"url",
"kind",
],
)

logging.basicConfig(format="%(message)s", level=logging.INFO)


class Tulula:
def __init__(
self, user_agent=TULULA_CONRAD_USER_AGENT, api_endpoint=TULULA_API_ENDPOINT
):
self.user_agent = user_agent
self.api_endpoint = api_endpoint

def get_new_events(self):
now = datetime.date.today()
has_next_page = True
after = None
i = 0
events = []
while has_next_page and i < TULULA_MAX_PAGES:
logging.info("Get %d page", i + 1)
data = self._get_page_data(now, after=after)
if data is None:
break
page_info = data["events"]["pageInfo"]
has_next_page, after = page_info["hasNextPage"], page_info["endCursor"]
events.extend(self._parse_edges(data["events"]["edges"]))
i += 1
time.sleep(TULULA_TIME_BETWEEN_REQUESTS)
logging.info("%d events has been fetched from tulu.la", len(events))
return events

def _get_page_data(self, date_from, after=None):
operation = {
"operationName": "QueryEventsList",
"query": GQL_EVENT_QUERY,
"variables": {
"first": 25,
"after": after,
"filter": {"dateFrom": date_from.strftime("%Y-%m-%d")},
"order": "ASC",
"sort": "DATE_START",
"searchView": "EVENTS_GRID",
},
}
resp = requests.post(
self.api_endpoint,
json=operation,
headers={
"User-Agent": self.user_agent,
"Accept-Language": "en-US,en;q=0.9",
},
)
if resp.status_code not in [200, 422]:
logging.error("Something went wrong. status_code %s", resp.status_code)
return None

response_data = resp.json()
if "errors" in response_data:
logging.error("%s", response_data)
for err in response_data["errors"]:
logging.error("Graphql error: %s", err["message"])

return response_data["data"]

@staticmethod
def _parse_edges(edges):
events = []
for event_node in edges:
node = event_node["node"]
if not node["dateIsApproved"]:
# an event is in maintenance mode
continue
events.append(
TululaEvent(
name=node["name"],
date_start=node["dateStart"],
date_end=node["dateEnd"],
cfp_date_start=node["cfpDateStart"],
cfp_date_end=node["cfpDateEnd"],
cfp_is_active=node["cfpIsActive"],
country=node["venue"] and node["venue"]["country"],
state=node["venue"] and node["venue"]["state"],
city=node["venue"] and node["venue"]["city"],
tags=node["tags"],
source="https://tulu.la",
url=make_url(node["slug"], node["id"]),
kind="conference",
)
)
return events


def base36encode(number):
"""
Convert number to base36
Based on https://github.com/tonyseek/python-base36/blob/master/base36.py
:param number:
:return: base36 encoded string
"""
if not isinstance(number, (int,)):
raise TypeError("number must be an integer")
if number < 0:
return "-" + base36encode(-number)

alphabet, base36 = ["0123456789abcdefghijklmnopqrstuvwxyz", ""]

while number:
number, i = divmod(number, len(alphabet))
base36 = alphabet[i] + base36

return base36 or alphabet[0]

def make_url(slug, event_id):
# base64("event:id") > base36(id)
decoded_id = base64.b64decode(event_id)
parts = decoded_id.split(b":")
real_id = base36encode(int(parts[1]))
return "https://tulu.la/events/{slug}-{id:0>6}".format(slug=slug, id=real_id)


GQL_EVENT_QUERY = """
query QueryEventsList($first: Int, $after: String, $filter: EventFilter,
$sort: EventSortField, $order: SortOrder,
$searchView: EventSearch) {
events: events(range: {first: $first, after: $after},
filter: $filter, sortField: $sort,
sortOrder: $order, searchView: $searchView) {
edges {
node {
id
name
slug
dateStart
dateEnd
dateIsApproved
cfpDateStart
cfpDateEnd
cfpIsActive
venue {
country
state
city
}
tags
}
}
pageInfo {
endCursor
hasNextPage
}
}
}
"""