Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New script to export data for school's newsletter #146

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions scripts/school_and_ax/export_school_newsletter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""Export the data needed for the school to send its newsletter to the subscribers

Requires:
pip install PyMySQL
platal.conf file in the parent folder (shared with ../export_platal_to_json.py)
"""
import configparser
import csv
import datetime
import os.path
import re

import pymysql


# Load platal.conf and connect
conf = configparser.SafeConfigParser()
conf.read(os.path.join(os.path.dirname(__file__), '..', 'platal.conf'))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Use os.path.pardir instead of '..'

db = pymysql.connect(
host=conf['Core']['dbhost'].strip('"'),
user=conf['Core']['dbuser'].strip('"'),
password=conf['Core']['dbpwd'].strip('"'),
db=conf['Core']['dbdb'].strip('"'),
charset='utf8mb4',
)


# Check that the school newsletter has the expected ID
SCHOOL_NL_ID = 2
with db.cursor() as cursor:
sql = """
SELECT n.name, g.diminutif
FROM newsletters AS n
INNER JOIN groups AS g ON (g.id = n.group_id)
WHERE n.id = %s
"""
cursor.execute(sql, SCHOOL_NL_ID)
rows = cursor.fetchall()
if rows != (("Lettre de l'École polytechnique", "Ecole"), ):
raise RuntimeError("The school newsletter does not have ID {}: {}".format(SCHOOL_NL_ID, rows))


# Create a CSV file
csv_name = datetime.datetime.now().strftime('%Y-%m-%d_export_dixit.csv')
with open(csv_name, 'w', newline='', encoding='utf-8') as fout:
csv_stream = csv.writer(fout, delimiter=';', quotechar='"', escapechar='\\', quoting=csv.QUOTE_MINIMAL)
csv_stream.writerow(("Prénom", "Nom", "Promotion", "Adresse"))

# Export subscribers
with db.cursor() as cursor:
sql = """
SELECT a.firstname, a.lastname, pd.promo, CONCAT(s.email, '_ecole@', d.name) AS email
FROM newsletter_ins AS ni
LEFT JOIN accounts AS a ON (a.uid = ni.uid)
LEFT JOIN account_profiles AS ap ON (ap.uid = a.uid AND FIND_IN_SET('owner', ap.perms))
LEFT JOIN profiles AS p ON (p.pid = ap.pid)
LEFT JOIN profile_display AS pd ON (pd.pid = p.pid)
LEFT JOIN email_source_account AS s ON (s.uid = a.uid AND s.type = 'forlife')
LEFT JOIN email_virtual_domains AS m ON (s.domain = m.id)
LEFT JOIN email_virtual_domains AS d ON (d.aliasing = m.id)
WHERE ni.nlid = %s
AND p.deathdate IS NULL
AND pd.promo IS NOT NULL
AND d.name = "alumni.polytechnique.org"
GROUP BY a.uid
ORDER BY pd.promo, pd.sort_name, a.uid
"""
cursor.execute(sql, SCHOOL_NL_ID)
for row in cursor:
# Sanity checks before adding a row, in order to make sure we do not transmit invalid data to the school
firstname, lastname, promo, email = row

name_charset = r"[^'.a-zA-ZÁÇÉÖàáâãäåçèéêëíîïñóôöúüÿ -]"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this set of valid chars?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it is the charset of all the names currently in the database. If you know a reference set of characters that are acceptable in names, it can be changed to it.

invalid_characters = set(re.findall(name_charset, firstname))
if invalid_characters:
print("Warning: invalid characters in firstname %r: %r" % (firstname, invalid_characters))

invalid_characters = set(re.findall(name_charset, lastname))
if invalid_characters:
print("Warning: invalid characters in lastname %r: %r" % (lastname, invalid_characters))

invalid_characters = set(re.findall(r'[^0-9BDEGMX ]', promo.replace('D (en cours)', '')))
if invalid_characters:
print("Warning: invalid characters in promo %r: %r" % (promo, invalid_characters))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would those be? Shouldn't we crash?


if not email.endswith('[email protected]'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should crash here: this case is impossible from the query construction.

print("Warning: invalid email suffix: %r" % email)
else:
invalid_characters = set(re.findall(r'[^0-9a-z.-]', email[:-len('[email protected]')]))
if invalid_characters:
print("Warning: invalid characters in email %r: %r" % (email, invalid_characters))
else:
if not re.match(r'^([a-z-]+\.){2,3}[a-z0-9]+_ecole@alumni\.polytechnique\.org$', email):
print("Warning: unexpect format of email address %r" % email)

csv_stream.writerow(row)

print("Exported data in {}".format(csv_name))