-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New script to export data for school's newsletter #146
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env python3 | ||
"""Export the data needed for the school to send its newsletter to the subscribers | ||
|
||
Requires: | ||
pip install PyMySQL | ||
platal.conf file in the parent folder (shared with ../export_platal_to_json.py) | ||
""" | ||
import configparser | ||
import csv | ||
import datetime | ||
import os.path | ||
import re | ||
|
||
import pymysql | ||
|
||
|
||
# Load platal.conf and connect | ||
conf = configparser.SafeConfigParser() | ||
conf.read(os.path.join(os.path.dirname(__file__), '..', 'platal.conf')) | ||
db = pymysql.connect( | ||
host=conf['Core']['dbhost'].strip('"'), | ||
user=conf['Core']['dbuser'].strip('"'), | ||
password=conf['Core']['dbpwd'].strip('"'), | ||
db=conf['Core']['dbdb'].strip('"'), | ||
charset='utf8mb4', | ||
) | ||
|
||
|
||
# Check that the school newsletter has the expected ID | ||
SCHOOL_NL_ID = 2 | ||
with db.cursor() as cursor: | ||
sql = """ | ||
SELECT n.name, g.diminutif | ||
FROM newsletters AS n | ||
INNER JOIN groups AS g ON (g.id = n.group_id) | ||
WHERE n.id = %s | ||
""" | ||
cursor.execute(sql, SCHOOL_NL_ID) | ||
rows = cursor.fetchall() | ||
if rows != (("Lettre de l'École polytechnique", "Ecole"), ): | ||
raise RuntimeError("The school newsletter does not have ID {}: {}".format(SCHOOL_NL_ID, rows)) | ||
|
||
|
||
# Create a CSV file | ||
csv_name = datetime.datetime.now().strftime('%Y-%m-%d_export_dixit.csv') | ||
with open(csv_name, 'w', newline='', encoding='utf-8') as fout: | ||
csv_stream = csv.writer(fout, delimiter=';', quotechar='"', escapechar='\\', quoting=csv.QUOTE_MINIMAL) | ||
csv_stream.writerow(("Prénom", "Nom", "Promotion", "Adresse")) | ||
|
||
# Export subscribers | ||
with db.cursor() as cursor: | ||
sql = """ | ||
SELECT a.firstname, a.lastname, pd.promo, CONCAT(s.email, '_ecole@', d.name) AS email | ||
FROM newsletter_ins AS ni | ||
LEFT JOIN accounts AS a ON (a.uid = ni.uid) | ||
LEFT JOIN account_profiles AS ap ON (ap.uid = a.uid AND FIND_IN_SET('owner', ap.perms)) | ||
LEFT JOIN profiles AS p ON (p.pid = ap.pid) | ||
LEFT JOIN profile_display AS pd ON (pd.pid = p.pid) | ||
LEFT JOIN email_source_account AS s ON (s.uid = a.uid AND s.type = 'forlife') | ||
LEFT JOIN email_virtual_domains AS m ON (s.domain = m.id) | ||
LEFT JOIN email_virtual_domains AS d ON (d.aliasing = m.id) | ||
WHERE ni.nlid = %s | ||
AND p.deathdate IS NULL | ||
AND pd.promo IS NOT NULL | ||
AND d.name = "alumni.polytechnique.org" | ||
GROUP BY a.uid | ||
ORDER BY pd.promo, pd.sort_name, a.uid | ||
""" | ||
cursor.execute(sql, SCHOOL_NL_ID) | ||
for row in cursor: | ||
# Sanity checks before adding a row, in order to make sure we do not transmit invalid data to the school | ||
firstname, lastname, promo, email = row | ||
|
||
name_charset = r"[^'.a-zA-ZÁÇÉÖàáâãäåçèéêëíîïñóôöúüÿ -]" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this set of valid chars? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because it is the charset of all the names currently in the database. If you know a reference set of characters that are acceptable in names, it can be changed to it. |
||
invalid_characters = set(re.findall(name_charset, firstname)) | ||
if invalid_characters: | ||
print("Warning: invalid characters in firstname %r: %r" % (firstname, invalid_characters)) | ||
|
||
invalid_characters = set(re.findall(name_charset, lastname)) | ||
if invalid_characters: | ||
print("Warning: invalid characters in lastname %r: %r" % (lastname, invalid_characters)) | ||
|
||
invalid_characters = set(re.findall(r'[^0-9BDEGMX ]', promo.replace('D (en cours)', ''))) | ||
if invalid_characters: | ||
print("Warning: invalid characters in promo %r: %r" % (promo, invalid_characters)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would those be? Shouldn't we crash? |
||
|
||
if not email.endswith('[email protected]'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should crash here: this case is impossible from the query construction. |
||
print("Warning: invalid email suffix: %r" % email) | ||
else: | ||
invalid_characters = set(re.findall(r'[^0-9a-z.-]', email[:-len('[email protected]')])) | ||
if invalid_characters: | ||
print("Warning: invalid characters in email %r: %r" % (email, invalid_characters)) | ||
else: | ||
if not re.match(r'^([a-z-]+\.){2,3}[a-z0-9]+_ecole@alumni\.polytechnique\.org$', email): | ||
print("Warning: unexpect format of email address %r" % email) | ||
|
||
csv_stream.writerow(row) | ||
|
||
print("Exported data in {}".format(csv_name)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Use
os.path.pardir
instead of '..'