Skip to content

Commit

Permalink
Merge pull request #546 from rcpch/mbarton/store-csv-files-in-db
Browse files Browse the repository at this point in the history
Store CSV uploads in the database
  • Loading branch information
mbarton authored Feb 4, 2025
2 parents f8d2d8a + e9ef37b commit 60bc45b
Show file tree
Hide file tree
Showing 13 changed files with 138 additions and 123 deletions.
4 changes: 0 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ WORKDIR /app/
# (Excludes any files/dirs matched by patterns in .dockerignore)
COPY . /app/

# Ensure the media directory exists - csv files are stored here
RUN mkdir -p /media/submissions/csv/


# Install Tailwind CSS and DaisyUI
RUN npm install

Expand Down
1 change: 0 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ services:
- postgis
volumes:
- .:/app/
- ./media/submissions/csv/:/app/media/submissions/csv/
command: s/start-dev
restart: always

Expand Down
29 changes: 18 additions & 11 deletions project/npda/general_functions/csv/csv_download.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
import json

from django.apps import apps
from django.http import HttpResponse
from django.shortcuts import get_object_or_404

def download_file(file_path, file_name):
with open(file_path, "rb") as f:
response = HttpResponse(f.read(), content_type="text/csv")
response["Content-Disposition"] = f'attachment; filename="{file_name}"'
return response
from ..write_errors_to_xlsx import write_errors_to_xlsx

def download_csv(request, submission_id):
"""
Download a CSV file.
"""
Submission = apps.get_model(app_label="npda", model_name="Submission")
submission = get_object_or_404(Submission, id=submission_id)
file_path = submission.csv_file.path
file_name = submission.csv_file.name.split("/")[-1]

return download_file(file_path, file_name)
response = HttpResponse(submission.csv_file, content_type="text/csv")
response["Content-Disposition"] = f'attachment; filename="{submission.csv_file_name}"'
return response

def download_xlsx(request, submission_id):
"""
Expand All @@ -26,7 +24,16 @@ def download_xlsx(request, submission_id):
"""
Submission = apps.get_model(app_label="npda", model_name="Submission")
submission = get_object_or_404(Submission, id=submission_id)
file_path = submission.csv_file.path.replace('.csv','.xlsx')
file_name = submission.csv_file.name.split("/")[-1].replace('.csv','.xlsx')

return download_file(file_path, file_name)
filename_without_extension = ".".join(submission.csv_file_name.split(".")[:-1])
xlsx_file_name = f"{filename_without_extension}_data_quality_report.xlsx"

errors = {}
if submission.errors:
errors = json.loads(submission.errors)

xlsx_file = write_errors_to_xlsx(errors or {}, submission.csv_file)

response = HttpResponse(xlsx_file, content_type="text/csv")
response["Content-Disposition"] = f'attachment; filename="{xlsx_file_name}"'
return response
23 changes: 5 additions & 18 deletions project/npda/general_functions/csv/csv_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import httpx

# RCPCH imports
from project.npda.general_functions.write_errors_to_xlsx import write_errors_to_xlsx
from project.constants import CSV_HEADINGS

# Logging setup
Expand All @@ -29,7 +28,7 @@
from project.npda.forms.external_visit_validators import validate_visit_async


async def csv_upload(user, dataframe, csv_file, pdu_pz_code, audit_year):
async def csv_upload(user, dataframe, csv_file_name, csv_file_bytes, pdu_pz_code, audit_year):
"""
Processes standardised NPDA csv file and persists results in NPDA tables
Returns the empty dict if successful, otherwise ValidationErrors indexed by the row they occurred at
Expand Down Expand Up @@ -191,18 +190,10 @@ def record_errors_from_form(errors_to_return, row_index, form):
submission_date=timezone.now(),
submission_by=user, # user is the user who is logged in. Passed in as a parameter
submission_active=True,
csv_file=csv_file_bytes,
csv_file_name=csv_file_name
)

if csv_file:
# save the csv file with a custom name
new_filename = (
f"{pdu.pz_code}_{timezone.now().strftime('%Y%m%d_%H%M%S')}.csv"
)

# save=False so it doesn't try to save the parent, which would cause an error in an async context
# we save immediately after this anyway
new_submission.csv_file.save(new_filename, csv_file, save=False)

await new_submission.asave()

except Exception as e:
Expand Down Expand Up @@ -283,7 +274,7 @@ def record_errors_from_form(errors_to_return, row_index, form):

await new_submission.patients.aadd(patient)
except Exception as error:
logger.exception(f"Error saving patient for {pdu_pz_code} from {csv_file}[{patient_row_index}]: {error}")
logger.exception(f"Error saving patient for {pdu_pz_code} from {csv_file_name}[{patient_row_index}]: {error}")

# We don't know what field caused the error so add to __all__
errors_to_return[patient_row_index]["__all__"].append(str(error))
Expand All @@ -298,12 +289,8 @@ def record_errors_from_form(errors_to_return, row_index, form):

await sync_to_async(lambda: visit_form.save())()
except Exception as error:
logger.exception(f"Error saving visit for {pdu_pz_code} from {csv_file}[{visit_row_index}]: {error}")
logger.exception(f"Error saving visit for {pdu_pz_code} from {csv_file_name}[{visit_row_index}]: {error}")
errors_to_return[visit_row_index]["__all__"].append(str(error))

# Only create xlsx file if the csv file was created.
if new_submission.csv_file:
_ = write_errors_to_xlsx(errors_to_return, new_submission)

# Store the errors to report back to the user in the Data Quality Report
if errors_to_return:
Expand Down
2 changes: 1 addition & 1 deletion project/npda/general_functions/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_submission_actions(pz_code, audit_year):
can_upload_csv = True

if submission:
if submission.csv_file and submission.csv_file.name:
if submission.csv_file:
can_upload_csv = True
can_complete_questionnaire = False
else:
Expand Down
22 changes: 11 additions & 11 deletions project/npda/general_functions/write_errors_to_xlsx.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# import types
from collections import defaultdict
from typing import Any, Dict, List, Union
import io

from openpyxl.worksheet.worksheet import Worksheet

# import models
from ..models.submission import Submission

# import functions
from project.npda.general_functions.csv import csv_parse
from project.npda.general_functions.csv.csv_parse import csv_parse

# import third-party libaries
import pandas as pd
Expand All @@ -22,19 +23,20 @@


def write_errors_to_xlsx(
errors: defaultdict[Any, defaultdict[Any, list]], new_submission: Submission
) -> bool:
errors: dict[str, dict[str, list[str]]], original_csv_file_bytes: bytes
) -> bytes:
"""
Write errors to an Excel file. This .xlsx file can later be downloaded by the user to highlight invalid cells when attempting to upload CSV data.
Write errors to an Excel file. Highlight invalid cells in the source CSV.
Args:
errors (defaultdict[Any, defaultdict[Any, list]]): A dictionary containing errors grouped by row index and field.
errors A nested dictionary containing errors grouped by row index, then field.
"""
xlsx_file: str = new_submission.csv_file.path.replace(".csv", ".xlsx")

xlsx_file = io.BytesIO()

# Get original data
df = csv_parse(new_submission.csv_file).df
df = csv_parse(io.BytesIO(initial_bytes=original_csv_file_bytes)).df
# Write an xlsx of the original data.
df.to_excel(xlsx_file, sheet_name="Uploaded data (raw)", index=False)

Expand Down Expand Up @@ -89,9 +91,7 @@ def write_errors_to_xlsx(
# Save the styled sheet.
wb.save(xlsx_file)

# Return True/False based on successful .xlsx creation.
print("Running write_errors_to_xlsx")
return True
return xlsx_file.getvalue()


def find_column_index_by_name(column_name: str, ws: Worksheet) -> int | None:
Expand All @@ -106,7 +106,7 @@ def find_column_index_by_name(column_name: str, ws: Worksheet) -> int | None:


def flatten_errors(
errors: defaultdict[int, defaultdict[Any, list]],
errors: dict[str, dict[str, list[str]]],
uploaded_nhs_numbers: "pd.Series[str]",
) -> "List[Dict[str, Union[int, str]]]":
"""
Expand Down
19 changes: 2 additions & 17 deletions project/npda/management/commands/seed_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
python manage.py seed_submission \
--pts=50 \
--visits="CDCD DHPC ACDC CDCD" \
--hb_target=T
--hb_target=T \
--user_pk=1 \
--submission_date="2024-10-18" \
Expand Down Expand Up @@ -56,11 +56,6 @@
--submission_date (str, optional):
The submission date in YYYY-MM-DD format. Defaults to today. This
date is used to set the audit period's start and end dates.
Notes:
- Submission requires an associated `csv_file`. A dummy value is set to
project/npda/dummy_sheets/dummy_sheet.csv.
"""

from datetime import datetime
Expand Down Expand Up @@ -227,22 +222,12 @@ def handle(self, *args, **options):
visit_kwargs={"is_valid": True},
)



# Need a mock csv
with open("project/npda/dummy_sheets/dummy_sheet.csv", "rb") as f:
mock_csv = SimpleUploadedFile(
name="dummy_sheet.csv",
content=f.read(),
content_type="text/csv",
)
new_submission = Submission.objects.create(
paediatric_diabetes_unit=primary_pdu_for_user,
audit_year=audit_start_date.year,
submission_date=submission_date,
submission_by=submission_by,
submission_active=True,
csv_file=mock_csv,
submission_active=True
)

# Add patients to submission
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 5.1.5 on 2025-01-31 17:17

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("npda", "0020_patient_location_bng_patient_location_wgs_and_more"),
]

operations = [
migrations.AddField(
model_name="submission",
name="csv_file_name",
field=models.CharField(
help_text="Name of the uploaded CSV file",
null=True,
verbose_name="CSV file name",
),
),
migrations.AlterField(
model_name="submission",
name="csv_file",
field=models.BinaryField(
help_text="CSV file containing the audit data for this submission",
null=True,
),
),
]
18 changes: 10 additions & 8 deletions project/npda/models/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,17 @@ class Submission(models.Model):
to="npda.NPDAUser",
)

csv_file = models.FileField(
upload_to=f"submissions/csv/",
csv_file = models.BinaryField(
help_text="CSV file containing the audit data for this submission",
null=True, # submissions that are not active will have their csv file deleted
)

csv_file_name = models.CharField(
"CSV file name",
help_text="Name of the uploaded CSV file",
null=True,
)

errors = models.JSONField(
"Errors",
help_text="Errors that have been found in the uploaded CSV file",
Expand Down Expand Up @@ -72,11 +78,7 @@ def delete(self, *args, **kwargs):

def save(self, *args, **kwargs):
if self.submission_active == False:
self.csv_file.delete(
save=True
) # delete the csv file if the submission is not active
self.csv_file = (
None # set the csv file to None if the submission is not active
)
self.csv_file = None
# keep filename for our records

super().save(*args, **kwargs)
Loading

0 comments on commit 60bc45b

Please sign in to comment.