Merge pull request #546 from rcpch/mbarton/store-csv-files-in-db

Store CSV uploads in the database
rcpch · Feb 4, 2025 · 60bc45b · 60bc45b
2 parents f8d2d8a + e9ef37b
commit 60bc45b
Show file tree

Hide file tree

Showing 13 changed files with 138 additions and 123 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -34,10 +34,6 @@ WORKDIR /app/
 # (Excludes any files/dirs matched by patterns in .dockerignore)
 COPY . /app/
 
-# Ensure the media directory exists - csv files are stored here
-RUN mkdir -p /media/submissions/csv/
-
-
 # Install Tailwind CSS and DaisyUI
 RUN npm install
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -28,7 +28,6 @@ services:
       - postgis
     volumes:
       - .:/app/
-      - ./media/submissions/csv/:/app/media/submissions/csv/
     command: s/start-dev
     restart: always
 

diff --git a/project/npda/general_functions/csv/csv_download.py b/project/npda/general_functions/csv/csv_download.py
@@ -1,23 +1,21 @@
+import json
+
 from django.apps import apps
 from django.http import HttpResponse
 from django.shortcuts import get_object_or_404
 
-def download_file(file_path, file_name):
-    with open(file_path, "rb") as f:
-        response = HttpResponse(f.read(), content_type="text/csv")
-        response["Content-Disposition"] = f'attachment; filename="{file_name}"'
-        return response
+from ..write_errors_to_xlsx import write_errors_to_xlsx
 
 def download_csv(request, submission_id):
     """
     Download a CSV file.
     """
     Submission = apps.get_model(app_label="npda", model_name="Submission")
     submission = get_object_or_404(Submission, id=submission_id)
-    file_path = submission.csv_file.path
-    file_name = submission.csv_file.name.split("/")[-1]
 
-    return download_file(file_path, file_name)
+    response = HttpResponse(submission.csv_file, content_type="text/csv")
+    response["Content-Disposition"] = f'attachment; filename="{submission.csv_file_name}"'
+    return response
 
 def download_xlsx(request, submission_id):
     """
@@ -26,7 +24,16 @@ def download_xlsx(request, submission_id):
     """
     Submission = apps.get_model(app_label="npda", model_name="Submission")
     submission = get_object_or_404(Submission, id=submission_id)
-    file_path = submission.csv_file.path.replace('.csv','.xlsx')
-    file_name = submission.csv_file.name.split("/")[-1].replace('.csv','.xlsx')
 
-    return download_file(file_path, file_name)
+    filename_without_extension = ".".join(submission.csv_file_name.split(".")[:-1])
+    xlsx_file_name = f"{filename_without_extension}_data_quality_report.xlsx"
+
+    errors = {}
+    if submission.errors:
+        errors = json.loads(submission.errors)
+
+    xlsx_file = write_errors_to_xlsx(errors or {}, submission.csv_file)
+
+    response = HttpResponse(xlsx_file, content_type="text/csv")
+    response["Content-Disposition"] = f'attachment; filename="{xlsx_file_name}"'
+    return response
diff --git a/project/npda/general_functions/csv/csv_upload.py b/project/npda/general_functions/csv/csv_upload.py
@@ -17,7 +17,6 @@
 import httpx
 
 # RCPCH imports
-from project.npda.general_functions.write_errors_to_xlsx import write_errors_to_xlsx
 from project.constants import CSV_HEADINGS
 
 # Logging setup
@@ -29,7 +28,7 @@
 from project.npda.forms.external_visit_validators import validate_visit_async
 
 
-async def csv_upload(user, dataframe, csv_file, pdu_pz_code, audit_year):
+async def csv_upload(user, dataframe, csv_file_name, csv_file_bytes, pdu_pz_code, audit_year):
     """
     Processes standardised NPDA csv file and persists results in NPDA tables
     Returns the empty dict if successful, otherwise ValidationErrors indexed by the row they occurred at
@@ -191,18 +190,10 @@ def record_errors_from_form(errors_to_return, row_index, form):
             submission_date=timezone.now(),
             submission_by=user,  # user is the user who is logged in. Passed in as a parameter
             submission_active=True,
+            csv_file=csv_file_bytes,
+            csv_file_name=csv_file_name
         )
 
-        if csv_file:
-            # save the csv file with a custom name
-            new_filename = (
-                f"{pdu.pz_code}_{timezone.now().strftime('%Y%m%d_%H%M%S')}.csv"
-            )
-
-            # save=False so it doesn't try to save the parent, which would cause an error in an async context
-            # we save immediately after this anyway
-            new_submission.csv_file.save(new_filename, csv_file, save=False)
-
         await new_submission.asave()
 
     except Exception as e:
@@ -283,7 +274,7 @@ def record_errors_from_form(errors_to_return, row_index, form):
 
                     await new_submission.patients.aadd(patient)
             except Exception as error:
-                logger.exception(f"Error saving patient for {pdu_pz_code} from {csv_file}[{patient_row_index}]: {error}")
+                logger.exception(f"Error saving patient for {pdu_pz_code} from {csv_file_name}[{patient_row_index}]: {error}")
 
                 # We don't know what field caused the error so add to __all__
                 errors_to_return[patient_row_index]["__all__"].append(str(error))
@@ -298,12 +289,8 @@ def record_errors_from_form(errors_to_return, row_index, form):
 
                         await sync_to_async(lambda: visit_form.save())()
                     except Exception as error:
-                        logger.exception(f"Error saving visit for {pdu_pz_code} from {csv_file}[{visit_row_index}]: {error}")
+                        logger.exception(f"Error saving visit for {pdu_pz_code} from {csv_file_name}[{visit_row_index}]: {error}")
                         errors_to_return[visit_row_index]["__all__"].append(str(error))
-
-    # Only create xlsx file if the csv file was created.
-    if new_submission.csv_file:
-        _ = write_errors_to_xlsx(errors_to_return, new_submission)
 
     # Store the errors to report back to the user in the Data Quality Report
     if errors_to_return:

diff --git a/project/npda/general_functions/session.py b/project/npda/general_functions/session.py
@@ -28,7 +28,7 @@ def get_submission_actions(pz_code, audit_year):
     can_upload_csv = True
 
     if submission:
-        if submission.csv_file and submission.csv_file.name:
+        if submission.csv_file:
             can_upload_csv = True
             can_complete_questionnaire = False
         else:

diff --git a/project/npda/general_functions/write_errors_to_xlsx.py b/project/npda/general_functions/write_errors_to_xlsx.py
@@ -1,14 +1,15 @@
 # import types
 from collections import defaultdict
 from typing import Any, Dict, List, Union
+import io
 
 from openpyxl.worksheet.worksheet import Worksheet
 
 # import models
 from ..models.submission import Submission
 
 # import functions
-from project.npda.general_functions.csv import csv_parse
+from project.npda.general_functions.csv.csv_parse import csv_parse
 
 # import third-party libaries
 import pandas as pd
@@ -22,19 +23,20 @@
 
 
 def write_errors_to_xlsx(
-    errors: defaultdict[Any, defaultdict[Any, list]], new_submission: Submission
-) -> bool:
+    errors: dict[str, dict[str, list[str]]], original_csv_file_bytes: bytes
+) -> bytes:
     """
-    Write errors to an Excel file. This .xlsx file can later be downloaded by the user to highlight invalid cells when attempting to upload CSV data.
+    Write errors to an Excel file. Highlight invalid cells in the source CSV.
 
     Args:
-      errors (defaultdict[Any, defaultdict[Any, list]]): A dictionary containing errors grouped by row index and field.
+      errors A nested dictionary containing errors grouped by row index, then field.
 
     """
-    xlsx_file: str = new_submission.csv_file.path.replace(".csv", ".xlsx")
+
+    xlsx_file = io.BytesIO()
 
     # Get original data
-    df = csv_parse(new_submission.csv_file).df
+    df = csv_parse(io.BytesIO(initial_bytes=original_csv_file_bytes)).df
     # Write an xlsx of the original data.
     df.to_excel(xlsx_file, sheet_name="Uploaded data (raw)", index=False)
 
@@ -89,9 +91,7 @@ def write_errors_to_xlsx(
     # Save the styled sheet.
     wb.save(xlsx_file)
 
-    # Return True/False based on successful .xlsx creation.
-    print("Running write_errors_to_xlsx")
-    return True
+    return xlsx_file.getvalue()
 
 
 def find_column_index_by_name(column_name: str, ws: Worksheet) -> int | None:
@@ -106,7 +106,7 @@ def find_column_index_by_name(column_name: str, ws: Worksheet) -> int | None:
 
 
 def flatten_errors(
-    errors: defaultdict[int, defaultdict[Any, list]],
+    errors: dict[str, dict[str, list[str]]],
     uploaded_nhs_numbers: "pd.Series[str]",
 ) -> "List[Dict[str, Union[int, str]]]":
     """

diff --git a/project/npda/management/commands/seed_submission.py b/project/npda/management/commands/seed_submission.py
@@ -6,7 +6,7 @@
     python manage.py seed_submission \
         --pts=50 \
         --visits="CDCD DHPC ACDC CDCD" \
-        --hb_target=T
+        --hb_target=T \
         --user_pk=1 \
         --submission_date="2024-10-18" \
 
@@ -56,11 +56,6 @@
     --submission_date (str, optional):
         The submission date in YYYY-MM-DD format. Defaults to today. This
         date is used to set the audit period's start and end dates.
-
-
-Notes:
-    - Submission requires an associated `csv_file`. A dummy value is set to
-      project/npda/dummy_sheets/dummy_sheet.csv.
 """
 
 from datetime import datetime
@@ -227,22 +222,12 @@ def handle(self, *args, **options):
             visit_kwargs={"is_valid": True},
         )
 
-
-
-        # Need a mock csv
-        with open("project/npda/dummy_sheets/dummy_sheet.csv", "rb") as f:
-            mock_csv = SimpleUploadedFile(
-                name="dummy_sheet.csv",
-                content=f.read(),
-                content_type="text/csv",
-            )
         new_submission = Submission.objects.create(
             paediatric_diabetes_unit=primary_pdu_for_user,
             audit_year=audit_start_date.year,
             submission_date=submission_date,
             submission_by=submission_by,
-            submission_active=True,
-            csv_file=mock_csv,
+            submission_active=True
         )
 
         # Add patients to submission

diff --git a/project/npda/migrations/0021_submission_csv_file_name_alter_submission_csv_file.py b/project/npda/migrations/0021_submission_csv_file_name_alter_submission_csv_file.py
@@ -0,0 +1,30 @@
+# Generated by Django 5.1.5 on 2025-01-31 17:17
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("npda", "0020_patient_location_bng_patient_location_wgs_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="submission",
+            name="csv_file_name",
+            field=models.CharField(
+                help_text="Name of the uploaded CSV file",
+                null=True,
+                verbose_name="CSV file name",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="submission",
+            name="csv_file",
+            field=models.BinaryField(
+                help_text="CSV file containing the audit data for this submission",
+                null=True,
+            ),
+        ),
+    ]
diff --git a/project/npda/models/submission.py b/project/npda/models/submission.py
@@ -35,11 +35,17 @@ class Submission(models.Model):
         to="npda.NPDAUser",
     )
 
-    csv_file = models.FileField(
-        upload_to=f"submissions/csv/",
+    csv_file = models.BinaryField(
         help_text="CSV file containing the audit data for this submission",
         null=True,  # submissions that are not active will have their csv file deleted
     )
+
+    csv_file_name = models.CharField(
+        "CSV file name",
+        help_text="Name of the uploaded CSV file",
+        null=True,
+    )
+
     errors = models.JSONField(
         "Errors",
         help_text="Errors that have been found in the uploaded CSV file",
@@ -72,11 +78,7 @@ def delete(self, *args, **kwargs):
 
     def save(self, *args, **kwargs):
         if self.submission_active == False:
-            self.csv_file.delete(
-                save=True
-            )  # delete the csv file if the submission is not active
-            self.csv_file = (
-                None  # set the csv file to None if the submission is not active
-            )
+            self.csv_file = None
+            # keep filename for our records
 
         super().save(*args, **kwargs)