diff --git a/Dockerfile b/Dockerfile index 6914ba53..8b728964 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,10 +34,6 @@ WORKDIR /app/ # (Excludes any files/dirs matched by patterns in .dockerignore) COPY . /app/ -# Ensure the media directory exists - csv files are stored here -RUN mkdir -p /media/submissions/csv/ - - # Install Tailwind CSS and DaisyUI RUN npm install diff --git a/docker-compose.yml b/docker-compose.yml index b4e62f1b..d12949e2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,7 +28,6 @@ services: - postgis volumes: - .:/app/ - - ./media/submissions/csv/:/app/media/submissions/csv/ command: s/start-dev restart: always diff --git a/project/npda/general_functions/csv/csv_download.py b/project/npda/general_functions/csv/csv_download.py index 806432bd..f1bd56df 100644 --- a/project/npda/general_functions/csv/csv_download.py +++ b/project/npda/general_functions/csv/csv_download.py @@ -1,12 +1,10 @@ +import json + from django.apps import apps from django.http import HttpResponse from django.shortcuts import get_object_or_404 -def download_file(file_path, file_name): - with open(file_path, "rb") as f: - response = HttpResponse(f.read(), content_type="text/csv") - response["Content-Disposition"] = f'attachment; filename="{file_name}"' - return response +from ..write_errors_to_xlsx import write_errors_to_xlsx def download_csv(request, submission_id): """ @@ -14,10 +12,10 @@ def download_csv(request, submission_id): """ Submission = apps.get_model(app_label="npda", model_name="Submission") submission = get_object_or_404(Submission, id=submission_id) - file_path = submission.csv_file.path - file_name = submission.csv_file.name.split("/")[-1] - return download_file(file_path, file_name) + response = HttpResponse(submission.csv_file, content_type="text/csv") + response["Content-Disposition"] = f'attachment; filename="{submission.csv_file_name}"' + return response def download_xlsx(request, submission_id): """ @@ -26,7 +24,16 @@ def download_xlsx(request, submission_id): """ Submission = apps.get_model(app_label="npda", model_name="Submission") submission = get_object_or_404(Submission, id=submission_id) - file_path = submission.csv_file.path.replace('.csv','.xlsx') - file_name = submission.csv_file.name.split("/")[-1].replace('.csv','.xlsx') - return download_file(file_path, file_name) + filename_without_extension = ".".join(submission.csv_file_name.split(".")[:-1]) + xlsx_file_name = f"{filename_without_extension}_data_quality_report.xlsx" + + errors = {} + if submission.errors: + errors = json.loads(submission.errors) + + xlsx_file = write_errors_to_xlsx(errors or {}, submission.csv_file) + + response = HttpResponse(xlsx_file, content_type="text/csv") + response["Content-Disposition"] = f'attachment; filename="{xlsx_file_name}"' + return response diff --git a/project/npda/general_functions/csv/csv_upload.py b/project/npda/general_functions/csv/csv_upload.py index 7b01e358..5cec1057 100644 --- a/project/npda/general_functions/csv/csv_upload.py +++ b/project/npda/general_functions/csv/csv_upload.py @@ -17,7 +17,6 @@ import httpx # RCPCH imports -from project.npda.general_functions.write_errors_to_xlsx import write_errors_to_xlsx from project.constants import CSV_HEADINGS # Logging setup @@ -29,7 +28,7 @@ from project.npda.forms.external_visit_validators import validate_visit_async -async def csv_upload(user, dataframe, csv_file, pdu_pz_code, audit_year): +async def csv_upload(user, dataframe, csv_file_name, csv_file_bytes, pdu_pz_code, audit_year): """ Processes standardised NPDA csv file and persists results in NPDA tables Returns the empty dict if successful, otherwise ValidationErrors indexed by the row they occurred at @@ -191,18 +190,10 @@ def record_errors_from_form(errors_to_return, row_index, form): submission_date=timezone.now(), submission_by=user, # user is the user who is logged in. Passed in as a parameter submission_active=True, + csv_file=csv_file_bytes, + csv_file_name=csv_file_name ) - if csv_file: - # save the csv file with a custom name - new_filename = ( - f"{pdu.pz_code}_{timezone.now().strftime('%Y%m%d_%H%M%S')}.csv" - ) - - # save=False so it doesn't try to save the parent, which would cause an error in an async context - # we save immediately after this anyway - new_submission.csv_file.save(new_filename, csv_file, save=False) - await new_submission.asave() except Exception as e: @@ -283,7 +274,7 @@ def record_errors_from_form(errors_to_return, row_index, form): await new_submission.patients.aadd(patient) except Exception as error: - logger.exception(f"Error saving patient for {pdu_pz_code} from {csv_file}[{patient_row_index}]: {error}") + logger.exception(f"Error saving patient for {pdu_pz_code} from {csv_file_name}[{patient_row_index}]: {error}") # We don't know what field caused the error so add to __all__ errors_to_return[patient_row_index]["__all__"].append(str(error)) @@ -298,12 +289,8 @@ def record_errors_from_form(errors_to_return, row_index, form): await sync_to_async(lambda: visit_form.save())() except Exception as error: - logger.exception(f"Error saving visit for {pdu_pz_code} from {csv_file}[{visit_row_index}]: {error}") + logger.exception(f"Error saving visit for {pdu_pz_code} from {csv_file_name}[{visit_row_index}]: {error}") errors_to_return[visit_row_index]["__all__"].append(str(error)) - - # Only create xlsx file if the csv file was created. - if new_submission.csv_file: - _ = write_errors_to_xlsx(errors_to_return, new_submission) # Store the errors to report back to the user in the Data Quality Report if errors_to_return: diff --git a/project/npda/general_functions/session.py b/project/npda/general_functions/session.py index fdbcca8f..1232cd29 100644 --- a/project/npda/general_functions/session.py +++ b/project/npda/general_functions/session.py @@ -28,7 +28,7 @@ def get_submission_actions(pz_code, audit_year): can_upload_csv = True if submission: - if submission.csv_file and submission.csv_file.name: + if submission.csv_file: can_upload_csv = True can_complete_questionnaire = False else: diff --git a/project/npda/general_functions/write_errors_to_xlsx.py b/project/npda/general_functions/write_errors_to_xlsx.py index dcd15e12..05ff2641 100644 --- a/project/npda/general_functions/write_errors_to_xlsx.py +++ b/project/npda/general_functions/write_errors_to_xlsx.py @@ -1,6 +1,7 @@ # import types from collections import defaultdict from typing import Any, Dict, List, Union +import io from openpyxl.worksheet.worksheet import Worksheet @@ -8,7 +9,7 @@ from ..models.submission import Submission # import functions -from project.npda.general_functions.csv import csv_parse +from project.npda.general_functions.csv.csv_parse import csv_parse # import third-party libaries import pandas as pd @@ -22,19 +23,20 @@ def write_errors_to_xlsx( - errors: defaultdict[Any, defaultdict[Any, list]], new_submission: Submission -) -> bool: + errors: dict[str, dict[str, list[str]]], original_csv_file_bytes: bytes +) -> bytes: """ - Write errors to an Excel file. This .xlsx file can later be downloaded by the user to highlight invalid cells when attempting to upload CSV data. + Write errors to an Excel file. Highlight invalid cells in the source CSV. Args: - errors (defaultdict[Any, defaultdict[Any, list]]): A dictionary containing errors grouped by row index and field. + errors A nested dictionary containing errors grouped by row index, then field. """ - xlsx_file: str = new_submission.csv_file.path.replace(".csv", ".xlsx") + + xlsx_file = io.BytesIO() # Get original data - df = csv_parse(new_submission.csv_file).df + df = csv_parse(io.BytesIO(initial_bytes=original_csv_file_bytes)).df # Write an xlsx of the original data. df.to_excel(xlsx_file, sheet_name="Uploaded data (raw)", index=False) @@ -89,9 +91,7 @@ def write_errors_to_xlsx( # Save the styled sheet. wb.save(xlsx_file) - # Return True/False based on successful .xlsx creation. - print("Running write_errors_to_xlsx") - return True + return xlsx_file.getvalue() def find_column_index_by_name(column_name: str, ws: Worksheet) -> int | None: @@ -106,7 +106,7 @@ def find_column_index_by_name(column_name: str, ws: Worksheet) -> int | None: def flatten_errors( - errors: defaultdict[int, defaultdict[Any, list]], + errors: dict[str, dict[str, list[str]]], uploaded_nhs_numbers: "pd.Series[str]", ) -> "List[Dict[str, Union[int, str]]]": """ diff --git a/project/npda/management/commands/seed_submission.py b/project/npda/management/commands/seed_submission.py index 40d8f52a..de04b456 100644 --- a/project/npda/management/commands/seed_submission.py +++ b/project/npda/management/commands/seed_submission.py @@ -6,7 +6,7 @@ python manage.py seed_submission \ --pts=50 \ --visits="CDCD DHPC ACDC CDCD" \ - --hb_target=T + --hb_target=T \ --user_pk=1 \ --submission_date="2024-10-18" \ @@ -56,11 +56,6 @@ --submission_date (str, optional): The submission date in YYYY-MM-DD format. Defaults to today. This date is used to set the audit period's start and end dates. - - -Notes: - - Submission requires an associated `csv_file`. A dummy value is set to - project/npda/dummy_sheets/dummy_sheet.csv. """ from datetime import datetime @@ -227,22 +222,12 @@ def handle(self, *args, **options): visit_kwargs={"is_valid": True}, ) - - - # Need a mock csv - with open("project/npda/dummy_sheets/dummy_sheet.csv", "rb") as f: - mock_csv = SimpleUploadedFile( - name="dummy_sheet.csv", - content=f.read(), - content_type="text/csv", - ) new_submission = Submission.objects.create( paediatric_diabetes_unit=primary_pdu_for_user, audit_year=audit_start_date.year, submission_date=submission_date, submission_by=submission_by, - submission_active=True, - csv_file=mock_csv, + submission_active=True ) # Add patients to submission diff --git a/project/npda/migrations/0021_submission_csv_file_name_alter_submission_csv_file.py b/project/npda/migrations/0021_submission_csv_file_name_alter_submission_csv_file.py new file mode 100644 index 00000000..bb27a7e0 --- /dev/null +++ b/project/npda/migrations/0021_submission_csv_file_name_alter_submission_csv_file.py @@ -0,0 +1,30 @@ +# Generated by Django 5.1.5 on 2025-01-31 17:17 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("npda", "0020_patient_location_bng_patient_location_wgs_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="submission", + name="csv_file_name", + field=models.CharField( + help_text="Name of the uploaded CSV file", + null=True, + verbose_name="CSV file name", + ), + ), + migrations.AlterField( + model_name="submission", + name="csv_file", + field=models.BinaryField( + help_text="CSV file containing the audit data for this submission", + null=True, + ), + ), + ] diff --git a/project/npda/models/submission.py b/project/npda/models/submission.py index 544695f4..272ede4e 100644 --- a/project/npda/models/submission.py +++ b/project/npda/models/submission.py @@ -35,11 +35,17 @@ class Submission(models.Model): to="npda.NPDAUser", ) - csv_file = models.FileField( - upload_to=f"submissions/csv/", + csv_file = models.BinaryField( help_text="CSV file containing the audit data for this submission", null=True, # submissions that are not active will have their csv file deleted ) + + csv_file_name = models.CharField( + "CSV file name", + help_text="Name of the uploaded CSV file", + null=True, + ) + errors = models.JSONField( "Errors", help_text="Errors that have been found in the uploaded CSV file", @@ -72,11 +78,7 @@ def delete(self, *args, **kwargs): def save(self, *args, **kwargs): if self.submission_active == False: - self.csv_file.delete( - save=True - ) # delete the csv file if the submission is not active - self.csv_file = ( - None # set the csv file to None if the submission is not active - ) + self.csv_file = None + # keep filename for our records super().save(*args, **kwargs) diff --git a/project/npda/tests/test_csv_upload.py b/project/npda/tests/test_csv_upload.py index 2ab98125..6eaf15b0 100644 --- a/project/npda/tests/test_csv_upload.py +++ b/project/npda/tests/test_csv_upload.py @@ -136,8 +136,15 @@ def test_user(seed_groups_fixture, seed_users_fixture): # The database is not rolled back if we used the built in async support for pytest # https://github.com/pytest-dev/pytest-asyncio/issues/226 @async_to_sync -async def csv_upload_sync(user, dataframe, csv_file, pdu_pz_code, audit_year): - return await csv_upload(user, dataframe, csv_file, pdu_pz_code, audit_year) +async def csv_upload_sync(user, dataframe): + return await csv_upload( + user, + dataframe, + csv_file_name=None, + csv_file_bytes=None, + pdu_pz_code=ALDER_HEY_PZ_CODE, + audit_year=2024 + ) def read_csv_from_str(contents): @@ -151,7 +158,7 @@ def read_csv_from_str(contents): @pytest.mark.django_db def test_create_patient(test_user, single_row_valid_df): - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) patient = Patient.objects.first() assert patient.nhs_number == nhs_number.standardise_format( @@ -171,7 +178,7 @@ def test_create_patient_with_death_date(test_user, single_row_valid_df): death_date = VALID_FIELDS["diagnosis_date"] + relativedelta(years=1) single_row_valid_df.loc[0, "Death Date"] = pd.to_datetime(death_date) - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) patient = Patient.objects.first() assert patient.death_date == single_row_valid_df["Death Date"][0].date() @@ -186,7 +193,7 @@ def test_multiple_patients( assert df["NHS Number"][0] == df["NHS Number"][1] assert df["NHS Number"][0] != df["NHS Number"][2] - csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, df) assert Patient.objects.count() == 2 [first_patient, second_patient] = Patient.objects.all() @@ -227,7 +234,7 @@ def test_missing_mandatory_field(seed_groups_per_function_fixture, seed_users_pe single_row_valid_df.loc[0, column] = None - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert model_field in errors[0] @@ -239,7 +246,7 @@ def test_missing_mandatory_field(seed_groups_per_function_fixture, seed_users_pe def test_error_in_single_visit(test_user, single_row_valid_df): single_row_valid_df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "treatment" in errors[0] visit = Visit.objects.first() @@ -253,7 +260,7 @@ def test_error_in_multiple_visits(test_user, one_patient_two_visits): df = one_patient_two_visits df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 - errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, df) assert "treatment" in errors[0] assert Visit.objects.count() == 2 @@ -281,7 +288,7 @@ def test_multiple_patients_where_one_has_visit_errors_and_the_other_does_not( df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 - errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, df) assert "treatment" in errors[0] [patient_one, patient_two] = Patient.objects.all() @@ -319,7 +326,7 @@ def test_multiple_patients_with_visit_errors( df.loc[0, "Diabetes Treatment at time of Hba1c measurement"] = 45 df.loc[1, "Diabetes Treatment at time of Hba1c measurement"] = 45 - errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, df) assert "treatment" in errors[0] assert "treatment" in errors[1] @@ -343,7 +350,7 @@ def test_invalid_nhs_number(test_user, single_row_valid_df): invalid_nhs_number = "123456789" single_row_valid_df["NHS Number"] = invalid_nhs_number - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "nhs_number" in errors[0] # Not catastrophic - error saved in model and raised back to caller @@ -360,7 +367,7 @@ def test_future_date_of_birth(test_user, single_row_valid_df): date_of_birth = TODAY + relativedelta(days=1) single_row_valid_df["Date of Birth"] = pd.to_datetime(date_of_birth) - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "date_of_birth" in errors[0] patient = Patient.objects.first() @@ -377,7 +384,7 @@ def test_over_25(test_user, single_row_valid_df): date_of_birth = TODAY + -relativedelta(years=25, days=1) single_row_valid_df["Date of Birth"] = pd.to_datetime(date_of_birth) - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "date_of_birth" in errors[0] patient = Patient.objects.first() @@ -393,7 +400,7 @@ def test_over_25(test_user, single_row_valid_df): def test_invalid_diabetes_type(test_user, single_row_valid_df): single_row_valid_df["Diabetes Type"] = 45 - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "diabetes_type" in errors[0] patient = Patient.objects.first() @@ -407,7 +414,7 @@ def test_future_diagnosis_date(test_user, single_row_valid_df): diagnosis_date = TODAY + relativedelta(days=1) single_row_valid_df["Date of Diabetes Diagnosis"] = pd.to_datetime(diagnosis_date) - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "diagnosis_date" in errors[0] patient = Patient.objects.first() @@ -426,7 +433,7 @@ def test_diagnosis_date_before_date_of_birth(test_user, single_row_valid_df): single_row_valid_df["Date of Diabetes Diagnosis"] = pd.to_datetime(diagnosis_date) - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "diagnosis_date" in errors[0] patient = Patient.objects.first() @@ -446,7 +453,7 @@ def test_diagnosis_date_before_date_of_birth(test_user, single_row_valid_df): def test_invalid_sex(test_user, single_row_valid_df): single_row_valid_df["Stated gender"] = 45 - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "sex" in errors[0] patient = Patient.objects.first() @@ -459,7 +466,7 @@ def test_invalid_sex(test_user, single_row_valid_df): def test_invalid_ethnicity(test_user, single_row_valid_df): single_row_valid_df["Ethnic Category"] = "45" - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "ethnicity" in errors[0] patient = Patient.objects.first() @@ -472,7 +479,7 @@ def test_invalid_ethnicity(test_user, single_row_valid_df): def test_missing_gp_ods_code(test_user, single_row_valid_df): single_row_valid_df["GP Practice Code"] = None - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "gp_practice_ods_code" in errors[0] patient = Patient.objects.first() @@ -493,7 +500,7 @@ def test_future_death_date(test_user, single_row_valid_df): single_row_valid_df["Death Date"] = pd.to_datetime(death_date) - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "death_date" in errors[0] patient = Patient.objects.first() @@ -512,7 +519,7 @@ def test_death_date_before_date_of_birth(test_user, single_row_valid_df): single_row_valid_df["Death Date"] = pd.to_datetime(death_date) - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "death_date" in errors[0] patient = Patient.objects.first() @@ -538,7 +545,7 @@ def test_death_date_before_date_of_birth(test_user, single_row_valid_df): def test_invalid_postcode(test_user, single_row_valid_df): single_row_valid_df["Postcode of usual address"] = "not a postcode" - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "postcode" in errors[0] patient = Patient.objects.first() @@ -555,7 +562,7 @@ def test_invalid_postcode(test_user, single_row_valid_df): def test_error_validating_postcode(test_user, single_row_valid_df): single_row_valid_df["Postcode of usual address"] = "WC1X 8SH" - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert len(errors) == 0 patient = Patient.objects.first() @@ -572,7 +579,7 @@ def test_error_validating_postcode(test_user, single_row_valid_df): def test_invalid_gp_ods_code(test_user, single_row_valid_df): single_row_valid_df["GP Practice Code"] = "not a GP code" - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "gp_practice_ods_code" in errors[0] patient = Patient.objects.first() @@ -589,7 +596,7 @@ def test_invalid_gp_ods_code(test_user, single_row_valid_df): def test_error_validating_gp_ods_code(test_user, single_row_valid_df): single_row_valid_df["GP Practice Code"] = "G85023" - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert len(errors) == 0 patient = Patient.objects.first() @@ -598,7 +605,7 @@ def test_error_validating_gp_ods_code(test_user, single_row_valid_df): @pytest.mark.django_db def test_lookup_index_of_multiple_deprivation(test_user, single_row_valid_df): - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) patient = Patient.objects.first() assert ( @@ -615,7 +622,7 @@ def test_lookup_index_of_multiple_deprivation(test_user, single_row_valid_df): ), ) def test_error_looking_up_index_of_multiple_deprivation(test_user, single_row_valid_df): - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) patient = Patient.objects.first() assert patient.index_of_multiple_deprivation_quintile is None @@ -623,7 +630,7 @@ def test_error_looking_up_index_of_multiple_deprivation(test_user, single_row_va @pytest.mark.django_db def test_save_location_from_postcode(test_user, single_row_valid_df): - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) patient = Patient.objects.first() assert patient.location_bng == MOCK_PATIENT_EXTERNAL_VALIDATION_RESULT.location_bng @@ -639,7 +646,7 @@ def test_save_location_from_postcode(test_user, single_row_valid_df): ), ) def test_missing_location_from_postcode(test_user, single_row_valid_df): - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) patient = Patient.objects.first() assert patient.location_bng is None @@ -653,7 +660,7 @@ def test_strip_first_spaces_in_column_name(test_user, dummy_sheet_csv): assert df.columns[0] == "NHS Number" - csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, df) patient = Patient.objects.first() assert patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][0]) @@ -666,7 +673,7 @@ def test_strip_last_spaces_in_column_name(test_user, dummy_sheet_csv): assert df.columns[0] == "NHS Number" - csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, df) patient = Patient.objects.first() assert patient.nhs_number == nhs_number.standardise_format(df["NHS Number"][0]) @@ -679,7 +686,7 @@ def test_spaces_in_date_column_name(test_user, dummy_sheet_csv): csv = dummy_sheet_csv.replace("Date of Birth", " Date of Birth") df = read_csv_from_str(csv).df - csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, df) patient = Patient.objects.first() assert patient.date_of_birth == df["Date of Birth"][0].date() @@ -693,7 +700,7 @@ def test_different_column_order(test_user, single_row_valid_df): columns = columns[1:] + columns[:1] df = single_row_valid_df[columns] - csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, df) assert Patient.objects.count() == 1 @@ -748,7 +755,7 @@ def test_case_insensitive_column_headers(test_user, dummy_sheet_csv): df = read_csv_from_str(csv).df - errors = csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, df) assert len(errors) == 0 @@ -827,7 +834,7 @@ def test_upload_without_headers(test_user, one_patient_two_visits): match="The first row of the csv file does not match any of the predefined column names. Please include these and upload the file again.", ): df = read_csv_from_str(csv).df - csv_upload_sync(test_user, df, None, ALDER_HEY_PZ_CODE) + csv_upload_sync(test_user, df) # No patients or associated visits should be saved assert Patient.objects.count() == 0 @@ -838,7 +845,7 @@ def test_upload_without_headers(test_user, one_patient_two_visits): def test_upload_csv_with_bool_values_instead_of_int(test_user, single_row_valid_df): single_row_valid_df["Has the patient been recommended a Gluten-free diet?"] = True - errors = csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + errors = csv_upload_sync(test_user, single_row_valid_df) assert "gluten_free_diet" in errors[0] visit = Visit.objects.first() @@ -850,7 +857,7 @@ def test_height_is_rounded_to_one_decimal(test_user, single_row_valid_df): single_row_valid_df["Patient Height (cm)"] = 123.456 single_row_valid_df["Patient Weight (kg)"] = 7.89 - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) visit = Visit.objects.first() @@ -884,7 +891,7 @@ def test_cleaned_fields_are_stored_when_other_fields_are_invalid(test_user, sing # - Invalid - cannot be less than 40 single_row_valid_df["Patient Height (cm)"] = 38 - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) patient = Patient.objects.first() visit = Visit.objects.first() @@ -898,7 +905,7 @@ def test_cleaned_fields_are_stored_when_other_fields_are_invalid(test_user, sing @pytest.mark.django_db def test_async_visit_fields_are_saved(test_user, single_row_valid_df): - csv_upload_sync(test_user, single_row_valid_df, None, ALDER_HEY_PZ_CODE, 2024) + csv_upload_sync(test_user, single_row_valid_df) visit = Visit.objects.first() assert(visit.height_centile == MOCK_VISIT_EXTERNAL_VALIDATION_RESULT.height_result.centile) diff --git a/project/npda/tests/view_tests/test_upload.py b/project/npda/tests/view_tests/test_upload.py index a3b284ec..35f13c5d 100644 --- a/project/npda/tests/view_tests/test_upload.py +++ b/project/npda/tests/view_tests/test_upload.py @@ -12,7 +12,6 @@ from project.npda.tests.test_csv_upload import mock_remote_calls -# @pytest.mark.skip(reason="CSV upload validation errors") @pytest.mark.django_db def test_generate_csv_upload_to_view( seed_groups_fixture, diff --git a/project/npda/views/home.py b/project/npda/views/home.py index 8555398e..47329da2 100644 --- a/project/npda/views/home.py +++ b/project/npda/views/home.py @@ -3,6 +3,7 @@ import datetime import logging import json +import io from datetime import date @@ -48,12 +49,17 @@ async def home(request): if request.method == "POST": form = UploadFileForm(request.POST, request.FILES) + user_csv = request.FILES["csv_upload"] + user_csv_filename = user_csv.name + # We are eventually storing the CSV file as a BinaryField so have to hold it in memory + user_csv_bytes = user_csv.read() + pz_code = request.session.get("pz_code") if request.session.get("can_upload_csv") is True: # check to see if the CSV is valid - cannot accept CSVs with no header. All other header errors are non-lethal but are reported back to the user try: - parsed_csv = csv_parse(user_csv) + parsed_csv = csv_parse(io.BytesIO(user_csv_bytes)) except ValueError as e: messages.error( request=request, @@ -87,7 +93,8 @@ async def home(request): errors_by_row_index = await csv_upload( user=request.user, dataframe=parsed_csv.df, - csv_file=user_csv, + csv_file_name=user_csv_filename, + csv_file_bytes=user_csv_bytes, pdu_pz_code=pz_code, audit_year=audit_year, ) diff --git a/project/npda/views/submissions.py b/project/npda/views/submissions.py index c1583227..94768a0a 100644 --- a/project/npda/views/submissions.py +++ b/project/npda/views/submissions.py @@ -90,16 +90,12 @@ def get_context_data(self, **kwargs: Any) -> dict: paediatric_diabetes_unit__pz_code=self.request.session.get("pz_code"), ).first() # there can be only one of these if requested_active_submission: - # If a submission exists and it was created by uploading a csv, summarize the csv data - if self.request.session.get("can_upload_csv"): - # check if the user has permission to upload csv (not this function is not available in this brance but is in live) - parsed_csv = csv_parse(requested_active_submission.csv_file) - if requested_active_submission.errors: - deserialized_errors = json.loads(requested_active_submission.errors) - context["submission_errors"] = deserialized_errors - else: - context["submission_errors"] = None - + if requested_active_submission.errors: + deserialized_errors = json.loads(requested_active_submission.errors) + context["submission_errors"] = deserialized_errors + else: + context["submission_errors"] = None + # Get some summary data about the patients in the submission... context["patients"] = Patient.objects.filter( submissions=requested_active_submission