add unit-tests for dealing w/ corrupt header (#30)

* Update * Add missing line break
eyurtsev · Aug 13, 2021 · 396524e · 396524e
1 parent 5ef1590
commit 396524e
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 8 deletions.
diff --git a/fcsparser/api.py b/fcsparser/api.py
@@ -209,14 +209,15 @@ def _extract_text_dict(raw_text):
             if raw_text[-1] != delimiter:
                 msg = (u'The first two characters were:\n {}. The last two characters were: {}\n'
                        u'Parser expects the same delimiter character in beginning '
-                       u'and end of TEXT segment. This file may be parsed incorrectly!'.format(raw_text[:2], raw_text[-2:]))
+                       u'and end of TEXT segment. '
+                       u'This file may be parsed incorrectly!'.format(raw_text[:2], raw_text[-2:]))
                 warnings.warn(msg)
                 raw_text = raw_text[1:]
             else:
                 raw_text = raw_text[1:-1]
         else:
             raw_text = raw_text[1:-1]
-            
+
         # 1:-1 above removes the first and last characters which are reserved for the delimiter.
 
         # The delimiter is escaped by being repeated (two consecutive delimiters). This code splits
@@ -601,3 +602,4 @@ def parse(path, meta_data_only=False, compensate=False, channel_naming='$PnS',
         df = fcs_parser.dataframe
         df = df.astype(dtype) if dtype else df
         return meta, df
+
diff --git a/fcsparser/tests/test_fcs_reader.py b/fcsparser/tests/test_fcs_reader.py
@@ -1,18 +1,18 @@
 from __future__ import print_function
 
-import warnings
+import os
 import timeit
 import unittest
 
 import numpy
+import pytest
 from numpy import array
-import os
 
-from fcsparser import parse as parse_fcs
-from fcsparser.api import FCSParser
+from .. import parse as parse_fcs
+from ..api import FCSParser
 
-BASE_PATH = os.path.abspath(os.path.dirname(__file__))
-BASE_PATH = os.path.join(BASE_PATH, 'data', 'FlowCytometers')
+HERE = os.path.abspath(os.path.dirname(__file__))
+BASE_PATH = os.path.join(HERE, 'data', 'FlowCytometers')
 
 # Used for checking data segments in fcs files generated by different machines.
 FILE_IDENTIFIER_TO_PATH = {
@@ -370,3 +370,22 @@ def test_reading_in_memory_fcs_file(self):
         matrix = FCSParser.from_data(data).data
         diff = numpy.abs(values - matrix[0:4, :])
         self.assertTrue(numpy.all(diff < 10 ** -8))
+
+
+# FCS file that contains only the header.
+CYTEK_NL_2000_sample_header = os.path.join(BASE_PATH, 'cytek-nl-2000', 'sample_header.fcs')
+
+
+class TestHeaderParsing(unittest.TestCase):
+    def test_parsing_headers(self):
+        """Attempt to parse a corrupted header from a cytek-nl-2000 flow cytometer."""
+        # Validate that a user warning is raised.
+        with pytest.warns(UserWarning):
+            fcs_parser = FCSParser(read_data=False, path=CYTEK_NL_2000_sample_header)
+
+        # Check that some of the header has been parsed
+        self.assertIsInstance(fcs_parser.annotation, dict)
+        # Verify number of keys
+        self.assertEqual(200, len(fcs_parser.annotation))
+        # Check one hard-coded key
+        self.assertEqual(fcs_parser.annotation['$P9B'], 32)