Skip to content

Commit

Permalink
add unit-tests for dealing w/ corrupt header (#30)
Browse files Browse the repository at this point in the history
* Update

* Add missing line break
  • Loading branch information
eyurtsev authored Aug 13, 2021
1 parent 5ef1590 commit 396524e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 8 deletions.
6 changes: 4 additions & 2 deletions fcsparser/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,15 @@ def _extract_text_dict(raw_text):
if raw_text[-1] != delimiter:
msg = (u'The first two characters were:\n {}. The last two characters were: {}\n'
u'Parser expects the same delimiter character in beginning '
u'and end of TEXT segment. This file may be parsed incorrectly!'.format(raw_text[:2], raw_text[-2:]))
u'and end of TEXT segment. '
u'This file may be parsed incorrectly!'.format(raw_text[:2], raw_text[-2:]))
warnings.warn(msg)
raw_text = raw_text[1:]
else:
raw_text = raw_text[1:-1]
else:
raw_text = raw_text[1:-1]

# 1:-1 above removes the first and last characters which are reserved for the delimiter.

# The delimiter is escaped by being repeated (two consecutive delimiters). This code splits
Expand Down Expand Up @@ -601,3 +602,4 @@ def parse(path, meta_data_only=False, compensate=False, channel_naming='$PnS',
df = fcs_parser.dataframe
df = df.astype(dtype) if dtype else df
return meta, df

31 changes: 25 additions & 6 deletions fcsparser/tests/test_fcs_reader.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from __future__ import print_function

import warnings
import os
import timeit
import unittest

import numpy
import pytest
from numpy import array
import os

from fcsparser import parse as parse_fcs
from fcsparser.api import FCSParser
from .. import parse as parse_fcs
from ..api import FCSParser

BASE_PATH = os.path.abspath(os.path.dirname(__file__))
BASE_PATH = os.path.join(BASE_PATH, 'data', 'FlowCytometers')
HERE = os.path.abspath(os.path.dirname(__file__))
BASE_PATH = os.path.join(HERE, 'data', 'FlowCytometers')

# Used for checking data segments in fcs files generated by different machines.
FILE_IDENTIFIER_TO_PATH = {
Expand Down Expand Up @@ -370,3 +370,22 @@ def test_reading_in_memory_fcs_file(self):
matrix = FCSParser.from_data(data).data
diff = numpy.abs(values - matrix[0:4, :])
self.assertTrue(numpy.all(diff < 10 ** -8))


# FCS file that contains only the header.
CYTEK_NL_2000_sample_header = os.path.join(BASE_PATH, 'cytek-nl-2000', 'sample_header.fcs')


class TestHeaderParsing(unittest.TestCase):
def test_parsing_headers(self):
"""Attempt to parse a corrupted header from a cytek-nl-2000 flow cytometer."""
# Validate that a user warning is raised.
with pytest.warns(UserWarning):
fcs_parser = FCSParser(read_data=False, path=CYTEK_NL_2000_sample_header)

# Check that some of the header has been parsed
self.assertIsInstance(fcs_parser.annotation, dict)
# Verify number of keys
self.assertEqual(200, len(fcs_parser.annotation))
# Check one hard-coded key
self.assertEqual(fcs_parser.annotation['$P9B'], 32)

0 comments on commit 396524e

Please sign in to comment.