From f16f21c8d6ff9c5313316ca09b67b867ab310647 Mon Sep 17 00:00:00 2001 From: Nathanael Lampe Date: Thu, 22 Dec 2016 15:37:40 +0200 Subject: [PATCH] added try/except statement for invalid unicode characters in header --- fcsparser/api.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fcsparser/api.py b/fcsparser/api.py index cf94fa2..2a635bd 100644 --- a/fcsparser/api.py +++ b/fcsparser/api.py @@ -163,7 +163,14 @@ def read_text(self, file_handle): # There are some differences in how the file_handle.seek(header['text start'], 0) raw_text = file_handle.read(header['text end'] - header['text start'] + 1) - raw_text = raw_text.decode('utf-8') + try: + raw_text = raw_text.decode('utf-8') + except UnicodeDecodeError as e: + print("Encountered an illegal utf-8 byte in the header.\n" + + "Illegal utf-8 characters will be ignored.\n" + + "The illegal byte was {} at position {}".format( + repr(e.object[e.start]), e.start)) + raw_text = raw_text.decode('utf-8', 'ignore') ##### # Parse the TEXT segment of the FCS file into a python dictionary