From 5b44d09e937d16bf692eca586c9a1bc144adcfc9 Mon Sep 17 00:00:00 2001 From: Gio Gottardi Date: Thu, 30 Aug 2018 16:49:13 -0500 Subject: [PATCH 1/6] Add transcriptions data to ResourceObject. --- .gitignore | 1 + aubreylib/resource.py | 39 ++++++++++++++++++++++++++++++++++++-- tests/test_resource.py | 43 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index cebbdd8..76ad980 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ dist/ .tox/* __pycache__/* +.pytest_cache/ diff --git a/aubreylib/resource.py b/aubreylib/resource.py index 3618b86..1368fab 100644 --- a/aubreylib/resource.py +++ b/aubreylib/resource.py @@ -73,7 +73,7 @@ def get_getCopy_data(getCopy_url, meta_id): # Try returning the getCopy data try: return json.loads(urllib2.urlopen(record_url).read()) - except: + except urllib2.URLError: # Otherwise, return an empty dictionary return {} @@ -120,6 +120,18 @@ def get_dimensions_data(mets_file): return None +def get_transcriptions_data(meta_id, resource_type, transcriptions_server_url): + """Return the JSON transcriptions structure if it exists. Only for sounds and videos.""" + if resource_type not in ['sound', 'video'] or not transcriptions_server_url: + return {} + transcriptions_url = '{}/{}/'.format(transcriptions_server_url.rstrip('/'), meta_id) + try: + return json.loads(urllib2.urlopen(transcriptions_url).read()) + except urllib2.URLError: + # Otherwise, return an empty dictionary + return {} + + class ResourceObject(object): def __init__(self, identifier, metadataLocations, staticFileLocations, @@ -174,6 +186,12 @@ def __init__(self, identifier, metadataLocations, staticFileLocations, # Get the descriptive metadata self.desc_MD = get_desc_metadata(self.metadata_file, self.metadata_type) + # Get transcriptions data + self.transcriptions = get_transcriptions_data( + meta_id=self.meta_id, + resource_type=self.desc_MD['resourceType'][0]['content'], + transcriptions_server_url=kwargs.get('transcriptions_server_url'), + ) # Get the fileSets within the fileSec self.get_structMap(parsed_mets) # Get the embargo information, if it exists @@ -410,8 +428,13 @@ def get_fileSets(self, manifest, fileSec, file_index): for fileSet in list(manifest): # Get the fileSet order number fileSet_num = int(fileSet.get("ORDER", '1')) + # Get the transcriptions data (if any) for this fileSet + fileSet_transcriptions = self.transcriptions.get(str(manifest_num), {}).get( + str(fileSet_num), []) # Get the file pointers and fileSet view type fileSet_data = self.get_file_pointers(fileSet, fileSec, file_index) + # Add the transcriptions (if any) to the file_ptrs list. + fileSet_data['file_ptrs'].extend(fileSet_transcriptions) # Create the fileSet data dictionary manifestation_dict[fileSet_num] = { 'file_ptrs': fileSet_data['file_ptrs'], @@ -419,6 +442,12 @@ def get_fileSets(self, manifest, fileSec, file_index): 'label': fileSet.get("LABEL"), 'fileSet_view_type': fileSet_data['fileSet_view_type'], 'zoom': fileSet_data['zoom'], + 'has_vtt_captions': self.has_vtt_type(fileSet_transcriptions, 'captions'), + 'has_vtt_subtitles': self.has_vtt_type(fileSet_transcriptions, 'subtitles'), + 'has_vtt_descriptions': self.has_vtt_type(fileSet_transcriptions, 'descriptions'), + 'has_vtt_chapters': self.has_vtt_type(fileSet_transcriptions, 'chapters'), + 'has_vtt_thumbnails': self.has_vtt_type(fileSet_transcriptions, 'thumbnails'), + 'has_vtt_metadata': self.has_vtt_type(fileSet_transcriptions, 'metadata'), } # If the manifestation doesn't have a view # type (return as a regular file) @@ -448,6 +477,12 @@ def get_fileSets(self, manifest, fileSec, file_index): self.manifestation_labels[manifest_num] = manifest.get("LABEL", None) return manifestation_dict + def has_vtt_type(self, transcriptions_list, vtt_type): + for transcription_dict in transcriptions_list: + if transcription_dict.get('vtt_kind') == vtt_type: + return True + return False + # Gets the file pointers from the given fileset # (searches for the fileset starting from the fileSec node or fileGrp node) # Slowest part of getting the resource object @@ -557,7 +592,7 @@ def get_embargo(self): try: embargo_date = datetime.datetime.strptime( date_string, "%Y-%m-%d") - except: + except ValueError: pass else: self.embargo_info['embargo_until_date'] = date_string diff --git a/tests/test_resource.py b/tests/test_resource.py index 3bb0d11..9e90c3d 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -93,3 +93,46 @@ def testResourceObjectDimensions(self, mocked_fileSet_file): 'USE': '4', 'flocat': 'file://web/pf_b-229.txt'} assert no_dimensions_data in ro.manifestation_dict[1][1]['file_ptrs'] + + @patch('aubreylib.resource.get_transcriptions_data') + @patch.object(resource.ResourceObject, 'get_fileSet_file') + def testResourceObjectTranscriptions(self, mocked_fileSet_file, + mocked_get_transcriptions_data): + """Verifies accurate transcriptions data is provided.""" + mocked_fileSet_file.return_value = {'file_mimetype': '', + 'file_name': '', + 'files_system': ''} + expected_transcription_data = { + 'MIMETYPE': 'text/vtt', + 'SIZE': 3618, + 'USE': 'vtt', + 'flocat': 'http://example.com/over/there', + 'language': 'eng', + 'vtt_kind': 'captions', + } + mocked_get_transcriptions_data.return_value = { + '1': { + '1': [ + expected_transcription_data + ] + } + } + + # Use the METs file from our test data to make resource object. + current_directory = os.path.dirname(os.path.abspath(__file__)) + mets_path = '{0}/data/metapth12434.mets.xml'.format(current_directory) + + ro = resource.ResourceObject(identifier=mets_path, metadataLocations=[], + staticFileLocations=[], + mimetypeIconsPath='', use=USE) + assert expected_transcription_data in ro.manifestation_dict[1][1]['file_ptrs'] + + # Check all the 'has_vtt...' values. + # This record does have captions. + assert ro.manifestation_dict[1][1]['has_vtt_captions'] + # No other types of transcriptions exist for this record. + assert not ro.manifestation_dict[1][1]['has_vtt_subtitles'] + assert not ro.manifestation_dict[1][1]['has_vtt_descriptions'] + assert not ro.manifestation_dict[1][1]['has_vtt_chapters'] + assert not ro.manifestation_dict[1][1]['has_vtt_thumbnails'] + assert not ro.manifestation_dict[1][1]['has_vtt_metadata'] From eb7c7caaa9d42a4b54fb6a94e57aeebd5426858d Mon Sep 17 00:00:00 2001 From: Gio Gottardi Date: Thu, 30 Aug 2018 16:56:03 -0500 Subject: [PATCH 2/6] Remove Python 2.6 Travis build and allow flake8 failures for now. --- .travis.yml | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b546ae8..7eb1968 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,13 +2,13 @@ language: python sudo: false python: 2.7 env: - - TOX_ENV=py26 - TOX_ENV=py27 - TOX_ENV=py34 - TOX_ENV=flake8 matrix: allow_failures: - env: TOX_ENV=py34 + - env: TOX_ENV=flake8 install: - pip install tox script: diff --git a/tox.ini b/tox.ini index c2f984f..0e5a57e 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ max-line-length = 99 [tox] -envlist = py26,py27,flake8 +envlist = py27,flake8 [testenv] usedevelop=True From c54ca081e31234a1c27c2fcd8dfb1c9dbda07f57 Mon Sep 17 00:00:00 2001 From: Gio Gottardi Date: Fri, 31 Aug 2018 11:17:04 -0500 Subject: [PATCH 3/6] Catch all exceptions and add more tests. --- aubreylib/resource.py | 12 ++++---- tests/test_resource.py | 66 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/aubreylib/resource.py b/aubreylib/resource.py index 1368fab..bf4ed39 100644 --- a/aubreylib/resource.py +++ b/aubreylib/resource.py @@ -73,7 +73,7 @@ def get_getCopy_data(getCopy_url, meta_id): # Try returning the getCopy data try: return json.loads(urllib2.urlopen(record_url).read()) - except urllib2.URLError: + except Exception: # Otherwise, return an empty dictionary return {} @@ -116,7 +116,7 @@ def get_dimensions_data(mets_file): dimensions_file = mets_file.replace('.mets.xml', '.json') try: return json.load(open_system_file(dimensions_file)) - except: + except Exception: return None @@ -127,7 +127,7 @@ def get_transcriptions_data(meta_id, resource_type, transcriptions_server_url): transcriptions_url = '{}/{}/'.format(transcriptions_server_url.rstrip('/'), meta_id) try: return json.loads(urllib2.urlopen(transcriptions_url).read()) - except urllib2.URLError: + except Exception: # Otherwise, return an empty dictionary return {} @@ -174,7 +174,7 @@ def __init__(self, identifier, metadataLocations, staticFileLocations, # Open the METS document try: mets_filehandle = open_system_file(self.mets_filename) - except: + except Exception: raise ResourceObjectException("Could not open the Mets " + "document: %s" % (self.meta_id)) # Parse the mets document @@ -592,7 +592,7 @@ def get_embargo(self): try: embargo_date = datetime.datetime.strptime( date_string, "%Y-%m-%d") - except ValueError: + except Exception: pass else: self.embargo_info['embargo_until_date'] = date_string @@ -617,7 +617,7 @@ def get_embargo(self): 'REPOSITORY_ADMIN_DICT', default_contact, ) - except: + except Exception: self.embargo_info['repository_admin_contact'] =\ default_contact # Attempt to get the author e-mails from the creator field diff --git a/tests/test_resource.py b/tests/test_resource.py index 9e90c3d..ca7b4b7 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -2,7 +2,8 @@ import os import pytest -from mock import mock_open, patch +import urllib2 +from mock import mock_open, patch, MagicMock from aubreylib import resource, USE @@ -63,6 +64,61 @@ def test_get_dimensions_data_absent(self, mock_exists): assert returned_json is None +class TestGetTranscriptionsData: + + def test_get_transcriptions_data_wrong_resource_type(self): + result = resource.get_transcriptions_data('metadc123', 'text', 'http://example.com') + assert result == {} + + @pytest.mark.parametrize('url', [ + '', + None, + ]) + def test_no_transcriptions_server_url(self, url): + result = resource.get_transcriptions_data('metadc123', 'text', url) + assert result == {} + + @pytest.mark.parametrize('url', [ + 'http://example.com', + 'http://example.com/', + ]) + @patch('urllib2.urlopen') + def test_no_double_slash(self, mock_urlopen, url): + mock_urlopen.return_value = '{}' + resource.get_transcriptions_data('metadc123', 'video', url) + mock_urlopen.assert_called_once_with('http://example.com/metadc123/') + + @patch('urllib2.urlopen') + def test_catches_urlopen_exceptions(self, mock_urlopen): + mock_urlopen.side_effect = [ + urllib2.HTTPError, + ValueError, + TypeError, + AttributeError, + ] + for i in range(5): + result = resource.get_transcriptions_data('metadc123', 'video', 'bad_url') + assert result == {} + + @patch('json.loads') + @patch('urllib2.urlopen') + def test_catches_loads_exceptions(self, mock_urlopen, mock_loads): + mock_loads.side_effect = [ + ValueError, + TypeError, + ] + mock_urlopen.return_value = '' + for i in range(2): + result = resource.get_transcriptions_data('metadc123', 'video', 'bad_json') + assert result == {} + + @patch('urllib2.urlopen') + def test_returns_expected_data(self, mock_urlopen): + mock_urlopen.return_value = MagicMock(read=lambda: '{"some": "data"}') + result = resource.get_transcriptions_data('metadc123', 'video', 'http://example.com') + assert result == {'some': 'data'} + + class TestResourceObject: @patch.object(resource.ResourceObject, 'get_fileSet_file') @@ -123,8 +179,12 @@ def testResourceObjectTranscriptions(self, mocked_fileSet_file, mets_path = '{0}/data/metapth12434.mets.xml'.format(current_directory) ro = resource.ResourceObject(identifier=mets_path, metadataLocations=[], - staticFileLocations=[], - mimetypeIconsPath='', use=USE) + staticFileLocations=[], mimetypeIconsPath='', use=USE, + transcriptions_server_url='http://example.com') + + mocked_get_transcriptions_data.assert_called_once_with( + meta_id='metapth12434', resource_type='image_photo', + transcriptions_server_url='http://example.com') assert expected_transcription_data in ro.manifestation_dict[1][1]['file_ptrs'] # Check all the 'has_vtt...' values. From c9a5c9c9c5cd76e7b721d5ab4c58da7b313ea44a Mon Sep 17 00:00:00 2001 From: Gio Gottardi Date: Fri, 31 Aug 2018 11:17:57 -0500 Subject: [PATCH 4/6] Catch all exceptions in system too and remove flake8 env from allowed failures in Travis. --- .travis.yml | 1 - aubreylib/system.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7eb1968..8e86181 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,6 @@ env: matrix: allow_failures: - env: TOX_ENV=py34 - - env: TOX_ENV=flake8 install: - pip install tox script: diff --git a/aubreylib/system.py b/aubreylib/system.py index 6058c68..dda93f3 100644 --- a/aubreylib/system.py +++ b/aubreylib/system.py @@ -68,7 +68,7 @@ def get_file_system(meta_id, file_path, location_tuple): break else: system_path = None - except: + except Exception: pass # returns the file name return system_path, file_location @@ -112,7 +112,7 @@ def open_system_file(file_name): valid_url = create_valid_url(file_name) try: return urllib2.urlopen(valid_url) - except: + except Exception: return get_other_system(valid_url) # open it over the file system else: @@ -165,7 +165,7 @@ def open_file_range(file_name, range_tuple): req = urllib2.Request(valid_url, None, headers) try: return urllib2.urlopen(req) - except: + except Exception: raise SystemMethodsException("Specified Range (%s,%s) not valid." % range_tuple) # open it over the file system else: @@ -179,13 +179,13 @@ def get_other_system(failed_url): # Determine meta/static servers locations try: from django.conf import settings - except: + except Exception: from aubreylib import METADATA_LOCATIONS, STATIC_FILE_LOCATIONS else: try: METADATA_LOCATIONS = settings.METADATA_LOCATIONS STATIC_FILE_LOCATIONS = settings.STATIC_FILE_LOCATIONS - except: + except Exception: from aubreylib import METADATA_LOCATIONS, STATIC_FILE_LOCATIONS # Combine the metadata locations with static locations all_locations = METADATA_LOCATIONS + STATIC_FILE_LOCATIONS @@ -197,6 +197,6 @@ def get_other_system(failed_url): new_url = failed_url.replace(host, replacement_host) try: return urllib2.urlopen(new_url, timeout=3) - except: + except Exception: pass raise SystemMethodsException("Can't locate file: %s" % (failed_url)) From c889f1f2ee72daee22ebadc8c31e6e18bf4c1782 Mon Sep 17 00:00:00 2001 From: Gio Gottardi Date: Fri, 31 Aug 2018 11:19:55 -0500 Subject: [PATCH 5/6] Correct range. --- tests/test_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_resource.py b/tests/test_resource.py index ca7b4b7..9f53e0d 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -96,7 +96,7 @@ def test_catches_urlopen_exceptions(self, mock_urlopen): TypeError, AttributeError, ] - for i in range(5): + for i in range(4): result = resource.get_transcriptions_data('metadc123', 'video', 'bad_url') assert result == {} From f8ce45f8ac8aec491d4f8b74804c65723011bef0 Mon Sep 17 00:00:00 2001 From: Gio Gottardi Date: Fri, 31 Aug 2018 14:53:35 -0500 Subject: [PATCH 6/6] Update aubreylib version to 1.2.0 --- CHANGELOG.md | 7 +++++++ setup.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee6ea67..ae592d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ Change Log ========== +1.2.0 +----- + +* Added transcriptions data to ResourceObject instances. +* Fixed flake8 failures dealing with bare excepts. Now those excepts catch all Exception instances. + + 1.1.0 ----- diff --git a/setup.py b/setup.py index 58fd10d..f8477ec 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup( name='aubreylib', - version='1.1.0', + version='1.2.0', description='A helper library for the Aubrey access system.', author='University of North Texas Libraries', author_email='mark.phillips@unt.edu',