Merge branch 'develop'

int-brain-lab · May 12, 2021 · bdaac77 · bdaac77
2 parents 7c32a0e + 62885df
commit bdaac77
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 57 deletions.
diff --git a/iblvideo/motion_energy.py b/iblvideo/motion_energy.py
@@ -8,43 +8,46 @@
 left(right)Camera: cut whisker pad region
 """
 
-import os
 import time
 import numpy as np
 import pandas as pd
 import cv2
+import logging
 
 from oneibl.one import ONE
-from ibllib.io.video import get_video_frames_preload, url_from_eid, label_from_path
+from ibllib.io.video import get_video_frames_preload, label_from_path
 from ibllib.io.extractors.camera import get_video_length
 from oneibl.stream import VideoStreamer
 
+_log = logging.getLogger('ibllib')
+
 
 def grayscale(x):
     return cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
 
 
-def get_dlc_midpoints(dlc_pqt):
+def get_dlc_midpoints(dlc_pqt, targets):
     # Load dataframe
     dlc_df = pd.read_parquet(dlc_pqt)
-    # Set values to nan if likelihood is too low and calcualte midpoints
-    targets = np.unique(['_'.join(col.split('_')[:-1]) for col in dlc_df.columns])
     mloc = {}
     for t in targets:
+        # Set values to nan if likelihood is too low and calcualte midpoints
         idx = dlc_df.loc[dlc_df[f'{t}_likelihood'] < 0.9].index
         dlc_df.loc[idx, [f'{t}_x', f'{t}_y']] = np.nan
-        mloc[t] = [int(np.nanmean(dlc_df[f'{t}_x'])), int(np.nanmean(dlc_df[f'{t}_y']))]
+        if all(np.isnan(dlc_df[f'{t}_x'])) or all(np.isnan(dlc_df[f'{t}_y'])):
+            raise ValueError(f'Failed to calculate midpoint, {t} all NaN in {dlc_pqt}')
+        else:
+            mloc[t] = [int(np.nanmean(dlc_df[f'{t}_x'])), int(np.nanmean(dlc_df[f'{t}_y']))]
     return mloc
 
 
-def motion_energy(session_path, dlc_pqt, frames=10000, one=None):
+def motion_energy(file_mp4, dlc_pqt, frames=10000):
     """
     Compute motion energy on cropped frames of a single video
 
-    :param session_path: Path to session.
-    :param dlc_pqt: Path to dlc result in pqt file format. If None all frames are loaded at once.
-    :param frames: Number of frames to load into memory at once.
-    :param one: ONE instance
+    :param file_mp4: Video file to run motion energy for
+    :param dlc_pqt: Path to dlc result in pqt file format.
+    :param frames: Number of frames to load into memory at once. If None all frames are loaded.
     :return me_file: Path to numpy file contaiing motion energy.
     :return me_roi: Path to numpy file containing ROI coordinates.
 
@@ -56,24 +59,17 @@ def motion_energy(session_path, dlc_pqt, frames=10000, one=None):
     None    :  25 GB (body), 17.5 GB (left), 12.5 GB (right)
     """
 
-    one = one or ONE()
     start_T = time.time()
-
-    # Get label from dlc_df
     label = label_from_path(dlc_pqt)
-    video_path = session_path.joinpath('raw_video_data', f'_iblrig_{label}Camera.raw.mp4')
-    # Check if video available locally, else create url
-    if not os.path.isfile(video_path):
-        eid = one.eid_from_path(session_path)
-        video_path = url_from_eid(eid, label=label, one=one)
 
     # Crop ROI
-    mloc = get_dlc_midpoints(dlc_pqt)
     if label == 'body':
+        mloc = get_dlc_midpoints(dlc_pqt, targets=['tail_start'])
         anchor = np.array(mloc['tail_start'])
         w, h = int(anchor[0] * 3 / 5), 210
         x, y = int(anchor[0] - anchor[0] * 3 / 5), int(anchor[1] - 120)
     else:
+        mloc = get_dlc_midpoints(dlc_pqt, targets=['nose_tip', 'pupil_top_r'])
         anchor = np.mean([mloc['nose_tip'], mloc['pupil_top_r']], axis=0)
         dist = np.sqrt(np.sum((np.array(mloc['nose_tip']) - np.array(mloc['pupil_top_r']))**2,
                        axis=0))
@@ -84,15 +80,15 @@ def motion_energy(session_path, dlc_pqt, frames=10000, one=None):
     mask = np.s_[y:y + h, x:x + w]
     # save ROI coordinates
     roi = np.asarray([w, h, x, y])
-    alf_path = session_path.joinpath('alf')
+    alf_path = file_mp4.parent.parent.joinpath('alf')
+    alf_path.mkdir(exist_ok=True)
     roi_file = alf_path.joinpath(f'{label}ROIMotionEnergy.position.npy')
     np.save(roi_file, roi)
 
-    frame_count = get_video_length(video_path)
+    frame_count = get_video_length(file_mp4)
     me = np.zeros(frame_count,)
 
-    is_url = isinstance(video_path, str) and video_path.startswith('http')
-    cap = VideoStreamer(video_path).cap if is_url else cv2.VideoCapture(str(video_path))
+    cap = cv2.VideoCapture(str(file_mp4))
     if frames:
         n, keep_reading = 0, True
         while keep_reading:

diff --git a/iblvideo/run.py b/iblvideo/run.py
@@ -4,6 +4,7 @@
 import traceback
 import time
 import cv2
+import warnings
 from glob import glob
 from datetime import datetime
 from collections import OrderedDict
@@ -65,25 +66,26 @@ def _video_intact(self, file_mp4):
     def _run(self, cams=('left', 'body', 'right'), version=__version__, frames=None, **kwargs):
         session_id = self.one.eid_from_path(self.session_path)
         overwrite = kwargs.pop('overwrite', None)
-        # Create dictionary for logging time spent on each task
         timer = OrderedDict()
+        dlc_results = me_results = me_rois = []
+
         # Loop through cams
-        dlc_results, me_results, me_rois = [], [], []
         for cam in cams:
             timer[f'{cam}'] = OrderedDict()
             # Check if dlc and me results are available locally or in database, if latter download
             if overwrite:
-                # If it's a rerun, pretend the data doesn't exist yet
-                dlc_result = None
-                # me_result, me_roi = None, None
+                dlc_result = me_result = me_roi = None
             else:
                 dlc_result = self._result_exists(session_id, f'_ibl_{cam}Camera.dlc.pqt')
-                # me_result = self._result_exists(session_id, f'{cam}Camera.ROIMotionEnergy.npy')
-                # me_roi = self._result_exists(session_id, f'{cam}ROIMotionEnergy.position.npy')
+                # If dlc needs to be rerun, me should be rerun as well, regardless if it exists
+                if dlc_result is None:
+                    me_result = me_roi = None
+                else:
+                    me_result = self._result_exists(session_id, f'{cam}Camera.ROIMotionEnergy.npy')
+                    me_roi = self._result_exists(session_id, f'{cam}ROIMotionEnergy.position.npy')
 
-            # If dlc_result doesn't exist or should be overwritten, run DLC
-            if dlc_result is None:
-                # Download the camera data if not available locally
+            # If either dlc or me needs to be rerun, check if raw video exists, else download
+            if dlc_result is None or me_result is None or me_roi is None:
                 time_on = time.time()
                 _logger.info(f'Downloading {cam}Camera.')
                 video_intact, clobber_vid, attempt = False, False, 0
@@ -101,11 +103,11 @@ def _run(self, cams=('left', 'body', 'right'), version=__version__, frames=None,
                     continue
                 time_off = time.time()
                 timer[f'{cam}'][f'Download video'] = time_off - time_on
+
+            # If dlc_result doesn't exist or should be overwritten, run DLC
+            if dlc_result is None:
                 # Download weights if not exist locally
-                time_on = time.time()
                 path_dlc = download_weights(version=version)
-                time_off = time.time()
-                timer[f'{cam}']['Download DLC weights'] = time_off - time_on
                 _logger.info(f'Running DLC on {cam}Camera.')
                 try:
                     dlc_result, timer[f'{cam}'] = dlc(file_mp4, path_dlc=path_dlc, force=overwrite,
@@ -115,24 +117,23 @@ def _run(self, cams=('left', 'body', 'right'), version=__version__, frames=None,
                     _logger.error(f'DLC {cam}Camera failed.\n' + traceback.format_exc())
                     self.status = -1
                     continue
-            dlc_results.append(dlc_result)
 
-            # Currently defaulting to recalculating ME even if exists locally, as last DLC step
-            # also defaults to rerun
-            # if me_result is None or me_roi is None:
-            _logger.info(f'Computing motion energy for {cam}Camera')
-            try:
-                time_on = time.time()
-                me_result, me_roi = motion_energy(self.session_path, dlc_result, frames=frames,
-                                                  one=self.one)
-                time_off = time.time()
-                timer[f'{cam}']['Compute motion energy'] = time_off - time_on
-                _logger.info(me_result)
-                _logger.info(me_roi)
-            except BaseException:
-                _logger.error(f'Motion energy {cam}Camera failed.\n' + traceback.format_exc())
-                self.status = -1
-                continue
+            # If me outputs don't exist or should be overwritten, run me
+            if me_result is None or me_roi is None:
+                _logger.info(f'Computing motion energy for {cam}Camera')
+                try:
+                    time_on = time.time()
+                    me_result, me_roi = motion_energy(file_mp4, dlc_result, frames=frames)
+                    time_off = time.time()
+                    timer[f'{cam}']['Compute motion energy'] = time_off - time_on
+                    _logger.info(me_result)
+                    _logger.info(me_roi)
+                except BaseException:
+                    _logger.error(f'Motion energy {cam}Camera failed.\n' + traceback.format_exc())
+                    self.status = -1
+                    continue
+
+            dlc_results.append(dlc_result)
             me_results.append(me_result)
             me_rois.append(me_roi)
         _logger.info(_format_timer(timer))
@@ -221,7 +222,9 @@ def run_session(session_id, machine=None, cams=('left', 'body', 'right'), one=No
                     # Only run if dlc actually exists
                     if alf_path.joinpath(f'_ibl_{cam}Camera.dlc.pqt').exists():
                         qc = DlcQC(session_id, cam, one=one, download_data=False)
-                        qc.run(update=True)
+                        with warnings.catch_warnings():
+                            warnings.simplefilter("ignore", category=RuntimeWarning)
+                            qc.run(update=True)
             except AssertionError:
                 # If the camera.times don't exist we cannot run QC, but the DLC task shouldn't fail
                 # Make sure to not overwrite the task log if that has already been updated

diff --git a/iblvideo/tests/__init__.py b/iblvideo/tests/__init__.py
@@ -1 +1,3 @@
 from iblvideo.tests.download_test_data import _download_dlc_test_data, _download_me_test_data
+from iblvideo.tests.test_choiceworld import *
+from iblvideo.tests.test_motion_energy import *
diff --git a/iblvideo/tests/test_motion_energy.py b/iblvideo/tests/test_motion_energy.py
@@ -1,5 +1,7 @@
 import os
+import pytest
 import numpy as np
+import pandas as pd
 from iblvideo.motion_energy import motion_energy
 from iblvideo.tests import _download_me_test_data
 
@@ -12,20 +14,55 @@ def test_motion_energy():
         ctrl_me = np.load(test_data.joinpath(f'output/{cam}Camera.ROIMotionEnergy.npy'))
         ctrl_roi = np.load(test_data.joinpath(f'output/{cam}ROIMotionEnergy.position.npy'))
         dlc_pqt = test_data.joinpath(f'alf/_ibl_{cam}Camera.dlc.pqt')
+        file_mp4 = test_data.joinpath('raw_video_data', f'_iblrig_{cam}Camera.raw.mp4')
 
         # Test with all frames
-        me_file, roi_file = motion_energy(test_data, dlc_pqt, frames=None)
+        me_file, roi_file = motion_energy(file_mp4, dlc_pqt, frames=None)
         test_me = np.load(me_file)
         test_roi = np.load(roi_file)
         assert all(test_me == ctrl_me)
         assert all(test_roi == ctrl_roi)
 
+        os.remove(me_file)
+        os.remove(roi_file)
+
+
+def test_with_chunks():
+
+    test_data = _download_me_test_data()
+    for cam in ['body', 'left', 'right']:
+        print(f"Running test for {cam}")
+        ctrl_me = np.load(test_data.joinpath(f'output/{cam}Camera.ROIMotionEnergy.npy'))
+        ctrl_roi = np.load(test_data.joinpath(f'output/{cam}ROIMotionEnergy.position.npy'))
+        dlc_pqt = test_data.joinpath(f'alf/_ibl_{cam}Camera.dlc.pqt')
+        file_mp4 = test_data.joinpath('raw_video_data', f'_iblrig_{cam}Camera.raw.mp4')
+
         # Test with frame chunking
-        me_file, roi_file = motion_energy(test_data, dlc_pqt, frames=70)
+        me_file, roi_file = motion_energy(file_mp4, dlc_pqt, frames=70)
         test_me = np.load(me_file)
         test_roi = np.load(roi_file)
         assert all(test_me == ctrl_me)
         assert all(test_roi == ctrl_roi)
 
         os.remove(me_file)
         os.remove(roi_file)
+
+
+def test_with_nans():
+    test_data = _download_me_test_data()
+    for cam in ['body', 'left', 'right']:
+        print(f"Running test for {cam}")
+        dlc_pqt = test_data.joinpath(f'alf/_ibl_{cam}Camera.dlc.pqt')
+        nan_pqt = test_data.joinpath(f'alf/_ibl_{cam}Camera.nan.pqt')
+        file_mp4 = test_data.joinpath('raw_video_data', f'_iblrig_{cam}Camera.raw.mp4')
+
+        # Test that all NaN in used columns give correct error
+        df_nan = pd.read_parquet(dlc_pqt)
+        if cam == 'body':
+            df_nan['tail_start_y'] = np.nan
+        else:
+            df_nan['pupil_top_r_x'] = np.nan
+        df_nan.to_parquet(nan_pqt)
+        with pytest.raises(ValueError):
+            motion_energy(file_mp4, nan_pqt)
+        os.remove(nan_pqt)