From 5b2cc90b74fa1417e368fdb9066267fc74dce20e Mon Sep 17 00:00:00 2001
From: Francis Charette Migneault <francis.charette.migneault@gmail.com>
Date: Fri, 8 Jan 2021 13:58:01 -0500
Subject: [PATCH 1/5] ignore files

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..d426fb02f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+.vscode/
+*.py[cod]

From 69d3a6e259ab83bc3f214658bd4a54ac1f9ee4ef Mon Sep 17 00:00:00 2001
From: Francis Charette Migneault <francis.charette.migneault@gmail.com>
Date: Fri, 15 Jan 2021 16:15:49 -0500
Subject: [PATCH 2/5] code to only log predictions to file, no display

---
 .gitignore                                 |  3 +
 slowfast/config/defaults.py                |  3 +
 slowfast/visualization/async_predictor.py  | 60 +++++++++++++++--
 slowfast/visualization/predictor.py        |  2 +
 slowfast/visualization/video_visualizer.py | 78 ++++++++++++++++++++++
 tools/demo_net.py                          | 32 ++++++---
 6 files changed, 163 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index d426fb02f..6fea3a1b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
 .idea/
 .vscode/
 *.py[cod]
+
+configs/custom
+models
diff --git a/slowfast/config/defaults.py b/slowfast/config/defaults.py
index 718801a92..6d4efc191 100644
--- a/slowfast/config/defaults.py
+++ b/slowfast/config/defaults.py
@@ -705,6 +705,9 @@
 # The number of overlapping frames cannot be larger than
 # half of the sequence length `cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE`
 _C.DEMO.BUFFER_SIZE = 0
+# Display the output prediction onto the input video
+# If disabled, output predictions will be logged instead of displaying the video
+_C.DEMO.OUTPUT_DISPLAY = True
 # If specified, the visualized outputs will be written this a video file of
 # this path. Otherwise, the visualized outputs will be displayed in a window.
 _C.DEMO.OUTPUT_FILE = ""
diff --git a/slowfast/visualization/async_predictor.py b/slowfast/visualization/async_predictor.py
index bb11ef15b..d83a39615 100644
--- a/slowfast/visualization/async_predictor.py
+++ b/slowfast/visualization/async_predictor.py
@@ -125,17 +125,19 @@ def default_buffer_size(self):
 
 class AsyncVis:
     class _VisWorker(mp.Process):
-        def __init__(self, video_vis, task_queue, result_queue):
+        def __init__(self, video_vis, task_queue, result_queue, prediction_processor):
             """
             Visualization Worker for AsyncVis.
             Args:
                 video_vis (VideoVisualizer object): object with tools for visualization.
                 task_queue (mp.Queue): a shared queue for incoming task for visualization.
                 result_queue (mp.Queue): a shared queue for visualized results.
+                prediction_processor (func): function that processes frames using (task, video_vis) inputs
             """
             self.video_vis = video_vis
             self.task_queue = task_queue
             self.result_queue = result_queue
+            self.process_predictions = prediction_processor
             super().__init__()
 
         def run(self):
@@ -147,17 +149,20 @@ def run(self):
                 if isinstance(task, _StopToken):
                     break
 
-                frames = draw_predictions(task, self.video_vis)
+                frames = self.process_predictions(task, self.video_vis)
                 task.frames = np.array(frames)
                 self.result_queue.put(task)
 
-    def __init__(self, video_vis, n_workers=None):
+    def __init__(self, video_vis, n_workers=None, prediction_processor=None):
         """
         Args:
             cfg (CfgNode): configs. Details can be found in
                 slowfast/config/defaults.py
             n_workers (Optional[int]): number of CPUs for running video visualizer.
                 If not given, use all CPUs.
+            prediction_processor (func):
+                function that processes frames using (task, video_vis) inputs
+                passed down to video visualizer.
         """
 
         num_workers = mp.cpu_count() if n_workers is None else n_workers
@@ -168,10 +173,11 @@ def __init__(self, video_vis, n_workers=None):
         self.procs = []
         self.result_data = {}
         self.put_id = -1
+        predictor = prediction_processor or draw_predictions
         for _ in range(max(num_workers, 1)):
             self.procs.append(
                 AsyncVis._VisWorker(
-                    video_vis, self.task_queue, self.result_queue
+                    video_vis, self.task_queue, self.result_queue, predictor
                 )
             )
 
@@ -317,3 +323,49 @@ def draw_predictions(task, video_vis):
     del task
 
     return buffer + frames
+
+
+def log_predictions(task, video_vis):
+    """
+    Log prediction for the given task.
+    Args:
+        task (TaskInfo object): task object that contain
+            the necessary information for logging. (e.g. frames, preds)
+            All attributes must lie on CPU devices.
+        video_vis (VideoVisualizer object): the video visualizer object.
+    """
+    boxes = task.bboxes
+    frames = task.frames
+    preds = task.action_preds
+    if boxes is not None:
+        img_width = task.img_width
+        img_height = task.img_height
+        if boxes.device != torch.device("cpu"):
+            boxes = boxes.cpu()
+        boxes = cv2_transform.revert_scaled_boxes(
+            task.crop_size, boxes, img_height, img_width
+        )
+
+    keyframe_idx = len(frames) // 2 - task.num_buffer_frames
+    draw_range = [
+        keyframe_idx - task.clip_vis_size,
+        keyframe_idx + task.clip_vis_size,
+    ]
+    buffer = frames[: task.num_buffer_frames]
+    frames = frames[task.num_buffer_frames :]
+    if boxes is not None:
+        if len(boxes) != 0:
+            frames = video_vis.draw_clip_range(
+                frames,
+                preds,
+                boxes,
+                keyframe_idx=keyframe_idx,
+                draw_range=draw_range,
+            )
+    else:
+        frames = video_vis.draw_clip_range(
+            frames, preds, keyframe_idx=keyframe_idx, draw_range=draw_range
+        )
+    del task
+
+    return buffer + frames
diff --git a/slowfast/visualization/predictor.py b/slowfast/visualization/predictor.py
index 3007aa58f..0e989e2df 100644
--- a/slowfast/visualization/predictor.py
+++ b/slowfast/visualization/predictor.py
@@ -33,6 +33,8 @@ def __init__(self, cfg, gpu_id=None):
             self.gpu_id = (
                 torch.cuda.current_device() if gpu_id is None else gpu_id
             )
+        else:
+            self.gpu_id = None
 
         # Build the video model and print model statistics.
         self.model = build_model(cfg, gpu_id=gpu_id)
diff --git a/slowfast/visualization/video_visualizer.py b/slowfast/visualization/video_visualizer.py
index faa127294..a7722b48f 100644
--- a/slowfast/visualization/video_visualizer.py
+++ b/slowfast/visualization/video_visualizer.py
@@ -675,3 +675,81 @@ def _get_thres_array(self, common_class_names=None):
         )
         thres_array[common_class_ids] = self.thres
         self.thres = torch.from_numpy(thres_array)
+
+
+class VideoLogger(VideoVisualizer):
+    """
+    Core is identical to visualizer. Override draw method to only log.
+    """
+    def __init__(self, *_, **__):
+        super(VideoLogger, self).__init__(*_, **__)
+        self.clip_index = 0
+
+    def draw_clip_range(
+        self,
+        frames,
+        preds,
+        bboxes=None,
+        text_alpha=0.5,
+        ground_truth=False,
+        keyframe_idx=None,
+        draw_range=None,
+        repeat_frame=1,
+    ):
+        self.clip_index += 1
+
+        if isinstance(preds, torch.Tensor):
+            if preds.ndim == 1:
+                preds = preds.unsqueeze(0)
+            n_instances = preds.shape[0]
+        elif isinstance(preds, list):
+            n_instances = len(preds)
+        else:
+            logger.error("Unsupported type of prediction input.")
+            return
+
+        if ground_truth:
+            method = "ground-truth"
+            top_scores, top_classes = [None] * n_instances, preds
+        elif self.mode == "top-k":
+            method = "top-k={}".format(self.top_k)
+            top_scores, top_classes = torch.topk(preds, k=self.top_k)
+            top_scores, top_classes = top_scores.tolist(), top_classes.tolist()
+        elif self.mode == "thres":
+            method = "thres>={}".format(self.thres)
+            top_scores, top_classes = [], []
+            for pred in preds:
+                mask = pred >= self.thres
+                top_scores.append(pred[mask].tolist())
+                top_class = torch.squeeze(torch.nonzero(mask), dim=-1).tolist()
+                top_classes.append(top_class)
+
+        text_labels = []
+        for i in range(n_instances):
+            text_labels.append(
+                _create_text_labels(
+                    top_classes[i],
+                    top_scores[i],
+                    self.class_names,
+                    ground_truth=ground_truth,
+                )
+            )
+
+        if bboxes is not None:
+            assert len(preds) == len(
+                bboxes
+            ), "Encounter {} predictions and {} bounding boxes".format(
+                len(preds), len(bboxes)
+            )
+            logger.info("%04d", self.clip_index)
+            for i, box in enumerate(bboxes):
+                label = " labeled '{}'".format(text_labels[i]) if ground_truth else ""
+                text_box = "bbox: {},".format(list(box))
+                logger.info("    %s %s is predicted to class %s, %s: %s, %s",
+                            text_box, label, top_classes[i], method, list(top_classes[i]), list(top_scores[i]))
+        else:
+            label = " labeled '{}'".format(text_labels[0]) if ground_truth else ""
+            logger.info("%04d%s is predicted to class %s, %s: %s, %s",
+                        self.clip_index, label, top_classes[0], method, list(top_classes), list(top_scores))
+
+        return []
diff --git a/tools/demo_net.py b/tools/demo_net.py
index a7e98ebde..3377eaef4 100644
--- a/tools/demo_net.py
+++ b/tools/demo_net.py
@@ -11,6 +11,7 @@
 from slowfast.visualization.ava_demo_precomputed_boxes import (
     AVAVisualizerWithPrecomputedBox,
 )
+from slowfast.visualization.async_predictor import draw_predictions, log_predictions
 from slowfast.visualization.demo_loader import ThreadVideoManager, VideoManager
 from slowfast.visualization.predictor import ActionPredictor
 from slowfast.visualization.video_visualizer import VideoVisualizer
@@ -42,19 +43,28 @@ def run_demo(cfg, frame_provider):
         else None
     )
 
-    video_vis = VideoVisualizer(
-        num_classes=cfg.MODEL.NUM_CLASSES,
-        class_names_path=cfg.DEMO.LABEL_FILE_PATH,
-        top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
-        thres=cfg.DEMO.COMMON_CLASS_THRES,
-        lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES,
-        common_class_names=common_classes,
-        colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
-        mode=cfg.DEMO.VIS_MODE,
+    if not cfg.DEMO.OUTPUT_DISPLAY:
+        video_vis = ()
+        pred_processor = log_predictions
+    else:
+        video_vis = VideoVisualizer(
+            num_classes=cfg.MODEL.NUM_CLASSES,
+            class_names_path=cfg.DEMO.LABEL_FILE_PATH,
+            top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
+            thres=cfg.DEMO.COMMON_CLASS_THRES,
+            lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES,
+            common_class_names=common_classes,
+            colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
+            mode=cfg.DEMO.VIS_MODE,
+        )
+        pred_processor = draw_predictions
+
+    async_vis = AsyncVis(
+        video_vis,
+        n_workers=cfg.DEMO.NUM_VIS_INSTANCES,
+        prediction_processor=pred_processor,
     )
 
-    async_vis = AsyncVis(video_vis, n_workers=cfg.DEMO.NUM_VIS_INSTANCES)
-
     if cfg.NUM_GPUS <= 1:
         model = ActionPredictor(cfg=cfg, async_vis=async_vis)
     else:

From 04a5dd2854d69c637306b1931d0de1126787550b Mon Sep 17 00:00:00 2001
From: Francis Charette Migneault <francis.charette.migneault@gmail.com>
Date: Fri, 15 Jan 2021 18:52:14 -0500
Subject: [PATCH 3/5] log predictions by themselves in separate file + main
 stdout log + fixes formatting of displayed results

---
 slowfast/utils/logging.py                  | 15 ++++++---
 slowfast/visualization/video_visualizer.py | 37 +++++++++++++++-------
 tools/demo_net.py                          | 28 +++++++++-------
 3 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/slowfast/utils/logging.py b/slowfast/utils/logging.py
index b26a53c02..8a27a2b29 100644
--- a/slowfast/utils/logging.py
+++ b/slowfast/utils/logging.py
@@ -64,11 +64,16 @@ def setup_logging(output_dir=None):
         logger.addHandler(ch)
 
     if output_dir is not None and du.is_master_proc(du.get_world_size()):
-        filename = os.path.join(output_dir, "stdout.log")
-        fh = logging.StreamHandler(_cached_log_stream(filename))
-        fh.setLevel(logging.DEBUG)
-        fh.setFormatter(plain_formatter)
-        logger.addHandler(fh)
+        setup_file_logger(logger, output_dir, "stdout.log", plain_formatter)
+
+
+def setup_file_logger(logger, output_dir, file_name, formatter=None):
+    filename = os.path.join(output_dir, file_name)
+    fh = logging.StreamHandler(_cached_log_stream(filename))
+    fh.setLevel(logging.DEBUG)
+    fh.setFormatter(formatter or logging.Formatter("%(message)s"))
+    logger.addHandler(fh)
+    logger.setLevel(logging.DEBUG)
 
 
 def get_logger(name):
diff --git a/slowfast/visualization/video_visualizer.py b/slowfast/visualization/video_visualizer.py
index a7722b48f..fd22a521a 100644
--- a/slowfast/visualization/video_visualizer.py
+++ b/slowfast/visualization/video_visualizer.py
@@ -13,6 +13,7 @@
 
 logger = logging.get_logger(__name__)
 log.getLogger("matplotlib").setLevel(log.ERROR)
+pred_log = logging.get_logger("slowfast-predictions")
 
 
 def _create_text_labels(classes, scores, class_names, ground_truth=False):
@@ -684,6 +685,7 @@ class VideoLogger(VideoVisualizer):
     def __init__(self, *_, **__):
         super(VideoLogger, self).__init__(*_, **__)
         self.clip_index = 0
+        self.frame_range = []
 
     def draw_clip_range(
         self,
@@ -697,6 +699,12 @@ def draw_clip_range(
         repeat_frame=1,
     ):
         self.clip_index += 1
+        frame_range = [0, len(frames) - 1]
+        if not self.frame_range:
+            self.frame_range = frame_range
+        else:
+            self.frame_range[0] = self.frame_range[1] + frame_range[0]
+            self.frame_range[1] = self.frame_range[1] + frame_range[1]
 
         if isinstance(preds, torch.Tensor):
             if preds.ndim == 1:
@@ -723,6 +731,9 @@ def draw_clip_range(
                 top_scores.append(pred[mask].tolist())
                 top_class = torch.squeeze(torch.nonzero(mask), dim=-1).tolist()
                 top_classes.append(top_class)
+        else:
+            logger.error("Unknown mode: %s", self.mode)
+            return
 
         text_labels = []
         for i in range(n_instances):
@@ -735,21 +746,23 @@ def draw_clip_range(
                 )
             )
 
+        frames_info = "{:04d} [{:08d}, {:08d}]:".format(self.clip_index, self.frame_range[0], self.frame_range[1])
         if bboxes is not None:
-            assert len(preds) == len(
-                bboxes
-            ), "Encounter {} predictions and {} bounding boxes".format(
-                len(preds), len(bboxes)
-            )
-            logger.info("%04d", self.clip_index)
+            assert len(preds) == len(bboxes), \
+                "Encounter {} predictions and {} bounding boxes".format(len(preds), len(bboxes))
+            pred_log.info(frames_info)
             for i, box in enumerate(bboxes):
+                top_labels = [self.class_names[i] for i in top_classes[i]]
+                txt_scores = [float("{:.4f}".format(float(score))) for score in top_scores[i]]
                 label = " labeled '{}'".format(text_labels[i]) if ground_truth else ""
-                text_box = "bbox: {},".format(list(box))
-                logger.info("    %s %s is predicted to class %s, %s: %s, %s",
-                            text_box, label, top_classes[i], method, list(top_classes[i]), list(top_scores[i]))
+                text_box = "bbox: {},".format(list(float("{:04.2f}".format(float(c))) for c in list(box)))
+                pred_log.info("    %s%s is predicted to class %s, %s: %s, %s",
+                              text_box, label, text_labels[i][0], method, top_labels, txt_scores)
         else:
             label = " labeled '{}'".format(text_labels[0]) if ground_truth else ""
-            logger.info("%04d%s is predicted to class %s, %s: %s, %s",
-                        self.clip_index, label, top_classes[0], method, list(top_classes), list(top_scores))
+            top_labels = [self.class_names[i] for i in top_classes[0]]
+            txt_scores = [float("{:.4f}".format(float(score))) for score in top_scores[0]]
+            pred_log.info("%s%s is predicted to class %s, %s: %s, %s",
+                          frames_info, label, text_labels[0], method, top_labels, txt_scores)
 
-        return []
+        return []  # drop frames to speed up process (no writing)
diff --git a/tools/demo_net.py b/tools/demo_net.py
index 3377eaef4..405dc2517 100644
--- a/tools/demo_net.py
+++ b/tools/demo_net.py
@@ -5,6 +5,7 @@
 import time
 import torch
 import tqdm
+import os
 
 from slowfast.utils import logging
 from slowfast.visualization.async_predictor import AsyncDemo, AsyncVis
@@ -14,7 +15,7 @@
 from slowfast.visualization.async_predictor import draw_predictions, log_predictions
 from slowfast.visualization.demo_loader import ThreadVideoManager, VideoManager
 from slowfast.visualization.predictor import ActionPredictor
-from slowfast.visualization.video_visualizer import VideoVisualizer
+from slowfast.visualization.video_visualizer import VideoVisualizer, VideoLogger
 
 logger = logging.get_logger(__name__)
 
@@ -44,21 +45,24 @@ def run_demo(cfg, frame_provider):
     )
 
     if not cfg.DEMO.OUTPUT_DISPLAY:
-        video_vis = ()
+        video_vis_cls = VideoLogger
         pred_processor = log_predictions
+        pred_log = logging.get_logger("slowfast-predictions")
+        logging.setup_file_logger(pred_log, cfg.OUTPUT_DIR, "predictions.log")
     else:
-        video_vis = VideoVisualizer(
-            num_classes=cfg.MODEL.NUM_CLASSES,
-            class_names_path=cfg.DEMO.LABEL_FILE_PATH,
-            top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
-            thres=cfg.DEMO.COMMON_CLASS_THRES,
-            lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES,
-            common_class_names=common_classes,
-            colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
-            mode=cfg.DEMO.VIS_MODE,
-        )
+        video_vis_cls = VideoVisualizer
         pred_processor = draw_predictions
 
+    video_vis = video_vis_cls(
+        num_classes=cfg.MODEL.NUM_CLASSES,
+        class_names_path=cfg.DEMO.LABEL_FILE_PATH,
+        top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
+        thres=cfg.DEMO.COMMON_CLASS_THRES,
+        lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES,
+        common_class_names=common_classes,
+        colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
+        mode=cfg.DEMO.VIS_MODE,
+    )
     async_vis = AsyncVis(
         video_vis,
         n_workers=cfg.DEMO.NUM_VIS_INSTANCES,

From e74c4148e28c8161ced4f06cfc6d4812268a26d5 Mon Sep 17 00:00:00 2001
From: Francis Charette Migneault <francis.charette.migneault@gmail.com>
Date: Mon, 18 Jan 2021 12:36:25 -0500
Subject: [PATCH 4/5] fix clip-id/frame-range considering multi-device frame
 buffers indices

---
 slowfast/visualization/async_predictor.py      | 14 ++++++++++++--
 .../ava_demo_precomputed_boxes.py              |  1 +
 slowfast/visualization/video_visualizer.py     | 18 ++++++++----------
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/slowfast/visualization/async_predictor.py b/slowfast/visualization/async_predictor.py
index d83a39615..2c6565885 100644
--- a/slowfast/visualization/async_predictor.py
+++ b/slowfast/visualization/async_predictor.py
@@ -315,10 +315,15 @@ def draw_predictions(task, video_vis):
                 boxes,
                 keyframe_idx=keyframe_idx,
                 draw_range=draw_range,
+                task=task,
             )
     else:
         frames = video_vis.draw_clip_range(
-            frames, preds, keyframe_idx=keyframe_idx, draw_range=draw_range
+            frames,
+            preds,
+            keyframe_idx=keyframe_idx,
+            draw_range=draw_range,
+            task=task,
         )
     del task
 
@@ -361,10 +366,15 @@ def log_predictions(task, video_vis):
                 boxes,
                 keyframe_idx=keyframe_idx,
                 draw_range=draw_range,
+                task=task,
             )
     else:
         frames = video_vis.draw_clip_range(
-            frames, preds, keyframe_idx=keyframe_idx, draw_range=draw_range
+            frames,
+            preds,
+            keyframe_idx=keyframe_idx,
+            draw_range=draw_range,
+            task=task,
         )
     del task
 
diff --git a/slowfast/visualization/ava_demo_precomputed_boxes.py b/slowfast/visualization/ava_demo_precomputed_boxes.py
index 1c97ad49d..ff8b1d563 100644
--- a/slowfast/visualization/ava_demo_precomputed_boxes.py
+++ b/slowfast/visualization/ava_demo_precomputed_boxes.py
@@ -294,6 +294,7 @@ def draw_video(self):
                     ground_truth=ground_truth,
                     draw_range=current_draw_range,
                     repeat_frame=repeat,
+                    task=task,
                 )
             # Store the current clip as buffer.
             prev_buffer = clip
diff --git a/slowfast/visualization/video_visualizer.py b/slowfast/visualization/video_visualizer.py
index fd22a521a..47e80ab90 100644
--- a/slowfast/visualization/video_visualizer.py
+++ b/slowfast/visualization/video_visualizer.py
@@ -522,6 +522,7 @@ def draw_clip_range(
         keyframe_idx=None,
         draw_range=None,
         repeat_frame=1,
+        task=None,
     ):
         """
         Draw predicted labels or ground truth classes to clip. Draw bouding boxes to clip
@@ -684,8 +685,7 @@ class VideoLogger(VideoVisualizer):
     """
     def __init__(self, *_, **__):
         super(VideoLogger, self).__init__(*_, **__)
-        self.clip_index = 0
-        self.frame_range = []
+        self.clip_index = -1
 
     def draw_clip_range(
         self,
@@ -697,14 +697,12 @@ def draw_clip_range(
         keyframe_idx=None,
         draw_range=None,
         repeat_frame=1,
+        task=None,
     ):
-        self.clip_index += 1
-        frame_range = [0, len(frames) - 1]
-        if not self.frame_range:
-            self.frame_range = frame_range
-        else:
-            self.frame_range[0] = self.frame_range[1] + frame_range[0]
-            self.frame_range[1] = self.frame_range[1] + frame_range[1]
+        self.clip_index = task.id if task else self.clip_index + 1
+        num_frames = len(frames)
+        start_frame = self.clip_index * num_frames
+        frame_range = [start_frame, start_frame + num_frames - 1]
 
         if isinstance(preds, torch.Tensor):
             if preds.ndim == 1:
@@ -746,7 +744,7 @@ def draw_clip_range(
                 )
             )
 
-        frames_info = "{:04d} [{:08d}, {:08d}]:".format(self.clip_index, self.frame_range[0], self.frame_range[1])
+        frames_info = "{:04d} [{:08d}, {:08d}]:".format(self.clip_index, frame_range[0], frame_range[1])
         if bboxes is not None:
             assert len(preds) == len(bboxes), \
                 "Encounter {} predictions and {} bounding boxes".format(len(preds), len(bboxes))

From b734b783681ba42ed9cbd62f0addadb18af225cb Mon Sep 17 00:00:00 2001
From: Francis Charette Migneault <francis.charette.migneault@gmail.com>
Date: Mon, 18 Jan 2021 13:24:09 -0500
Subject: [PATCH 5/5] update task-id and add description in docstring

---
 slowfast/visualization/async_predictor.py            |  8 ++++----
 slowfast/visualization/ava_demo_precomputed_boxes.py |  1 -
 slowfast/visualization/video_visualizer.py           | 11 +++++++----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/slowfast/visualization/async_predictor.py b/slowfast/visualization/async_predictor.py
index 2c6565885..66dd4ed4f 100644
--- a/slowfast/visualization/async_predictor.py
+++ b/slowfast/visualization/async_predictor.py
@@ -315,7 +315,7 @@ def draw_predictions(task, video_vis):
                 boxes,
                 keyframe_idx=keyframe_idx,
                 draw_range=draw_range,
-                task=task,
+                task_id=task.id,
             )
     else:
         frames = video_vis.draw_clip_range(
@@ -323,7 +323,7 @@ def draw_predictions(task, video_vis):
             preds,
             keyframe_idx=keyframe_idx,
             draw_range=draw_range,
-            task=task,
+            task_id=task.id,
         )
     del task
 
@@ -366,7 +366,7 @@ def log_predictions(task, video_vis):
                 boxes,
                 keyframe_idx=keyframe_idx,
                 draw_range=draw_range,
-                task=task,
+                task_id=task.id,
             )
     else:
         frames = video_vis.draw_clip_range(
@@ -374,7 +374,7 @@ def log_predictions(task, video_vis):
             preds,
             keyframe_idx=keyframe_idx,
             draw_range=draw_range,
-            task=task,
+            task_id=task.id,
         )
     del task
 
diff --git a/slowfast/visualization/ava_demo_precomputed_boxes.py b/slowfast/visualization/ava_demo_precomputed_boxes.py
index ff8b1d563..1c97ad49d 100644
--- a/slowfast/visualization/ava_demo_precomputed_boxes.py
+++ b/slowfast/visualization/ava_demo_precomputed_boxes.py
@@ -294,7 +294,6 @@ def draw_video(self):
                     ground_truth=ground_truth,
                     draw_range=current_draw_range,
                     repeat_frame=repeat,
-                    task=task,
                 )
             # Store the current clip as buffer.
             prev_buffer = clip
diff --git a/slowfast/visualization/video_visualizer.py b/slowfast/visualization/video_visualizer.py
index 47e80ab90..6fd29f051 100644
--- a/slowfast/visualization/video_visualizer.py
+++ b/slowfast/visualization/video_visualizer.py
@@ -522,7 +522,7 @@ def draw_clip_range(
         keyframe_idx=None,
         draw_range=None,
         repeat_frame=1,
-        task=None,
+        task_id=None,
     ):
         """
         Draw predicted labels or ground truth classes to clip. Draw bouding boxes to clip
@@ -539,6 +539,7 @@ def draw_clip_range(
             draw_range (Optional[list[ints]): only draw frames in range [start_idx, end_idx] inclusively in the clip.
                 If None, draw on the entire clip.
             repeat_frame (int): repeat each frame in draw_range for `repeat_frame` time for slow-motion effect.
+            task_id (int): reference index of the task where frames and predictions originated from.
         """
         if draw_range is None:
             draw_range = [0, len(frames) - 1]
@@ -681,7 +682,9 @@ def _get_thres_array(self, common_class_names=None):
 
 class VideoLogger(VideoVisualizer):
     """
-    Core is identical to visualizer. Override draw method to only log.
+    Log predictions to file instead of drawing onto output video frames.
+
+    Core is identical to `VideoVisualizer`. Override draw method to log.
     """
     def __init__(self, *_, **__):
         super(VideoLogger, self).__init__(*_, **__)
@@ -697,9 +700,9 @@ def draw_clip_range(
         keyframe_idx=None,
         draw_range=None,
         repeat_frame=1,
-        task=None,
+        task_id=None,
     ):
-        self.clip_index = task.id if task else self.clip_index + 1
+        self.clip_index = task_id or self.clip_index + 1
         num_frames = len(frames)
         start_frame = self.clip_index * num_frames
         frame_range = [start_frame, start_frame + num_frames - 1]