Expose cloud and edge inference speed metrics (#173)

* Expose inference speed metrics * Made the metrics private to the class * Type fixes --------- Co-authored-by: cat101 <[email protected]>
landing-ai · Feb 23, 2024 · 5d6a85a · 5d6a85a
1 parent 1054555
commit 5d6a85a
Show file tree

Hide file tree

Showing 2 changed files with 110 additions and 15 deletions.
diff --git a/examples/capture-service/run.py b/examples/capture-service/run.py
@@ -2,7 +2,8 @@
 from datetime import datetime
 
 from landingai.pipeline.image_source import NetworkedCamera
-from landingai.predict import Predictor  # , EdgePredictor
+from landingai.predict import Predictor, EdgePredictor
+import argparse
 
 logging.basicConfig(
     level=logging.INFO,
@@ -20,6 +21,7 @@
 # Public Cloud & Sky detection segmentation model
 api_key = "land_sk_aMemWbpd41yXnQ0tXvZMh59ISgRuKNRKjJEIUHnkiH32NBJAwf"
 endpoint_id = "432d58f6-6cd4-4108-a01c-2f023503d838"
+model_id = "9315c71e-31af-451f-9b38-120e035e6240"
 
 #
 # Below we provide some links to public cameras. Local RTSP cameras can also be used by specifying a local URL
@@ -38,10 +40,35 @@
 
 
 if __name__ == "__main__":
-    # Cloud inference model to segment clouds
-    cloud_sky_model = Predictor(endpoint_id, api_key=api_key)
-    # Local inference model example. In order to use it, you need to manually run the local inference server with the "cloud & sky" model.
-    # cloud_sky_model = EdgePredictor()
+    parser = argparse.ArgumentParser(
+        description="Capture a live traffic camera and run a cloud segmentation model on it"
+    )
+
+    parser.add_argument(
+        "--localinference",
+        action="store_true",
+        help="Use a local LandingLens docker inference service",
+    )
+    args = parser.parse_args()
+    if args.localinference:
+        # Local inference model example. In order to use it, you need to manually run the local inference server with the "cloud & sky" model.
+        try:
+            cloud_sky_model = EdgePredictor()
+        except ConnectionError:
+            _LOGGER.error(
+                f"""Failed to connect to the local LandingLens docker inference service. Have you launched the LandingLens container? If not please read the guide here (https://support.landing.ai/docs/docker-deploy)\nOnce you have installed it and obtained a license, run:
+                docker run -p 8000:8000 --rm --name landingedge\\
+                -e LANDING_LICENSE_KEY=YOUR_LICENSE_KEY  \\
+                public.ecr.aws/landing-ai/deploy:latest \\
+                run-model-id -name sdk_example \\
+                    -k {api_key}\\
+                    -m {model_id}
+                """
+            )
+            exit(1)
+    else:
+        # Cloud inference model to segment clouds
+        cloud_sky_model = Predictor(endpoint_id, api_key=api_key)
 
     Camera = NetworkedCamera(
         stream_url, motion_detection_threshold=1, capture_interval=_CAPTURE_INTERVAL
@@ -58,11 +85,12 @@
         _LOGGER.info(
             f"Inference time {(datetime.now()-start_time).total_seconds():.2f} sec"
         )
+        _LOGGER.debug(f"Detailed inference metrics {cloud_sky_model.get_metrics()}")
         # Do some further processing on the pipeline
         frame = (
             frame.overlay_predictions()
             # .show_image()
-            .show_image(image_src="overlay")
+            .show_image(include_predictions=True)
             # .save_image(filename_prefix="./capture")
         )
         start_time = datetime.now()
diff --git a/landingai/predict.py b/landingai/predict.py
@@ -74,6 +74,8 @@ def __init__(
         }
         headers = self._build_default_headers(self._api_credential, extra_x_event)
         self._session = _create_session(Predictor._url, self._num_retry, headers)
+        # performance_metrics keeps performance metrics for the last call to _do_inference()
+        self._performance_metrics: Dict[str, int] = {}
 
     def _check_connectivity(
         self, url: Optional[str] = None, host: Optional[Tuple[str, int]] = None
@@ -152,14 +154,32 @@ def predict(
             "endpoint_id": self._endpoint_id,
         }
         data = {"metadata": metadata.json()} if metadata else None
-        return _do_inference(
+        (preds, self._performance_metrics) = _do_inference(
             self._session,
             Predictor._url,
             files,
             query_params,
             _CloudExtractor,
             data=data,
         )
+        return preds
+
+    def get_metrics(self) -> Dict[str, int]:
+        """
+        Return the performance metrics for the last inference call.
+
+        Returns:
+            A dictionary containing the performance metrics.
+            Example:
+            {
+                "decoding_s": 0.0084266,
+                "infer_s": 3.3537345,
+                "postprocess_s": 0.0255059,
+                "preprocess_s": 0.0124037,
+                "waiting_s": 0.0001487
+            }
+        """
+        return self._performance_metrics
 
 
 class OcrPredictor(Predictor):
@@ -245,7 +265,7 @@ def predict(  # type: ignore
         if rois := kwargs.get("regions_of_interest", []):
             data["rois"] = serialize_rois(rois, mode)
 
-        preds = _do_inference(
+        (preds, self._performance_metrics) = _do_inference(
             self._session,
             OcrPredictor._url,
             files,
@@ -342,7 +362,10 @@ def predict(
                     "contentType": "multipart/form-data"
                 },  # No retries for the inference service
             )
-        return _do_inference(session, self._url, files, {}, _EdgeExtractor, data=data)
+        (preds, self._performance_metrics) = _do_inference(
+            session, self._url, files, {}, _EdgeExtractor, data=data
+        )
+        return preds
 
 
 class _Extractor:
@@ -405,7 +428,7 @@ def _extract_od_prediction(
         Parameters
         ----------
         response: Response from the LandingLens prediction endpoint.
-        Example example input:
+        Example input:
         {
             "backbonetype": "ObjectDetectionPrediction",
             "backbonepredictions":
@@ -631,7 +654,7 @@ def _extract_edge_od_prediction(
         Parameters
         ----------
         response: Response from the Edge prediction endpoint.
-        Example example input:
+        Example input:
         {
             "type": "ObjectDetectionPrediction",
             "predictions":
@@ -726,7 +749,48 @@ def _extract_edge_seg_prediction(
         Parameters
         ----------
         response: Response from the Edge prediction endpoint.
-
+        Example input:
+        {
+            "type": "SegmentationPrediction",
+            "model_id": "9315c71e-31af-451f-9b38-120e035e6240",
+            "predictions": {
+                "bitmaps": {
+                    "1855c44a-215f-40d0-b627-9c4c83641df2": {
+                        "bitmap": "84480Z",
+                        "defectId": 74026,
+                        "labelIndex": 2,
+                        "labelName": "Cloud",
+                        "score": 0
+                    },
+                    "c2e7372c-4d64-4078-a6ee-09bf4ef5084a": {
+                        "bitmap": "84480Z",
+                        "defectId": 74025,
+                        "labelIndex": 1,
+                        "labelName": "Sky",
+                        "score": 0
+                    }
+                },
+                "encoding": {
+                    "algorithm": "rle",
+                    "options": {
+                        "map": {
+                            "N": 1,
+                            "Z": 0
+                        }
+                    }
+                },
+                "imageHeight": 240,
+                "imageWidth": 352,
+                "numClasses": 2
+            },
+            "latency": {
+                "decoding_s": 0.0084266,
+                "infer_s": 3.3537345,
+                "postprocess_s": 0.0255059,
+                "preprocess_s": 0.0124037,
+                "waiting_s": 0.0001487
+            }
+        }
         """
         encoded_predictions = response["predictions"]["bitmaps"]
         encoding_map = response["predictions"]["encoding"]["options"]["map"]
@@ -819,8 +883,9 @@ def _do_inference(
     extractor_class: Type[_Extractor],
     *,
     data: Optional[Dict[str, Any]] = None,
-) -> List[Prediction]:
+) -> Tuple[List[Prediction], Dict[str, int]]:
     """Call the inference endpoint and extract the prediction result."""
+    global _performance_metrics
     try:
         resp = session.post(endpoint_url, files=files, params=params, data=data)
     except requests.exceptions.ConnectionError as e:
@@ -830,5 +895,7 @@ def _do_inference(
     response = HttpResponse.from_response(resp)
     _LOGGER.debug("Response: %s", response)
     response.raise_for_status()
-    json_dict = response.json()
-    return extractor_class.extract_prediction(json_dict)
+    json_dict = cast(Dict[str, Any], response.json())
+    # Save performance metrics for debugging
+    performance_metrics = json_dict.get("latency", {})
+    return (extractor_class.extract_prediction(json_dict), performance_metrics)