From 69d5f53b7f262134a4b40b06c92b0cf6460bc51d Mon Sep 17 00:00:00 2001 From: Xee authors Date: Sun, 3 Dec 2023 21:40:42 -0800 Subject: [PATCH] Efficient way to convert a structured numpy array into a 3d array. This CL uses a more efficient way to convert the numpy structured array removing the need to copy the array into memory as a list. Using `.view()` doesn't seem to change the data buffer and should be more memory efficient. closes #9 PiperOrigin-RevId: 587594128 --- xee/ext.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/xee/ext.py b/xee/ext.py index 8f6399a..884ee6e 100644 --- a/xee/ext.py +++ b/xee/ext.py @@ -25,7 +25,7 @@ import math import os import sys -from typing import Any, Dict, List, Iterable, Literal, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union from urllib import parse import warnings @@ -464,9 +464,30 @@ def image_to_array( pixels_getter, params, catch=ee.ee_exception.EEException ) - # TODO(#9): Find a way to make this more efficient. This is needed because - # `raw` is a structured array of all the same dtype (i.e. number of images). - arr = np.array(raw.tolist(), dtype=dtype) + # extract out the shape information from EE response + y_size, x_size = raw.shape + n_bands = len(raw.dtype) + + # Get a view (no copy) of the data as the returned type from EE + # then reshape to the correct shape based on the request. + # This is needed because `raw` is a structured array of all the same dtype + # (i.e. number of images) and this converts it to an ndarray. + arr = raw.view(raw.dtype[0]).reshape( + y_size, + x_size, + n_bands, + ) + + # try converting the data to desired dtype in place without copying + # if conversion is not allowed then just use the EE returned dtype + try: + arr = arr.astype(dtype, copy=False) + except ValueError: + warnings.warn( + f'Could convert EE results to requested dtype {dtype} ' + f'falling back to returned dtype from EE {np.dtype(raw.dtype[0])}' + ) + data = arr.T current_mask_value = np.array(self.mask_value, dtype=data.dtype) # Sets EE nodata masked value to NaNs.