diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bbd88264..79400973 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,11 +41,15 @@ jobs: pip install -U pip setuptools wheel pip install numpy GDAL_VERSION=$(gdal-config --version | awk -F'[.]' '{print $1"."$2}') - pip install GDAL==$GDAL_VERSION --no-cache-dir - pip install arosics + echo "GDAL Version: $GDAL_VERSION" + pip install arosics GDAL==$GDAL_VERSION --no-cache-dir + - name: Install GeoWombat run: | pip install ".[stac,web,coreg,perf,tests]" + - name: Print Versions + run: | + echo "$(pip list )" - name: Run Unittests run: | pip install testfixtures @@ -55,6 +59,8 @@ jobs: pip install testfixtures pip install ".[ml]" python -m unittest discover -p 'ml_*.py' + + # Quality: # runs-on: ubuntu-latest # steps: diff --git a/.gitignore b/.gitignore index bc47c40d..f27565d8 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,5 @@ notebooks/stac*.py dask-worker-space/ .vscode/ +bin/act +act/ \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index fdfe8422..0459c686 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,7 +28,8 @@ python_requires = >=3.8,<3.12 install_requires = dask[array,dataframe]>=2023.1.0 distributed>=2023.1.0 - geopandas>=0.8.0 + geopandas>=0.13.0 + fiona== 1.9.4; python_version < "3.9" joblib>=0.16.0 matplotlib>=3.3.0 numpy>=1.19.0 @@ -37,7 +38,7 @@ install_requires = rasterio>=1.3.0,<2.0.0 retry requests>=2.20.0 - scikit-learn>=0.23.0 + scikit-learn<=1.5.2 scipy>=1.5.0 shapely>=1.7.0 tqdm>=4.62.0 @@ -47,7 +48,7 @@ install_requires = threadpoolctl>=3.1.0 setuptools>=59.5.0 cython>=0.29.0,<3.0.0 - + h5py<3.10.0 [options.extras_require] docs = numpydoc sphinx @@ -76,14 +77,14 @@ coreg = earthpy cmocean dill geoarray>=0.15.0 - geojson + geojson folium>=0.6.0,!=0.12.0 pykrige py_tools_ds>=0.18.0 zarr = zarr numcodecs ml = dask-ml>=2022.5.27 - scikit-learn==1.2.0 + scikit-learn<=1.5.2 lightgbm sklearn-xarray@git+https://github.com/mmann1123/sklearn-xarray.git numpy_groupies diff --git a/src/geowombat/backends/__init__.py b/src/geowombat/backends/__init__.py index 8d81c29a..aaf2bc98 100644 --- a/src/geowombat/backends/__init__.py +++ b/src/geowombat/backends/__init__.py @@ -1,4 +1,17 @@ from .dask_ import Cluster -from .xarray_ import concat, mosaic, transform_crs, warp_open +from .xarray_ import ( + concat, + mosaic, + transform_crs, + warp_open, + _check_config_globals, +) -__all__ = ['Cluster', 'concat', 'mosaic', 'warp_open', 'transform_crs'] +__all__ = [ + "Cluster", + "concat", + "mosaic", + "warp_open", + "transform_crs", + "_check_config_globals", +] diff --git a/src/geowombat/backends/xarray_.py b/src/geowombat/backends/xarray_.py index ef5b4467..693720a3 100644 --- a/src/geowombat/backends/xarray_.py +++ b/src/geowombat/backends/xarray_.py @@ -76,91 +76,91 @@ def _check_config_globals( bounds_by (str) ref_kwargs (dict) """ - if config['nodata'] is not None: - ref_kwargs = _update_kwarg(config['nodata'], ref_kwargs, 'nodata') + if config["nodata"] is not None: + ref_kwargs = _update_kwarg(config["nodata"], ref_kwargs, "nodata") # Check if there is a reference image - if config['ref_image']: - if isinstance(config['ref_image'], str) and os.path.isfile( - config['ref_image'] + if config["ref_image"]: + if isinstance(config["ref_image"], str) and os.path.isfile( + config["ref_image"] ): # Get the metadata from the reference image - ref_meta = get_ref_image_meta(config['ref_image']) - ref_kwargs['bounds'] = ref_meta.bounds - ref_kwargs['crs'] = ref_meta.crs - ref_kwargs['res'] = ref_meta.res + ref_meta = get_ref_image_meta(config["ref_image"]) + ref_kwargs["bounds"] = ref_meta.bounds + ref_kwargs["crs"] = ref_meta.crs + ref_kwargs["res"] = ref_meta.res else: - if not config['ignore_warnings']: - logger.warning(' The reference image does not exist') + if not config["ignore_warnings"]: + logger.warning(" The reference image does not exist") else: - if config['ref_bounds']: - if isinstance(config['ref_bounds'], str) and config[ - 'ref_bounds' - ].startswith('Window'): + if config["ref_bounds"]: + if isinstance(config["ref_bounds"], str) and config[ + "ref_bounds" + ].startswith("Window"): ref_bounds_ = window_to_bounds( - filenames, unpack_window(config['ref_bounds']) + filenames, unpack_window(config["ref_bounds"]) ) - elif isinstance(config['ref_bounds'], str) and config[ - 'ref_bounds' - ].startswith('BoundingBox'): - ref_bounds_ = unpack_bounding_box(config['ref_bounds']) - elif isinstance(config['ref_bounds'], Window): - ref_bounds_ = window_to_bounds(filenames, config['ref_bounds']) - elif isinstance(config['ref_bounds'], BoundingBox): + elif isinstance(config["ref_bounds"], str) and config[ + "ref_bounds" + ].startswith("BoundingBox"): + ref_bounds_ = unpack_bounding_box(config["ref_bounds"]) + elif isinstance(config["ref_bounds"], Window): + ref_bounds_ = window_to_bounds(filenames, config["ref_bounds"]) + elif isinstance(config["ref_bounds"], BoundingBox): ref_bounds_ = ( - config['ref_bounds'].left, - config['ref_bounds'].bottom, - config['ref_bounds'].right, - config['ref_bounds'].top, + config["ref_bounds"].left, + config["ref_bounds"].bottom, + config["ref_bounds"].right, + config["ref_bounds"].top, ) else: - ref_bounds_ = config['ref_bounds'] + ref_bounds_ = config["ref_bounds"] - ref_kwargs = _update_kwarg(ref_bounds_, ref_kwargs, 'bounds') + ref_kwargs = _update_kwarg(ref_bounds_, ref_kwargs, "bounds") else: if isinstance(filenames, str) or isinstance(filenames, Path): # Use the bounds of the image - ref_kwargs['bounds'] = get_file_bounds( + ref_kwargs["bounds"] = get_file_bounds( [filenames], - bounds_by='reference', - crs=ref_kwargs['crs'], - res=ref_kwargs['res'], + bounds_by="reference", + crs=ref_kwargs["crs"], + res=ref_kwargs["res"], return_bounds=True, ) else: # Replace the bounds keyword, if needed - if bounds_by.lower() == 'intersection': + if bounds_by.lower() == "intersection": # Get the intersecting bounds of all images - ref_kwargs['bounds'] = get_file_bounds( + ref_kwargs["bounds"] = get_file_bounds( filenames, - bounds_by='intersection', - crs=ref_kwargs['crs'], - res=ref_kwargs['res'], + bounds_by="intersection", + crs=ref_kwargs["crs"], + res=ref_kwargs["res"], return_bounds=True, ) - elif bounds_by.lower() == 'union': + elif bounds_by.lower() == "union": # Get the union bounds of all images - ref_kwargs['bounds'] = get_file_bounds( + ref_kwargs["bounds"] = get_file_bounds( filenames, - bounds_by='union', - crs=ref_kwargs['crs'], - res=ref_kwargs['res'], + bounds_by="union", + crs=ref_kwargs["crs"], + res=ref_kwargs["res"], return_bounds=True, ) - elif bounds_by.lower() == 'reference': + elif bounds_by.lower() == "reference": # Use the bounds of the first image - ref_kwargs['bounds'] = get_file_bounds( + ref_kwargs["bounds"] = get_file_bounds( filenames, - bounds_by='reference', - crs=ref_kwargs['crs'], - res=ref_kwargs['res'], + bounds_by="reference", + crs=ref_kwargs["crs"], + res=ref_kwargs["res"], return_bounds=True, ) @@ -169,33 +169,33 @@ def _check_config_globals( " Choose from 'intersection', 'union', or 'reference'." ) - config['ref_bounds'] = ref_kwargs['bounds'] + config["ref_bounds"] = ref_kwargs["bounds"] - if config['ref_crs'] is not None: - ref_kwargs = _update_kwarg(config['ref_crs'], ref_kwargs, 'crs') + if config["ref_crs"] is not None: + ref_kwargs = _update_kwarg(config["ref_crs"], ref_kwargs, "crs") - if config['ref_res'] is not None: - ref_kwargs = _update_kwarg(config['ref_res'], ref_kwargs, 'res') + if config["ref_res"] is not None: + ref_kwargs = _update_kwarg(config["ref_res"], ref_kwargs, "res") - if config['ref_tar'] is not None: - if isinstance(config['ref_tar'], str): - if os.path.isfile(config['ref_tar']): + if config["ref_tar"] is not None: + if isinstance(config["ref_tar"], str): + if os.path.isfile(config["ref_tar"]): ref_kwargs = _update_kwarg( - _get_raster_coords(config['ref_tar']), + _get_raster_coords(config["ref_tar"]), ref_kwargs, - 'tac', + "tac", ) else: - if not config['ignore_warnings']: + if not config["ignore_warnings"]: logger.warning( - ' The target aligned raster does not exist.' + " The target aligned raster does not exist." ) else: - if not config['ignore_warnings']: + if not config["ignore_warnings"]: logger.warning( - ' The target aligned raster must be an image.' + " The target aligned raster must be an image." ) return ref_kwargs @@ -214,7 +214,7 @@ def delayed_to_xarray( da.from_delayed(delayed_data, shape=shape, dtype=dtype).rechunk( chunks ), - dims=('band', 'y', 'x'), + dims=("band", "y", "x"), coords=coords, attrs=attrs, ) @@ -223,7 +223,7 @@ def delayed_to_xarray( def warp_open( filename: T.Union[str, Path], band_names: T.Optional[T.Sequence[T.Union[int, str]]] = None, - resampling: str = 'nearest', + resampling: str = "nearest", dtype: T.Optional[str] = None, netcdf_vars: T.Optional[T.Sequence[T.Union[int, str]]] = None, nodata: T.Optional[T.Union[int, float]] = None, @@ -253,39 +253,39 @@ def warp_open( ``xarray.DataArray`` """ ref_kwargs = { - 'bounds': None, - 'crs': None, - 'res': None, - 'nodata': nodata, - 'warp_mem_limit': warp_mem_limit, - 'num_threads': num_threads, - 'tap': tap, - 'tac': None, + "bounds": None, + "crs": None, + "res": None, + "nodata": nodata, + "warp_mem_limit": warp_mem_limit, + "num_threads": num_threads, + "tap": tap, + "tac": None, } ref_kwargs_netcdf_stack = ref_kwargs.copy() - ref_kwargs_netcdf_stack['bounds_by'] = 'union' - del ref_kwargs_netcdf_stack['tap'] + ref_kwargs_netcdf_stack["bounds_by"] = "union" + del ref_kwargs_netcdf_stack["tap"] - ref_kwargs = _check_config_globals(filename, 'reference', ref_kwargs) + ref_kwargs = _check_config_globals(filename, "reference", ref_kwargs) filenames = None # Create a list of variables to open - if filename.lower().startswith('netcdf:') and netcdf_vars: - filenames = (f'{filename}:' + f',{filename}:'.join(netcdf_vars)).split( - ',' + if filename.lower().startswith("netcdf:") and netcdf_vars: + filenames = (f"{filename}:" + f",{filename}:".join(netcdf_vars)).split( + "," ) if filenames: ref_kwargs_netcdf_stack = _check_config_globals( - filenames[0], 'reference', ref_kwargs_netcdf_stack + filenames[0], "reference", ref_kwargs_netcdf_stack ) with rio_open(filenames[0]) as src: tags = src.tags() else: ref_kwargs_netcdf_stack = _check_config_globals( - filename, 'reference', ref_kwargs_netcdf_stack + filename, "reference", ref_kwargs_netcdf_stack ) with rio_open(filename) as src: @@ -301,33 +301,37 @@ def warp_netcdf_vars(): yield xr.concat( ( open_rasterio( - wobj, nodata=ref_kwargs['nodata'], **kwargs + wobj, nodata=ref_kwargs["nodata"], **kwargs ).assign_coords( band=[band_names[wi]] if band_names else [netcdf_vars[wi]] ) for wi, wobj in enumerate(warped_objects) ), - dim='band', + dim="band", ) - with open_rasterio( - warp(filename, resampling=resampling, **ref_kwargs), - nodata=ref_kwargs['nodata'], - **kwargs, - ) if not filenames else warp_netcdf_vars() as src: + with ( + open_rasterio( + warp(filename, resampling=resampling, **ref_kwargs), + nodata=ref_kwargs["nodata"], + **kwargs, + ) + if not filenames + else warp_netcdf_vars() + ) as src: if band_names: if len(band_names) > src.gw.nbands: - src.coords['band'] = band_names[: src.gw.nbands] + src.coords["band"] = band_names[: src.gw.nbands] else: - src.coords['band'] = band_names + src.coords["band"] = band_names else: if src.gw.sensor: if src.gw.sensor not in src.gw.avail_sensors: - if not src.gw.config['ignore_warnings']: + if not src.gw.config["ignore_warnings"]: logger.warning( - ' The {} sensor is not currently supported.\nChoose from [{}].'.format( - src.gw.sensor, ', '.join(src.gw.avail_sensors) + " The {} sensor is not currently supported.\nChoose from [{}].".format( + src.gw.sensor, ", ".join(src.gw.avail_sensors) ) ) @@ -337,35 +341,35 @@ def warp_netcdf_vars(): ) # Avoid nested opens within a `config` context if len(new_band_names) != len(src.band.values.tolist()): - if not src.gw.config['ignore_warnings']: + if not src.gw.config["ignore_warnings"]: logger.warning( - ' The new bands, {}, do not match the sensor bands, {}.'.format( + " The new bands, {}, do not match the sensor bands, {}.".format( new_band_names, src.band.values.tolist() ) ) else: - src = src.assign_coords(**{'band': new_band_names}) + src = src.assign_coords(**{"band": new_band_names}) src = src.assign_attrs( - **{'sensor': src.gw.sensor_names[src.gw.sensor]} + **{"sensor": src.gw.sensor_names[src.gw.sensor]} ) if return_windows: - if isinstance(kwargs['chunks'], tuple): - chunksize = kwargs['chunks'][-1] + if isinstance(kwargs["chunks"], tuple): + chunksize = kwargs["chunks"][-1] else: - chunksize = kwargs['chunks'] + chunksize = kwargs["chunks"] - src.attrs['block_windows'] = get_window_offsets( + src.attrs["block_windows"] = get_window_offsets( src.shape[-2], src.shape[-1], chunksize, chunksize, - return_as='list', + return_as="list", ) src = src.assign_attrs( - **{'filename': filename, 'resampling': resampling} + **{"filename": filename, "resampling": resampling} ) if tags: @@ -384,9 +388,9 @@ def warp_netcdf_vars(): def mosaic( filenames: T.Sequence[T.Union[str, Path]], - overlap: str = 'max', - bounds_by: str = 'reference', - resampling: str = 'nearest', + overlap: str = "max", + bounds_by: str = "reference", + resampling: str = "nearest", band_names: T.Optional[T.Sequence[T.Union[int, str]]] = None, nodata: T.Optional[T.Union[float, int]] = None, dtype: T.Optional[str] = None, @@ -419,22 +423,22 @@ def mosaic( ``xarray.DataArray`` """ if overlap not in ( - 'min', - 'max', - 'mean', + "min", + "max", + "mean", ): logger.exception( " The overlap argument must be one of 'min', 'max', or 'mean'." ) ref_kwargs = { - 'bounds': None, - 'crs': None, - 'res': None, - 'nodata': nodata, - 'warp_mem_limit': warp_mem_limit, - 'num_threads': num_threads, - 'tac': None, + "bounds": None, + "crs": None, + "res": None, + "nodata": nodata, + "warp_mem_limit": warp_mem_limit, + "num_threads": num_threads, + "tac": None, } ref_kwargs = _check_config_globals(filenames, bounds_by, ref_kwargs) @@ -449,45 +453,48 @@ def mosaic( # Get the original bounds, unsampled with open_rasterio( - filenames[0], nodata=ref_kwargs['nodata'], **kwargs + filenames[0], nodata=ref_kwargs["nodata"], **kwargs ) as src_: attrs = src_.attrs.copy() geometries = [] for fn in filenames: - with open_rasterio(fn, nodata=ref_kwargs['nodata'], **kwargs) as src_: + with open_rasterio(fn, nodata=ref_kwargs["nodata"], **kwargs) as src_: geometries.append(src_.gw.geometry) - if overlap == 'min': - reduce_func = da.minimum + def custom_nanmax(left, right): + max_data = da.nanmax(da.stack([left.data, right.data]), axis=0) + return xr.DataArray(max_data, dims=left.dims, coords=left.coords) + + def custom_nanmin(left, right): + min_data = da.nanmin(da.stack([left.data, right.data]), axis=0) + return xr.DataArray(min_data, dims=left.dims, coords=left.coords) + + def custom_nanmean(left, right): + mean_data = da.nanmean(da.stack([left.data, right.data]), axis=0) + return xr.DataArray(mean_data, dims=left.dims, coords=left.coords) + + if overlap == "min": + reduce_func = custom_nanmin tmp_nodata = 1e9 - elif overlap == 'max': - reduce_func = da.maximum + elif overlap == "max": + reduce_func = custom_nanmax tmp_nodata = -1e9 - elif overlap == 'mean': + elif overlap == "mean": tmp_nodata = -1e9 + reduce_func = custom_nanmean - def reduce_func( - left: xr.DataArray, right: xr.DataArray - ) -> xr.DataArray: - return xr.where( - (left != tmp_nodata) & (right != tmp_nodata), - (left + right) / 2.0, - xr.where(left != tmp_nodata, left, right), - ) - - # Open all the data pointers data_arrays = [ open_rasterio( wo, - nodata=ref_kwargs['nodata'], + nodata=ref_kwargs["nodata"], **kwargs, ) .gw.set_nodata( - src_nodata=ref_kwargs['nodata'], + src_nodata=ref_kwargs["nodata"], dst_nodata=tmp_nodata, - dtype='float64', + dtype="float64", ) .gw.mask_nodata() for wo in warped_objects @@ -498,25 +505,24 @@ def reduce_func( lambda left, right: reduce_func(left, right), data_arrays, ) - # Reset the 'no data' values darray = darray.gw.set_nodata( src_nodata=tmp_nodata, - dst_nodata=ref_kwargs['nodata'], + dst_nodata=ref_kwargs["nodata"], ).assign_attrs(**attrs) if band_names: - darray.coords['band'] = band_names + darray.coords["band"] = band_names else: if darray.gw.sensor: if darray.gw.sensor not in darray.gw.avail_sensors: - if not darray.gw.config['ignore_warnings']: + if not darray.gw.config["ignore_warnings"]: logger.warning( - ' The {} sensor is not currently supported.\nChoose from [{}].'.format( + " The {} sensor is not currently supported.\nChoose from [{}].".format( darray.gw.sensor, - ', '.join(darray.gw.avail_sensors), + ", ".join(darray.gw.avail_sensors), ) ) @@ -528,19 +534,19 @@ def reduce_func( if len(new_band_names) != len(darray.band.values.tolist()): - if not darray.gw.config['ignore_warnings']: + if not darray.gw.config["ignore_warnings"]: logger.warning( - ' The band list length does not match the sensor bands.' + " The band list length does not match the sensor bands." ) else: - darray = darray.assign_coords(**{'band': new_band_names}) + darray = darray.assign_coords(**{"band": new_band_names}) darray = darray.assign_attrs( - **{'sensor': darray.gw.sensor_names[darray.gw.sensor]} + **{"sensor": darray.gw.sensor_names[darray.gw.sensor]} ) darray = darray.assign_attrs( - **{'resampling': resampling, 'geometries': geometries} + **{"resampling": resampling, "geometries": geometries} ) if tags: @@ -548,7 +554,6 @@ def reduce_func( attrs.update(tags) darray = darray.assign_attrs(**attrs) - if dtype is not None: attrs = darray.attrs.copy() return darray.astype(dtype).assign_attrs(**attrs) @@ -560,12 +565,12 @@ def reduce_func( def check_alignment(concat_list: T.Sequence[xr.DataArray]) -> None: try: for fidx in range(0, len(concat_list) - 1): - xr.align(concat_list[fidx], concat_list[fidx + 1], join='exact') + xr.align(concat_list[fidx], concat_list[fidx + 1], join="exact") except ValueError: warning_message = ( - 'The stacked dimensions are not aligned. If this was not intentional, ' - 'use gw.config.update to align coordinates. To suppress this message, use ' - 'with gw.config.update(ignore_warnings=True):.' + "The stacked dimensions are not aligned. If this was not intentional, " + "use gw.config.update to align coordinates. To suppress this message, use " + "with gw.config.update(ignore_warnings=True):." ) warnings.warn(warning_message, UserWarning) logger.warning(warning_message) @@ -573,15 +578,15 @@ def check_alignment(concat_list: T.Sequence[xr.DataArray]) -> None: def concat( filenames: T.Sequence[T.Union[str, Path]], - stack_dim: str = 'time', - bounds_by: str = 'reference', - resampling: str = 'nearest', + stack_dim: str = "time", + bounds_by: str = "reference", + resampling: str = "nearest", time_names: T.Optional[T.Sequence[T.Any]] = None, band_names: T.Optional[T.Sequence[T.Any]] = None, nodata: T.Optional[T.Union[float, int]] = None, dtype: T.Optional[str] = None, netcdf_vars: T.Optional[T.Sequence[T.Any]] = None, - overlap: str = 'max', + overlap: str = "max", warp_mem_limit: int = 512, num_threads: int = 1, tap: bool = False, @@ -615,14 +620,14 @@ def concat( Returns: ``xarray.DataArray`` """ - if stack_dim.lower() not in ['band', 'time']: + if stack_dim.lower() not in ["band", "time"]: logger.exception(" The stack dimension should be 'band' or 'time'.") with rio_open(filenames[0]) as src_: tags = src_.tags() src_ = warp_open( - f'{filenames[0]}:{netcdf_vars[0]}' if netcdf_vars else filenames[0], + f"{filenames[0]}:{netcdf_vars[0]}" if netcdf_vars else filenames[0], resampling=resampling, band_names=[netcdf_vars[0]] if netcdf_vars else band_names, nodata=nodata, @@ -635,7 +640,7 @@ def concat( src_.close() src_ = None - if time_names and not (str(filenames[0]).lower().startswith('netcdf:')): + if time_names and not (str(filenames[0]).lower().startswith("netcdf:")): concat_list = [] new_time_names = [] @@ -688,7 +693,7 @@ def concat( ) ) - if not concat_list[0].gw.config['ignore_warnings']: + if not concat_list[0].gw.config["ignore_warnings"]: check_alignment(concat_list) # Warp all images and concatenate along the 'time' axis into a DataArray src = xr.concat(concat_list, dim=stack_dim.lower()).assign_coords( @@ -709,11 +714,11 @@ def concat( ) for fn in filenames ] - if not warp_list[0].gw.config['ignore_warnings']: + if not warp_list[0].gw.config["ignore_warnings"]: check_alignment(warp_list) src = xr.concat(warp_list, dim=stack_dim.lower()) - src = src.assign_attrs(**{'filename': [Path(fn).name for fn in filenames]}) + src = src.assign_attrs(**{"filename": [Path(fn).name for fn in filenames]}) if tags: attrs = src.attrs.copy() @@ -721,33 +726,33 @@ def concat( src = src.assign_attrs(**attrs) - if stack_dim == 'time': + if stack_dim == "time": - if str(filenames[0]).lower().startswith('netcdf:'): + if str(filenames[0]).lower().startswith("netcdf:"): if time_names: - src.coords['time'] = time_names + src.coords["time"] = time_names else: - src.coords['time'] = parse_filename_dates(filenames) + src.coords["time"] = parse_filename_dates(filenames) try: - src = src.groupby('time').max().assign_attrs(**attrs) + src = src.groupby("time").max().assign_attrs(**attrs) except ValueError: pass else: if not time_names: - src.coords['time'] = parse_filename_dates(filenames) + src.coords["time"] = parse_filename_dates(filenames) if band_names: - src.coords['band'] = band_names + src.coords["band"] = band_names else: if src.gw.sensor: if src.gw.sensor not in src.gw.avail_sensors: - if not src.gw.config['ignore_warnings']: + if not src.gw.config["ignore_warnings"]: logger.warning( - ' The {} sensor is not currently supported.\nChoose from [{}].'.format( - src.gw.sensor, ', '.join(src.gw.avail_sensors) + " The {} sensor is not currently supported.\nChoose from [{}].".format( + src.gw.sensor, ", ".join(src.gw.avail_sensors) ) ) @@ -756,17 +761,17 @@ def concat( src.gw.wavelengths[src.gw.sensor]._fields ) if len(new_band_names) != len(src.band.values.tolist()): - if not src.gw.config['ignore_warnings']: + if not src.gw.config["ignore_warnings"]: logger.warning( - ' The new bands, {}, do not match the sensor bands, {}.'.format( + " The new bands, {}, do not match the sensor bands, {}.".format( new_band_names, src.band.values.tolist() ) ) else: - src = src.assign_coords(**{'band': new_band_names}) + src = src.assign_coords(**{"band": new_band_names}) src = src.assign_attrs( - **{'sensor': src.gw.sensor_names[src.gw.sensor]} + **{"sensor": src.gw.sensor_names[src.gw.sensor]} ) if dtype: @@ -787,7 +792,7 @@ def transform_crs( src_nodata=None, dst_nodata=None, coords_only=False, - resampling='nearest', + resampling="nearest", warp_mem_limit=512, num_threads=1, ): @@ -834,10 +839,10 @@ def transform_crs( return_as_dict=True, delayed_array=True, ) - dst_transform = data_dict['transform'] - dst_crs = data_dict['crs'] - dst_height = data_dict['height'] - dst_width = data_dict['width'] + dst_transform = data_dict["transform"] + dst_crs = data_dict["crs"] + dst_height = data_dict["height"] + dst_width = data_dict["width"] # Get the transformed coordinates left = dst_transform[2] @@ -856,9 +861,9 @@ def transform_crs( if coords_only: attrs.update( { - 'crs': dst_crs, - 'transform': dst_transform[:6], - 'res': (cellx, celly), + "crs": dst_crs, + "transform": dst_transform[:6], + "res": (cellx, celly), } ) @@ -866,17 +871,17 @@ def transform_crs( else: # The transformed array is a dask Delayed object - data_dst = data_dict['array'] + data_dst = data_dict["array"] if not dst_res: dst_res = (abs(x[1] - x[0]), abs(y[0] - y[1])) attrs.update( { - 'transform': tuple(dst_transform)[:6], - 'crs': dst_crs, - 'res': dst_res, - 'resampling': resampling, + "transform": tuple(dst_transform)[:6], + "crs": dst_crs, + "res": dst_res, + "resampling": resampling, } ) # Get the DataArray from the delayed object @@ -885,7 +890,7 @@ def transform_crs( shape=(data_src.gw.nbands, dst_height, dst_width), dtype=data_src.dtype, chunks=data_src.data.chunksize, - coords={'band': data_src.band.values.tolist(), 'y': y, 'x': x}, + coords={"band": data_src.band.values.tolist(), "y": y, "x": x}, attrs=attrs, ) diff --git a/src/geowombat/core/api.py b/src/geowombat/core/api.py index c332fd4e..152f0992 100644 --- a/src/geowombat/core/api.py +++ b/src/geowombat/core/api.py @@ -28,7 +28,7 @@ from ..backends import concat as gw_concat from ..backends import mosaic as gw_mosaic -from ..backends import warp_open +from ..backends import warp_open, _check_config_globals from ..backends.rasterio_ import check_src_crs from ..config import _set_defaults, config from ..handler import add_handler @@ -513,6 +513,18 @@ def __init__( w = src.block_window(1, 0, 0) kwargs["chunks"] = (band_chunks, w.height, w.width) + # kw = { + # "crs": None, + # "res": None, + # } + + # kw = _check_config_globals(filename, "reference", kw) + + # if all(value is not None for value in kw.values()): + # logger.exception( + # """ Changes to the reference image, CRS, or resolution are not allowed when opening multiple images.\nPlease write changes to disk before opening multiple images.""" + # ) + if mosaic: # Mosaic images over space self.data = gw_mosaic( @@ -770,12 +782,12 @@ def load( with open( image_list[0], time_names=time_names[0], - band_names=band_names - if not str(image_list[0]).endswith(".nc") - else None, - netcdf_vars=band_names - if str(image_list[0]).endswith(".nc") - else None, + band_names=( + band_names if not str(image_list[0]).endswith(".nc") else None + ), + netcdf_vars=( + band_names if str(image_list[0]).endswith(".nc") else None + ), chunks=chunks, ) as src: pass @@ -1230,19 +1242,24 @@ def apply( with pool(num_workers) as executor: data_gen = ( ( - w, - self.read( - bands, window=w[1], gain=gain, offset=offset - ), - self.band_dict, - ) - if self.padding - else ( - w, - self.read( - bands, window=w, gain=gain, offset=offset - ), - self.band_dict, + ( + w, + self.read( + bands, + window=w[1], + gain=gain, + offset=offset, + ), + self.band_dict, + ) + if self.padding + else ( + w, + self.read( + bands, window=w, gain=gain, offset=offset + ), + self.band_dict, + ) ) for w in self.windows_ ) diff --git a/src/geowombat/core/geoxarray.py b/src/geowombat/core/geoxarray.py index 38767019..4034038d 100644 --- a/src/geowombat/core/geoxarray.py +++ b/src/geowombat/core/geoxarray.py @@ -62,7 +62,7 @@ def _update_kwargs(self, **kwargs): return kwargs -@xr.register_dataarray_accessor('gw') +@xr.register_dataarray_accessor("gw") class GeoWombatAccessor(_UpdateConfig, _DataProperties): """A method to access a ``xarray.DataArray``. This class is typically not accessed directly, but rather through a call to ``geowombat.open``. @@ -84,8 +84,8 @@ def filenames(self) -> T.Sequence[T.Union[str, _Path]]: ``list`` """ return ( - self._obj.attrs['_filenames'] - if '_filenames' in self._obj.attrs + self._obj.attrs["_filenames"] + if "_filenames" in self._obj.attrs else [] ) @@ -97,8 +97,8 @@ def data_are_separate(self) -> bool: ``bool`` """ return ( - bool(self._obj.attrs['_data_are_separate']) - if '_data_are_separate' in self._obj.attrs + bool(self._obj.attrs["_data_are_separate"]) + if "_data_are_separate" in self._obj.attrs else False ) @@ -110,8 +110,8 @@ def data_are_stacked(self) -> bool: ``bool`` """ return ( - bool(self._obj.attrs['_data_are_stacked']) - if '_data_are_stacked' in self._obj.attrs + bool(self._obj.attrs["_data_are_stacked"]) + if "_data_are_stacked" in self._obj.attrs else False ) @@ -169,7 +169,7 @@ def mask( self, df: T.Union[str, _Path, gpd.GeoDataFrame], query: T.Optional[str] = None, - keep: T.Optional[str] = 'in', + keep: T.Optional[str] = "in", ) -> xr.DataArray: """Masks a DataArray. @@ -191,6 +191,11 @@ def mask_nodata(self) -> xr.DataArray: ``xarray.DataArray`` """ nodata_value = self._obj.gw.nodataval + + if np.isnan(nodata_value): + print("skipping nodata mask") + return self._obj + if nodata_value is None: warnings.warn( "The 'no data' value is None, so masking cannot be applied." @@ -201,17 +206,17 @@ def mask_nodata(self) -> xr.DataArray: if not np.issubdtype(self._obj.gw.dtype, np.floating): if isinstance(nodata_value, float): if not np.issubdtype(self._obj.gw.dtype, np.floating): - self._obj = self._obj.astype('float64') + self._obj = self._obj.astype("float64") else: if nodata_value > abs(np.iinfo(self._obj.gw.dtype).max): for dtype_ in [ - 'uint8', - 'int16', - 'uint16', - 'int32', - 'uint32', - 'int64', - 'uint64', + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + "uint64", ]: if nodata_value <= abs(np.iinfo(dtype_).max): if self._obj.gw.dtype != dtype_: @@ -236,8 +241,8 @@ def assign_nodata_attrs(self, nodata: T.Union[float, int]) -> xr.DataArray: """ return self._obj.assign_attrs( **{ - 'nodatavals': (nodata,) * self._obj.gw.nbands, - '_FillValue': nodata, + "nodatavals": (nodata,) * self._obj.gw.nbands, + "_FillValue": nodata, } ) @@ -299,20 +304,20 @@ def compare( >>> # Mask all values greater than 10 >>> thresh = src.gw.compare(op='lt', b=10) """ - if op not in ['lt', 'le', 'gt', 'ge', 'eq', 'ne']: - raise NameError('The comparison operation is not supported.') + if op not in ["lt", "le", "gt", "ge", "eq", "ne"]: + raise NameError("The comparison operation is not supported.") - if op == 'lt': + if op == "lt": out = self._obj.where(self._obj < b) - elif op == 'le': + elif op == "le": out = self._obj.where(self._obj <= b) - elif op == 'gt': + elif op == "gt": out = self._obj.where(self._obj > b) - elif op == 'ge': + elif op == "ge": out = self._obj.where(self._obj >= b) - elif op == 'eq': + elif op == "eq": out = self._obj.where(self._obj == b) - elif op == 'ne': + elif op == "ne": out = self._obj.where(self._obj != b) if return_binary: @@ -385,7 +390,7 @@ def recode( def bounds_overlay( self, bounds: T.Union[tuple, _BoundingBox], - how: T.Optional[str] = 'intersects', + how: T.Optional[str] = "intersects", ) -> bool: """Checks whether the bounds overlay the image bounds. @@ -451,7 +456,7 @@ def windows( self, row_chunks: int = None, col_chunks: int = None, - return_type: T.Optional[str] = 'window', + return_type: T.Optional[str] = "window", ndim: T.Optional[int] = 2, ): """Generates windows for a row/column iteration. @@ -465,7 +470,7 @@ def windows( Returns: ``yields`` ``xarray.DataArray``, ``tuple``, or ``rasterio.windows.Window`` """ - if return_type not in ['data', 'slice', 'window']: + if return_type not in ["data", "slice", "window"]: raise NameError( "The return type must be one of 'data', 'slice', or 'window'." ) @@ -486,7 +491,7 @@ def windows( for col_off in range(0, self._obj.gw.ncols, cchunks): width = n_rows_cols(col_off, cchunks, self._obj.gw.ncols) - if return_type == 'data': + if return_type == "data": if ndim == 2: yield self._obj[ row_off : row_off + height, @@ -499,7 +504,7 @@ def windows( ) yield self._obj[slicer] - elif return_type == 'slice': + elif return_type == "slice": if ndim == 2: yield ( slice(row_off, row_off + height), @@ -511,7 +516,7 @@ def windows( slice(col_off, col_off + width), ) - elif return_type == 'window': + elif return_type == "window": yield _Window( row_off=row_off, @@ -525,7 +530,7 @@ def imshow( mask: T.Optional[bool] = False, nodata: T.Optional[int] = 0, flip: T.Optional[bool] = False, - text_color: T.Optional[str] = 'black', + text_color: T.Optional[str] = "black", rot: T.Optional[int] = 30, **kwargs, ) -> None: @@ -622,7 +627,7 @@ def transform_crs( src_nodata=None, dst_nodata=None, coords_only=False, - resampling='nearest', + resampling="nearest", warp_mem_limit=512, num_threads=1, ) -> xr.DataArray: @@ -728,13 +733,13 @@ def to_netcdf( def save( self, filename: T.Union[str, _Path], - mode: T.Optional[str] = 'w', + mode: T.Optional[str] = "w", nodata: T.Optional[T.Union[float, int]] = None, overwrite: bool = False, client: T.Optional[_Client] = None, compute: T.Optional[bool] = True, tags: T.Optional[dict] = None, - compress: T.Optional[str] = 'none', + compress: T.Optional[str] = "none", compression: T.Optional[str] = None, num_workers: T.Optional[int] = 1, log_progress: T.Optional[bool] = True, @@ -807,19 +812,19 @@ def to_raster( readxsize=None, readysize=None, separate=False, - out_block_type='gtiff', + out_block_type="gtiff", keep_blocks=False, verbose=0, overwrite=False, gdal_cache=512, - scheduler='processes', + scheduler="processes", n_jobs=1, n_workers=None, n_threads=None, n_chunks=None, overviews=False, - resampling='nearest', - driver='GTiff', + resampling="nearest", + driver="GTiff", nodata=None, blockxsize=512, blockysize=512, @@ -877,14 +882,14 @@ def to_raster( >>> ds.gw.to_raster('output.tif', n_jobs=8, compress='lzw') """ - if not hasattr(self._obj, 'crs'): + if not hasattr(self._obj, "crs"): raise AttributeError( - 'The DataArray does not have a `crs` attribute.' + "The DataArray does not have a `crs` attribute." ) - if not hasattr(self._obj, 'transform'): + if not hasattr(self._obj, "transform"): raise AttributeError( - 'The DataArray does not have a `transform` attribute.' + "The DataArray does not have a `transform` attribute." ) kwargs = self._update_kwargs( @@ -896,23 +901,23 @@ def to_raster( ) # Keywords for rasterio profile - if 'crs' not in kwargs: - kwargs['crs'] = self._obj.crs + if "crs" not in kwargs: + kwargs["crs"] = self._obj.crs - if 'transform' not in kwargs: - kwargs['transform'] = self._obj.transform + if "transform" not in kwargs: + kwargs["transform"] = self._obj.transform - if 'width' not in kwargs: - kwargs['width'] = self._obj.gw.ncols + if "width" not in kwargs: + kwargs["width"] = self._obj.gw.ncols - if 'height' not in kwargs: - kwargs['height'] = self._obj.gw.nrows + if "height" not in kwargs: + kwargs["height"] = self._obj.gw.nrows - if 'count' not in kwargs: - kwargs['count'] = self._obj.gw.nbands + if "count" not in kwargs: + kwargs["count"] = self._obj.gw.nbands - if 'dtype' not in kwargs: - kwargs['dtype'] = self._obj.data.dtype.name + if "dtype" not in kwargs: + kwargs["dtype"] = self._obj.data.dtype.name to_raster( self._obj, @@ -1029,7 +1034,7 @@ def apply( cluster.start() - with joblib.parallel_backend('loky', n_jobs=n_jobs): + with joblib.parallel_backend("loky", n_jobs=n_jobs): ds_sub = user_func(self._obj) ds_sub.attrs = self._obj.attrs @@ -1091,7 +1096,7 @@ def clip( ``xarray.DataArray`` """ warnings.warn( - 'The method clip() will be deprecated in >=2.2.0. Use clip_by_polygon() instead.', + "The method clip() will be deprecated in >=2.2.0. Use clip_by_polygon() instead.", DeprecationWarning, stacklevel=2, ) @@ -1157,16 +1162,15 @@ def subset( def calc_area( self, values: T.Sequence[T.Union[float, int]], - op: str = 'eq', - units: str = 'km2', + op: str = "eq", + units: str = "km2", row_chunks: int = None, col_chunks: int = None, n_workers: int = 1, n_threads: int = 1, - scheduler: str = 'threads', + scheduler: str = "threads", n_chunks: int = 100, ) -> pd.DataFrame: - """Calculates the area of data values. Args: @@ -1217,7 +1221,7 @@ def calc_area( def sample( self, - method: str = 'random', + method: str = "random", band: T.Union[int, str] = None, n: int = None, strata: T.Optional[T.Dict[str, T.Union[float, int]]] = None, @@ -1228,7 +1232,6 @@ def sample( verbose: T.Optional[int] = 1, **kwargs, ) -> gpd.GeoDataFrame: - """Generates samples from a raster. Args: @@ -1300,8 +1303,8 @@ def extract( frac: float = 1.0, min_frac_area: T.Optional[T.Union[float, int]] = None, all_touched: T.Optional[bool] = False, - id_column: T.Optional[str] = 'id', - time_format: T.Optional[str] = '%Y%m%d', + id_column: T.Optional[str] = "id", + time_format: T.Optional[str] = "%Y%m%d", mask: T.Optional[T.Union[_Polygon, gpd.GeoDataFrame]] = None, n_jobs: T.Optional[int] = 8, verbose: T.Optional[int] = 0, @@ -1418,16 +1421,16 @@ def band_mask( """ mask = ( self._obj.where(self._obj.sel(band=valid_bands) > 0) - .count(dim='band') - .expand_dims(dim='band') - .assign_coords(band=['mask']) - .astype('uint8') + .count(dim="band") + .expand_dims(dim="band") + .assign_coords(band=["mask"]) + .astype("uint8") ) if isinstance(src_nodata, (float, int)): return xr.where( (mask < len(valid_bands)) - | (self._obj.sel(band='blue') == src_nodata), + | (self._obj.sel(band="blue") == src_nodata), dst_mask_val, dst_clear_val, ).assign_attrs(**self._obj.attrs) @@ -1466,21 +1469,21 @@ def set_nodata( >>> with gw.open('image.tif') as src: >>> src = src.gw.set_nodata(0, 65535, out_range=(0, 10000), dtype='uint16') """ - if self.config['nodata'] is not None: - src_nodata = self.config['nodata'] + if self.config["nodata"] is not None: + src_nodata = self.config["nodata"] elif src_nodata is None: src_nodata = self._obj.gw.nodataval if dst_nodata is None: dst_nodata = np.nan - if self.config['scale_factor'] is not None: - scale_factor = self.config['scale_factor'] + if self.config["scale_factor"] is not None: + scale_factor = self.config["scale_factor"] elif scale_factor is None: scale_factor = self._obj.gw.scaleval - if self.config['offset'] is not None: - offset = self.config['offset'] + if self.config["offset"] is not None: + offset = self.config["offset"] elif offset is None: offset = self._obj.gw.offsetval @@ -1503,19 +1506,19 @@ def set_nodata( data = data.clip(out_range[0], out_range[1]) if self._obj.gw.has_time_coord: - data = data.transpose('time', 'band', 'y', 'x').astype(dtype) + data = data.transpose("time", "band", "y", "x").astype(dtype) else: - data = data.transpose('band', 'y', 'x').astype(dtype) + data = data.transpose("band", "y", "x").astype(dtype) # These now refer to the new, scaled data - attrs['scales'] = (1.0,) * self._obj.gw.nbands - attrs['offsets'] = (0,) * self._obj.gw.nbands + attrs["scales"] = (1.0,) * self._obj.gw.nbands + attrs["offsets"] = (0,) * self._obj.gw.nbands return data.assign_attrs(**attrs).gw.assign_nodata_attrs(dst_nodata) def moving( self, - stat: str = 'mean', + stat: str = "mean", perc: T.Union[float, int] = 50, w: int = 3, nodata: T.Optional[T.Union[float, int]] = None, @@ -1974,14 +1977,14 @@ def norm_brdf( central_lat = project_coords( np.array( [self._obj.x.values[int(self._obj.x.shape[0] / 2)]], - dtype='float64', + dtype="float64", ), np.array( [self._obj.y.values[int(self._obj.y.shape[0] / 2)]], - dtype='float64', + dtype="float64", ), self._obj.crs, - {'init': 'epsg:4326'}, + {"init": "epsg:4326"}, )[1][0] return _BRDF().norm_brdf( diff --git a/src/geowombat/data/__init__.py b/src/geowombat/data/__init__.py index 8cecdfb2..5746ab40 100644 --- a/src/geowombat/data/__init__.py +++ b/src/geowombat/data/__init__.py @@ -18,59 +18,62 @@ p = Path(__file__).absolute().parent -rgbn = str(p / 'rgbn.tif') -rgbn_suba = str(p / 'rgbn_suba.tif') -rgbn_subb = str(p / 'rgbn_subb.tif') -rgbn_20160101 = str(p / 'rgbn_20160101.tif') -rgbn_20160401 = str(p / 'rgbn_20160401.tif') -rgbn_20160517 = str(p / 'rgbn_20160517.tif') -rgbn_20170203 = str(p / 'rgbn_20170203.tif') +rgbn = str(p / "rgbn.tif") +rgbn_suba = str(p / "rgbn_suba.tif") +rgbn_subb = str(p / "rgbn_subb.tif") +rgbn_20160101 = str(p / "rgbn_20160101.tif") +rgbn_20160401 = str(p / "rgbn_20160401.tif") +rgbn_20160517 = str(p / "rgbn_20160517.tif") +rgbn_20170203 = str(p / "rgbn_20170203.tif") rgbn_time_list = [rgbn_20160101, rgbn_20160401, rgbn_20160517, rgbn_20170203] l8_224077_20200518_B2_60m = str( - p / 'LC08_L1TP_224077_20200518_20200518_01_RT_B2_60m.TIF' + p / "LC08_L1TP_224077_20200518_20200518_01_RT_B2_60m.TIF" ) l8_224077_20200518_B2 = str( - p / 'LC08_L1TP_224077_20200518_20200518_01_RT_B2.TIF' + p / "LC08_L1TP_224077_20200518_20200518_01_RT_B2.TIF" ) l8_224077_20200518_B3 = str( - p / 'LC08_L1TP_224077_20200518_20200518_01_RT_B3.TIF' + p / "LC08_L1TP_224077_20200518_20200518_01_RT_B3.TIF" ) l8_224077_20200518_B4 = str( - p / 'LC08_L1TP_224077_20200518_20200518_01_RT_B4.TIF' + p / "LC08_L1TP_224077_20200518_20200518_01_RT_B4.TIF" ) l8_224078_20200518_B2 = str( - p / 'LC08_L1TP_224078_20200518_20200518_01_RT_B2.TIF' + p / "LC08_L1TP_224078_20200518_20200518_01_RT_B2.TIF" ) l8_224078_20200518_B3 = str( - p / 'LC08_L1TP_224078_20200518_20200518_01_RT_B3.TIF' + p / "LC08_L1TP_224078_20200518_20200518_01_RT_B3.TIF" ) l8_224078_20200518_B4 = str( - p / 'LC08_L1TP_224078_20200518_20200518_01_RT_B4.TIF' + p / "LC08_L1TP_224078_20200518_20200518_01_RT_B4.TIF" ) -l8_224078_20200518 = str(p / 'LC08_L1TP_224078_20200518_20200518_01_RT.TIF') +l8_224078_20200518 = str(p / "LC08_L1TP_224078_20200518_20200518_01_RT.TIF") l8_224078_20200518_points = str( - p / 'LC08_L1TP_224078_20200518_20200518_01_RT_points.gpkg' + p / "LC08_L1TP_224078_20200518_20200518_01_RT_points.gpkg" ) l8_224078_20200518_polygons = str( - p / 'LC08_L1TP_224078_20200518_20200518_01_RT_polygons.gpkg' + p / "LC08_L1TP_224078_20200518_20200518_01_RT_polygons.gpkg" ) l3b_s2b_00390821jxn0l2a_20210319_20220730_c01 = str( - p / 'L3B_S2B_00390821JXN0L2A_20210319_20220730_C01.nc' + p / "L3B_S2B_00390821JXN0L2A_20210319_20220730_C01.nc" ) l8_224078_20200127_meta = str( p - / 'LC08_L2SP_224078_20200127_02_T1_LC08_L2SP_224078_20200127_20200823_02_T1_MTL.txt' + / "LC08_L2SP_224078_20200127_02_T1_LC08_L2SP_224078_20200127_20200823_02_T1_MTL.txt" ) l7_225078_20110306_ang = str( p - / 'LE07_L2SP_225078_20110306_02_T1_LE07_L2SP_225078_20110306_20200910_02_T1_ANG.txt' + / "LE07_L2SP_225078_20110306_02_T1_LE07_L2SP_225078_20110306_20200910_02_T1_ANG.txt" ) -l7_225078_20110306_SZA = str(p / 'LE07_L2SP_225078_20110306_02_T1_SZA.tif') -l7_225078_20110306_B1 = str(p / 'LE07_L2SP_225078_20110306_02_T1_B1.tif') -srtm30m_bounding_boxes = str(p / 'srtm30m_bounding_boxes.gpkg') -wrs2 = str(p / 'wrs2.tar.gz') +l7_225078_20110306_SZA = str(p / "LE07_L2SP_225078_20110306_02_T1_SZA.tif") +l7_225078_20110306_B1 = str(p / "LE07_L2SP_225078_20110306_02_T1_B1.tif") +srtm30m_bounding_boxes = str(p / "srtm30m_bounding_boxes.gpkg") +wrs2 = str(p / "wrs2.tar.gz") + +l8_224077_20200518_B2_nan = str(p / "l8_224077_20200518_B2_nan.tif") +l8_224078_20200518_B2_nan = str(p / "l8_224078_20200518_B2_nan.tif") class PassKey(object): @@ -79,12 +82,11 @@ def create_key(key_file): key = Fernet.generate_key() - with open(key_file, mode='w') as pf: - yaml.dump({'key': key}, pf, default_flow_style=False) + with open(key_file, mode="w") as pf: + yaml.dump({"key": key}, pf, default_flow_style=False) @staticmethod def create_passcode(key_file, passcode_file): - """ Args: key_file (str) @@ -93,30 +95,30 @@ def create_passcode(key_file, passcode_file): passcode = getpass() - with open(key_file, mode='r') as pf: + with open(key_file, mode="r") as pf: key = yaml.load(pf, Loader=yaml.FullLoader) - cipher_suite = Fernet(key['key']) + cipher_suite = Fernet(key["key"]) ciphered_text = cipher_suite.encrypt(passcode.encode()) - with open(passcode_file, mode='w') as pf: + with open(passcode_file, mode="w") as pf: yaml.dump( - {'passcode': ciphered_text}, pf, default_flow_style=False + {"passcode": ciphered_text}, pf, default_flow_style=False ) @staticmethod def load_passcode(key_file, passcode_file): - with open(key_file, mode='r') as pf: + with open(key_file, mode="r") as pf: key = yaml.load(pf, Loader=yaml.FullLoader) - cipher_suite = Fernet(key['key']) + cipher_suite = Fernet(key["key"]) - with open(passcode_file, mode='r') as pf: + with open(passcode_file, mode="r") as pf: ciphered_text = yaml.load(pf, Loader=yaml.FullLoader) - return cipher_suite.decrypt(ciphered_text['passcode']) + return cipher_suite.decrypt(ciphered_text["passcode"]) class BaseDownloader(object): @@ -140,7 +142,7 @@ def download(self, url, outfile, safe_download=True): session.auth = (self.username, base64_password) # Open - req = session.request('get', url) + req = session.request("get", url) if safe_download: response = session.get( @@ -150,12 +152,12 @@ def download(self, url, outfile, safe_download=True): response = session.get(req.url) if not response.ok: - logger.exception(' Could not retrieve the page.') + logger.exception(" Could not retrieve the page.") raise NameError - if 'Content-Length' in response.headers: + if "Content-Length" in response.headers: - content_length = float(response.headers['Content-Length']) + content_length = float(response.headers["Content-Length"]) content_iters = int(math.ceil(content_length / chunk_size)) chunk_size_ = chunk_size * 1 @@ -164,7 +166,7 @@ def download(self, url, outfile, safe_download=True): content_iters = 1 chunk_size_ = chunk_size * 1000 - with open(str(outfile), 'wb') as ofn: + with open(str(outfile), "wb") as ofn: for data in tqdm( response.iter_content(chunk_size=chunk_size_), @@ -178,7 +180,6 @@ class LUTDownloader(BaseDownloader): class NASAEarthdataDownloader(PassKey, BaseDownloader): - """A class to handle NASA Earthdata downloads. Args: @@ -244,7 +245,7 @@ def stream_zipfile(): def download_aerosol(self, year, doy, outfile=None): if not outfile: - outfile = f'{year}{int(doy):03d}' + outfile = f"{year}{int(doy):03d}" self.download( f"https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MOD04_3K/{year}/{doy}/", diff --git a/src/geowombat/data/l8_224077_20200518_B2_nan.tif b/src/geowombat/data/l8_224077_20200518_B2_nan.tif new file mode 100644 index 00000000..18defcd6 Binary files /dev/null and b/src/geowombat/data/l8_224077_20200518_B2_nan.tif differ diff --git a/src/geowombat/data/l8_224078_20200518_B2_nan.tif b/src/geowombat/data/l8_224078_20200518_B2_nan.tif new file mode 100644 index 00000000..9d99f5c9 Binary files /dev/null and b/src/geowombat/data/l8_224078_20200518_B2_nan.tif differ diff --git a/tests/test_open.py b/tests/test_open.py index fce07234..7765c7e8 100755 --- a/tests/test_open.py +++ b/tests/test_open.py @@ -16,6 +16,8 @@ l8_224077_20200518_B2_60m, l8_224078_20200518, l8_224078_20200518_B2, + l8_224077_20200518_B2_nan, + l8_224078_20200518_B2_nan, ) @@ -23,14 +25,14 @@ class TestOpen(unittest.TestCase): def test_open_netcdf(self): with gw.open( l3b_s2b_00390821jxn0l2a_20210319_20220730_c01, - chunks={'band': -1, 'y': 256, 'x': 256}, - engine='h5netcdf', + chunks={"band": -1, "y": 256, "x": 256}, + engine="h5netcdf", ) as src: self.assertEqual(src.shape, (6, 668, 668)) with xr.open_dataset( l3b_s2b_00390821jxn0l2a_20210319_20220730_c01, - chunks={'band': -1, 'y': 256, 'x': 256}, - engine='h5netcdf', + chunks={"band": -1, "y": 256, "x": 256}, + engine="h5netcdf", ) as ds: self.assertTrue(np.allclose(src.y.values, ds.y.values)) self.assertTrue(np.allclose(src.x.values, ds.x.values)) @@ -43,12 +45,12 @@ def test_open_incompat_res(self): with gw.open(l8_224077_20200518_B2) as src30m: with gw.open(l8_224077_20200518_B2_60m) as src60m: with self.assertRaises(ValueError): - res = xr.align(src30m, src60m, join='exact') + res = xr.align(src30m, src60m, join="exact") with self.assertWarns(UserWarning): with gw.open( [l8_224077_20200518_B2, l8_224077_20200518_B2_60m], - stack_dim='band', + stack_dim="band", band_names=[1, 2], ) as src: pass @@ -67,7 +69,7 @@ def test_has_band(self): def test_has_no_band_coord(self): with gw.open(l8_224078_20200518) as src: - self.assertFalse(src.drop_vars('band').gw.has_band_coord) + self.assertFalse(src.drop_vars("band").gw.has_band_coord) def test_nodata(self): with gw.open(l8_224078_20200518) as src: @@ -78,7 +80,7 @@ def test_nodata(self): def test_open_multiple(self): with gw.open( [l8_224078_20200518, l8_224078_20200518], - stack_dim='time', + stack_dim="time", ) as src: self.assertEqual(src.gw.ntime, 2), self.assertTrue(src.gw.has_time_dim) @@ -86,7 +88,7 @@ def test_open_multiple(self): with gw.open( [l8_224078_20200518_B2, l8_224078_20200518_B2], - stack_dim='band', + stack_dim="band", ) as src: self.assertEqual(src.gw.nbands, 2) self.assertTrue(src.gw.has_band_dim) @@ -95,8 +97,8 @@ def test_open_multiple(self): def test_open_multiple_same(self): with gw.open( [l8_224078_20200518, l8_224078_20200518], - time_names=['20200518', '20200518'], - stack_dim='time', + time_names=["20200518", "20200518"], + stack_dim="time", ) as src: self.assertEqual(src.gw.ntime, 1) self.assertTrue(src.gw.has_time_dim) @@ -105,9 +107,9 @@ def test_open_multiple_same(self): def test_open_multiple_same_max(self): with gw.open( [l8_224078_20200518, l8_224078_20200518], - time_names=['20200518', '20200518'], - stack_dim='time', - overlap='max', + time_names=["20200518", "20200518"], + stack_dim="time", + overlap="max", ) as src: self.assertEqual(src.gw.ntime, 1) self.assertTrue(src.gw.has_time_dim) @@ -116,9 +118,9 @@ def test_open_multiple_same_max(self): def test_open_multiple_same_min(self): with gw.open( [l8_224078_20200518, l8_224078_20200518], - time_names=['20200518', '20200518'], - stack_dim='time', - overlap='min', + time_names=["20200518", "20200518"], + stack_dim="time", + overlap="min", ) as src: self.assertEqual(src.gw.ntime, 1) self.assertTrue(src.gw.has_time_dim) @@ -127,9 +129,9 @@ def test_open_multiple_same_min(self): def test_open_multiple_same_mean(self): with gw.open( [l8_224078_20200518, l8_224078_20200518], - time_names=['20200518', '20200518'], - stack_dim='time', - overlap='mean', + time_names=["20200518", "20200518"], + stack_dim="time", + overlap="mean", ) as src: self.assertEqual(src.gw.ntime, 1) self.assertTrue(src.gw.has_time_dim) @@ -139,9 +141,9 @@ def test_union_values(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - bounds_by='union', + bounds_by="union", ) as src: values = src.values[ 0, @@ -172,9 +174,9 @@ def test_bounds_union(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - bounds_by='union', + bounds_by="union", ) as src: bounds = src.gw.bounds self.assertEqual( @@ -185,9 +187,9 @@ def test_bounds_intersection(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - bounds_by='intersection', + bounds_by="intersection", ) as src: bounds = src.gw.bounds self.assertEqual( @@ -198,10 +200,10 @@ def test_mosaic_max_bands(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - overlap='max', - bounds_by='intersection', + overlap="max", + bounds_by="intersection", nodata=0, ) as src: self.assertTrue(src.gw.has_band_dim) @@ -212,10 +214,10 @@ def test_mosaic_max(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - overlap='max', - bounds_by='intersection', + overlap="max", + bounds_by="intersection", nodata=0, ) as src: x, y = lonlat_to_xy(-54.78604601, -25.23023330, dst_crs=src) @@ -230,19 +232,94 @@ def test_mosaic_max(self): [7938, 7869, 7889], [7862, 7828, 7721], ], - dtype='float32', + dtype="float32", ), ) ) + def test_mosaic_max_nan(self): + filenames = [l8_224077_20200518_B2_nan, l8_224078_20200518_B2_nan] + with gw.open( + filenames, + band_names=["blue"], + mosaic=True, + overlap="max", + bounds_by="union", + nodata=0, + ) as src: + start_values = src.values[ + 0, + 0, + 0:10, + ] + end_values = src.values[ + 0, + -2, + -10:, + ] + x, y = lonlat_to_xy(-54.78604601, -25.23023330, dst_crs=src) + j, i = coords_to_indices(x, y, src) + mid_values = src[0, i : i + 3, j : j + 3].values + self.assertTrue( + np.allclose( + start_values, + np.array( + [ + 8482.0, + 8489.0, + 8483.0, + 8547.0, + 8561.0, + 8574.0, + 8616.0, + 8530.0, + 8396.0, + 8125.0, + ] + ), + ), + ) + self.assertTrue( + np.allclose( + mid_values, + np.array( + [ + [8387.0, 8183.0, 8050.0], + [7938.0, 7869.0, 7889.0], + [7862.0, 7828.0, 7721.0], + ] + ), + ) + ) + self.assertTrue( + np.allclose( + end_values, + np.array( + [ + 7409.0, + 7427.0, + 7490.0, + 7444.0, + 7502.0, + 7472.0, + 7464.0, + 7443.0, + 7406.0, + np.nan, + ] + ), + equal_nan=True, + ), + ) + def test_mosaic_min(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - overlap='min', - bounds_by='intersection', + overlap="min", + bounds_by="intersection", nodata=0, ) as src: x, y = lonlat_to_xy(-54.78604601, -25.23023330, dst_crs=src) @@ -257,19 +334,95 @@ def test_mosaic_min(self): [7934, 7867, 7885], [7861, 7826, 7721], ], - dtype='float32', + dtype="float32", ), ) ) + def test_mosaic_min_nan(self): + filenames = [l8_224077_20200518_B2_nan, l8_224078_20200518_B2_nan] + with gw.open( + filenames, + band_names=["blue"], + mosaic=True, + overlap="min", + bounds_by="union", + nodata=0, + ) as src: + start_values = src.values[ + 0, + 0, + 0:10, + ] + end_values = src.values[ + 0, + -2, + -10:, + ] + x, y = lonlat_to_xy(-54.78604601, -25.23023330, dst_crs=src) + j, i = coords_to_indices(x, y, src) + mid_values = src[0, i : i + 3, j : j + 3].values + self.assertTrue( + np.allclose( + start_values, + np.array( + [ + 8482.0, + 8489.0, + 8483.0, + 8547.0, + 8561.0, + 8574.0, + 8616.0, + 8530.0, + 8396.0, + 8125.0, + ] + ), + ), + ) + print(mid_values) + self.assertTrue( + np.allclose( + mid_values, + np.array( + [ + [8384.0, 8183.0, 8049.0], + [7934.0, 7867.0, 7885.0], + [7861.0, 7826.0, 7721.0], + ] + ), + ) + ) + self.assertTrue( + np.allclose( + end_values, + np.array( + [ + 7409.0, + 7427.0, + 7490.0, + 7444.0, + 7502.0, + 7472.0, + 7464.0, + 7443.0, + 7406.0, + np.nan, + ] + ), + equal_nan=True, + ), + ) + def test_mosaic_mean(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - overlap='mean', - bounds_by='intersection', + overlap="mean", + bounds_by="intersection", nodata=0, ) as src: x, y = lonlat_to_xy(-54.78604601, -25.23023330, dst_crs=src) @@ -284,19 +437,95 @@ def test_mosaic_mean(self): [7936, 7868, 7887], [7861.5, 7827, 7721], ], - dtype='float32', + dtype="float32", + ), + ) + ) + + def test_mosaic_mean_nan(self): + filenames = [l8_224077_20200518_B2_nan, l8_224078_20200518_B2_nan] + with gw.open( + filenames, + band_names=["blue"], + mosaic=True, + overlap="mean", + bounds_by="union", + nodata=0, + ) as src: + start_values = src.values[ + 0, + 0, + 0:10, + ] + end_values = src.values[ + 0, + -2, + -10:, + ] + x, y = lonlat_to_xy(-54.78604601, -25.23023330, dst_crs=src) + j, i = coords_to_indices(x, y, src) + mid_values = src[0, i : i + 3, j : j + 3].values + self.assertTrue( + np.allclose( + start_values, + np.array( + [ + 8482.0, + 8489.0, + 8483.0, + 8547.0, + 8561.0, + 8574.0, + 8616.0, + 8530.0, + 8396.0, + 8125.0, + ] + ), + ), + ) + print(mid_values) + self.assertTrue( + np.allclose( + mid_values, + np.array( + [ + [8385.5, 8183.0, 8049.5], + [7936.0, 7868.0, 7887.0], + [7861.5, 7827.0, 7721.0], + ] ), ) ) + self.assertTrue( + np.allclose( + end_values, + np.array( + [ + 7409.0, + 7427.0, + 7490.0, + 7444.0, + 7502.0, + 7472.0, + 7464.0, + 7443.0, + 7406.0, + np.nan, + ] + ), + equal_nan=True, + ), + ) def test_footprint_grid(self): filenames = [l8_224077_20200518_B2, l8_224078_20200518_B2] with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - overlap='mean', - bounds_by='intersection', + overlap="mean", + bounds_by="intersection", nodata=0, persist_filenames=True, ) as src: @@ -308,10 +537,10 @@ def test_footprint_grid(self): with gw.open( filenames, - band_names=['blue'], + band_names=["blue"], mosaic=True, - overlap='mean', - bounds_by='intersection', + overlap="mean", + bounds_by="intersection", nodata=0, chunks=128, persist_filenames=False, @@ -322,27 +551,27 @@ def test_footprint_grid(self): def test_has_time_dim(self): with gw.open( - [l8_224078_20200518, l8_224078_20200518], stack_dim='time' + [l8_224078_20200518, l8_224078_20200518], stack_dim="time" ) as src: self.assertTrue(src.gw.has_time_dim) def test_has_time_coord(self): with gw.open( - [l8_224078_20200518, l8_224078_20200518], stack_dim='time' + [l8_224078_20200518, l8_224078_20200518], stack_dim="time" ) as src: self.assertTrue(src.gw.has_time_coord) def test_has_time(self): with gw.open( - [l8_224078_20200518, l8_224078_20200518], stack_dim='time' + [l8_224078_20200518, l8_224078_20200518], stack_dim="time" ) as src: self.assertTrue(src.gw.has_time) def test_has_no_time_coord(self): with gw.open( - [l8_224078_20200518, l8_224078_20200518], stack_dim='time' + [l8_224078_20200518, l8_224078_20200518], stack_dim="time" ) as src: - self.assertFalse(src.drop_vars('time').gw.has_time_coord) + self.assertFalse(src.drop_vars("time").gw.has_time_coord) def test_open_path(self): with gw.open(Path(l8_224078_20200518)) as src: @@ -382,8 +611,8 @@ def test_col_chunks_set(self): self.assertEqual(src.gw.col_chunks, 64) def test_dtype(self): - with gw.open(l8_224078_20200518, dtype='float64') as src: - self.assertEqual(src.dtype, 'float64') + with gw.open(l8_224078_20200518, dtype="float64") as src: + self.assertEqual(src.dtype, "float64") def test_count(self): with gw.open(l8_224078_20200518) as src, rio.open( @@ -404,7 +633,7 @@ def test_height(self): self.assertEqual(src.gw.ncols, rsrc.width) def test_transform(self): - test_crs = CRS.from_user_input('epsg:4326') + test_crs = CRS.from_user_input("epsg:4326") with gw.open(l8_224078_20200518) as src: result = src.gw.transform_crs( dst_crs=4326, @@ -416,5 +645,5 @@ def test_transform(self): self.assertEqual(test_crs, result.gw.crs_to_pyproj) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main()