Source code for eodag_cube.utils.xarray

# -*- coding: utf-8 -*-
# Copyright 2024, CS GROUP - France, http://www.c-s.fr
#
# This file is part of EODAG project
#     https://www.github.com/CS-SI/EODAG
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Xarray-related utilities"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Any

import rioxarray
import xarray as xr

from eodag_cube.utils import fsspec_file_extension
from eodag_cube.utils.exceptions import DatasetCreationError

if TYPE_CHECKING:
    from fsspec.core import OpenFile

logger = logging.getLogger("eodag-cube.utils.xarray")


[docs] def guess_engines(file: OpenFile) -> list[str]: """Guess matching ``xarray`` engines for fsspec :class:`fsspec.core.OpenFile` :param file: fsspec https OpenFile :returns: engines list """ ext = fsspec_file_extension(file) guessed_engines = [] for engine, backend in xr.backends.list_engines().items(): # xarray backends check path file extension if backend.guess_can_open(f"foo{ext}"): guessed_engines.append(engine) return guessed_engines
[docs] def try_open_dataset(file: OpenFile, **xarray_kwargs: Any) -> xr.Dataset: """Try opening xarray dataset from fsspec OpenFile :param file: fsspec https OpenFile :param xarray_kwargs: (optional) keyword arguments passed to :func:`xarray.open_dataset` :returns: opened xarray dataset """ LOCALFILE_ONLY_ENGINES = ["netcdf4", "cfgrib"] if engine := xarray_kwargs.pop("engine", None): all_engines = [ engine, ] else: all_engines = guess_engines(file) or [*xr.backends.list_engines()] if "file" in file.fs.protocol: engines = all_engines # use path str as cfgrib does not support fsspec OpenFile as input file_or_path = file.path # if no engine was passed, let xarray guess it for local data if len(engines) > 1: try: ds = xr.open_dataset(file_or_path, **xarray_kwargs) logger.debug(f"{file.path} opened using {file.fs.protocol} + guessed engine") return ds except Exception as e: raise DatasetCreationError(f"Cannot open local dataset {file.path}: {str(e)}") from e else: # remove engines that do not support remote access # https://tutorial.xarray.dev/intermediate/remote_data/remote-data.html#supported-format-read-from-buffers-remote-access engines = [eng for eng in all_engines if eng not in LOCALFILE_ONLY_ENGINES] file_or_path = file # loop for engines on remote data, as xarray does not always guess it right for engine in engines: # re-open file to prevent I/O operation on closed file # (and `closed` attr does not seem up-to-date) try: file = file.fs.open(path=file.path) except Exception as e: logger.debug(f"Could not re-open file: {str(e)}") try: if engine == "rasterio": # prevents to read all file in memory since rasterio 1.4.0 # https://github.com/rasterio/rasterio/issues/3232 opener = file.fs.open if not any(p in file.fs.protocol for p in ["local", "s3"]) else None # fix messy protocol with zip+s3 clean_url = getattr(file, "full_name", file.path).replace("s3://zip+s3://", "zip+s3://") da = rioxarray.open_rasterio( clean_url, opener=opener, # default value from RasterioBackend mask_and_scale=True, **xarray_kwargs, ) ds_or_list = da.to_dataset(name="band_data") if isinstance(da, xr.DataArray) else da if isinstance(ds_or_list, list): logger.warning(f"Only 1/{len(ds_or_list)} datasets list was kept for {file.path}") ds = ds_or_list[0] else: ds = ds_or_list else: ds = xr.open_dataset(file_or_path, engine=engine, **xarray_kwargs) except Exception as e: logger.debug(f"Cannot open {file.path} with {file.fs.protocol} + {engine}: {str(e)}") else: logger.debug(f"{file.path} opened using {file.fs.protocol} + {engine}") return ds raise DatasetCreationError(f"None of the engines {engines} could open the dataset at {file.path}.")