Comparison of ROMS subsetting with CO-OPs Subsetting Tool

Open In Colab

# Install required libraries
# !pip install -q xarray_subset_grid@git+https://github.com/asascience-open/xarray-subset-grid.git
# !pip install -q s3fs cftime xarray cf-xarray fsspec dask h5netcdf
import fsspec
import numpy as np
import xarray as xr

import xarray_subset_grid.accessor  # noqa: F401

# open dataset as zarr object using fsspec reference file system and xarray
fs = fsspec.filesystem(
    "reference",
    fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.forecast.20240321.t03z.nc.zarr",
    remote_protocol="s3",
    remote_options={"anon": True},
    target_protocol="s3",
    target_options={"anon": True},
)
m = fs.get_mapper("")

ds = xr.open_dataset(
    m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
)
ds
---------------------------------------------------------------------------
NoSuchKey                                 Traceback (most recent call last)
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
    112 try:
--> 113     return await func(*args, **kwargs)
    114 except S3_RETRYABLE_ERRORS as e:

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/aiobotocore/client.py:411, in AioBaseClient._make_api_call(self, operation_name, api_params)
    410     error_class = self.exceptions.from_code(error_code)
--> 411     raise error_class(parsed_response, operation_name)
    412 else:

NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.

The above exception was the direct cause of the following exception:

FileNotFoundError                         Traceback (most recent call last)
Cell In[2], line 18
      8 fs = fsspec.filesystem(
      9     "reference",
     10     fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.forecast.20240321.t03z.nc.zarr",
   (...)
     14     target_options={"anon": True},
     15 )
     16 m = fs.get_mapper("")
---> 18 ds = xr.open_dataset(
     19     m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
     20 )
     21 ds

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/api.py:571, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    559 decoders = _resolve_decoders_kwargs(
    560     decode_cf,
    561     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    567     decode_coords=decode_coords,
    568 )
    570 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 571 backend_ds = backend.open_dataset(
    572     filename_or_obj,
    573     drop_variables=drop_variables,
    574     **decoders,
    575     **kwargs,
    576 )
    577 ds = _dataset_from_backend_dataset(
    578     backend_ds,
    579     filename_or_obj,
   (...)
    589     **kwargs,
    590 )
    591 return ds

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:1170, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel, zarr_version)
   1149 def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporting **kwargs
   1150     self,
   1151     filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
   (...)
   1167     zarr_version=None,
   1168 ) -> Dataset:
   1169     filename_or_obj = _normalize_path(filename_or_obj)
-> 1170     store = ZarrStore.open_group(
   1171         filename_or_obj,
   1172         group=group,
   1173         mode=mode,
   1174         synchronizer=synchronizer,
   1175         consolidated=consolidated,
   1176         consolidate_on_close=False,
   1177         chunk_store=chunk_store,
   1178         storage_options=storage_options,
   1179         stacklevel=stacklevel + 1,
   1180         zarr_version=zarr_version,
   1181     )
   1183     store_entrypoint = StoreBackendEntrypoint()
   1184     with close_on_error(store):

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:500, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, stacklevel, zarr_version, write_empty)
    498     zarr_group = zarr.open_consolidated(store, **open_kwargs)
    499 else:
--> 500     zarr_group = zarr.open_group(store, **open_kwargs)
    501 close_store_on_close = zarr_group.store is not store
    502 return cls(
    503     zarr_group,
    504     mode,
   (...)
    510     close_store_on_close,
    511 )

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/hierarchy.py:1575, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, meta_array)
   1572 # ensure store is initialized
   1574 if mode in ["r", "r+"]:
-> 1575     if not contains_group(store, path=path):
   1576         if contains_array(store, path=path):
   1577             raise ContainsArrayError(path)

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:131, in contains_group(store, path, explicit_only)
    129 store_version = getattr(store, "_store_version", 2)
    130 if store_version == 2 or explicit_only:
--> 131     return key in store
    132 else:
    133     if key in store:

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:1496, in FSStore.__contains__(self, key)
   1494 def __contains__(self, key):
   1495     key = self._normalize_key(key)
-> 1496     return key in self.map

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/mapping.py:193, in FSMap.__contains__(self, key)
    191 """Does key exist in mapping?"""
    192 path = self._key_to_str(key)
--> 193 return self.fs.isfile(path)

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:1085, in ReferenceFileSystem.isfile(self, path)
   1084 def isfile(self, path):  # overwrite auto-sync version
-> 1085     return path in self.references

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:541, in LazyReferenceMapper.__contains__(self, item)
    539 def __contains__(self, item):
    540     try:
--> 541         self._load_one_key(item)
    542         return True
    543     except KeyError:

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:274, in LazyReferenceMapper._load_one_key(self, key)
    269 def _load_one_key(self, key):
    270     """Get the reference for one key
    271 
    272     Returns bytes, one-element list or three-element list.
    273     """
--> 274     if key in self._items:
    275         return self._items[key]
    276     elif key in self.zmetadata:

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:141, in LazyReferenceMapper.__getattr__(self, item)
    139 def __getattr__(self, item):
    140     if item in ("_items", "record_size", "zmetadata"):
--> 141         self.setup()
    142         # avoid possible recursion if setup fails somehow
    143         return self.__dict__[item]

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:148, in LazyReferenceMapper.setup(self)
    146 def setup(self):
    147     self._items = {}
--> 148     self._items[".zmetadata"] = self.fs.cat_file(
    149         "/".join([self.root, ".zmetadata"])
    150     )
    151     met = json.loads(self._items[".zmetadata"])
    152     self.record_size = met["record_size"]

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:118, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
    115 @functools.wraps(func)
    116 def wrapper(*args, **kwargs):
    117     self = obj or args[0]
--> 118     return sync(self.loop, func, *args, **kwargs)

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:103, in sync(loop, func, timeout, *args, **kwargs)
    101     raise FSTimeoutError from return_result
    102 elif isinstance(return_result, BaseException):
--> 103     raise return_result
    104 else:
    105     return return_result

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:56, in _runner(event, coro, result, timeout)
     54     coro = asyncio.wait_for(coro, timeout=timeout)
     55 try:
---> 56     result[0] = await coro
     57 except Exception as ex:
     58     result[0] = ex

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1128, in S3FileSystem._cat_file(self, path, version_id, start, end)
   1125     finally:
   1126         resp["Body"].close()
-> 1128 return await _error_wrapper(_call_and_read, retries=self.retries)

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
    143         err = e
    144 err = translate_boto_error(err)
--> 145 raise err

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
    111 for i in range(retries):
    112     try:
--> 113         return await func(*args, **kwargs)
    114     except S3_RETRYABLE_ERRORS as e:
    115         err = e

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1115, in S3FileSystem._cat_file.<locals>._call_and_read()
   1114 async def _call_and_read():
-> 1115     resp = await self._call_s3(
   1116         "get_object",
   1117         Bucket=bucket,
   1118         Key=key,
   1119         **version_id_kw(version_id or vers),
   1120         **head,
   1121         **self.req_kw,
   1122     )
   1123     try:
   1124         return await resp["Body"].read()

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:365, in S3FileSystem._call_s3(self, method, *akwarglist, **kwargs)
    363 logger.debug("CALL: %s - %s - %s", method.__name__, akwarglist, kw2)
    364 additional_kwargs = self._get_s3_method_kwargs(method, *akwarglist, **kwargs)
--> 365 return await _error_wrapper(
    366     method, kwargs=additional_kwargs, retries=self.retries
    367 )

File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
    143         err = e
    144 err = translate_boto_error(err)
--> 145 raise err

FileNotFoundError: The specified key does not exist.
f"Dataset size: {ds.nbytes * 1.0e-9} Gb"

Example Polygon

Drawn with: https://geojson.io

image.png

polygon = np.array(
    [
        [-122.38488806417945, 34.98888604471138],
        [-122.02425311530737, 33.300351211467074],
        [-120.60402628930146, 32.723214427630836],
        [-116.63789131284673, 32.54346959375448],
        [-116.39346090873218, 33.8541384965596],
        [-118.83845767505964, 35.257586401855164],
        [-121.34541503969862, 35.50073821008141],
        [-122.38488806417945, 34.98888604471138],
    ]
)

polygon
# get the subset of the grid that is within the polygon
ds_subset = ds.xsg.grid.subset_polygon(ds[['temp_sur', 'grid']], polygon)
ds_subset
ds_subset.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")
f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"

Compare against CO-OPs Subsetting Tool

Using the same vertices and only subsetting for temperature:

image.png

ds_subset_control = xr.open_dataset('./wcofs-subset-control.nc',
                                    chunks={},
                                    backend_kwargs={'decode_times': False})
ds_subset_control
ds_subset_control.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")