ROMS

NOTE: currently broken – I think the url needs updating.

Open In Colab

# Install required libraries
# !pip install -q xarray_subset_grid@git+https://github.com/asascience-open/xarray-subset-grid.git
# !pip install -q s3fs cftime xarray cf-xarray fsspec dask h5netcdf
import fsspec
import numpy as np
import xarray as xr

import xarray_subset_grid.accessor  # noqa: F401
from xarray_subset_grid.utils import format_bytes

# open dataset as zarr object using fsspec reference file system and xarray
fs = fsspec.filesystem(
    "reference",
    fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.best.nc.zarr",
    remote_protocol="s3",
    remote_options={"anon": True},
    target_protocol="s3",
    target_options={"anon": True},
)
m = fs.get_mapper("")

ds = xr.open_dataset(
    m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
)
ds
---------------------------------------------------------------------------
ClientError                               Traceback (most recent call last)
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
    112 try:
--> 113     return await func(*args, **kwargs)
    114 except S3_RETRYABLE_ERRORS as e:

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/aiobotocore/client.py:412, in AioBaseClient._make_api_call(self, operation_name, api_params)
    411     error_class = self.exceptions.from_code(error_code)
--> 412     raise error_class(parsed_response, operation_name)
    413 else:

ClientError: An error occurred (AccessDenied) when calling the GetObject operation: Access Denied

The above exception was the direct cause of the following exception:

PermissionError                           Traceback (most recent call last)
Cell In[2], line 19
      9 fs = fsspec.filesystem(
     10     "reference",
     11     fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.best.nc.zarr",
   (...)
     15     target_options={"anon": True},
     16 )
     17 m = fs.get_mapper("")
---> 19 ds = xr.open_dataset(
     20     m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
     21 )
     22 ds

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/api.py:611, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    599 decoders = _resolve_decoders_kwargs(
    600     decode_cf,
    601     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    607     decode_coords=decode_coords,
    608 )
    610 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 611 backend_ds = backend.open_dataset(
    612     filename_or_obj,
    613     drop_variables=drop_variables,
    614     **decoders,
    615     **kwargs,
    616 )
    617 ds = _dataset_from_backend_dataset(
    618     backend_ds,
    619     filename_or_obj,
   (...)
    629     **kwargs,
    630 )
    631 return ds

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:1173, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel, zarr_version, store, engine)
   1171 filename_or_obj = _normalize_path(filename_or_obj)
   1172 if not store:
-> 1173     store = ZarrStore.open_group(
   1174         filename_or_obj,
   1175         group=group,
   1176         mode=mode,
   1177         synchronizer=synchronizer,
   1178         consolidated=consolidated,
   1179         consolidate_on_close=False,
   1180         chunk_store=chunk_store,
   1181         storage_options=storage_options,
   1182         stacklevel=stacklevel + 1,
   1183         zarr_version=zarr_version,
   1184     )
   1186 store_entrypoint = StoreBackendEntrypoint()
   1187 with close_on_error(store):

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:483, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, stacklevel, zarr_version, write_empty)
    464 @classmethod
    465 def open_group(
    466     cls,
   (...)
    480     write_empty: bool | None = None,
    481 ):
--> 483     zarr_group, consolidate_on_close, close_store_on_close = _get_open_params(
    484         store=store,
    485         mode=mode,
    486         synchronizer=synchronizer,
    487         group=group,
    488         consolidated=consolidated,
    489         consolidate_on_close=consolidate_on_close,
    490         chunk_store=chunk_store,
    491         storage_options=storage_options,
    492         stacklevel=stacklevel,
    493         zarr_version=zarr_version,
    494     )
    496     return cls(
    497         zarr_group,
    498         mode,
   (...)
    504         close_store_on_close,
    505     )

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:1360, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, stacklevel, zarr_version)
   1358     zarr_group = zarr.open_consolidated(store, **open_kwargs)
   1359 else:
-> 1360     zarr_group = zarr.open_group(store, **open_kwargs)
   1361 close_store_on_close = zarr_group.store is not store
   1362 return zarr_group, consolidate_on_close, close_store_on_close

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/hierarchy.py:1575, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, meta_array)
   1572 # ensure store is initialized
   1574 if mode in ["r", "r+"]:
-> 1575     if not contains_group(store, path=path):
   1576         if contains_array(store, path=path):
   1577             raise ContainsArrayError(path)

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:131, in contains_group(store, path, explicit_only)
    129 store_version = getattr(store, "_store_version", 2)
    130 if store_version == 2 or explicit_only:
--> 131     return key in store
    132 else:
    133     if key in store:

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:1496, in FSStore.__contains__(self, key)
   1494 def __contains__(self, key):
   1495     key = self._normalize_key(key)
-> 1496     return key in self.map

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/mapping.py:193, in FSMap.__contains__(self, key)
    191 """Does key exist in mapping?"""
    192 path = self._key_to_str(key)
--> 193 return self.fs.isfile(path)

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:1098, in ReferenceFileSystem.isfile(self, path)
   1097 def isfile(self, path):  # overwrite auto-sync version
-> 1098     return path in self.references

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:543, in LazyReferenceMapper.__contains__(self, item)
    541 def __contains__(self, item):
    542     try:
--> 543         self._load_one_key(item)
    544         return True
    545     except KeyError:

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:275, in LazyReferenceMapper._load_one_key(self, key)
    270 def _load_one_key(self, key):
    271     """Get the reference for one key
    272 
    273     Returns bytes, one-element list or three-element list.
    274     """
--> 275     if key in self._items:
    276         return self._items[key]
    277     elif key in self.zmetadata:

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:141, in LazyReferenceMapper.__getattr__(self, item)
    139 def __getattr__(self, item):
    140     if item in ("_items", "record_size", "zmetadata"):
--> 141         self.setup()
    142         # avoid possible recursion if setup fails somehow
    143         return self.__dict__[item]

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:148, in LazyReferenceMapper.setup(self)
    146 def setup(self):
    147     self._items = {}
--> 148     self._items[".zmetadata"] = self.fs.cat_file(
    149         "/".join([self.root, ".zmetadata"])
    150     )
    151     met = json.loads(self._items[".zmetadata"])
    152     self.record_size = met["record_size"]

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:118, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
    115 @functools.wraps(func)
    116 def wrapper(*args, **kwargs):
    117     self = obj or args[0]
--> 118     return sync(self.loop, func, *args, **kwargs)

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:103, in sync(loop, func, timeout, *args, **kwargs)
    101     raise FSTimeoutError from return_result
    102 elif isinstance(return_result, BaseException):
--> 103     raise return_result
    104 else:
    105     return return_result

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:56, in _runner(event, coro, result, timeout)
     54     coro = asyncio.wait_for(coro, timeout=timeout)
     55 try:
---> 56     result[0] = await coro
     57 except Exception as ex:
     58     result[0] = ex

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1128, in S3FileSystem._cat_file(self, path, version_id, start, end)
   1125     finally:
   1126         resp["Body"].close()
-> 1128 return await _error_wrapper(_call_and_read, retries=self.retries)

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
    143         err = e
    144 err = translate_boto_error(err)
--> 145 raise err

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
    111 for i in range(retries):
    112     try:
--> 113         return await func(*args, **kwargs)
    114     except S3_RETRYABLE_ERRORS as e:
    115         err = e

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1115, in S3FileSystem._cat_file.<locals>._call_and_read()
   1114 async def _call_and_read():
-> 1115     resp = await self._call_s3(
   1116         "get_object",
   1117         Bucket=bucket,
   1118         Key=key,
   1119         **version_id_kw(version_id or vers),
   1120         **head,
   1121         **self.req_kw,
   1122     )
   1123     try:
   1124         return await resp["Body"].read()

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:365, in S3FileSystem._call_s3(self, method, *akwarglist, **kwargs)
    363 logger.debug("CALL: %s - %s - %s", method.__name__, akwarglist, kw2)
    364 additional_kwargs = self._get_s3_method_kwargs(method, *akwarglist, **kwargs)
--> 365 return await _error_wrapper(
    366     method, kwargs=additional_kwargs, retries=self.retries
    367 )

File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
    143         err = e
    144 err = translate_boto_error(err)
--> 145 raise err

PermissionError: Access Denied
f"Dataset size: {format_bytes}"

Example Polygon

Drawn with: https://geojson.io

image.png

polygon = np.array(
    [
        [-122.38488806417945, 34.98888604471138],
        [-122.02425311530737, 33.300351211467074],
        [-120.60402628930146, 32.723214427630836],
        [-116.63789131284673, 32.54346959375448],
        [-116.39346090873218, 33.8541384965596],
        [-118.83845767505964, 35.257586401855164],
        [-121.34541503969862, 35.50073821008141],
        [-122.38488806417945, 34.98888604471138],
    ]
)

polygon

We can subset down to the variables we care about, while keeping the grid information for later analysis

ds_temp = ds.xsg.subset_vars(['temp_sur'])
ds_temp
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 ds_temp = ds.xsg.subset_vars(['temp_sur'])
      2 ds_temp

NameError: name 'ds' is not defined

Then we can subset the grid down to the target area

ds_subset = ds_temp.xsg.subset_polygon(polygon)
assert(ds_subset is not None)
ds_subset
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 ds_subset = ds_temp.xsg.subset_polygon(polygon)
      2 assert(ds_subset is not None)
      3 ds_subset

NameError: name 'ds_temp' is not defined
ds_subset.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 ds_subset.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")

NameError: name 'ds_subset' is not defined
f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[6], line 1
----> 1 f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"

NameError: name 'ds_subset' is not defined