ROMS¶
NOTE: currently broken – I think the url needs updating.
# Install required libraries
# !pip install -q xarray_subset_grid@git+https://github.com/asascience-open/xarray-subset-grid.git
# !pip install -q s3fs cftime xarray cf-xarray fsspec dask h5netcdf
import fsspec
import numpy as np
import xarray as xr
import xarray_subset_grid.accessor # noqa: F401
from xarray_subset_grid.utils import format_bytes
# open dataset as zarr object using fsspec reference file system and xarray
fs = fsspec.filesystem(
"reference",
fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.best.nc.zarr",
remote_protocol="s3",
remote_options={"anon": True},
target_protocol="s3",
target_options={"anon": True},
)
m = fs.get_mapper("")
ds = xr.open_dataset(
m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
)
ds
---------------------------------------------------------------------------
ClientError Traceback (most recent call last)
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
112 try:
--> 113 return await func(*args, **kwargs)
114 except S3_RETRYABLE_ERRORS as e:
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/aiobotocore/client.py:412, in AioBaseClient._make_api_call(self, operation_name, api_params)
411 error_class = self.exceptions.from_code(error_code)
--> 412 raise error_class(parsed_response, operation_name)
413 else:
ClientError: An error occurred (AccessDenied) when calling the GetObject operation: Access Denied
The above exception was the direct cause of the following exception:
PermissionError Traceback (most recent call last)
Cell In[2], line 19
9 fs = fsspec.filesystem(
10 "reference",
11 fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.best.nc.zarr",
(...)
15 target_options={"anon": True},
16 )
17 m = fs.get_mapper("")
---> 19 ds = xr.open_dataset(
20 m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
21 )
22 ds
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/api.py:611, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
599 decoders = _resolve_decoders_kwargs(
600 decode_cf,
601 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
607 decode_coords=decode_coords,
608 )
610 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 611 backend_ds = backend.open_dataset(
612 filename_or_obj,
613 drop_variables=drop_variables,
614 **decoders,
615 **kwargs,
616 )
617 ds = _dataset_from_backend_dataset(
618 backend_ds,
619 filename_or_obj,
(...)
629 **kwargs,
630 )
631 return ds
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:1173, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel, zarr_version, store, engine)
1171 filename_or_obj = _normalize_path(filename_or_obj)
1172 if not store:
-> 1173 store = ZarrStore.open_group(
1174 filename_or_obj,
1175 group=group,
1176 mode=mode,
1177 synchronizer=synchronizer,
1178 consolidated=consolidated,
1179 consolidate_on_close=False,
1180 chunk_store=chunk_store,
1181 storage_options=storage_options,
1182 stacklevel=stacklevel + 1,
1183 zarr_version=zarr_version,
1184 )
1186 store_entrypoint = StoreBackendEntrypoint()
1187 with close_on_error(store):
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:483, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, stacklevel, zarr_version, write_empty)
464 @classmethod
465 def open_group(
466 cls,
(...)
480 write_empty: bool | None = None,
481 ):
--> 483 zarr_group, consolidate_on_close, close_store_on_close = _get_open_params(
484 store=store,
485 mode=mode,
486 synchronizer=synchronizer,
487 group=group,
488 consolidated=consolidated,
489 consolidate_on_close=consolidate_on_close,
490 chunk_store=chunk_store,
491 storage_options=storage_options,
492 stacklevel=stacklevel,
493 zarr_version=zarr_version,
494 )
496 return cls(
497 zarr_group,
498 mode,
(...)
504 close_store_on_close,
505 )
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:1360, in _get_open_params(store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, stacklevel, zarr_version)
1358 zarr_group = zarr.open_consolidated(store, **open_kwargs)
1359 else:
-> 1360 zarr_group = zarr.open_group(store, **open_kwargs)
1361 close_store_on_close = zarr_group.store is not store
1362 return zarr_group, consolidate_on_close, close_store_on_close
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/hierarchy.py:1575, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, meta_array)
1572 # ensure store is initialized
1574 if mode in ["r", "r+"]:
-> 1575 if not contains_group(store, path=path):
1576 if contains_array(store, path=path):
1577 raise ContainsArrayError(path)
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:131, in contains_group(store, path, explicit_only)
129 store_version = getattr(store, "_store_version", 2)
130 if store_version == 2 or explicit_only:
--> 131 return key in store
132 else:
133 if key in store:
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:1496, in FSStore.__contains__(self, key)
1494 def __contains__(self, key):
1495 key = self._normalize_key(key)
-> 1496 return key in self.map
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/mapping.py:193, in FSMap.__contains__(self, key)
191 """Does key exist in mapping?"""
192 path = self._key_to_str(key)
--> 193 return self.fs.isfile(path)
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:1098, in ReferenceFileSystem.isfile(self, path)
1097 def isfile(self, path): # overwrite auto-sync version
-> 1098 return path in self.references
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:543, in LazyReferenceMapper.__contains__(self, item)
541 def __contains__(self, item):
542 try:
--> 543 self._load_one_key(item)
544 return True
545 except KeyError:
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:275, in LazyReferenceMapper._load_one_key(self, key)
270 def _load_one_key(self, key):
271 """Get the reference for one key
272
273 Returns bytes, one-element list or three-element list.
274 """
--> 275 if key in self._items:
276 return self._items[key]
277 elif key in self.zmetadata:
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:141, in LazyReferenceMapper.__getattr__(self, item)
139 def __getattr__(self, item):
140 if item in ("_items", "record_size", "zmetadata"):
--> 141 self.setup()
142 # avoid possible recursion if setup fails somehow
143 return self.__dict__[item]
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:148, in LazyReferenceMapper.setup(self)
146 def setup(self):
147 self._items = {}
--> 148 self._items[".zmetadata"] = self.fs.cat_file(
149 "/".join([self.root, ".zmetadata"])
150 )
151 met = json.loads(self._items[".zmetadata"])
152 self.record_size = met["record_size"]
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:118, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
115 @functools.wraps(func)
116 def wrapper(*args, **kwargs):
117 self = obj or args[0]
--> 118 return sync(self.loop, func, *args, **kwargs)
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:103, in sync(loop, func, timeout, *args, **kwargs)
101 raise FSTimeoutError from return_result
102 elif isinstance(return_result, BaseException):
--> 103 raise return_result
104 else:
105 return return_result
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:56, in _runner(event, coro, result, timeout)
54 coro = asyncio.wait_for(coro, timeout=timeout)
55 try:
---> 56 result[0] = await coro
57 except Exception as ex:
58 result[0] = ex
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1128, in S3FileSystem._cat_file(self, path, version_id, start, end)
1125 finally:
1126 resp["Body"].close()
-> 1128 return await _error_wrapper(_call_and_read, retries=self.retries)
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
143 err = e
144 err = translate_boto_error(err)
--> 145 raise err
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
111 for i in range(retries):
112 try:
--> 113 return await func(*args, **kwargs)
114 except S3_RETRYABLE_ERRORS as e:
115 err = e
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1115, in S3FileSystem._cat_file.<locals>._call_and_read()
1114 async def _call_and_read():
-> 1115 resp = await self._call_s3(
1116 "get_object",
1117 Bucket=bucket,
1118 Key=key,
1119 **version_id_kw(version_id or vers),
1120 **head,
1121 **self.req_kw,
1122 )
1123 try:
1124 return await resp["Body"].read()
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:365, in S3FileSystem._call_s3(self, method, *akwarglist, **kwargs)
363 logger.debug("CALL: %s - %s - %s", method.__name__, akwarglist, kw2)
364 additional_kwargs = self._get_s3_method_kwargs(method, *akwarglist, **kwargs)
--> 365 return await _error_wrapper(
366 method, kwargs=additional_kwargs, retries=self.retries
367 )
File ~/Hazmat/ThirdPartyPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
143 err = e
144 err = translate_boto_error(err)
--> 145 raise err
PermissionError: Access Denied
f"Dataset size: {format_bytes}"
Example Polygon¶
Drawn with: https://geojson.io

polygon = np.array(
[
[-122.38488806417945, 34.98888604471138],
[-122.02425311530737, 33.300351211467074],
[-120.60402628930146, 32.723214427630836],
[-116.63789131284673, 32.54346959375448],
[-116.39346090873218, 33.8541384965596],
[-118.83845767505964, 35.257586401855164],
[-121.34541503969862, 35.50073821008141],
[-122.38488806417945, 34.98888604471138],
]
)
polygon
We can subset down to the variables we care about, while keeping the grid information for later analysis
ds_temp = ds.xsg.subset_vars(['temp_sur'])
ds_temp
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[3], line 1
----> 1 ds_temp = ds.xsg.subset_vars(['temp_sur'])
2 ds_temp
NameError: name 'ds' is not defined
Then we can subset the grid down to the target area
ds_subset = ds_temp.xsg.subset_polygon(polygon)
assert(ds_subset is not None)
ds_subset
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[4], line 1
----> 1 ds_subset = ds_temp.xsg.subset_polygon(polygon)
2 assert(ds_subset is not None)
3 ds_subset
NameError: name 'ds_temp' is not defined
ds_subset.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 1
----> 1 ds_subset.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")
NameError: name 'ds_subset' is not defined
f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[6], line 1
----> 1 f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"
NameError: name 'ds_subset' is not defined