Comparison of ROMS subsetting with CO-OPs Subsetting Tool¶
# Install required libraries
# !pip install -q xarray_subset_grid@git+https://github.com/asascience-open/xarray-subset-grid.git
# !pip install -q s3fs cftime xarray cf-xarray fsspec dask h5netcdf
import fsspec
import numpy as np
import xarray as xr
import xarray_subset_grid.accessor # noqa: F401
# open dataset as zarr object using fsspec reference file system and xarray
fs = fsspec.filesystem(
"reference",
fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.forecast.20240321.t03z.nc.zarr",
remote_protocol="s3",
remote_options={"anon": True},
target_protocol="s3",
target_options={"anon": True},
)
m = fs.get_mapper("")
ds = xr.open_dataset(
m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
)
ds
---------------------------------------------------------------------------
NoSuchKey Traceback (most recent call last)
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
112 try:
--> 113 return await func(*args, **kwargs)
114 except S3_RETRYABLE_ERRORS as e:
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/aiobotocore/client.py:411, in AioBaseClient._make_api_call(self, operation_name, api_params)
410 error_class = self.exceptions.from_code(error_code)
--> 411 raise error_class(parsed_response, operation_name)
412 else:
NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
The above exception was the direct cause of the following exception:
FileNotFoundError Traceback (most recent call last)
Cell In[2], line 18
8 fs = fsspec.filesystem(
9 "reference",
10 fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.2ds.forecast.20240321.t03z.nc.zarr",
(...)
14 target_options={"anon": True},
15 )
16 m = fs.get_mapper("")
---> 18 ds = xr.open_dataset(
19 m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
20 )
21 ds
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/api.py:571, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
559 decoders = _resolve_decoders_kwargs(
560 decode_cf,
561 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
567 decode_coords=decode_coords,
568 )
570 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 571 backend_ds = backend.open_dataset(
572 filename_or_obj,
573 drop_variables=drop_variables,
574 **decoders,
575 **kwargs,
576 )
577 ds = _dataset_from_backend_dataset(
578 backend_ds,
579 filename_or_obj,
(...)
589 **kwargs,
590 )
591 return ds
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:1170, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel, zarr_version)
1149 def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs
1150 self,
1151 filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
(...)
1167 zarr_version=None,
1168 ) -> Dataset:
1169 filename_or_obj = _normalize_path(filename_or_obj)
-> 1170 store = ZarrStore.open_group(
1171 filename_or_obj,
1172 group=group,
1173 mode=mode,
1174 synchronizer=synchronizer,
1175 consolidated=consolidated,
1176 consolidate_on_close=False,
1177 chunk_store=chunk_store,
1178 storage_options=storage_options,
1179 stacklevel=stacklevel + 1,
1180 zarr_version=zarr_version,
1181 )
1183 store_entrypoint = StoreBackendEntrypoint()
1184 with close_on_error(store):
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/xarray/backends/zarr.py:500, in ZarrStore.open_group(cls, store, mode, synchronizer, group, consolidated, consolidate_on_close, chunk_store, storage_options, append_dim, write_region, safe_chunks, stacklevel, zarr_version, write_empty)
498 zarr_group = zarr.open_consolidated(store, **open_kwargs)
499 else:
--> 500 zarr_group = zarr.open_group(store, **open_kwargs)
501 close_store_on_close = zarr_group.store is not store
502 return cls(
503 zarr_group,
504 mode,
(...)
510 close_store_on_close,
511 )
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/hierarchy.py:1575, in open_group(store, mode, cache_attrs, synchronizer, path, chunk_store, storage_options, zarr_version, meta_array)
1572 # ensure store is initialized
1574 if mode in ["r", "r+"]:
-> 1575 if not contains_group(store, path=path):
1576 if contains_array(store, path=path):
1577 raise ContainsArrayError(path)
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:131, in contains_group(store, path, explicit_only)
129 store_version = getattr(store, "_store_version", 2)
130 if store_version == 2 or explicit_only:
--> 131 return key in store
132 else:
133 if key in store:
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/zarr/storage.py:1496, in FSStore.__contains__(self, key)
1494 def __contains__(self, key):
1495 key = self._normalize_key(key)
-> 1496 return key in self.map
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/mapping.py:193, in FSMap.__contains__(self, key)
191 """Does key exist in mapping?"""
192 path = self._key_to_str(key)
--> 193 return self.fs.isfile(path)
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:1085, in ReferenceFileSystem.isfile(self, path)
1084 def isfile(self, path): # overwrite auto-sync version
-> 1085 return path in self.references
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:541, in LazyReferenceMapper.__contains__(self, item)
539 def __contains__(self, item):
540 try:
--> 541 self._load_one_key(item)
542 return True
543 except KeyError:
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:274, in LazyReferenceMapper._load_one_key(self, key)
269 def _load_one_key(self, key):
270 """Get the reference for one key
271
272 Returns bytes, one-element list or three-element list.
273 """
--> 274 if key in self._items:
275 return self._items[key]
276 elif key in self.zmetadata:
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:141, in LazyReferenceMapper.__getattr__(self, item)
139 def __getattr__(self, item):
140 if item in ("_items", "record_size", "zmetadata"):
--> 141 self.setup()
142 # avoid possible recursion if setup fails somehow
143 return self.__dict__[item]
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/implementations/reference.py:148, in LazyReferenceMapper.setup(self)
146 def setup(self):
147 self._items = {}
--> 148 self._items[".zmetadata"] = self.fs.cat_file(
149 "/".join([self.root, ".zmetadata"])
150 )
151 met = json.loads(self._items[".zmetadata"])
152 self.record_size = met["record_size"]
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:118, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
115 @functools.wraps(func)
116 def wrapper(*args, **kwargs):
117 self = obj or args[0]
--> 118 return sync(self.loop, func, *args, **kwargs)
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:103, in sync(loop, func, timeout, *args, **kwargs)
101 raise FSTimeoutError from return_result
102 elif isinstance(return_result, BaseException):
--> 103 raise return_result
104 else:
105 return return_result
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/fsspec/asyn.py:56, in _runner(event, coro, result, timeout)
54 coro = asyncio.wait_for(coro, timeout=timeout)
55 try:
---> 56 result[0] = await coro
57 except Exception as ex:
58 result[0] = ex
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1128, in S3FileSystem._cat_file(self, path, version_id, start, end)
1125 finally:
1126 resp["Body"].close()
-> 1128 return await _error_wrapper(_call_and_read, retries=self.retries)
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
143 err = e
144 err = translate_boto_error(err)
--> 145 raise err
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:113, in _error_wrapper(func, args, kwargs, retries)
111 for i in range(retries):
112 try:
--> 113 return await func(*args, **kwargs)
114 except S3_RETRYABLE_ERRORS as e:
115 err = e
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:1115, in S3FileSystem._cat_file.<locals>._call_and_read()
1114 async def _call_and_read():
-> 1115 resp = await self._call_s3(
1116 "get_object",
1117 Bucket=bucket,
1118 Key=key,
1119 **version_id_kw(version_id or vers),
1120 **head,
1121 **self.req_kw,
1122 )
1123 try:
1124 return await resp["Body"].read()
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:365, in S3FileSystem._call_s3(self, method, *akwarglist, **kwargs)
363 logger.debug("CALL: %s - %s - %s", method.__name__, akwarglist, kw2)
364 additional_kwargs = self._get_s3_method_kwargs(method, *akwarglist, **kwargs)
--> 365 return await _error_wrapper(
366 method, kwargs=additional_kwargs, retries=self.retries
367 )
File ~/Hazmat/ERD-PythonPackages/xarray-subset-grid/.pixi/envs/all/lib/python3.12/site-packages/s3fs/core.py:145, in _error_wrapper(func, args, kwargs, retries)
143 err = e
144 err = translate_boto_error(err)
--> 145 raise err
FileNotFoundError: The specified key does not exist.
f"Dataset size: {ds.nbytes * 1.0e-9} Gb"
Example Polygon¶
Drawn with: https://geojson.io

polygon = np.array(
[
[-122.38488806417945, 34.98888604471138],
[-122.02425311530737, 33.300351211467074],
[-120.60402628930146, 32.723214427630836],
[-116.63789131284673, 32.54346959375448],
[-116.39346090873218, 33.8541384965596],
[-118.83845767505964, 35.257586401855164],
[-121.34541503969862, 35.50073821008141],
[-122.38488806417945, 34.98888604471138],
]
)
polygon
# get the subset of the grid that is within the polygon
ds_subset = ds.xsg.grid.subset_polygon(ds[['temp_sur', 'grid']], polygon)
ds_subset
ds_subset.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")
f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"
Compare against CO-OPs Subsetting Tool¶
Using the same vertices and only subsetting for temperature:

ds_subset_control = xr.open_dataset('./wcofs-subset-control.nc',
chunks={},
backend_kwargs={'decode_times': False})
ds_subset_control
ds_subset_control.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")