RTOFS¶
# Install required libraries
# !pip install -q xarray_subset_grid@git+https://github.com/asascience-open/xarray-subset-grid.git
# !pip install -q s3fs cftime xarray cf-xarray fsspec dask h5netcdf
import cf_xarray # noqa
import datetime
import fsspec
import xarray as xr
import xarray_subset_grid.accessor # noqa: F401
# open dataset as zarr object using fsspec reference file system and xarray
fs = fsspec.filesystem(
"reference",
fo="s3://noaa-nodd-kerchunk-pds/rtofs/rtofs.rtofs_glo_2ds_diag.best.nc.zarr",
remote_protocol="s3",
remote_options={"anon": True},
target_protocol="s3",
target_options={"anon": True},
)
m = fs.get_mapper("")
ds = xr.open_dataset(
m,
engine="zarr",
backend_kwargs=dict(consolidated=False),
chunks={},
)
ds
<xarray.Dataset> Size: 618GB
Dimensions: (MT: 2081, Y: 3298, X: 4500)
Coordinates:
Date (MT) float64 17kB dask.array<chunksize=(512,), meta=np.ndarray>
Latitude (Y, X) float32 59MB dask.array<chunksize=(825, 1125), meta=np.ndarray>
Longitude (Y, X) float32 59MB dask.array<chunksize=(825, 1125), meta=np.ndarray>
* MT (MT) datetime64[ns] 17kB 2024-05-09 ......
* X (X) int32 18kB 1 2 3 4 ... 4498 4499 4500
* Y (Y) int32 13kB 1 2 3 4 ... 3296 3297 3298
Data variables:
mixed_layer_thickness (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
ssh (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
surface_boundary_layer_thickness (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
u_barotropic_velocity (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
v_barotropic_velocity (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
Attributes:
Conventions: CF-1.0
experiment: 92.8
history: archv2ncdf2d
institution: National Centers for Environmental Prediction
source: HYCOM archive file
title: HYCOM ATLb2.00Reprojected HYCOM has some garbage data in the last X column, so we just get rid of it for this example
ds = ds.isel(X=slice(0, -1))
ds
<xarray.Dataset> Size: 618GB
Dimensions: (MT: 2081, Y: 3298, X: 4499)
Coordinates:
Date (MT) float64 17kB dask.array<chunksize=(512,), meta=np.ndarray>
Latitude (Y, X) float32 59MB dask.array<chunksize=(825, 1125), meta=np.ndarray>
Longitude (Y, X) float32 59MB dask.array<chunksize=(825, 1125), meta=np.ndarray>
* MT (MT) datetime64[ns] 17kB 2024-05-09 ......
* X (X) int32 18kB 1 2 3 4 ... 4497 4498 4499
* Y (Y) int32 13kB 1 2 3 4 ... 3296 3297 3298
Data variables:
mixed_layer_thickness (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
ssh (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
surface_boundary_layer_thickness (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
u_barotropic_velocity (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
v_barotropic_velocity (MT, Y, X) float32 124GB dask.array<chunksize=(1, 825, 1125), meta=np.ndarray>
Attributes:
Conventions: CF-1.0
experiment: 92.8
history: archv2ncdf2d
institution: National Centers for Environmental Prediction
source: HYCOM archive file
title: HYCOM ATLb2.00f"Dataset size: {ds.nbytes * 1.0e-9} Gb"
'Dataset size: 617.66392334 Gb'
Check the grid type that is recognized
ds.xsg.grid.name
'regular_grid_2d'
Create our region of interest and subset

bbox = [-93.63283364104035, 16.18222316056857, -67.60864242620244, 34.02542167172069]
ds_subset = ds.xsg.subset_bbox(bbox)
ds_subset
<xarray.Dataset> Size: 3GB
Dimensions: (MT: 2081, Y: 247, X: 325)
Coordinates:
Date (MT) float64 17kB dask.array<chunksize=(512,), meta=np.ndarray>
Latitude (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
Longitude (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
* MT (MT) datetime64[ns] 17kB 2024-05-09 ......
* X (X) int32 1kB 2404 2405 2406 ... 2727 2728
* Y (Y) int32 988B 1711 1712 ... 1956 1957
Data variables:
mixed_layer_thickness (MT, Y, X) float32 668MB dask.array<chunksize=(1, 247, 325), meta=np.ndarray>
ssh (MT, Y, X) float32 668MB dask.array<chunksize=(1, 247, 325), meta=np.ndarray>
surface_boundary_layer_thickness (MT, Y, X) float32 668MB dask.array<chunksize=(1, 247, 325), meta=np.ndarray>
u_barotropic_velocity (MT, Y, X) float32 668MB dask.array<chunksize=(1, 247, 325), meta=np.ndarray>
v_barotropic_velocity (MT, Y, X) float32 668MB dask.array<chunksize=(1, 247, 325), meta=np.ndarray>
subset_mask (Y, X) float64 642kB 1.0 1.0 ... 1.0 1.0
Attributes:
Conventions: CF-1.0
experiment: 92.8
history: archv2ncdf2d
institution: National Centers for Environmental Prediction
source: HYCOM archive file
title: HYCOM ATLb2.00f"Dataset size: {ds_subset.nbytes * 1.0e-9} Gb"
'Dataset size: 3.342365484 Gb'
Lets just get the first timestep
now = datetime.datetime.now()
ds_subset = ds_subset.cf.sel(time=now, method="nearest")
ds_subset
<xarray.Dataset> Size: 3MB
Dimensions: (Y: 247, X: 325)
Coordinates:
Date float64 8B dask.array<chunksize=(), meta=np.ndarray>
Latitude (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
Longitude (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
MT datetime64[ns] 8B 2024-07-30T16:00:00
* X (X) int32 1kB 2404 2405 2406 ... 2727 2728
* Y (Y) int32 988B 1711 1712 ... 1956 1957
Data variables:
mixed_layer_thickness (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
ssh (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
surface_boundary_layer_thickness (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
u_barotropic_velocity (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
v_barotropic_velocity (Y, X) float32 321kB dask.array<chunksize=(247, 325), meta=np.ndarray>
subset_mask (Y, X) float64 642kB 1.0 1.0 ... 1.0 1.0
Attributes:
Conventions: CF-1.0
experiment: 92.8
history: archv2ncdf2d
institution: National Centers for Environmental Prediction
source: HYCOM archive file
title: HYCOM ATLb2.00f"Dataset size: {ds_subset.nbytes * 1.0e-6} Mb"
'Dataset size: 2.892204 Mb'
Lets plot the sea surface height
ds_subset.ssh.cf.plot(x='longitude', y='latitude', cmap='viridis')
<matplotlib.collections.QuadMesh at 0x36247d4e0>