ROMS 3D

Open In Colab

# Install required libraries
# !pip install -q xarray_subset_grid@git+https://github.com/asascience-open/xarray-subset-grid.git
# !pip install -q s3fs cftime xarray cf-xarray fsspec dask h5netcdf
import fsspec
import numpy as np
import xarray as xr

import xarray_subset_grid.accessor  # noqa: F401

# open dataset as zarr object using fsspec reference file system and xarray
fs = fsspec.filesystem(
    "reference",
    fo="s3://nextgen-dmac-cloud-ingest/nos/wcofs/nos.wcofs.fields.best.nc.zarr",
    remote_protocol="s3",
    remote_options={"anon": True},
    target_protocol="s3",
    target_options={"anon": True},
)
m = fs.get_mapper("")

ds = xr.open_dataset(
    m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
)
ds
<xarray.Dataset> Size: 76GB
Dimensions:        (tracer: 2, s_rho: 40, s_w: 41, boundary: 4,
                    ocean_time: 264, eta_rho: 1016, xi_rho: 348, eta_psi: 1015,
                    xi_psi: 347, eta_u: 1016, xi_u: 347, eta_v: 1015, xi_v: 348)
Coordinates:
    lat_psi        (eta_psi, xi_psi) float64 3MB dask.array<chunksize=(1015, 347), meta=np.ndarray>
    lat_rho        (eta_rho, xi_rho) float64 3MB dask.array<chunksize=(1016, 348), meta=np.ndarray>
    lat_u          (eta_u, xi_u) float64 3MB dask.array<chunksize=(1016, 347), meta=np.ndarray>
    lat_v          (eta_v, xi_v) float64 3MB dask.array<chunksize=(1015, 348), meta=np.ndarray>
    lon_psi        (eta_psi, xi_psi) float64 3MB dask.array<chunksize=(1015, 347), meta=np.ndarray>
    lon_rho        (eta_rho, xi_rho) float64 3MB dask.array<chunksize=(1016, 348), meta=np.ndarray>
    lon_u          (eta_u, xi_u) float64 3MB dask.array<chunksize=(1016, 347), meta=np.ndarray>
    lon_v          (eta_v, xi_v) float64 3MB dask.array<chunksize=(1015, 348), meta=np.ndarray>
  * ocean_time     (ocean_time) datetime64[ns] 2kB 2024-06-18T06:00:00 ... 20...
  * s_rho          (s_rho) float64 320B -0.9875 -0.9625 ... -0.0375 -0.0125
  * s_w            (s_w) float64 328B -1.0 -0.975 -0.95 ... -0.05 -0.025 0.0
Dimensions without coordinates: tracer, boundary, eta_rho, xi_rho, eta_psi,
                                xi_psi, eta_u, xi_u, eta_v, xi_v
Data variables: (12/80)
    Akk_bak        float64 8B ...
    Akp_bak        float64 8B ...
    Akt_bak        (tracer) float64 16B dask.array<chunksize=(2,), meta=np.ndarray>
    Akv_bak        float64 8B ...
    Cs_r           (s_rho) float64 320B dask.array<chunksize=(40,), meta=np.ndarray>
    Cs_w           (s_w) float64 328B dask.array<chunksize=(41,), meta=np.ndarray>
    ...             ...
    ubar           (ocean_time, eta_u, xi_u) float32 372MB dask.array<chunksize=(1, 1016, 347), meta=np.ndarray>
    v              (ocean_time, s_rho, eta_v, xi_v) float32 15GB dask.array<chunksize=(1, 40, 1015, 348), meta=np.ndarray>
    vbar           (ocean_time, eta_v, xi_v) float32 373MB dask.array<chunksize=(1, 1015, 348), meta=np.ndarray>
    w              (ocean_time, s_w, eta_rho, xi_rho) float32 15GB dask.array<chunksize=(1, 41, 1016, 348), meta=np.ndarray>
    xl             float64 8B ...
    zeta           (ocean_time, eta_rho, xi_rho) float32 373MB dask.array<chunksize=(1, 1016, 348), meta=np.ndarray>
Attributes: (12/36)
    CPP_options:       mode, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX, ANA_BTFLUX, AS...
    Conventions:       CF-1.4, SGRID-0.3
    NLM_LBC:           \nEDGE:  WEST   SOUTH  EAST   NORTH  \nzeta:  Cha    C...
    NLM_TADV:          \nADVECTION:   HORIZONTAL   VERTICAL     \ntemp:      ...
    ana_file:          ROMS/Functionals/ana_btflux.h
    avg_file:          nos.wcofs.avg.nc
    ...                ...
    svn_url:           
    tide_file:         nos.wcofs.roms.tides.nc
    tiling:            008x060
    title:             wcofs forecast RUN in operational mode
    type:              ROMS/TOMS history file
    var_info:          varinfo.yaml
f"Dataset size: {ds.nbytes * 1.0e-9} Gb"
'Dataset size: 76.15602202000001 Gb'

Example Polygon

Drawn with: https://geojson.io

image.png

polygon = np.array(
    [
        [-122.38488806417945, 34.98888604471138],
        [-122.02425311530737, 33.300351211467074],
        [-120.60402628930146, 32.723214427630836],
        [-116.63789131284673, 32.54346959375448],
        [-116.39346090873218, 33.8541384965596],
        [-118.83845767505964, 35.257586401855164],
        [-121.34541503969862, 35.50073821008141],
        [-122.38488806417945, 34.98888604471138],
    ]
)

polygon
array([[-122.38488806,   34.98888604],
       [-122.02425312,   33.30035121],
       [-120.60402629,   32.72321443],
       [-116.63789131,   32.54346959],
       [-116.39346091,   33.8541385 ],
       [-118.83845768,   35.2575864 ],
       [-121.34541504,   35.50073821],
       [-122.38488806,   34.98888604]])

We can subset down to the variables we care about, while keeping the grid information for later analysis

ds_temp = ds.xsg.subset_vars(['temp'])
ds_temp
<xarray.Dataset> Size: 15GB
Dimensions:     (eta_rho: 1016, xi_rho: 348, eta_u: 1016, xi_u: 347,
                 eta_psi: 1015, xi_psi: 347, eta_v: 1015, xi_v: 348,
                 ocean_time: 264, s_rho: 40)
Coordinates:
    lat_rho     (eta_rho, xi_rho) float64 3MB dask.array<chunksize=(1016, 348), meta=np.ndarray>
    lat_u       (eta_u, xi_u) float64 3MB dask.array<chunksize=(1016, 347), meta=np.ndarray>
    lat_psi     (eta_psi, xi_psi) float64 3MB dask.array<chunksize=(1015, 347), meta=np.ndarray>
    lon_psi     (eta_psi, xi_psi) float64 3MB dask.array<chunksize=(1015, 347), meta=np.ndarray>
    lon_v       (eta_v, xi_v) float64 3MB dask.array<chunksize=(1015, 348), meta=np.ndarray>
    lat_v       (eta_v, xi_v) float64 3MB dask.array<chunksize=(1015, 348), meta=np.ndarray>
    lon_u       (eta_u, xi_u) float64 3MB dask.array<chunksize=(1016, 347), meta=np.ndarray>
    lon_rho     (eta_rho, xi_rho) float64 3MB dask.array<chunksize=(1016, 348), meta=np.ndarray>
  * ocean_time  (ocean_time) datetime64[ns] 2kB 2024-06-18T06:00:00 ... 2024-...
  * s_rho       (s_rho) float64 320B -0.9875 -0.9625 -0.9375 ... -0.0375 -0.0125
Dimensions without coordinates: eta_rho, xi_rho, eta_u, xi_u, eta_psi, xi_psi,
                                eta_v, xi_v
Data variables:
    grid        int32 4B ...
    temp        (ocean_time, s_rho, eta_rho, xi_rho) float32 15GB dask.array<chunksize=(1, 40, 1016, 348), meta=np.ndarray>
Attributes: (12/36)
    CPP_options:       mode, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX, ANA_BTFLUX, AS...
    Conventions:       CF-1.4, SGRID-0.3
    NLM_LBC:           \nEDGE:  WEST   SOUTH  EAST   NORTH  \nzeta:  Cha    C...
    NLM_TADV:          \nADVECTION:   HORIZONTAL   VERTICAL     \ntemp:      ...
    ana_file:          ROMS/Functionals/ana_btflux.h
    avg_file:          nos.wcofs.avg.nc
    ...                ...
    svn_url:           
    tide_file:         nos.wcofs.roms.tides.nc
    tiling:            008x060
    title:             wcofs forecast RUN in operational mode
    type:              ROMS/TOMS history file
    var_info:          varinfo.yaml

Then we can subset the grid so we only have the surface data

ds_temp_surface = ds_temp.xsg.subset_vertical_level(level=0, method='nearest')
ds_temp_surface
<xarray.Dataset> Size: 396MB
Dimensions:     (eta_rho: 1016, xi_rho: 348, eta_u: 1016, xi_u: 347,
                 eta_psi: 1015, xi_psi: 347, eta_v: 1015, xi_v: 348,
                 ocean_time: 264)
Coordinates:
    lat_rho     (eta_rho, xi_rho) float64 3MB dask.array<chunksize=(1016, 348), meta=np.ndarray>
    lat_u       (eta_u, xi_u) float64 3MB dask.array<chunksize=(1016, 347), meta=np.ndarray>
    lat_psi     (eta_psi, xi_psi) float64 3MB dask.array<chunksize=(1015, 347), meta=np.ndarray>
    lon_psi     (eta_psi, xi_psi) float64 3MB dask.array<chunksize=(1015, 347), meta=np.ndarray>
    lon_v       (eta_v, xi_v) float64 3MB dask.array<chunksize=(1015, 348), meta=np.ndarray>
    lat_v       (eta_v, xi_v) float64 3MB dask.array<chunksize=(1015, 348), meta=np.ndarray>
    lon_u       (eta_u, xi_u) float64 3MB dask.array<chunksize=(1016, 347), meta=np.ndarray>
    lon_rho     (eta_rho, xi_rho) float64 3MB dask.array<chunksize=(1016, 348), meta=np.ndarray>
  * ocean_time  (ocean_time) datetime64[ns] 2kB 2024-06-18T06:00:00 ... 2024-...
    s_rho       float64 8B -0.0125
Dimensions without coordinates: eta_rho, xi_rho, eta_u, xi_u, eta_psi, xi_psi,
                                eta_v, xi_v
Data variables:
    grid        int32 4B ...
    temp        (ocean_time, eta_rho, xi_rho) float32 373MB dask.array<chunksize=(1, 1016, 348), meta=np.ndarray>
Attributes: (12/36)
    CPP_options:       mode, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX, ANA_BTFLUX, AS...
    Conventions:       CF-1.4, SGRID-0.3
    NLM_LBC:           \nEDGE:  WEST   SOUTH  EAST   NORTH  \nzeta:  Cha    C...
    NLM_TADV:          \nADVECTION:   HORIZONTAL   VERTICAL     \ntemp:      ...
    ana_file:          ROMS/Functionals/ana_btflux.h
    avg_file:          nos.wcofs.avg.nc
    ...                ...
    svn_url:           
    tide_file:         nos.wcofs.roms.tides.nc
    tiling:            008x060
    title:             wcofs forecast RUN in operational mode
    type:              ROMS/TOMS history file
    var_info:          varinfo.yaml

Then we can subset the grid down to the target area

ds_subset = ds_temp_surface.xsg.subset_polygon(polygon)
assert(ds_subset is not None)
ds_subset
<xarray.Dataset> Size: 18MB
Dimensions:     (eta_u: 125, xi_u: 132, eta_v: 125, xi_v: 133, ocean_time: 264,
                 eta_rho: 125, xi_rho: 132, eta_psi: 125, xi_psi: 132)
Coordinates:
    lat_u       (eta_u, xi_u) float64 132kB dask.array<chunksize=(125, 132), meta=np.ndarray>
    lon_u       (eta_u, xi_u) float64 132kB dask.array<chunksize=(125, 132), meta=np.ndarray>
    s_rho       float64 8B -0.0125
    lon_v       (eta_v, xi_v) float64 133kB dask.array<chunksize=(125, 133), meta=np.ndarray>
    lat_v       (eta_v, xi_v) float64 133kB dask.array<chunksize=(125, 133), meta=np.ndarray>
    lat_rho     (eta_rho, xi_rho) float64 132kB dask.array<chunksize=(125, 132), meta=np.ndarray>
    lon_rho     (eta_rho, xi_rho) float64 132kB dask.array<chunksize=(125, 132), meta=np.ndarray>
  * ocean_time  (ocean_time) datetime64[ns] 2kB 2024-06-18T06:00:00 ... 2024-...
    lat_psi     (eta_psi, xi_psi) float64 132kB dask.array<chunksize=(125, 132), meta=np.ndarray>
    lon_psi     (eta_psi, xi_psi) float64 132kB dask.array<chunksize=(125, 132), meta=np.ndarray>
Dimensions without coordinates: eta_u, xi_u, eta_v, xi_v, eta_rho, xi_rho,
                                eta_psi, xi_psi
Data variables:
    temp        (ocean_time, eta_rho, xi_rho) float32 17MB dask.array<chunksize=(1, 125, 132), meta=np.ndarray>
    grid        int32 4B ...
Attributes: (12/36)
    CPP_options:       mode, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX, ANA_BTFLUX, AS...
    Conventions:       CF-1.4, SGRID-0.3
    NLM_LBC:           \nEDGE:  WEST   SOUTH  EAST   NORTH  \nzeta:  Cha    C...
    NLM_TADV:          \nADVECTION:   HORIZONTAL   VERTICAL     \ntemp:      ...
    ana_file:          ROMS/Functionals/ana_btflux.h
    avg_file:          nos.wcofs.avg.nc
    ...                ...
    svn_url:           
    tide_file:         nos.wcofs.roms.tides.nc
    tiling:            008x060
    title:             wcofs forecast RUN in operational mode
    type:              ROMS/TOMS history file
    var_info:          varinfo.yaml
ds_subset.temp.cf.isel(time=0).plot(x="lon_rho", y="lat_rho")
<matplotlib.collections.QuadMesh at 0x3415e46d0>
../_images/a5d25376a71e50fc32921baadb75669a82cee1825d52b5a3cdb21697d3646cd6.png
f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"
'Subset dataset size: 18.484123999999998 Mb'