FVCOM

Open In Colab

# Install required libraries
# !pip install -q xarray_subset_grid@git+https://github.com/asascience-open/xarray-subset-grid.git
# !pip install -q s3fs cftime xarray cf-xarray fsspec dask h5netcdf
import cf_xarray # noqa
import fsspec
import numpy as np
import xarray as xr

import xarray_subset_grid.accessor  # noqa: F401

# open dataset as zarr object using fsspec reference file system and xarray
fs = fsspec.filesystem(
    "reference",
    fo="s3://nextgen-dmac-cloud-ingest/nos/ngofs2/nos.ngofs2.2ds.best.nc.zarr",
    remote_protocol="s3",
    remote_options={"anon": True},
    target_protocol="s3",
    target_options={"anon": True},
)
m = fs.get_mapper("")

ds = xr.open_dataset(
    m,
    engine="zarr",
    backend_kwargs=dict(consolidated=False),
    chunks={'time': 1},
    drop_variables=["Itime2"],
)
ds
<xarray.Dataset> Size: 20GB
Dimensions:             (time: 763, four: 4, nele: 569405, node: 303714,
                         three: 3, maxnode: 10, maxelem: 8)
Coordinates:
    lat                 (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    latc                (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
    lon                 (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    lonc                (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
  * time                (time) datetime64[ns] 6kB 2024-06-18T21:00:00 ... 202...
Dimensions without coordinates: four, nele, node, three, maxnode, maxelem
Data variables: (12/39)
    Itime               (time) datetime64[ns] 6kB dask.array<chunksize=(1,), meta=np.ndarray>
    Times               (time) |S26 20kB dask.array<chunksize=(1,), meta=np.ndarray>
    a1u                 (four, nele) float32 9MB dask.array<chunksize=(4, 569405), meta=np.ndarray>
    a2u                 (four, nele) float32 9MB dask.array<chunksize=(4, 569405), meta=np.ndarray>
    art1                (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    art2                (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    ...                  ...
    wet_nodes_prev_int  (time, node) int32 927MB dask.array<chunksize=(1, 303714), meta=np.ndarray>
    x                   (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    xc                  (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
    y                   (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    yc                  (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
    zeta                (time, node) float32 927MB dask.array<chunksize=(1, 303714), meta=np.ndarray>
Attributes: (12/14)
    Conventions:                 CF-1.0
    CoordinateProjection:        init=nad83:4205
    CoordinateSystem:            GeoReferenced
    GroundWater_Forcing:         GROUND WATER FORCING IS OFF!
    River_Forcing:               THERE ARE 63 RIVERS IN THIS MODEL.\nRIVER IN...
    Surface_Heat_Forcing:        FVCOM variable surface heat forcing file:\nF...
    ...                          ...
    Tidal_Forcing:               TIDAL ELEVATION FORCING IS OFF!
    history:                     model started at: 18/06/2024   21:15
    institution:                 School for Marine Science and Technology
    references:                  http://fvcom.smast.umassd.edu, http://codfis...
    source:                      FVCOM_4.3
    title:                       NGOFS2

Fixup UGRID mesh topology to be CF Compliant

ds = xarray_subset_grid.grids.ugrid.assign_ugrid_topology(ds)
ds
<xarray.Dataset> Size: 20GB
Dimensions:             (time: 763, four: 4, nele: 569405, node: 303714,
                         three: 3, maxnode: 10, maxelem: 8)
Coordinates:
    lat                 (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    latc                (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
    lon                 (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    lonc                (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
  * time                (time) datetime64[ns] 6kB 2024-06-18T21:00:00 ... 202...
Dimensions without coordinates: four, nele, node, three, maxnode, maxelem
Data variables: (12/39)
    Itime               (time) datetime64[ns] 6kB dask.array<chunksize=(1,), meta=np.ndarray>
    Times               (time) |S26 20kB dask.array<chunksize=(1,), meta=np.ndarray>
    a1u                 (four, nele) float32 9MB dask.array<chunksize=(4, 569405), meta=np.ndarray>
    a2u                 (four, nele) float32 9MB dask.array<chunksize=(4, 569405), meta=np.ndarray>
    art1                (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    art2                (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    ...                  ...
    wet_nodes_prev_int  (time, node) int32 927MB dask.array<chunksize=(1, 303714), meta=np.ndarray>
    x                   (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    xc                  (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
    y                   (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    yc                  (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
    zeta                (time, node) float32 927MB dask.array<chunksize=(1, 303714), meta=np.ndarray>
Attributes: (12/14)
    Conventions:                 CF-1.0
    CoordinateProjection:        init=nad83:4205
    CoordinateSystem:            GeoReferenced
    GroundWater_Forcing:         GROUND WATER FORCING IS OFF!
    River_Forcing:               THERE ARE 63 RIVERS IN THIS MODEL.\nRIVER IN...
    Surface_Heat_Forcing:        FVCOM variable surface heat forcing file:\nF...
    ...                          ...
    Tidal_Forcing:               TIDAL ELEVATION FORCING IS OFF!
    history:                     model started at: 18/06/2024   21:15
    institution:                 School for Marine Science and Technology
    references:                  http://fvcom.smast.umassd.edu, http://codfis...
    source:                      FVCOM_4.3
    title:                       NGOFS2
f"Dataset size: {ds.nbytes * 1.0e-9} Gb"
'Dataset size: 19.676859022000002 Gb'

Example Polygon

Drawn with: https://geojson.io

image.png

polygon = np.array(
    [
        [-88.90580394001934, 30.29241252023394],
        [-89.19516286474344, 30.472177580997183],
        [-89.68564387438431, 30.353800554455674],
        [-90.10008710994322, 30.07406870382799],
        [-90.02651730481438, 29.72115942943863],
        [-89.61697872293067, 29.347768286358786],
        [-89.48700540053677, 28.869972870160396],
        [-89.03822958925102, 28.876415308359796],
        [-88.6164293731791, 29.163769289372496],
        [-88.54531189488806, 29.682817158850725],
        [-88.31234084531368, 29.836098340764792],
        [-88.3491257478779, 29.967902324410517],
        [-88.90580394001934, 30.29241252023394],
    ]
)

polygon
array([[-88.90580394,  30.29241252],
       [-89.19516286,  30.47217758],
       [-89.68564387,  30.35380055],
       [-90.10008711,  30.0740687 ],
       [-90.0265173 ,  29.72115943],
       [-89.61697872,  29.34776829],
       [-89.4870054 ,  28.86997287],
       [-89.03822959,  28.87641531],
       [-88.61642937,  29.16376929],
       [-88.54531189,  29.68281716],
       [-88.31234085,  29.83609834],
       [-88.34912575,  29.96790232],
       [-88.90580394,  30.29241252]])

We can subset down to the variables we care about, while keeping the grid information for later analysis

ds_zeta = ds.xsg.subset_vars(['zeta'])
ds_zeta
<xarray.Dataset> Size: 948MB
Dimensions:     (node: 303714, nele: 569405, three: 3, time: 763)
Coordinates:
    lon         (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    lat         (node) float32 1MB dask.array<chunksize=(303714,), meta=np.ndarray>
    lonc        (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
    latc        (nele) float32 2MB dask.array<chunksize=(569405,), meta=np.ndarray>
  * time        (time) datetime64[ns] 6kB 2024-06-18T21:00:00 ... 2024-07-20T...
Dimensions without coordinates: node, nele, three
Data variables:
    fvcom_mesh  int32 4B ...
    nv          (three, nele) int32 7MB dask.array<chunksize=(3, 569405), meta=np.ndarray>
    nbe         (three, nele) int32 7MB dask.array<chunksize=(3, 569405), meta=np.ndarray>
    zeta        (time, node) float32 927MB dask.array<chunksize=(1, 303714), meta=np.ndarray>
Attributes: (12/14)
    Conventions:                 CF-1.0
    CoordinateProjection:        init=nad83:4205
    CoordinateSystem:            GeoReferenced
    GroundWater_Forcing:         GROUND WATER FORCING IS OFF!
    River_Forcing:               THERE ARE 63 RIVERS IN THIS MODEL.\nRIVER IN...
    Surface_Heat_Forcing:        FVCOM variable surface heat forcing file:\nF...
    ...                          ...
    Tidal_Forcing:               TIDAL ELEVATION FORCING IS OFF!
    history:                     model started at: 18/06/2024   21:15
    institution:                 School for Marine Science and Technology
    references:                  http://fvcom.smast.umassd.edu, http://codfis...
    source:                      FVCOM_4.3
    title:                       NGOFS2

Then we can subset the grid down to the target area

ds_subset = ds_zeta.xsg.subset_polygon(polygon)
assert(ds_subset is not None)
ds_subset
<xarray.Dataset> Size: 168MB
Dimensions:     (node: 54024, nele: 99246, three: 3, time: 763)
Coordinates:
    lon         (node) float32 216kB dask.array<chunksize=(54024,), meta=np.ndarray>
    lat         (node) float32 216kB dask.array<chunksize=(54024,), meta=np.ndarray>
    lonc        (nele) float32 397kB dask.array<chunksize=(99246,), meta=np.ndarray>
    latc        (nele) float32 397kB dask.array<chunksize=(99246,), meta=np.ndarray>
  * time        (time) datetime64[ns] 6kB 2024-06-18T21:00:00 ... 2024-07-20T...
Dimensions without coordinates: node, nele, three
Data variables:
    fvcom_mesh  int32 4B ...
    nv          (three, nele) int32 1MB dask.array<chunksize=(3, 99246), meta=np.ndarray>
    nbe         (three, nele) int32 1MB dask.array<chunksize=(3, 99246), meta=np.ndarray>
    zeta        (time, node) float32 165MB dask.array<chunksize=(1, 54024), meta=np.ndarray>
Attributes: (12/14)
    Conventions:                 CF-1.0
    CoordinateProjection:        init=nad83:4205
    CoordinateSystem:            GeoReferenced
    GroundWater_Forcing:         GROUND WATER FORCING IS OFF!
    River_Forcing:               THERE ARE 63 RIVERS IN THIS MODEL.\nRIVER IN...
    Surface_Heat_Forcing:        FVCOM variable surface heat forcing file:\nF...
    ...                          ...
    Tidal_Forcing:               TIDAL ELEVATION FORCING IS OFF!
    history:                     model started at: 18/06/2024   21:15
    institution:                 School for Marine Science and Technology
    references:                  http://fvcom.smast.umassd.edu, http://codfis...
    source:                      FVCOM_4.3
    title:                       NGOFS2
import matplotlib.pyplot as plt
import matplotlib.tri as tri

zeta = ds_subset.zeta.isel(time=0)
tris = tri.Triangulation(
    zeta.cf["longitude"],
    zeta.cf["latitude"],
    ds_subset[ds_subset.fvcom_mesh.face_node_connectivity].T - 1,
)
plt.tripcolor(tris, zeta, shading="flat")
<matplotlib.collections.PolyCollection at 0x334dc3970>
../_images/92f2f0479d44ff5f452c9cc5c6b5317e788880f1c68fefe6931ed03413e66087.png
f"Subset dataset size: {ds_subset.nbytes * 1.0e-6} Mb"
'Subset dataset size: 168.49542 Mb'