Subsetting an Unstructured Grid: Analysis Over Chicago

Subsetting an Unstructured Grid: Analysis Over Chicago#

Authors: Philip Chmielowiec

This usage example showcases various ways of subsetting an unstructured grid using UXarray, focussing on analyzing a region around Chicago, Illinois.

import uxarray as ux
import geoviews.feature as gf
import cartopy.crs as ccrs
import holoviews as hv

import warnings

import geocat.datafiles as geodf


plot_opts = {"width": 700, "height": 350}

hv.extension("bokeh")


warnings.filterwarnings("ignore")
Downloading file 'registry.txt' from 'https://github.com/NCAR/GeoCAT-datafiles/raw/main/registry.txt' to '/home/docs/.cache/geocat'.

Setup#

In this example, we will be using the geocat-datafiles package to obtain our grid and data files.

The dataset used in this example is a 30km global MPAS meshes. We will be investigating the relative humidity vertically interpolated to 200hPa (relhum200hPa) data variable.

datafiles = (
    geodf.get(
        "netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/diag.2016-08-20_00.00.00_subset.nc"
    ),
    geodf.get("netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/x1.655362.grid_subset.nc"),
)
Downloading file 'netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/diag.2016-08-20_00.00.00_subset.nc' from 'https://github.com/NCAR/GeoCAT-datafiles/raw/main/netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/diag.2016-08-20_00.00.00_subset.nc' to '/home/docs/.cache/geocat'.
Downloading file 'netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/x1.655362.grid_subset.nc' from 'https://github.com/NCAR/GeoCAT-datafiles/raw/main/netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/x1.655362.grid_subset.nc' to '/home/docs/.cache/geocat'.
---------------------------------------------------------------------------
TimeoutError                              Traceback (most recent call last)
File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/connectionpool.py:537, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
    536 try:
--> 537     response = conn.getresponse()
    538 except (BaseSSLError, OSError) as e:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/connection.py:466, in HTTPConnection.getresponse(self)
    465 # Get the response from http.client.HTTPConnection
--> 466 httplib_response = super().getresponse()
    468 try:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/http/client.py:1395, in HTTPConnection.getresponse(self)
   1394 try:
-> 1395     response.begin()
   1396 except ConnectionError:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/http/client.py:325, in HTTPResponse.begin(self)
    324 while True:
--> 325     version, status, reason = self._read_status()
    326     if status != CONTINUE:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/http/client.py:286, in HTTPResponse._read_status(self)
    285 def _read_status(self):
--> 286     line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    287     if len(line) > _MAXLINE:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/socket.py:706, in SocketIO.readinto(self, b)
    705 try:
--> 706     return self._sock.recv_into(b)
    707 except timeout:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/ssl.py:1314, in SSLSocket.recv_into(self, buffer, nbytes, flags)
   1311         raise ValueError(
   1312           "non-zero flags not allowed in calls to recv_into() on %s" %
   1313           self.__class__)
-> 1314     return self.read(nbytes, buffer)
   1315 else:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/ssl.py:1166, in SSLSocket.read(self, len, buffer)
   1165 if buffer is not None:
-> 1166     return self._sslobj.read(len, buffer)
   1167 else:

TimeoutError: The read operation timed out

The above exception was the direct cause of the following exception:

ReadTimeoutError                          Traceback (most recent call last)
File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    485 try:
--> 486     resp = conn.urlopen(
    487         method=request.method,
    488         url=url,
    489         body=request.body,
    490         headers=request.headers,
    491         redirect=False,
    492         assert_same_host=False,
    493         preload_content=False,
    494         decode_content=False,
    495         retries=self.max_retries,
    496         timeout=timeout,
    497         chunked=chunked,
    498     )
    500 except (ProtocolError, OSError) as err:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/connectionpool.py:847, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    845     new_e = ProtocolError("Connection aborted.", new_e)
--> 847 retries = retries.increment(
    848     method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
    849 )
    850 retries.sleep()

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/util/retry.py:470, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    469 if read is False or method is None or not self._is_method_retryable(method):
--> 470     raise reraise(type(error), error, _stacktrace)
    471 elif read is not None:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/util/util.py:39, in reraise(tp, value, tb)
     38         raise value.with_traceback(tb)
---> 39     raise value
     40 finally:

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/connectionpool.py:793, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    792 # Make the request on the HTTPConnection object
--> 793 response = self._make_request(
    794     conn,
    795     method,
    796     url,
    797     timeout=timeout_obj,
    798     body=body,
    799     headers=headers,
    800     chunked=chunked,
    801     retries=retries,
    802     response_conn=response_conn,
    803     preload_content=preload_content,
    804     decode_content=decode_content,
    805     **response_kw,
    806 )
    808 # Everything went great!

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/connectionpool.py:539, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
    538 except (BaseSSLError, OSError) as e:
--> 539     self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
    540     raise

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/urllib3/connectionpool.py:370, in HTTPConnectionPool._raise_timeout(self, err, url, timeout_value)
    369 if isinstance(err, SocketTimeout):
--> 370     raise ReadTimeoutError(
    371         self, url, f"Read timed out. (read timeout={timeout_value})"
    372     ) from err
    374 # See the above comment about EAGAIN in Python 3.

ReadTimeoutError: HTTPSConnectionPool(host='raw.githubusercontent.com', port=443): Read timed out. (read timeout=5)

During handling of the above exception, another exception occurred:

ReadTimeout                               Traceback (most recent call last)
Cell In[2], line 5
      1 datafiles = (
      2     geodf.get(
      3         "netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/diag.2016-08-20_00.00.00_subset.nc"
      4     ),
----> 5     geodf.get("netcdf_files/MPAS/FalkoJudt/dyamond_1/30km/x1.655362.grid_subset.nc"),
      6 )

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/geocat/datafiles/__init__.py:31, in get(fname)
     30 def get(fname):
---> 31     return POOCH.fetch(fname)

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/pooch/core.py:589, in Pooch.fetch(self, fname, processor, downloader, progressbar)
    586     if downloader is None:
    587         downloader = choose_downloader(url, progressbar=progressbar)
--> 589     stream_download(
    590         url,
    591         full_path,
    592         known_hash,
    593         downloader,
    594         pooch=self,
    595         retry_if_failed=self.retry_if_failed,
    596     )
    598 if processor is not None:
    599     return processor(str(full_path), action, self)

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
    803 try:
    804     # Stream the file to a temporary so that we can safely check its
    805     # hash before overwriting the original.
    806     with temporary_file(path=str(fname.parent)) as tmp:
--> 807         downloader(url, tmp, pooch)
    808         hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
    809         shutil.move(tmp, str(fname))

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/pooch/downloaders.py:214, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
    212     # pylint: enable=consider-using-with
    213 try:
--> 214     response = requests.get(url, timeout=timeout, **kwargs)
    215     response.raise_for_status()
    216     content = response.iter_content(chunk_size=self.chunk_size)

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/api.py:73, in get(url, params, **kwargs)
     62 def get(url, params=None, **kwargs):
     63     r"""Sends a GET request.
     64 
     65     :param url: URL for the new :class:`Request` object.
   (...)
     70     :rtype: requests.Response
     71     """
---> 73     return request("get", url, params=params, **kwargs)

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/api.py:59, in request(method, url, **kwargs)
     55 # By using the 'with' statement we are sure the session is closed, thus we
     56 # avoid leaving sockets open which can trigger a ResourceWarning in some
     57 # cases, and look like a memory leak in others.
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/sessions.py:725, in Session.send(self, request, **kwargs)
    722 if allow_redirects:
    723     # Redirect resolving generator.
    724     gen = self.resolve_redirects(r, request, **kwargs)
--> 725     history = [resp for resp in gen]
    726 else:
    727     history = []

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/sessions.py:725, in <listcomp>(.0)
    722 if allow_redirects:
    723     # Redirect resolving generator.
    724     gen = self.resolve_redirects(r, request, **kwargs)
--> 725     history = [resp for resp in gen]
    726 else:
    727     history = []

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/sessions.py:266, in SessionRedirectMixin.resolve_redirects(self, resp, req, stream, timeout, verify, cert, proxies, yield_requests, **adapter_kwargs)
    263     yield req
    264 else:
--> 266     resp = self.send(
    267         req,
    268         stream=stream,
    269         timeout=timeout,
    270         verify=verify,
    271         cert=cert,
    272         proxies=proxies,
    273         allow_redirects=False,
    274         **adapter_kwargs,
    275     )
    277     extract_cookies_to_jar(self.cookies, prepared_request, resp.raw)
    279     # extract redirect url, if any, for the next loop

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File ~/checkouts/readthedocs.org/user_builds/uxarray-jain/conda/latest/lib/python3.11/site-packages/requests/adapters.py:532, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    530     raise SSLError(e, request=request)
    531 elif isinstance(e, ReadTimeoutError):
--> 532     raise ReadTimeout(e, request=request)
    533 elif isinstance(e, _InvalidHeader):
    534     raise InvalidHeader(e, request=request)

ReadTimeout: HTTPSConnectionPool(host='raw.githubusercontent.com', port=443): Read timed out. (read timeout=5)
uxds = ux.open_dataset(datafiles[1], datafiles[0])
clim = (uxds["relhum_200hPa"][0].values.min(), uxds["relhum_200hPa"][0].values.max())
features = gf.coastline(
    projection=ccrs.PlateCarree(), line_width=1, scale="50m"
) * gf.states(projection=ccrs.PlateCarree(), line_width=1, scale="50m")

Global Grid#

Many unstructured grids, such as those from global climate models, span the entire surface of a sphere (both with or without masks, such as continents).

UXarray supports working with these global grids, handling cases that arise with the spherical geometry of the earth (wrap around at the antimeridian, pole points, etc.)

uxds["relhum_200hPa"][0].plot.rasterize(
    method="polygon", exclude_antimeridian=True, title="Global Grid", **plot_opts
) * features

In addition to plotting global grids, we can perform analysis operations.

uxds["relhum_200hPa"][0].values.mean()

Regional Subsets#

UXarray supports taking subsets of a grid, which allows us to select a region and perform analysis directly on that area, as opposed to the global grid.

There are currently three supported subsetting methods, both for the Grid and UxDataArray data structures.

uxds["relhum_200hPa"].subset
uxds["relhum_200hPa"].uxgrid.subset

Bounding Box#

We can declare a bounding box centered about the Chicago area by specifying the minimum and maximum longitude and latitude bounds.

lon_bounds = (-87.6298 - 2, -87.6298 + 2)
lat_bounds = (41.8781 - 2, 41.8781 + 2)

Our bounding box ensures that the coordinates of our select element (nodes, edge_centers, or face_centers) are within the defined bounding box range.

Below is an example using the corner nodes for our subset.

bbox_subset_nodes = uxds["relhum_200hPa"][0].subset.bounding_box(
    lon_bounds, lat_bounds, element="nodes"
)

bbox_subset_nodes.plot.rasterize(
    method="polygon",
    exclude_antimeridian=True,
    clim=clim,
    title="Bounding Box Subset",
    **plot_opts,
) * features

And similarly using face centers.

bbox_subset_faces = uxds["relhum_200hPa"][0].subset.bounding_box(
    lon_bounds, lat_bounds, element="face centers"
)

bbox_subset_faces.plot.rasterize(
    method="polygon",
    exclude_antimeridian=True,
    clim=clim,
    title="Bounding Box Subset (Face Center Query)",
    **plot_opts,
) * features

While the bounding box is generally the same, you will notice differences along the border depending on which element is used to query.

:::{note} Specifying which element to query (i.e. nodes, edgecenters, or face centers) is supported by all subsetting methods. :::

Bounding Circle#

A bounding circle is defined using a center coordinate (lon, lat) and a radius (in degrees). The resulting subset will contain all elements within the radius of that circle.

center_coord = [-87.6298, 41.8781]

r = 2
bcircle_subset = uxds["relhum_200hPa"][0].subset.bounding_circle(center_coord, r)

bcircle_subset.plot.rasterize(
    method="polygon",
    exclude_antimeridian=True,
    clim=clim,
    title="Bounding Circle Subset",
    **plot_opts,
) * features

Nearest Neighbor#

Similar to the bounding circle, we can perform a nearest neighbor subset at some center coordinate (lon, lat) and query for some number of elements k

center_coord = [-87.6298, 41.8781]
nn_subset = uxds["relhum_200hPa"][0].subset.nearest_neighbor(
    center_coord, k=30, element="nodes"
)

nn_subset.plot.rasterize(
    method="polygon",
    exclude_antimeridian=True,
    clim=clim,
    title="Nearest Neighbor Subset",
    **plot_opts,
) * features
nn_subset_120 = uxds["relhum_200hPa"][0].subset.nearest_neighbor(
    center_coord, k=120, element="face centers"
)

nn_subset_120.plot.rasterize(
    method="polygon",
    exclude_antimeridian=True,
    clim=clim,
    title="Nearest Neighbor Subset (120 Faces)",
    **plot_opts,
) * features
nn_subset_1 = uxds["relhum_200hPa"][0].subset.nearest_neighbor(
    center_coord, k=1, element="face centers"
)

nn_subset_1.plot.rasterize(
    method="polygon",
    exclude_antimeridian=True,
    clim=clim,
    title="Nearest Neighbor Subset (Closest Face)",
    **plot_opts,
) * features

Analysis Operators#

Since each subset is a newly initialized UxDataArray, paired also with a newly initialized Grid, we can perform analysis operators directly on these new objects.

Below is a few examples of basic statical operations on the subset data arrays.

(
    bbox_subset_nodes.values.mean(),
    bbox_subset_faces.values.mean(),
    bcircle_subset.values.mean(),
)
(
    bbox_subset_nodes.values.std(),
    bbox_subset_faces.values.std(),
    bcircle_subset.values.std(),
)
(
    bbox_subset_nodes.values.min(),
    bbox_subset_faces.values.min(),
    bcircle_subset.values.min(),
)
(
    bbox_subset_nodes.values.max(),
    bbox_subset_faces.values.max(),
    bcircle_subset.values.max(),
)