Skip to content

NGI Utils

C M pynasonde.vipir.ngi.utils — timezone conversion, outlier removal, smoothing, grid helpers.

pynasonde.vipir.ngi.utils

Shared utilities for VIPIR NGI processing (time conversion, smoothing, etc.).

TimeZoneConversion

Convert timestamps between UTC and a station's local timezone.

Source code in pynasonde/vipir/ngi/utils.py
class TimeZoneConversion:
    """Convert timestamps between UTC and a station's local timezone."""

    def __init__(
        self,
        local_tz: str = None,
        lat: float = 37.8815,
        long: float = -75.4374,
    ):
        """Initialize the converter with either a timezone name or coordinates.

        Args:
            local_tz: Explicit timezone name; inferred from lat/lon when None.
            lat: Station latitude used when deriving the timezone.
            long: Station longitude used when deriving the timezone.
        """
        self.local_tz = local_tz
        tf = TimezoneFinder()
        if (long is not None) and (lat is not None):
            self.local_tz = tf.timezone_at(lng=long, lat=lat)
        logger.info(f"Local Time: {self.local_tz}")
        self.utc_zone, self.local_zone = pytz.timezone("UTC"), pytz.timezone(
            self.local_tz
        )
        return

    def utc_to_local_time(self, dates):
        """Translate an iterable of UTC datetimes to the configured local zone.

        Args:
            dates: Iterable of naive or timezone-aware datetime objects.

        Returns:
            List of localized datetimes adjusted to `self.local_zone`.
        """
        date = pd.to_datetime(dates[0])
        tdiff_hr = (
            self.local_zone.localize(date)
            - self.utc_zone.localize(date).astimezone(self.local_zone)
        ).seconds / 3600
        dates = [d - dt.timedelta(hours=tdiff_hr) for d in dates]
        return dates

__init__(local_tz=None, lat=37.8815, long=-75.4374)

Initialize the converter with either a timezone name or coordinates.

Parameters:

Name Type Description Default
local_tz str

Explicit timezone name; inferred from lat/lon when None.

None
lat float

Station latitude used when deriving the timezone.

37.8815
long float

Station longitude used when deriving the timezone.

-75.4374
Source code in pynasonde/vipir/ngi/utils.py
def __init__(
    self,
    local_tz: str = None,
    lat: float = 37.8815,
    long: float = -75.4374,
):
    """Initialize the converter with either a timezone name or coordinates.

    Args:
        local_tz: Explicit timezone name; inferred from lat/lon when None.
        lat: Station latitude used when deriving the timezone.
        long: Station longitude used when deriving the timezone.
    """
    self.local_tz = local_tz
    tf = TimezoneFinder()
    if (long is not None) and (lat is not None):
        self.local_tz = tf.timezone_at(lng=long, lat=lat)
    logger.info(f"Local Time: {self.local_tz}")
    self.utc_zone, self.local_zone = pytz.timezone("UTC"), pytz.timezone(
        self.local_tz
    )
    return

utc_to_local_time(dates)

Translate an iterable of UTC datetimes to the configured local zone.

Parameters:

Name Type Description Default
dates

Iterable of naive or timezone-aware datetime objects.

required

Returns:

Type Description

List of localized datetimes adjusted to self.local_zone.

Source code in pynasonde/vipir/ngi/utils.py
def utc_to_local_time(self, dates):
    """Translate an iterable of UTC datetimes to the configured local zone.

    Args:
        dates: Iterable of naive or timezone-aware datetime objects.

    Returns:
        List of localized datetimes adjusted to `self.local_zone`.
    """
    date = pd.to_datetime(dates[0])
    tdiff_hr = (
        self.local_zone.localize(date)
        - self.utc_zone.localize(date).astimezone(self.local_zone)
    ).seconds / 3600
    dates = [d - dt.timedelta(hours=tdiff_hr) for d in dates]
    return dates

to_local_time(dates, tz1, tz2)

Adjust naive datetimes from timezone tz1 into tz2.

Parameters:

Name Type Description Default
dates list

Iterable of datetime objects.

required
tz1

Origin timezone (pytz timezone).

required
tz2

Destination timezone (pytz timezone).

required

Returns:

Type Description

List of datetime objects converted to tz2.

Source code in pynasonde/vipir/ngi/utils.py
def to_local_time(dates: list, tz1, tz2):
    """Adjust naive datetimes from timezone `tz1` into `tz2`.

    Args:
        dates: Iterable of datetime objects.
        tz1: Origin timezone (pytz timezone).
        tz2: Destination timezone (pytz timezone).

    Returns:
        List of datetime objects converted to `tz2`.
    """
    date = pd.to_datetime(dates[0])
    tdiff_hr = (tz2.localize(date) - tz1.localize(date).astimezone(tz2)).seconds / 3600
    dates = [d - dt.timedelta(hours=tdiff_hr) for d in dates]
    return dates

remove_outliers(o, pname, quantiles=[0.05, 0.95])

Trim rows where pname falls outside the provided quantile window.

Parameters:

Name Type Description Default
o pd.DataFrame

Input dataframe.

required
pname str

Column name used to evaluate quantiles.

required
quantiles

Lower and upper quantile thresholds.

[0.05, 0.95]

Returns:

Type Description

Filtered dataframe restricted to the quantile window.

Source code in pynasonde/vipir/ngi/utils.py
def remove_outliers(o: pd.DataFrame, pname: str, quantiles=[0.05, 0.95]):
    """Trim rows where `pname` falls outside the provided quantile window.

    Args:
        o: Input dataframe.
        pname: Column name used to evaluate quantiles.
        quantiles: Lower and upper quantile thresholds.

    Returns:
        Filtered dataframe restricted to the quantile window.
    """
    lower_bound = o[pname].quantile(quantiles[0])
    upper_bound = o[pname].quantile(quantiles[1])
    o = o[(o[pname] >= lower_bound) & (o[pname] <= upper_bound)]
    return o

running_median(arr, window=21)

Compute a moving median with the given window size.

Parameters:

Name Type Description Default
arr

Sequence of values.

required
window

Sliding window length.

21

Returns:

Type Description

List of median values aligned with the input sequence.

Source code in pynasonde/vipir/ngi/utils.py
def running_median(arr, window=21):
    """Compute a moving median with the given window size.

    Args:
        arr: Sequence of values.
        window: Sliding window length.

    Returns:
        List of median values aligned with the input sequence.
    """
    return pd.Series(arr).rolling(window=window, min_periods=1).median().tolist()

smooth(x, window_len=11, window='hanning')

Apply a windowed smoothing convolution to a 1-D array.

Parameters:

Name Type Description Default
x

Input NumPy array (1-D).

required
window_len

Length of the smoothing window.

11
window

Window function name (flat, hanning, hamming, bartlett, blackman).

'hanning'

Returns:

Type Description

Smoothed NumPy array with edge handling.

Source code in pynasonde/vipir/ngi/utils.py
def smooth(x, window_len=11, window="hanning"):
    """Apply a windowed smoothing convolution to a 1-D array.

    Args:
        x: Input NumPy array (1-D).
        window_len: Length of the smoothing window.
        window: Window function name (flat, hanning, hamming, bartlett, blackman).

    Returns:
        Smoothed NumPy array with edge handling.
    """
    if x.ndim != 1:
        raise ValueError("smooth only accepts 1 dimension arrays.")
    if x.size < window_len:
        raise ValueError("Input vector needs to be bigger than window size.")
    if window_len < 3:
        return x
    if not window in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
        raise ValueError(
            "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
        )
    s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]]
    if window == "flat":
        w = np.ones(window_len, "d")
    else:
        w = eval("np." + window + "(window_len)")
    y = np.convolve(w / w.sum(), s, mode="valid")
    d = window_len - 1
    y = y[int(d / 2) : -int(d / 2)]
    return y

setsize(size=8)

Configure matplotlib/scienceplots defaults for a consistent style.

Parameters:

Name Type Description Default
size

Base font size applied to the plot configuration.

8
Source code in pynasonde/vipir/ngi/utils.py
def setsize(size=8):
    """Configure matplotlib/scienceplots defaults for a consistent style.

    Args:
        size: Base font size applied to the plot configuration.
    """

    import matplotlib as mpl
    import matplotlib.pyplot as plt

    plt.style.use(["science", "ieee"])
    plt.rcParams.update(
        {
            "text.usetex": False,
        }
    )
    plt.rcParams["font.family"] = "sans-serif"
    plt.rcParams["font.sans-serif"] = [
        "Tahoma",
        "DejaVu Sans",
        "Lucida Grande",
        "Verdana",
    ]
    mpl.rcParams.update(
        {"xtick.labelsize": size, "ytick.labelsize": size, "font.size": size}
    )
    return

to_namespace(d)

Recursively convert dicts/lists into SimpleNamespace instances.

Parameters:

Name Type Description Default
d object

Arbitrary nested structure of dicts/lists/primitives.

required

Returns:

Type Description
SimpleNamespace

Equivalent structure with dicts converted to SimpleNamespace.

Source code in pynasonde/vipir/ngi/utils.py
def to_namespace(d: object) -> SimpleNamespace:
    """Recursively convert dicts/lists into `SimpleNamespace` instances.

    Args:
        d: Arbitrary nested structure of dicts/lists/primitives.

    Returns:
        Equivalent structure with dicts converted to `SimpleNamespace`.
    """
    if isinstance(d, dict):
        return SimpleNamespace(**{k: to_namespace(v) for k, v in d.items()})
    elif isinstance(d, list):
        return [to_namespace(v) for v in d]
    else:
        return d

load_toml(fpath=None)

Load a TOML configuration file into nested SimpleNamespace objects.

Parameters:

Name Type Description Default
fpath str

Optional explicit path to a TOML file; defaults to bundled config.

None

Returns:

Type Description
SimpleNamespace

SimpleNamespace representation of the parsed TOML.

Source code in pynasonde/vipir/ngi/utils.py
def load_toml(fpath: str = None) -> SimpleNamespace:
    """Load a TOML configuration file into nested `SimpleNamespace` objects.

    Args:
        fpath: Optional explicit path to a TOML file; defaults to bundled config.

    Returns:
        SimpleNamespace representation of the parsed TOML.
    """
    if fpath:
        logger.info(f"Loading from {fpath}")
        cfg = to_namespace(toml.load(fpath))
    else:
        with importlib.resources.path("pynasonde", "config.toml") as config_path:
            logger.info(f"Loading from {config_path}")
            cfg = to_namespace(toml.load(config_path))
    return cfg

get_color_by_index(index, total_indices, cmap_name='viridis')

Pick a color from a colormap using an index within [0, total).

Parameters:

Name Type Description Default
index

Position within the available color slots.

required
total_indices

Total number of available slots.

required
cmap_name

Matplotlib colormap name.

'viridis'

Returns:

Type Description

RGBA tuple sampled from the requested colormap.

Source code in pynasonde/vipir/ngi/utils.py
def get_color_by_index(index, total_indices, cmap_name="viridis"):
    """Pick a color from a colormap using an index within ``[0, total)``.

    Args:
        index: Position within the available color slots.
        total_indices: Total number of available slots.
        cmap_name: Matplotlib colormap name.

    Returns:
        RGBA tuple sampled from the requested colormap.
    """
    import matplotlib.pyplot as plt

    # Normalize the index to be between 0 and 1
    norm_index = index / total_indices

    # Get the colormap
    cmap = plt.get_cmap(cmap_name)

    # Return the color for the given index
    return cmap(norm_index)

get_gridded_parameters(q, xparam, yparam, zparam, r=1, rounding=True)

Reshape scattered parameter samples onto an evenly-spaced grid.

Parameters:

Name Type Description Default
q

Dataframe containing the source columns.

required
xparam

Column name used for the X dimension.

required
yparam

Column name used for the Y dimension.

required
zparam

Column name containing the values to grid.

required
r

Rounding precision applied before grouping.

1
rounding

Whether to round values prior to pivoting.

True

Returns:

Type Description

Tuple of (X, Y, Z) NumPy arrays suitable for contour/mesh plots.

Source code in pynasonde/vipir/ngi/utils.py
def get_gridded_parameters(q, xparam, yparam, zparam, r=1, rounding=True):
    """Reshape scattered parameter samples onto an evenly-spaced grid.

    Args:
        q: Dataframe containing the source columns.
        xparam: Column name used for the X dimension.
        yparam: Column name used for the Y dimension.
        zparam: Column name containing the values to grid.
        r: Rounding precision applied before grouping.
        rounding: Whether to round values prior to pivoting.

    Returns:
        Tuple of `(X, Y, Z)` NumPy arrays suitable for contour/mesh plots.
    """
    import numpy as np

    plotParamDF = q[[xparam, yparam, zparam]]
    if rounding:
        if xparam != "time":
            plotParamDF[xparam] = np.round(plotParamDF[xparam], r)
        plotParamDF[yparam] = np.round(plotParamDF[yparam], r)
    plotParamDF = plotParamDF.groupby([xparam, yparam]).mean().reset_index()
    plotParamDF = plotParamDF[[xparam, yparam, zparam]].pivot(
        index=xparam, columns=yparam
    )
    x = plotParamDF.index.values
    y = plotParamDF.columns.levels[1].values
    X, Y = np.meshgrid(x, y)
    # Mask the nan values! pcolormesh can't handle them well!
    Z = np.ma.masked_where(
        np.isnan(plotParamDF[zparam].values), plotParamDF[zparam].values
    )
    return X, Y, Z