Source code for publiplots.plot.kdeplot

"""
Kernel density estimate (KDE) plot functions for publiplots.

This module provides publication-ready kernel density estimates in both
univariate (1D curve / filled density) and bivariate (2D contour) modes,
with optional hue grouping, continuous-hue colorbar legend, and the
publiplots double-layer transparent-fill styling shared with
:func:`publiplots.histplot`.

Dispatch rule
-------------
- Univariate (1D): exactly one of ``x``/``y`` is provided.
- Bivariate (2D): both ``x`` and ``y`` are provided (contour plot).

Legend stashing
---------------
- 1D + categorical hue: one ``LegendEntry`` with rectangle handles when
  ``fill=True``, line handles when ``fill=False``.
- 2D + categorical hue: one ``LegendEntry`` with line handles (one per
  hue level, colored from the palette).
- 2D + ``cbar=True`` (no hue): one continuous-hue ``LegendEntry`` built
  from the QuadContourSet's cmap + data-level-derived normalization.
  Seaborn's own colorbar is suppressed; the band is rendered by the
  publiplots legend reactor.
"""

import warnings
from typing import Any, Dict, List, Optional, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.axes import Axes
from matplotlib.collections import PolyCollection
from matplotlib.colors import Normalize, to_rgba
from matplotlib.lines import Line2D

from publiplots.themes.colors import resolve_palette_map
from publiplots.themes.rcparams import resolve_param
from publiplots.utils import create_legend_handles
from publiplots.utils.legend_entries import (
    LegendEntry,
    resolve_legend_flags,
    stash_entry,
)
from publiplots.utils.plot_legend import render_entries, stash_continuous_hue
from publiplots.utils.transparency import ArtistTracker


[docs] def kdeplot( data: Optional[pd.DataFrame] = None, *, x: Optional[str] = None, y: Optional[str] = None, hue: Optional[str] = None, palette: Optional[Union[str, Dict, List]] = None, color: Optional[str] = None, alpha: Optional[float] = None, fill: Optional[bool] = None, multiple: str = "layer", common_norm: bool = True, common_grid: bool = False, cumulative: bool = False, weights: Optional[str] = None, bw_method: Union[str, float] = "scott", bw_adjust: float = 1, gridsize: int = 200, cut: float = 3, clip: Optional[Tuple[float, float]] = None, log_scale: Optional[Union[bool, float, Tuple]] = None, warn_singular: bool = True, levels: Union[int, List[float]] = 10, thresh: float = 0.05, cbar: bool = False, cbar_ax: Optional[Axes] = None, cbar_kws: Optional[Dict] = None, cmap: Optional[str] = None, linewidth: Optional[float] = None, edgecolor: Optional[str] = None, ax: Optional[Axes] = None, title: Optional[str] = None, xlabel: Optional[str] = None, ylabel: Optional[str] = None, legend: Union[bool, Dict] = True, legend_kws: Optional[Dict] = None, **kwargs, ) -> Axes: """ Create a publication-ready kernel density estimate (KDE) plot. Draws a univariate density curve when exactly one of ``x`` / ``y`` is provided, or a bivariate contour density when both are. Supports hue grouping, the full seaborn ``kdeplot`` estimator API (bandwidth, cut, clip, cumulative, log-scale), and publiplots' double-layer transparent-fill styling + standard legend reactor. Parameters ---------- data : DataFrame, optional Long-form dataframe with ``x`` / ``y`` / ``hue`` columns. x : str, optional Column for the x-axis variable. Provide alone for a 1D density curve; provide with ``y`` for a 2D contour plot. y : str, optional Column for the y-axis variable. Provide alone for a horizontal 1D density curve; provide with ``x`` for a 2D contour plot. hue : str, optional Column name for categorical color grouping. palette : str, dict, or list, optional Color palette. Accepts a palette name (publiplots or seaborn), an explicit list of colors, or a ``{hue_level: color}`` dict. color : str, optional Fixed color used when ``hue`` is None. alpha : float, optional Transparency for filled regions (0-1). Falls back to ``publiplots.rcParams["alpha"]``. fill : bool, optional If True, fill area under curves (1D) or color-fill contour bands (2D). Pass ``None`` to use seaborn's derived default (which is ``False`` for ``multiple='layer'`` and ``True`` for ``multiple='stack'`` / ``'fill'``). multiple : {'layer', 'stack', 'fill'}, default='layer' Handling of overlapping hue levels for 1D densities. common_norm : bool, default=True When normalizing, normalize across all hue levels jointly (True) or per-level (False). common_grid : bool, default=False Use a shared evaluation grid across hue levels (useful when ``multiple='stack'`` or ``'fill'``). cumulative : bool, default=False Plot the cumulative distribution instead of the density. weights : str, optional Column name with per-observation weights. bw_method : str or float, default='scott' Bandwidth rule (``'scott'`` / ``'silverman'``) or an explicit scalar. bw_adjust : float, default=1 Multiplicative factor on the automatic bandwidth. gridsize : int, default=200 Number of evaluation points for the KDE grid. cut : float, default=3 Extend KDE beyond data range by ``cut * bandwidth``. clip : (min, max), optional Hard limits for the KDE evaluation range. log_scale : bool, number, or pair, optional Log-scale the value axis; forwarded to seaborn. warn_singular : bool, default=True Emit a warning when a hue level has near-zero variance (KDE cannot be computed). levels : int or list of float, default=10 Number of contour levels (int) or explicit probability-mass cutoffs (list). Used in 2D mode only. thresh : float, default=0.05 Lowest iso-density level as a fraction of peak density. Used in 2D mode only. cbar : bool, default=False 2D only. If True, draws filled contour bands colored by density and stashes a continuous-hue colorbar entry on the axes. The band is rendered by publiplots' legend reactor, not by seaborn. When ``hue`` is None and ``fill`` is unset, ``cbar=True`` implicitly switches to ``fill=True`` so the contour fill encodes the density value the colorbar advertises. cbar_ax : Axes, optional Accepted for signature compatibility. Logs a UserWarning — the colorbar is placed via ``stash_continuous_hue``, not onto ``cbar_ax`` directly. cbar_kws : dict, optional Forwarded into ``legend_kws`` (e.g. ``{'hue_label': 'density'}``). cmap : str or Colormap, optional 2D only. Colormap used to color the filled contour bands and the colorbar gradient. When None (the default), falls back to ``matplotlib.rcParams["image.cmap"]`` — same convention as :func:`pp.hexbinplot`. Only applies in 2D mode without ``hue``. linewidth : float, optional Stroke width for curves / contour lines. Falls back to rcParams. edgecolor : str, optional Override for curve / contour edge color. Falls back to rcParams. ax : Axes, optional Target axes. When None, a new figure is created via :func:`publiplots.subplots`. title : str, optional Plot title. When None, no title is set. xlabel : str, optional X-axis label. When None (the default), seaborn's inferred label (the column name or ``'Density'``) is preserved. ylabel : str, optional Y-axis label. Same semantics as ``xlabel``. legend : bool or dict, default=True Legend control. ``True`` stashes and renders all legend kinds. ``False`` hides everything. A dict enables per-kind (e.g. ``{"hue": False}``). legend_kws : dict, optional Extra kwargs forwarded to the legend builder (e.g. ``{'inside': True}``). **kwargs Extra kwargs passed to :func:`seaborn.kdeplot`. ``figsize`` is rejected — use ``pp.subplots(axes_size=...)`` and pass ``ax=``. Returns ------- Axes The axes the plot was drawn on. Examples -------- >>> ax = pp.kdeplot(data=df, x="value") >>> ax = pp.kdeplot(data=df, x="value", hue="group", fill=True) >>> ax = pp.kdeplot(data=df, x="x", y="y", cbar=True) See Also -------- publiplots.histplot : Binned counterpart with optional KDE overlay. publiplots.hexbinplot : Bivariate hex binning for dense point clouds. seaborn.kdeplot : Underlying rendering primitive. """ from publiplots.layout.subplots import reject_figsize, subplots as _pp_subplots reject_figsize(kwargs) linewidth = resolve_param("lines.linewidth", linewidth) alpha = resolve_param("alpha", alpha) color = resolve_param("color", color) edgecolor = resolve_param("edgecolor", edgecolor) # Dispatch: exactly one of (x alone), (y alone), or (x and y) is valid. if x is None and y is None: raise ValueError( "pp.kdeplot requires at least one of `x` or `y` to be provided." ) is_2d = x is not None and y is not None # Column-presence validation (mimics sns/pp.hexbinplot semantics). if data is not None: required_cols: List[str] = [] for name in (x, y, hue, weights): if name is not None: required_cols.append(name) missing = [c for c in required_cols if c not in data.columns] if missing: raise ValueError(f"Missing columns in data: {missing}") if ax is None: fig, ax = _pp_subplots() else: fig = ax.get_figure() # Signature-compat: cbar_ax is accepted but not honored; we route # colorbars through stash_continuous_hue, not manual placement. if cbar_ax is not None: warnings.warn( "pp.kdeplot: cbar_ax= is ignored; colorbar placement is " "managed by publiplots' legend reactor. Use pp.legend(side=...) " "or legend_kws={'inside': True} to customize placement.", UserWarning, stacklevel=2, ) # Resolve categorical palette map once up-front so seaborn + legend # agree on per-level colors. palette_map: Optional[Dict] = None if hue is not None and data is not None: palette_map = resolve_palette_map( values=list(data[hue].unique()), palette=palette, ) tracker = ArtistTracker(ax) sns_kwargs: Dict = { "data": data, "x": x, "y": y, "hue": hue, "weights": weights, "multiple": multiple, "common_norm": common_norm, "common_grid": common_grid, "cumulative": cumulative, "bw_method": bw_method, "bw_adjust": bw_adjust, "gridsize": gridsize, "cut": cut, "clip": clip, "log_scale": log_scale, "warn_singular": warn_singular, "levels": levels, "thresh": thresh, # IMPORTANT: always suppress seaborn's colorbar; publiplots manages it. "cbar": False, "ax": ax, "legend": False, } # seaborn rejects linewidth in its 2D contour path (it forwards to # matplotlib's contour, which doesn't accept it), but honors it in 1D. if not is_2d: sns_kwargs["linewidth"] = linewidth # 2D + cbar + no hue: a colorbar is meaningful only when the # contours are actually colored by density. Default fill=True (when # the user didn't explicitly set fill) and resolve a continuous # cmap so the bands span the density range. Without this, seaborn # draws all contour lines in a single color and the colorbar # advertises a gradient that the plot does not actually encode. effective_fill = fill cmap_resolved: Optional[Any] = None if is_2d and cbar and hue is None: if effective_fill is None: effective_fill = True cmap_resolved = cmap if cmap is not None else plt.rcParams["image.cmap"] sns_kwargs["cmap"] = cmap_resolved if effective_fill is not None: sns_kwargs["fill"] = effective_fill if hue is None: # Don't pass `color` when we're letting `cmap` drive the bands; # seaborn would otherwise build a discrete light_palette from # `color` and re-discretize the cmap. if color is not None and cmap_resolved is None: sns_kwargs["color"] = color else: sns_kwargs["palette"] = palette_map if palette_map else palette sns_kwargs.update(kwargs) sns.kdeplot(**sns_kwargs) # ---- 1D face/edge double-layer paint ---------------------------------- # Mirror hist.py::_paint_lines: lines carry the opaque edge color, filled # PolyCollections get alpha on face and 1.0 on edge. For 1D only — 2D # contour collections carry per-level colormap array data that we must # not overwrite. if not is_2d: _paint_1d( new_lines=tracker.get_new_lines(), new_collections=tracker.get_new_collections(), palette=palette_map, color=color, edgecolor=edgecolor, linewidth=linewidth, alpha=alpha, ) else: # 2D contour sets need linewidth applied post-hoc (seaborn's # contour path rejects the kwarg upstream). Face/edge colors are # driven by the QuadContourSet's cmap so we leave those alone. for collection in tracker.get_new_collections(): try: collection.set_linewidth(linewidth) except AttributeError: pass # ---- Labels ---------------------------------------------------------- # None → preserve seaborn's inferred label (column name / 'Density'). if title is not None: ax.set_title(title) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) # ---- Legend stash ---------------------------------------------------- # Merge cbar_kws into legend_kws: cbar_kws is the seaborn-compat name for # colorbar-specific options; legend_kws is our canonical kwarg bucket. merged_legend_kws: Dict = dict(legend_kws or {}) if cbar_kws: for k, v in cbar_kws.items(): merged_legend_kws.setdefault(k, v) _legend( ax=ax, hue=hue, palette=palette_map, is_2d=is_2d, fill=fill, multiple=multiple, cbar=cbar, cbar_cmap=cmap_resolved, new_collections=tracker.get_new_collections(), alpha=alpha, linewidth=linewidth, edgecolor=edgecolor, color=color, legend=legend, legend_kws=merged_legend_kws, ) return ax
# ============================================================================= # Helper Functions # ============================================================================= def _match_line_to_level(line: Line2D, palette: Dict) -> Optional[str]: """Identify the hue level whose palette entry matches the line color.""" sample = to_rgba(line.get_color())[:3] best = None best_dist = float("inf") for level, col in palette.items(): target = to_rgba(col)[:3] dist = sum((a - b) ** 2 for a, b in zip(sample, target)) if dist < best_dist: best_dist = dist best = level return best def _match_collection_to_level( collection: PolyCollection, palette: Dict, ) -> Optional[str]: """Identify the hue level whose palette entry matches the face color.""" fc = collection.get_facecolor() if len(fc) == 0: ec = collection.get_edgecolor() if len(ec) == 0: return None sample = ec[0][:3] else: sample = fc[0][:3] best = None best_dist = float("inf") for level, col in palette.items(): target = to_rgba(col)[:3] dist = sum((a - b) ** 2 for a, b in zip(sample, target)) if dist < best_dist: best_dist = dist best = level return best def _paint_1d( new_lines: List[Line2D], new_collections: List, palette: Optional[Dict], color: Optional[str], edgecolor: Optional[str], linewidth: float, alpha: float, ) -> None: """Apply publiplots' transparent-fill / opaque-edge split to 1D KDE. With ``fill=False`` seaborn emits a ``Line2D`` per hue level; with ``fill=True`` (or stacked/fill multiple) it emits ``FillBetweenPolyCollection`` / ``PolyCollection`` objects. Both are recolored from the resolved palette (when hue is present) and the publiplots alpha-on-face / 1.0-on-edge split is applied. """ # Line2D outlines — always opaque, full palette color. for line in new_lines: if palette: level = _match_line_to_level(line, palette) line_color = palette.get(level, color) if level is not None else color else: line_color = color if line_color is not None: line.set_color(to_rgba(line_color, alpha=1.0)) line.set_linewidth(linewidth) # Filled density regions — alpha on face, opaque on edge. for collection in new_collections: if not isinstance(collection, PolyCollection): continue if palette: level = _match_collection_to_level(collection, palette) face_color = palette.get(level, color) if level is not None else color else: face_color = color if face_color is None: continue edge = edgecolor if edgecolor is not None else face_color collection.set_facecolor(to_rgba(face_color, alpha=alpha)) collection.set_edgecolor(to_rgba(edge, alpha=1.0)) collection.set_linewidth(linewidth) def _find_quadcontour(new_collections: List): """Return the first QuadContourSet among new_collections, or None. QuadContourSet carries the cmap + data array used to derive a meaningful colorbar normalization (seaborn's ``NoNorm`` is a per-level index, not a density scale). """ from matplotlib.contour import QuadContourSet for c in new_collections: if isinstance(c, QuadContourSet): return c return None def _derive_cbar_norm(contour_set) -> Normalize: """Build a Normalize spanning the contour set's density levels. Seaborn sets ``norm=NoNorm()`` on the QuadContourSet so each contour band picks a discrete cmap index; that's the wrong scale for a publishable colorbar, which should read density. Rebuilding from ``contour_set.levels`` (the actual density cutoffs) gives a continuous mapping that matches what the contours encode. """ levels = np.asarray(getattr(contour_set, "levels", []), dtype=float) if levels.size == 0: return Normalize(vmin=0.0, vmax=1.0) return Normalize(vmin=float(np.min(levels)), vmax=float(np.max(levels))) def _legend( ax: Axes, hue: Optional[str], palette: Optional[Dict], is_2d: bool, fill: Optional[bool], multiple: str, cbar: bool, cbar_cmap: Optional[Any], new_collections: List, alpha: float, linewidth: float, edgecolor: Optional[str], color: Optional[str], legend: Union[bool, Dict] = True, legend_kws: Optional[Dict] = None, ) -> None: """Stash the mode-specific LegendEntry and render per-axis. Stashing logic (mirrors the spec): - 1D, hue=None: nothing stashed. - 1D + categorical hue: one ``"hue"`` entry. Rectangle handles when the drawn fill is active, line handles otherwise. Effective fill derived from seaborn's own default when the user passed ``fill=None``. - 2D, hue=None, cbar=True: one continuous-hue entry wrapping a ScalarMappable built from the QuadContourSet's cmap + density range. - 2D, hue=None, cbar=False: nothing stashed. - 2D + categorical hue: one ``"hue"`` entry with line handles per group. """ if legend is False: return flags = resolve_legend_flags(legend) legend_kws = dict(legend_kws or {}) # Nothing hue-like to stash → render any existing entries and exit. if hue is None: if is_2d and cbar and flags["hue"]: contour_set = _find_quadcontour(new_collections) if contour_set is not None: hue_label = legend_kws.pop("hue_label", "density") # Prefer the explicitly resolved cmap from the call site # (a continuous Colormap). The QuadContourSet's own # cmap is a discrete light_palette built from `color` # and only has N≈levels swatches — wrong for a smooth # colorbar gradient. cmap = cbar_cmap if cmap is None: cmap = contour_set.get_cmap() or plt.rcParams["image.cmap"] stash_continuous_hue( ax, name=hue_label, palette=cmap, hue_norm=_derive_cbar_norm(contour_set), ) render_entries(ax, flags=flags, legend_kws=legend_kws) return # hue is set — need a palette dict to build categorical entries. if not palette: render_entries(ax, flags=flags, legend_kws=legend_kws) return hue_label = legend_kws.pop("hue_label", hue) # Derive effective fill for legend-handle style. # Seaborn defaults: fill=False for multiple='layer', fill=True for # 'stack'/'fill'. If user passed fill explicitly, honor it. if fill is None: effective_fill = multiple in ("stack", "fill") else: effective_fill = bool(fill) if flags["hue"]: labels = list(palette.keys()) colors = [palette[v] for v in labels] edgecolors = [edgecolor] * len(labels) if edgecolor else None if is_2d: # 2D + categorical hue: line handles (per-group contour color). handles = create_legend_handles( labels=labels, colors=colors, edgecolors=edgecolors, linestyles=["-"] * len(labels), alpha=alpha, linewidth=linewidth, ) elif effective_fill: handles = create_legend_handles( labels=labels, colors=colors, edgecolors=edgecolors, alpha=alpha, linewidth=linewidth, style="rectangle", ) else: handles = create_legend_handles( labels=labels, colors=colors, edgecolors=edgecolors, linestyles=["-"] * len(labels), alpha=alpha, linewidth=linewidth, ) stash_entry( ax, LegendEntry.build( name=hue_label, kind="hue", handles=handles, labels=labels, ), ) render_entries(ax, flags=flags, legend_kws=legend_kws)