Source code for dcor._partial_dcor

"""Functions for computing partial distance covariance and correlation"""
from __future__ import annotations

from typing import TypeVar

import numpy as np

from ._dcor_internals import (
    _u_distance_matrix,
    u_complementary_projection,
    u_product,
)
from ._utils import ArrayType, _sqrt

Array = TypeVar("Array", bound=ArrayType)


[docs]def partial_distance_covariance(
    x: ArrayType,
    y: ArrayType,
    z: ArrayType,
) -> ArrayType:
    r"""
    Partial distance covariance estimator.

    Compute the estimator for the partial distance covariance of the
    random vectors corresponding to :math:`x` and :math:`y` with respect
    to the random variable corresponding to :math:`z`.

    Warning:
        Partial distance covariance should be used carefully as it presents
        some undesirable or counterintuitive properties. In particular, the
        reader cannot assume that :math:`\mathcal{V}^{*}` characterizes 
        independence, i.e., :math:`\mathcal{V}^{*}(X, Y; Z)=0` does not always
        implies that :math:`X` and :math:`Y` are conditionally independent 
        given :math:`Z` and vice versa. A more detailed discussion and some 
        counter examples can be found in Sec. 4.2 of 
        :footcite:t:`partial_distance_correlation`.

    Parameters:
        x: First random vector. The columns correspond with the individual
            random variables while the rows are individual instances of the
            random vector.
        y: Second random vector. The columns correspond with the individual
            random variables while the rows are individual instances of the
            random vector.
        z: Random vector with respect to which the partial distance covariance
            is computed. The columns correspond with the individual random
            variables while the rows are individual instances of the random
            vector.

    Returns:
        Value of the estimator of the partial distance covariance.

    See Also:
        partial_distance_correlation

    Examples:
        >>> import numpy as np
        >>> import dcor
        >>> a = np.array([[1, 2, 3, 4],
        ...               [5, 6, 7, 8],
        ...               [9, 10, 11, 12],
        ...               [13, 14, 15, 16]])
        >>> b = np.array([[1], [0], [0], [1]])
        >>> c = np.array([[1, 3, 4],
        ...               [5, 7, 8],
        ...               [9, 11, 15],
        ...               [13, 15, 16]])
        >>> dcor.partial_distance_covariance(a, a, c) # doctest: +ELLIPSIS
        0.0024298...
        >>> dcor.partial_distance_covariance(a, b, c)
        0.0347030...
        >>> dcor.partial_distance_covariance(b, b, c)
        0.4956241...

    References:
        .. footbibliography::

    """
    a = _u_distance_matrix(x)
    b = _u_distance_matrix(y)
    c = _u_distance_matrix(z)

    proj = u_complementary_projection(c)

    return u_product(proj(a), proj(b))


[docs]def partial_distance_correlation(
    x: ArrayType,
    y: ArrayType,
    z: ArrayType,
) -> ArrayType:  # pylint:disable=too-many-locals
    r"""
    Partial distance correlation estimator.

    Compute the estimator for the partial distance correlation of the
    random vectors corresponding to :math:`x` and :math:`y` with respect
    to the random variable corresponding to :math:`z`.

    Warning:
        Partial distance correlation should be used carefully as it presents
        some undesirable or counterintuitive properties. In particular, the
        reader cannot assume that :math:`\mathcal{R}^{*}` characterizes 
        independence, i.e., :math:`\mathcal{R}^{*}(X, Y; Z)=0` does not always
        implies that :math:`X` and :math:`Y` are conditionally independent 
        given :math:`Z` and vice versa. A more detailed discussion and some 
        counter examples can be found in Sec. 4.2 of 
        :footcite:t:`partial_distance_correlation`.

    Parameters:
        x: First random vector. The columns correspond with the individual
            random variables while the rows are individual instances of the
            random vector.
        y: Second random vector. The columns correspond with the individual
            random variables while the rows are individual instances of the
            random vector.
        z: Random vector with respect to which the partial distance correlation
            is computed. The columns correspond with the individual random
            variables while the rows are individual instances of the random
            vector.

    Returns:
        Value of the estimator of the partial distance correlation.

    See Also:
        partial_distance_covariance

    Examples:
        >>> import numpy as np
        >>> import dcor
        >>> a = np.array([[1], [1], [2], [2], [3]])
        >>> b = np.array([[1], [2], [1], [2], [1]])
        >>> c = np.array([[1], [2], [2], [1], [2]])
        >>> dcor.partial_distance_correlation(a, a, c)
        1.0
        >>> dcor.partial_distance_correlation(a, b, c) # doctest: +ELLIPSIS
        -0.5...
        >>> dcor.partial_distance_correlation(b, b, c)
        1.0
        >>> dcor.partial_distance_correlation(a, c, c)
        0.0

    References:
        .. footbibliography::

    """
    a = _u_distance_matrix(x)
    b = _u_distance_matrix(y)
    c = _u_distance_matrix(z)

    aa = u_product(a, a)
    bb = u_product(b, b)
    cc = u_product(c, c)
    ab = u_product(a, b)
    ac = u_product(a, c)
    bc = u_product(b, c)

    denom_sqr = aa * bb
    r_xy = ab / _sqrt(denom_sqr) if denom_sqr != 0 else denom_sqr
    r_xy = np.clip(r_xy, -1, 1)

    denom_sqr = aa * cc
    r_xz = ac / _sqrt(denom_sqr) if denom_sqr != 0 else denom_sqr
    r_xz = np.clip(r_xz, -1, 1)

    denom_sqr = bb * cc
    r_yz = bc / _sqrt(denom_sqr) if denom_sqr != 0 else denom_sqr
    r_yz = np.clip(r_yz, -1, 1)

    denom = _sqrt(1 - r_xz ** 2) * _sqrt(1 - r_yz ** 2)

    return (r_xy - r_xz * r_yz) / denom if denom != 0 else denom