Source code for dcor._dcor

"""
Distance correlation and covariance.

This module contains functions to compute statistics related to the
distance covariance and distance correlation
:cite:`b-distance_correlation`.

References
----------
.. bibliography:: ../refs.bib
   :labelprefix: B
   :keyprefix: b-

"""
from __future__ import annotations

from dataclasses import astuple, dataclass
from enum import Enum
from typing import (
    Generic,
    Iterator,
    Literal,
    Protocol,
    Tuple,
    TypeVar,
    Union,
    overload,
)

import numpy as np

from dcor._dcor_internals import _af_inv_scaled

from ._dcor_internals import _dcov_from_terms, _dcov_terms_naive
from ._fast_dcov_avl import _distance_covariance_sqr_terms_avl
from ._fast_dcov_mergesort import _distance_covariance_sqr_terms_mergesort
from ._utils import (
    ArrayType,
    CompileMode,
    _sqrt,
    array_namespace,
    numpy_namespace,
)

Array = TypeVar("Array", bound=ArrayType)


[docs]@dataclass(frozen=True) class Stats(Generic[Array]): """Distance covariance related stats.""" covariance_xy: Array correlation_xy: Array variance_x: Array variance_y: Array def __iter__(self) -> Iterator[Array]: return iter(astuple(self))
class DCovFunction(Protocol): """Callback protocol for dcov method.""" def __call__( self, __x: Array, __y: Array, *, compile_mode: CompileMode, ) -> Array: ... class DCovTermsFunction(Protocol): """Callback protocol for dcov terms method.""" @overload def __call__( self, __x: Array, __y: Array, *, exponent: float, compile_mode: CompileMode = CompileMode.AUTO, return_var_terms: Literal[False] = False, ) -> Tuple[ Array, Array, Array, Array, Array, None, None, ]: ... @overload def __call__( self, __x: Array, __y: Array, *, exponent: float, compile_mode: CompileMode = CompileMode.AUTO, return_var_terms: Literal[True], ) -> Tuple[ Array, Array, Array, Array, Array, Array, Array, ]: ... def __call__( self, __x: Array, __y: Array, *, exponent: float, compile_mode: CompileMode = CompileMode.AUTO, return_var_terms: bool = False, ) -> Tuple[ Array, Array, Array, Array, Array, Array | None, Array | None, ]: ... @overload def _dcov_terms_auto( __x: Array, __y: Array, *, exponent: float, compile_mode: CompileMode = CompileMode.AUTO, return_var_terms: Literal[False] = False, ) -> Tuple[ Array, Array, Array, Array, Array, None, None, ]: ... @overload def _dcov_terms_auto( __x: Array, __y: Array, *, exponent: float, compile_mode: CompileMode = CompileMode.AUTO, return_var_terms: Literal[True], ) -> Tuple[ Array, Array, Array, Array, Array, Array, Array, ]: ... def _dcov_terms_auto( x: Array, y: Array, *, exponent: float, compile_mode: CompileMode = CompileMode.AUTO, return_var_terms: bool = False, ) -> Tuple[ Array, Array, Array, Array, Array, Array | None, Array | None, ]: xp = array_namespace(x, y) dcov_terms = _dcov_terms_naive if xp == numpy_namespace and _can_use_fast_algorithm(x, y, exponent): dcov_terms = _distance_covariance_sqr_terms_avl return dcov_terms( x, y, exponent=exponent, compile_mode=compile_mode, return_var_terms=return_var_terms, ) class _DcovAlgorithmInternals(): def __init__( self, *, terms: DCovTermsFunction | None = None, ): self.terms = terms def dcov_sqr( self, x: Array, y: Array, *, exponent: float = 1, compile_mode: CompileMode = CompileMode.AUTO, bias_corrected=False, ) -> Array: """Generic estimator for distance covariance.""" terms = self.terms( x, y, exponent=exponent, compile_mode=compile_mode, ) return _dcov_from_terms( *terms[:-2], n_samples=x.shape[0], bias_corrected=bias_corrected, ) def stats_sqr( self, x: Array, y: Array, *, bias_corrected: bool = False, exponent: float = 1, compile_mode: CompileMode = CompileMode.AUTO, ) -> Stats[Array]: """Compute generic squared stats.""" n_samples = x.shape[0] ( mean_prod, a_axis_sum, a_total_sum, b_axis_sum, b_total_sum, a_mean_prod, b_mean_prod, ) = self.terms( x, y, exponent=exponent, compile_mode=compile_mode, return_var_terms=True, ) covariance_xy_sqr = _dcov_from_terms( mean_prod=mean_prod, a_axis_sum=a_axis_sum, a_total_sum=a_total_sum, b_axis_sum=b_axis_sum, b_total_sum=b_total_sum, n_samples=n_samples, bias_corrected=bias_corrected, ) variance_x_sqr = _dcov_from_terms( mean_prod=a_mean_prod, a_axis_sum=a_axis_sum, a_total_sum=a_total_sum, b_axis_sum=a_axis_sum, b_total_sum=a_total_sum, n_samples=n_samples, bias_corrected=bias_corrected, ) variance_y_sqr = _dcov_from_terms( mean_prod=b_mean_prod, a_axis_sum=b_axis_sum, a_total_sum=b_total_sum, b_axis_sum=b_axis_sum, b_total_sum=b_total_sum, n_samples=n_samples, bias_corrected=bias_corrected, ) xp = array_namespace(x, y) denominator_sqr = xp.abs(variance_x_sqr * variance_y_sqr) denominator = _sqrt(xp.asarray(denominator_sqr)) # Comparisons using a tolerance can change results if the # covariance has a similar order of magnitude if denominator == 0.0: correlation_xy_sqr = xp.zeros_like(covariance_xy_sqr) else: correlation_xy_sqr = covariance_xy_sqr / denominator return Stats( covariance_xy=covariance_xy_sqr, correlation_xy=correlation_xy_sqr, variance_x=variance_x_sqr, variance_y=variance_y_sqr, ) def _is_random_variable(x: Array) -> bool: """ Check if the matrix x correspond to a random variable. The matrix is considered a random variable if it is a vector or a matrix corresponding to a column vector. Otherwise, the matrix correspond to a random vector. """ return len(x.shape) == 1 or x.shape[1] == 1 def _can_use_fast_algorithm(x: Array, y: Array, exponent: float = 1) -> bool: """ Check if the fast algorithm for distance stats can be used. The fast algorithm has complexity :math:`O(NlogN)`, better than the complexity of the naive algorithm (:math:`O(N^2)`). The algorithm can only be used for random variables (not vectors) where the number of instances is greater than 3. Also, the exponent must be 1. """ return ( _is_random_variable(x) and _is_random_variable(y) and x.shape[0] > 3 and y.shape[0] > 3 and exponent == 1 ) def _distance_stats_sqr_generic( x: Array, y: Array, *, exponent: float = 1, dcov_function: DCovFunction, compile_mode: CompileMode = CompileMode.AUTO, ) -> Stats[Array]: """Compute the distance stats using a dcov algorithm.""" if exponent != 1: raise ValueError(f"Exponent should be 1 but is {exponent} instead.") xp = array_namespace(x, y) covariance_xy_sqr = dcov_function(x, y, compile_mode=compile_mode) variance_x_sqr = dcov_function(x, x, compile_mode=compile_mode) variance_y_sqr = dcov_function(y, y, compile_mode=compile_mode) denominator_sqr_signed = variance_x_sqr * variance_y_sqr denominator_sqr = xp.abs(denominator_sqr_signed) denominator = _sqrt(denominator_sqr) # Comparisons using a tolerance can change results if the # covariance has a similar order of magnitude if denominator == 0.0: correlation_xy_sqr = xp.zeros_like(covariance_xy_sqr) else: correlation_xy_sqr = covariance_xy_sqr / denominator return Stats( covariance_xy=covariance_xy_sqr, correlation_xy=correlation_xy_sqr, variance_x=variance_x_sqr, variance_y=variance_y_sqr, )
[docs]class DistanceCovarianceMethod(Enum): """Method used for computing the distance covariance.""" AUTO = _DcovAlgorithmInternals( terms=_dcov_terms_auto, ) """ Try to select the best algorithm. It will try to use a fast algorithm if possible. Otherwise it will use the naive implementation. """ NAIVE = _DcovAlgorithmInternals( terms=_dcov_terms_naive, ) r"""Usual estimator of the distance covariance, which is :math:`O(n^2)`""" AVL = _DcovAlgorithmInternals( terms=_distance_covariance_sqr_terms_avl, ) r""" Use the AVL fast implementation. This is the implementation described in :cite:`b-fast_distance_correlation_avl` which is :math:`O(n\log n)` """ MERGESORT = _DcovAlgorithmInternals( terms=_distance_covariance_sqr_terms_mergesort, ) r""" Use the mergesort fast implementation. This is the implementation described in :cite:`b-fast_distance_correlation_mergesort` which is :math:`O(n\log n)` """ def __repr__(self) -> str: return '%s.%s' % (self.__class__.__name__, self.name)
_DistanceCovarianceMethodName = Literal["auto", "naive", "avl", "mergesort"] DistanceCovarianceMethodLike = Union[ DistanceCovarianceMethod, _DistanceCovarianceMethodName, ] def _to_algorithm( algorithm: DistanceCovarianceMethodLike, ) -> DistanceCovarianceMethod: """Convert to algorithm if string.""" if isinstance(algorithm, DistanceCovarianceMethod): return algorithm return DistanceCovarianceMethod[algorithm.upper()]
[docs]def distance_covariance_sqr( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Usual (biased) estimator for the squared distance covariance. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Biased estimator of the squared distance covariance. See Also: distance_covariance u_distance_covariance_sqr Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_covariance_sqr(a, a) 52.0 >>> dcor.distance_covariance_sqr(a, b) 1.0 >>> dcor.distance_covariance_sqr(b, b) 0.25 >>> dcor.distance_covariance_sqr(a, b, exponent=0.5) # doctest: +ELLIPSIS 0.3705904... """ method = _to_algorithm(method) return method.value.dcov_sqr( x, y, exponent=exponent, compile_mode=compile_mode, )
[docs]def u_distance_covariance_sqr( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Unbiased estimator for the squared distance covariance. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Value of the unbiased estimator of the squared distance covariance. See Also: distance_covariance distance_covariance_sqr Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.u_distance_covariance_sqr(a, a) # doctest: +ELLIPSIS 42.6666666... >>> dcor.u_distance_covariance_sqr(a, b) # doctest: +ELLIPSIS -2.6666666... >>> dcor.u_distance_covariance_sqr(b, b) # doctest: +ELLIPSIS 0.6666666... >>> dcor.u_distance_covariance_sqr(a, b, exponent=0.5) # doctest: +ELLIPSIS -0.2996598... """ method = _to_algorithm(method) return method.value.dcov_sqr( x, y, exponent=exponent, compile_mode=compile_mode, bias_corrected=True, )
[docs]def distance_covariance( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Usual (biased) estimator for the distance covariance. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Biased estimator of the distance covariance. See Also: distance_covariance_sqr u_distance_covariance_sqr Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_covariance(a, a) # doctest: +ELLIPSIS 7.2111025... >>> dcor.distance_covariance(a, b) 1.0 >>> dcor.distance_covariance(b, b) 0.5 >>> dcor.distance_covariance(a, b, exponent=0.5) 0.6087614... """ return _sqrt( distance_covariance_sqr( x, y, exponent=exponent, method=method, compile_mode=compile_mode, ), )
[docs]def distance_stats_sqr( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Stats[Array]: """ Usual (biased) statistics related with the squared distance covariance. Computes the usual (biased) estimators for the squared distance covariance and squared distance correlation between two random vectors, and the individual squared distance variances. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Stats object containing squared distance covariance, squared distance correlation, squared distance variance of the first random vector and squared distance variance of the second random vector. See Also: distance_covariance_sqr distance_correlation_sqr Notes: It is less efficient to compute the statistics separately, rather than using this function, because some computations can be shared. Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_stats_sqr(a, a) # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=52.0, correlation_xy=1.0, variance_x=52.0, variance_y=52.0) >>> dcor.distance_stats_sqr(a, b) # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=1.0, correlation_xy=0.2773500..., variance_x=52.0, variance_y=0.25) >>> dcor.distance_stats_sqr(b, b) # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=0.25, correlation_xy=1.0, variance_x=0.25, variance_y=0.25) >>> dcor.distance_stats_sqr(a, b, exponent=0.5) # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=0.3705904..., correlation_xy=0.4493308..., variance_x=2.7209220..., variance_y=0.25) """ method = _to_algorithm(method) return method.value.stats_sqr( x, y, exponent=exponent, compile_mode=compile_mode, )
[docs]def u_distance_stats_sqr( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Stats[Array]: """ Unbiased statistics related with the squared distance covariance. Computes the unbiased estimators for the squared distance covariance and squared distance correlation between two random vectors, and the individual squared distance variances. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Stats object containing squared distance covariance, squared distance correlation, squared distance variance of the first random vector and squared distance variance of the second random vector. See Also: u_distance_covariance_sqr u_distance_correlation_sqr Notes: It is less efficient to compute the statistics separately, rather than using this function, because some computations can be shared. Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.u_distance_stats_sqr(a, a) # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=42.6666666..., correlation_xy=1.0, variance_x=42.6666666..., variance_y=42.6666666...) >>> dcor.u_distance_stats_sqr(a, b) # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=-2.6666666..., correlation_xy=-0.4999999..., variance_x=42.6666666..., variance_y=0.6666666...) >>> dcor.u_distance_stats_sqr(b, b) # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=0.6666666..., correlation_xy=1.0, variance_x=0.6666666..., variance_y=0.6666666...) >>> dcor.u_distance_stats_sqr(a, b, exponent=0.5) # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=-0.2996598..., correlation_xy=-0.4050479..., variance_x=0.8209855..., variance_y=0.6666666...) """ method = _to_algorithm(method) return method.value.stats_sqr( x, y, exponent=exponent, compile_mode=compile_mode, bias_corrected=True, )
[docs]def distance_stats( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Stats[Array]: """ Usual (biased) statistics related with the distance covariance. Computes the usual (biased) estimators for the distance covariance and distance correlation between two random vectors, and the individual distance variances. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Stats object containing distance covariance, distance correlation, distance variance of the first random vector and distance variance of the second random vector. See Also: distance_covariance distance_correlation Notes: It is less efficient to compute the statistics separately, rather than using this function, because some computations can be shared. Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_stats(a, a) # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=7.2111025..., correlation_xy=1.0, variance_x=7.2111025..., variance_y=7.2111025...) >>> dcor.distance_stats(a, b) # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=1.0, correlation_xy=0.5266403..., variance_x=7.2111025..., variance_y=0.5) >>> dcor.distance_stats(b, b) # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=0.5, correlation_xy=1.0, variance_x=0.5, variance_y=0.5) >>> dcor.distance_stats(a, b, exponent=0.5) # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE Stats(covariance_xy=0.6087614..., correlation_xy=0.6703214..., variance_x=1.6495217..., variance_y=0.5) """ return Stats( *[ _sqrt(s) for s in astuple( distance_stats_sqr( x, y, exponent=exponent, method=method, compile_mode=compile_mode, ), ) ], )
[docs]def distance_correlation_sqr( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Usual (biased) estimator for the squared distance correlation. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Value of the biased estimator of the squared distance correlation. See Also: distance_correlation u_distance_correlation_sqr Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_correlation_sqr(a, a) 1.0 >>> dcor.distance_correlation_sqr(a, b) # doctest: +ELLIPSIS 0.2773500... >>> dcor.distance_correlation_sqr(b, b) 1.0 >>> dcor.distance_correlation_sqr(a, b, exponent=0.5) # doctest: +ELLIPSIS 0.4493308... """ method = _to_algorithm(method) return method.value.stats_sqr( x, y, exponent=exponent, compile_mode=compile_mode, ).correlation_xy
[docs]def u_distance_correlation_sqr( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Bias-corrected estimator for the squared distance correlation. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Value of the bias-corrected estimator of the squared distance correlation. See Also: distance_correlation distance_correlation_sqr Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.u_distance_correlation_sqr(a, a) 1.0 >>> dcor.u_distance_correlation_sqr(a, b) -0.4999999... >>> dcor.u_distance_correlation_sqr(b, b) 1.0 >>> dcor.u_distance_correlation_sqr(a, b, exponent=0.5) ... # doctest: +ELLIPSIS -0.4050479... """ method = _to_algorithm(method) return method.value.stats_sqr( x, y, exponent=exponent, compile_mode=compile_mode, bias_corrected=True, ).correlation_xy
[docs]def distance_correlation( x: Array, y: Array, *, exponent: float = 1, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Usual (biased) estimator for the distance correlation. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. exponent: Exponent of the Euclidean distance, in the range :math:`(0, 2)`. Equivalently, it is twice the Hurst parameter of fractional Brownian motion. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Value of the biased estimator of the distance correlation. See Also: distance_correlation_sqr u_distance_correlation_sqr Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 2., 3., 4.], ... [5., 6., 7., 8.], ... [9., 10., 11., 12.], ... [13., 14., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_correlation(a, a) 1.0 >>> dcor.distance_correlation(a, b) # doctest: +ELLIPSIS 0.5266403... >>> dcor.distance_correlation(b, b) 1.0 >>> dcor.distance_correlation(a, b, exponent=0.5) # doctest: +ELLIPSIS 0.6703214... """ return _sqrt( distance_correlation_sqr( x, y, exponent=exponent, method=method, compile_mode=compile_mode, ), )
[docs]def distance_correlation_af_inv_sqr( x: Array, y: Array, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Square of the affinely invariant distance correlation. Computes the estimator for the square of the affinely invariant distance correlation between two random vectors. Warning: The return value of this function is undefined when the covariance matrix of :math:`x` or :math:`y` is singular. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Value of the estimator of the squared affinely invariant distance correlation. See Also: distance_correlation u_distance_correlation Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 3., 2., 5.], ... [5., 7., 6., 8.], ... [9., 10., 11., 12.], ... [13., 15., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_correlation_af_inv_sqr(a, a) 1.0 >>> dcor.distance_correlation_af_inv_sqr(a, b) # doctest: +ELLIPSIS 0.5773502... >>> dcor.distance_correlation_af_inv_sqr(b, b) 1.0 """ x = _af_inv_scaled(x) y = _af_inv_scaled(y) correlation = distance_correlation_sqr( x, y, method=method, compile_mode=compile_mode, ) xp = array_namespace(x, y) return ( xp.zeros_like(correlation) if xp.isnan(correlation) else correlation )
[docs]def distance_correlation_af_inv( x: Array, y: Array, method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO, compile_mode: CompileMode = CompileMode.AUTO, ) -> Array: """ Affinely invariant distance correlation. Computes the estimator for the affinely invariant distance correlation between two random vectors. Warning: The return value of this function is undefined when the covariance matrix of :math:`x` or :math:`y` is singular. Args: x: First random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. y: Second random vector. The columns correspond with the individual random variables while the rows are individual instances of the random vector. method: Method to use internally to compute the distance covariance. compile_mode: Compilation mode used. By default it tries to use the fastest available type of compilation. Returns: Value of the estimator of the squared affinely invariant distance correlation. See Also: distance_correlation u_distance_correlation Examples: >>> import numpy as np >>> import dcor >>> a = np.array([[1., 3., 2., 5.], ... [5., 7., 6., 8.], ... [9., 10., 11., 12.], ... [13., 15., 15., 16.]]) >>> b = np.array([[1.], [0.], [0.], [1.]]) >>> dcor.distance_correlation_af_inv(a, a) 1.0 >>> dcor.distance_correlation_af_inv(a, b) # doctest: +ELLIPSIS 0.7598356... >>> dcor.distance_correlation_af_inv(b, b) 1.0 """ return _sqrt( distance_correlation_af_inv_sqr( x, y, method=method, compile_mode=compile_mode, ), )