# -*- coding: utf-8 -*-
"""
Gromov-Wasserstein and Fused-Gromov-Wasserstein utils.
"""
# Author: Erwan Vautier <erwan.vautier@gmail.com>
# Nicolas Courty <ncourty@irisa.fr>
# Rémi Flamary <remi.flamary@unice.fr>
# Titouan Vayer <titouan.vayer@irisa.fr>
# Cédric Vincent-Cuaz <cedvincentcuaz@gmail.com>
#
# License: MIT License
from ..utils import list_to_array
from ..backend import get_backend
[docs]
def init_matrix(C1, C2, p, q, loss_fun='square_loss', nx=None):
r"""Return loss matrices and tensors for Gromov-Wasserstein fast computation
Returns the value of :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` with the
selected loss function as the loss function of Gromov-Wasserstein discrepancy.
The matrices are computed as described in Proposition 1 in :ref:`[12] <references-init-matrix>`
Where :
- :math:`\mathbf{C_1}`: Metric cost matrix in the source space
- :math:`\mathbf{C_2}`: Metric cost matrix in the target space
- :math:`\mathbf{T}`: A coupling between those two spaces
The square-loss function :math:`L(a, b) = |a - b|^2` is read as :
.. math::
L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)
\mathrm{with} \ f_1(a) &= a^2
f_2(b) &= b^2
h_1(a) &= a
h_2(b) &= 2b
The kl-loss function :math:`L(a, b) = a \log\left(\frac{a}{b}\right) - a + b` is read as :
.. math::
L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)
\mathrm{with} \ f_1(a) &= a \log(a) - a
f_2(b) &= b
h_1(a) &= a
h_2(b) &= \log(b)
Parameters
----------
C1 : array-like, shape (ns, ns)
Metric cost matrix in the source space
C2 : array-like, shape (nt, nt)
Metric cost matrix in the target space
p : array-like, shape (ns,)
Probability distribution in the source space
q : array-like, shape (nt,)
Probability distribution in the target space
loss_fun : str, optional
Name of loss function to use: either 'square_loss' or 'kl_loss' (default='square_loss')
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
-------
constC : array-like, shape (ns, nt)
Constant :math:`\mathbf{C}` matrix in Eq. (6)
hC1 : array-like, shape (ns, ns)
:math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
hC2 : array-like, shape (nt, nt)
:math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
.. _references-init-matrix:
References
----------
.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
if nx is None:
C1, C2, p, q = list_to_array(C1, C2, p, q)
nx = get_backend(C1, C2, p, q)
if loss_fun == 'square_loss':
def f1(a):
return (a**2)
def f2(b):
return (b**2)
def h1(a):
return a
def h2(b):
return 2 * b
elif loss_fun == 'kl_loss':
def f1(a):
return a * nx.log(a + 1e-15) - a
def f2(b):
return b
def h1(a):
return a
def h2(b):
return nx.log(b + 1e-15)
else:
raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.")
constC1 = nx.dot(
nx.dot(f1(C1), nx.reshape(p, (-1, 1))),
nx.ones((1, len(q)), type_as=q)
)
constC2 = nx.dot(
nx.ones((len(p), 1), type_as=p),
nx.dot(nx.reshape(q, (1, -1)), f2(C2).T)
)
constC = constC1 + constC2
hC1 = h1(C1)
hC2 = h2(C2)
return constC, hC1, hC2
[docs]
def tensor_product(constC, hC1, hC2, T, nx=None):
r"""Return the tensor for Gromov-Wasserstein fast computation
The tensor is computed as described in Proposition 1 Eq. (6) in :ref:`[12] <references-tensor-product>`
Parameters
----------
constC : array-like, shape (ns, nt)
Constant :math:`\mathbf{C}` matrix in Eq. (6)
hC1 : array-like, shape (ns, ns)
:math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
hC2 : array-like, shape (nt, nt)
:math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
-------
tens : array-like, shape (`ns`, `nt`)
:math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` tensor-matrix multiplication result
.. _references-tensor-product:
References
----------
.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
if nx is None:
constC, hC1, hC2, T = list_to_array(constC, hC1, hC2, T)
nx = get_backend(constC, hC1, hC2, T)
A = - nx.dot(
nx.dot(hC1, T), hC2.T
)
tens = constC + A
# tens -= tens.min()
return tens
[docs]
def gwloss(constC, hC1, hC2, T, nx=None):
r"""Return the Loss for Gromov-Wasserstein
The loss is computed as described in Proposition 1 Eq. (6) in :ref:`[12] <references-gwloss>`
Parameters
----------
constC : array-like, shape (ns, nt)
Constant :math:`\mathbf{C}` matrix in Eq. (6)
hC1 : array-like, shape (ns, ns)
:math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
hC2 : array-like, shape (nt, nt)
:math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
T : array-like, shape (ns, nt)
Current value of transport matrix :math:`\mathbf{T}`
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
-------
loss : float
Gromov-Wasserstein loss
.. _references-gwloss:
References
----------
.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
tens = tensor_product(constC, hC1, hC2, T, nx)
if nx is None:
tens, T = list_to_array(tens, T)
nx = get_backend(tens, T)
return nx.sum(tens * T)
[docs]
def gwggrad(constC, hC1, hC2, T, nx=None):
r"""Return the gradient for Gromov-Wasserstein
The gradient is computed as described in Proposition 2 in :ref:`[12] <references-gwggrad>`
Parameters
----------
constC : array-like, shape (ns, nt)
Constant :math:`\mathbf{C}` matrix in Eq. (6)
hC1 : array-like, shape (ns, ns)
:math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
hC2 : array-like, shape (nt, nt)
:math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
T : array-like, shape (ns, nt)
Current value of transport matrix :math:`\mathbf{T}`
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
-------
grad : array-like, shape (`ns`, `nt`)
Gromov-Wasserstein gradient
.. _references-gwggrad:
References
----------
.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
return 2 * tensor_product(constC, hC1, hC2,
T, nx) # [12] Prop. 2 misses a 2 factor
[docs]
def update_square_loss(p, lambdas, T, Cs, nx=None):
r"""
Updates :math:`\mathbf{C}` according to the L2 Loss kernel with the `S`
:math:`\mathbf{T}_s` couplings calculated at each iteration of the GW
barycenter problem in :ref:`[12]`:
.. math::
\mathbf{C}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
Where :
- :math:`\mathbf{C}_s`: metric cost matrix
- :math:`\mathbf{p}_s`: distribution
Parameters
----------
p : array-like, shape (N,)
Masses in the targeted barycenter.
lambdas : list of float
List of the `S` spaces' weights.
T : list of S array-like of shape (N, ns)
The `S` :math:`\mathbf{T}_s` couplings calculated at each iteration.
Cs : list of S array-like, shape(ns,ns)
Metric cost matrices.
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
----------
C : array-like, shape (`nt`, `nt`)
Updated :math:`\mathbf{C}` matrix.
References
----------
.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
if nx is None:
nx = get_backend(p, *T, *Cs)
# Correct order mistake in Equation 14 in [12]
tmpsum = sum([
lambdas[s] * nx.dot(
nx.dot(T[s], Cs[s]),
T[s].T
) for s in range(len(T))
])
ppt = nx.outer(p, p)
return tmpsum / ppt
[docs]
def update_kl_loss(p, lambdas, T, Cs, nx=None):
r"""
Updates :math:`\mathbf{C}` according to the KL Loss kernel with the `S`
:math:`\mathbf{T}_s` couplings calculated at each iteration of the GW
barycenter problem in :ref:`[12]`:
.. math::
\mathbf{C}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
Where :
- :math:`\mathbf{C}_s`: metric cost matrix
- :math:`\mathbf{p}_s`: distribution
Parameters
----------
p : array-like, shape (N,)
Weights in the targeted barycenter.
lambdas : list of float
List of the `S` spaces' weights
T : list of S array-like of shape (N, ns)
The `S` :math:`\mathbf{T}_s` couplings calculated at each iteration.
Cs : list of S array-like, shape(ns,ns)
Metric cost matrices.
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
----------
C : array-like, shape (`ns`, `ns`)
updated :math:`\mathbf{C}` matrix
References
----------
.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
if nx is None:
nx = get_backend(p, *T, *Cs)
# Correct order mistake in Equation 15 in [12]
tmpsum = sum([
lambdas[s] * nx.dot(
nx.dot(T[s], nx.log(nx.maximum(Cs[s], 1e-15))),
T[s].T
) for s in range(len(T))
])
ppt = nx.outer(p, p)
return nx.exp(tmpsum / ppt)
[docs]
def update_feature_matrix(lambdas, Ys, Ts, p, nx=None):
r"""Updates the feature with respect to the `S` :math:`\mathbf{T}_s` couplings.
See "Solving the barycenter problem with Block Coordinate Descent (BCD)"
in :ref:`[24] <references-update-feature-matrix>` calculated at each iteration
Parameters
----------
p : array-like, shape (N,)
masses in the targeted barycenter
lambdas : list of float
List of the `S` spaces' weights
Ts : list of S array-like, shape (N, ns)
The `S` :math:`\mathbf{T}_s` couplings calculated at each iteration
Ys : list of S array-like, shape (d,ns)
The features.
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
-------
X : array-like, shape (`d`, `N`)
.. _references-update-feature-matrix:
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary Rémi, Tavenard Romain and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
if nx is None:
nx = get_backend(*Ys, *Ts, p)
p = 1. / p
tmpsum = sum([
lambdas[s] * nx.dot(Ys[s], Ts[s].T) * p[None, :]
for s in range(len(Ts))
])
return tmpsum
[docs]
def init_matrix_semirelaxed(C1, C2, p, loss_fun='square_loss', nx=None):
r"""Return loss matrices and tensors for semi-relaxed Gromov-Wasserstein fast computation
Returns the value of :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` with the
selected loss function as the loss function of semi-relaxed Gromov-Wasserstein discrepancy.
The matrices are computed as described in Proposition 1 in :ref:`[12] <references-init-matrix>`
and adapted to the semi-relaxed problem where the second marginal is not a constant anymore.
Where :
- :math:`\mathbf{C_1}`: Metric cost matrix in the source space
- :math:`\mathbf{C_2}`: Metric cost matrix in the target space
- :math:`\mathbf{T}`: A coupling between those two spaces
The square-loss function :math:`L(a, b) = |a - b|^2` is read as :
.. math::
L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)
\mathrm{with} \ f_1(a) &= a^2
f_2(b) &= b^2
h_1(a) &= a
h_2(b) &= 2b
The kl-loss function :math:`L(a, b) = a \log\left(\frac{a}{b}\right) - a + b` is read as :
.. math::
L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)
\mathrm{with} \ f_1(a) &= a \log(a) - a
f_2(b) &= b
h_1(a) &= a
h_2(b) &= \log(b)
Parameters
----------
C1 : array-like, shape (ns, ns)
Metric cost matrix in the source space
C2 : array-like, shape (nt, nt)
Metric cost matrix in the target space
p : array-like, shape (ns,)
loss_fun : str, optional
Name of loss function to use: either 'square_loss' or 'kl_loss' (default='square_loss')
nx : backend, optional
If let to its default value None, a backend test will be conducted.
Returns
-------
constC : array-like, shape (ns, nt)
Constant :math:`\mathbf{C}` matrix in Eq. (6) adapted to srGW
hC1 : array-like, shape (ns, ns)
:math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
hC2 : array-like, shape (nt, nt)
:math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
fC2t: array-like, shape (nt, nt)
:math:`\mathbf{f2}(\mathbf{C2})^\top` matrix in Eq. (6)
.. _references-init-matrix:
References
----------
.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
.. [48] Cédric Vincent-Cuaz, Rémi Flamary, Marco Corneli, Titouan Vayer, Nicolas Courty.
"Semi-relaxed Gromov-Wasserstein divergence and applications on graphs"
International Conference on Learning Representations (ICLR), 2022.
"""
if nx is None:
C1, C2, p = list_to_array(C1, C2, p)
nx = get_backend(C1, C2, p)
if loss_fun == 'square_loss':
def f1(a):
return (a**2)
def f2(b):
return (b**2)
def h1(a):
return a
def h2(b):
return 2 * b
elif loss_fun == 'kl_loss':
def f1(a):
return a * nx.log(a + 1e-15) - a
def f2(b):
return b
def h1(a):
return a
def h2(b):
return nx.log(b + 1e-15)
else:
raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.")
constC = nx.dot(nx.dot(f1(C1), nx.reshape(p, (-1, 1))),
nx.ones((1, C2.shape[0]), type_as=p))
hC1 = h1(C1)
hC2 = h2(C2)
fC2t = f2(C2).T
return constC, hC1, hC2, fC2t