Source code for ot.smooth

#Copyright (c) 2018, Mathieu Blondel
#All rights reserved.
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
#list of conditions and the following disclaimer.
#
#2. Redistributions in binary form must reproduce the above copyright notice,
#this list of conditions and the following disclaimer in the documentation and/or
#other materials provided with the distribution.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
#IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
#INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
#NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
#OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
#OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
#THE POSSIBILITY OF SUCH DAMAGE.

# Author: Mathieu Blondel
#         Remi Flamary <remi.flamary@unice.fr>
#         Tianlin Liu <t.liu@unibas.ch>

"""
Smooth and Sparse (KL an L2 reg.) and sparsity-constrained OT solvers.

Implementation of :
Smooth and Sparse Optimal Transport.
Mathieu Blondel, Vivien Seguy, Antoine Rolet.
In Proc. of AISTATS 2018.
https://arxiv.org/abs/1710.06276

(Original code from https://github.com/mblondel/smooth-ot/)

Sparsity-Constrained Optimal Transport.
Liu, T., Puigcerver, J., & Blondel, M. (2023).
Sparsity-constrained optimal transport.
Proceedings of the Eleventh International Conference on
Learning Representations (ICLR).
https://arxiv.org/abs/2209.15466


[17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal
Transport. Proceedings of the Twenty-First International Conference on
Artificial Intelligence and Statistics (AISTATS).

[50] Liu, T., Puigcerver, J., & Blondel, M. (2023).
Sparsity-constrained optimal transport.
Proceedings of the Eleventh International Conference on
Learning Representations (ICLR).

"""

import numpy as np
from scipy.optimize import minimize
from .backend import get_backend
import ot


[docs] def projection_simplex(V, z=1, axis=None): r""" Projection of :math:`\mathbf{V}` onto the simplex, scaled by `z` .. math:: P\left(\mathbf{V}, z\right) = \mathop{\arg \min}_{\substack{\mathbf{y} >= 0 \\ \sum_i \mathbf{y}_i = z}} \quad \|\mathbf{y} - \mathbf{V}\|^2 Parameters ---------- V: ndarray, rank 2 z: float or array If array, len(z) must be compatible with :math:`\mathbf{V}` axis: None or int - axis=None: project :math:`\mathbf{V}` by :math:`P(\mathbf{V}.\mathrm{ravel}(), z)` - axis=1: project each :math:`\mathbf{V}_i` by :math:`P(\mathbf{V}_i, z_i)` - axis=0: project each :math:`\mathbf{V}_{:, j}` by :math:`P(\mathbf{V}_{:, j}, z_j)` Returns ------- projection: ndarray, shape :math:`\mathbf{V}`.shape """ if axis == 1: n_features = V.shape[1] U = np.sort(V, axis=1)[:, ::-1] z = np.ones(len(V)) * z cssv = np.cumsum(U, axis=1) - z[:, np.newaxis] ind = np.arange(n_features) + 1 cond = U - cssv / ind > 0 rho = np.count_nonzero(cond, axis=1) theta = cssv[np.arange(len(V)), rho - 1] / rho return np.maximum(V - theta[:, np.newaxis], 0) elif axis == 0: return projection_simplex(V.T, z, axis=1).T else: V = V.ravel().reshape(1, -1) return projection_simplex(V, z, axis=1).ravel()
[docs] class Regularization(object): r"""Base class for Regularization objects Notes ----- This class is not intended for direct use but as apparent for true regularization implementation. """ def __init__(self, gamma=1.0): """ Parameters ---------- gamma: float Regularization parameter. We recover unregularized OT when gamma -> 0. """ self.gamma = gamma
[docs] def delta_Omega(X): r""" Compute :math:`\delta_\Omega(\mathbf{X}_{:, j})` for each :math:`\mathbf{X}_{:, j}`. .. math:: \delta_\Omega(\mathbf{x}) = \sup_{\mathbf{y} >= 0} \ \mathbf{y}^T \mathbf{x} - \Omega(\mathbf{y}) Parameters ---------- X: array, shape = (len(a), len(b)) Input array. Returns ------- v: array, (len(b), ) Values: :math:`\mathbf{v}_j = \delta_\Omega(\mathbf{X}_{:, j})` G: array, (len(a), len(b)) Gradients: :math:`\mathbf{G}_{:, j} = \nabla \delta_\Omega(\mathbf{X}_{:, j})` """ raise NotImplementedError
[docs] def max_Omega(X, b): r""" Compute :math:`\mathrm{max}_{\Omega, j}(\mathbf{X}_{:, j})` for each :math:`\mathbf{X}_{:, j}`. .. math:: \mathrm{max}_{\Omega, j}(\mathbf{x}) = \sup_{\substack{\mathbf{y} >= 0 \ \sum_i \mathbf{y}_i = 1}} \mathbf{y}^T \mathbf{x} - \frac{1}{\mathbf{b}_j} \Omega(\mathbf{b}_j \mathbf{y}) Parameters ---------- X: array, shape = (len(a), len(b)) Input array. b: array, shape = (len(b), ) Returns ------- v: array, (len(b), ) Values: :math:`\mathbf{v}_j = \mathrm{max}_{\Omega, j}(\mathbf{X}_{:, j})` G: array, (len(a), len(b)) Gradients: :math:`\mathbf{G}_{:, j} = \nabla \mathrm{max}_{\Omega, j}(\mathbf{X}_{:, j})` """ raise NotImplementedError
[docs] def Omega(T): """ Compute regularization term. Parameters ---------- T: array, shape = len(a) x len(b) Input array. Returns ------- value: float Regularization term. """ raise NotImplementedError
[docs] class NegEntropy(Regularization): """ NegEntropy regularization """
[docs] def delta_Omega(self, X): G = np.exp(X / self.gamma - 1) val = self.gamma * np.sum(G, axis=0) return val, G
[docs] def max_Omega(self, X, b): max_X = np.max(X, axis=0) / self.gamma exp_X = np.exp(X / self.gamma - max_X) val = self.gamma * (np.log(np.sum(exp_X, axis=0)) + max_X) val -= self.gamma * np.log(b) G = exp_X / np.sum(exp_X, axis=0) return val, G
[docs] def Omega(self, T): return self.gamma * np.sum(T * np.log(T))
[docs] class SquaredL2(Regularization): """ Squared L2 regularization """
[docs] def delta_Omega(self, X): max_X = np.maximum(X, 0) val = np.sum(max_X ** 2, axis=0) / (2 * self.gamma) G = max_X / self.gamma return val, G
[docs] def max_Omega(self, X, b): G = projection_simplex(X / (b * self.gamma), axis=0) val = np.sum(X * G, axis=0) val -= 0.5 * self.gamma * b * np.sum(G * G, axis=0) return val, G
[docs] def Omega(self, T): return 0.5 * self.gamma * np.sum(T ** 2)
[docs] class SparsityConstrained(Regularization): """ Squared L2 regularization with sparsity constraints """ def __init__(self, max_nz, gamma=1.0): self.max_nz = max_nz self.gamma = gamma
[docs] def delta_Omega(self, X): # For each column of X, find entries that are not among the top max_nz. non_top_indices = np.argpartition( -X, self.max_nz, axis=0)[self.max_nz:] # Set these entries to -inf. if X.ndim == 1: X[non_top_indices] = 0.0 else: X[non_top_indices, np.arange(X.shape[1])] = 0.0 max_X = np.maximum(X, 0) val = np.sum(max_X ** 2, axis=0) / (2 * self.gamma) G = max_X / self.gamma return val, G
[docs] def max_Omega(self, X, b): # Project the scaled X onto the simplex with sparsity constraint. G = ot.utils.projection_sparse_simplex( X / (b * self.gamma), self.max_nz, axis=0) val = np.sum(X * G, axis=0) val -= 0.5 * self.gamma * b * np.sum(G * G, axis=0) return val, G
[docs] def Omega(self, T): return 0.5 * self.gamma * np.sum(T ** 2)
[docs] def dual_obj_grad(alpha, beta, a, b, C, regul): r""" Compute objective value and gradients of dual objective. Parameters ---------- alpha: array, shape = len(a) beta: array, shape = len(b) Current iterate of dual potentials. a: array, shape = len(a) b: array, shape = len(b) Input histograms (should be non-negative and sum to 1). C: array, shape = (len(a), len(b)) Ground cost matrix. regul: Regularization object Should implement a `delta_Omega(X)` method. Returns ------- obj: float Objective value (higher is better). grad_alpha: array, shape = len(a) Gradient w.r.t. `alpha`. grad_beta: array, shape = len(b) Gradient w.r.t. `beta`. """ obj = np.dot(alpha, a) + np.dot(beta, b) grad_alpha = a.copy() grad_beta = b.copy() # X[:, j] = alpha + beta[j] - C[:, j] X = alpha[:, np.newaxis] + beta - C # val.shape = len(b) # G.shape = len(a) x len(b) val, G = regul.delta_Omega(X) obj -= np.sum(val) grad_alpha -= G.sum(axis=1) grad_beta -= G.sum(axis=0) return obj, grad_alpha, grad_beta
[docs] def solve_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500, verbose=False): """ Solve the "smoothed" dual objective. Parameters ---------- a: array, shape = (len(a), ) b: array, shape = (len(b), ) Input histograms (should be non-negative and sum to 1). C: array, shape = (len(a), len(b)) Ground cost matrix. regul: Regularization object Should implement a `delta_Omega(X)` method. method: str Solver to be used (passed to `scipy.optimize.minimize`). tol: float Tolerance parameter. max_iter: int Maximum number of iterations. Returns ------- alpha: array, shape = (len(a), ) beta: array, shape = (len(b), ) Dual potentials. """ def _func(params): # Unpack alpha and beta. alpha = params[:len(a)] beta = params[len(a):] obj, grad_alpha, grad_beta = dual_obj_grad(alpha, beta, a, b, C, regul) # Pack grad_alpha and grad_beta. grad = np.concatenate((grad_alpha, grad_beta)) # We need to maximize the dual. return -obj, -grad # Unfortunately, `minimize` only supports functions whose argument is a # vector. So, we need to concatenate alpha and beta. alpha_init = np.zeros(len(a)) beta_init = np.zeros(len(b)) params_init = np.concatenate((alpha_init, beta_init)) res = minimize(_func, params_init, method=method, jac=True, tol=tol, options=dict(maxiter=max_iter, disp=verbose)) alpha = res.x[:len(a)] beta = res.x[len(a):] return alpha, beta, res
[docs] def semi_dual_obj_grad(alpha, a, b, C, regul): """ Compute objective value and gradient of semi-dual objective. Parameters ---------- alpha: array, shape = len(a) Current iterate of semi-dual potentials. a: array, shape = len(a) b: array, shape = len(b) Input histograms (should be non-negative and sum to 1). C: array, shape = (len(a), len(b)) Ground cost matrix. regul: Regularization object Should implement a `max_Omega(X)` method. Returns ------- obj: float Objective value (higher is better). grad: array, shape = len(a) Gradient w.r.t. alpha. """ obj = np.dot(alpha, a) grad = a.copy() # X[:, j] = alpha - C[:, j] X = alpha[:, np.newaxis] - C # val.shape = len(b) # G.shape = len(a) x len(b) val, G = regul.max_Omega(X, b) obj -= np.dot(b, val) grad -= np.dot(G, b) return obj, grad
[docs] def solve_semi_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500, verbose=False): """ Solve the "smoothed" semi-dual objective. Parameters ---------- a: array, shape = (len(a), ) b: array, shape = (len(b), ) Input histograms (should be non-negative and sum to 1). C: array, shape = (len(a), len(b)) Ground cost matrix. regul: Regularization object Should implement a `max_Omega(X)` method. method: str Solver to be used (passed to `scipy.optimize.minimize`). tol: float Tolerance parameter. max_iter: int Maximum number of iterations. Returns ------- alpha: array, shape = (len(a), ) Semi-dual potentials. """ def _func(alpha): obj, grad = semi_dual_obj_grad(alpha, a, b, C, regul) # We need to maximize the semi-dual. return -obj, -grad alpha_init = np.zeros(len(a)) res = minimize(_func, alpha_init, method=method, jac=True, tol=tol, options=dict(maxiter=max_iter, disp=verbose)) return res.x, res
[docs] def get_plan_from_dual(alpha, beta, C, regul): r""" Retrieve optimal transportation plan from optimal dual potentials. Parameters ---------- alpha: array, shape = len(a) beta: array, shape = len(b) Optimal dual potentials. C: array, shape = (len(a), len(b)) Ground cost matrix. regul: Regularization object Should implement a `delta_Omega(X)` method. Returns ------- T: array, shape = (len(a), len(b)) Optimal transportation plan. """ X = alpha[:, np.newaxis] + beta - C return regul.delta_Omega(X)[1]
[docs] def get_plan_from_semi_dual(alpha, b, C, regul): r""" Retrieve optimal transportation plan from optimal semi-dual potentials. Parameters ---------- alpha: array, shape = len(a) Optimal semi-dual potentials. b: array, shape = len(b) Second input histogram (should be non-negative and sum to 1). C: array, shape = (len(a), len(b)) Ground cost matrix. regul: Regularization object Should implement a `delta_Omega(X)` method. Returns ------- T: array, shape = (len(a), len(b)) Optimal transportation plan. """ X = alpha[:, np.newaxis] - C return regul.max_Omega(X, b)[1] * b
[docs] def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9, numItermax=500, verbose=False, log=False, max_nz=None): r""" Solve the regularized OT problem in the dual and return the OT matrix The function solves the smooth relaxed dual formulation (7) in :ref:`[17] <references-smooth-ot-dual>`: .. math:: \max_{\alpha,\beta}\quad \mathbf{a}^T\alpha + \mathbf{b}^T\beta - \sum_j \delta_\Omega \left(\alpha+\beta_j-\mathbf{m}_j \right) where : - :math:`\mathbf{m}_j` is the j-th column of the cost matrix - :math:`\delta_\Omega` is the convex conjugate of the regularization term :math:`\Omega` - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1) The OT matrix can is reconstructed from the gradient of :math:`\delta_\Omega` (See :ref:`[17] <references-smooth-ot-dual>` Proposition 1). The optimization algorithm is using gradient decent (L-BFGS by default). Parameters ---------- a : np.ndarray (ns,) samples weights in the source domain b : np.ndarray (nt,) or np.ndarray (nt,nbb) samples in the target domain, compute sinkhorn with multiple targets and fixed :math:`\mathbf{M}` if :math:`\mathbf{b}` is a matrix (return OT loss + dual variables in log) M : np.ndarray (ns,nt) loss matrix reg : float Regularization term >0 reg_type : str Regularization type, can be the following (default ='l2'): - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn :ref:`[2] <references-smooth-ot-dual>`) - 'l2' : Squared Euclidean regularization - 'sparsity_constrained' : Sparsity-constrained regularization [50] max_nz : int or None, optional. Used only in the case of reg_type = 'sparsity_constrained' to specify the maximum number of nonzeros per column of the optimal plan; not used for other regularization types. method : str Solver to use for scipy.optimize.minimize numItermax : int, optional Max number of iterations stopThr : float, optional Stop threshold on error (>0) verbose : bool, optional Print information along iterations log : bool, optional record log if True Returns ------- gamma : (ns, nt) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters .. _references-smooth-ot-dual: References ---------- .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). .. [50] Liu, T., Puigcerver, J., & Blondel, M. (2023). Sparsity-constrained optimal transport. Proceedings of the Eleventh International Conference on Learning Representations (ICLR). See Also -------- ot.lp.emd : Unregularized OT ot.sinhorn : Entropic regularized OT ot.optim.cg : General regularized OT """ nx = get_backend(a, b, M) if reg_type.lower() in ['l2', 'squaredl2']: regul = SquaredL2(gamma=reg) elif reg_type.lower() in ['entropic', 'negentropy', 'kl']: regul = NegEntropy(gamma=reg) elif reg_type.lower() in ['sparsity_constrained', 'sparsity-constrained']: if not isinstance(max_nz, int): raise ValueError( f'max_nz {max_nz} must be an integer') regul = SparsityConstrained(gamma=reg, max_nz=max_nz) else: raise NotImplementedError('Unknown regularization') a0, b0, M0 = a, b, M # convert to humpy a, b, M = nx.to_numpy(a, b, M) # solve dual alpha, beta, res = solve_dual(a, b, M, regul, max_iter=numItermax, tol=stopThr, verbose=verbose) # reconstruct transport matrix G = nx.from_numpy(get_plan_from_dual(alpha, beta, M, regul), type_as=M0) if log: log = {'alpha': nx.from_numpy(alpha, type_as=a0), 'beta': nx.from_numpy(beta, type_as=b0), 'res': res} return G, log else: return G
[docs] def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', max_nz=None, method="L-BFGS-B", stopThr=1e-9, numItermax=500, verbose=False, log=False): r""" Solve the regularized OT problem in the semi-dual and return the OT matrix The function solves the smooth relaxed dual formulation (10) in :ref:`[17] <references-smooth-ot-semi-dual>`: .. math:: \max_{\alpha}\quad \mathbf{a}^T\alpha- \mathrm{OT}_\Omega^*(\alpha, \mathbf{b}) where : .. math:: \mathrm{OT}_\Omega^*(\alpha,b)=\sum_j \mathbf{b}_j - :math:`\mathbf{m}_j` is the j-th column of the cost matrix - :math:`\mathrm{OT}_\Omega^*(\alpha,b)` is defined in Eq. (9) in :ref:`[17] <references-smooth-ot-semi-dual>` - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1) The OT matrix can is reconstructed using :ref:`[17] <references-smooth-ot-semi-dual>` Proposition 2. The optimization algorithm is using gradient decent (L-BFGS by default). Parameters ---------- a : np.ndarray (ns,) samples weights in the source domain b : np.ndarray (nt,) or np.ndarray (nt,nbb) samples in the target domain, compute sinkhorn with multiple targets and fixed:math:`\mathbf{M}` if :math:`\mathbf{b}` is a matrix (return OT loss + dual variables in log) M : np.ndarray (ns,nt) loss matrix reg : float Regularization term >0 reg_type : str Regularization type, can be the following (default ='l2'): - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn :ref:`[2] <references-smooth-ot-semi-dual>`) - 'l2' : Squared Euclidean regularization - 'sparsity_constrained' : Sparsity-constrained regularization [50] max_nz : int or None, optional. Used only in the case of reg_type = 'sparsity_constrained' to specify the maximum number of nonzeros per column of the optimal plan; not used for other regularization types. method : str Solver to use for scipy.optimize.minimize numItermax : int, optional Max number of iterations stopThr : float, optional Stop threshold on error (>0) verbose : bool, optional Print information along iterations log : bool, optional record log if True Returns ------- gamma : (ns, nt) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters .. _references-smooth-ot-semi-dual: References ---------- .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). .. [50] Liu, T., Puigcerver, J., & Blondel, M. (2023). Sparsity-constrained optimal transport. Proceedings of the Eleventh International Conference on Learning Representations (ICLR). See Also -------- ot.lp.emd : Unregularized OT ot.sinhorn : Entropic regularized OT ot.optim.cg : General regularized OT """ if reg_type.lower() in ['l2', 'squaredl2']: regul = SquaredL2(gamma=reg) elif reg_type.lower() in ['entropic', 'negentropy', 'kl']: regul = NegEntropy(gamma=reg) elif reg_type.lower() in ['sparsity_constrained', 'sparsity-constrained']: if not isinstance(max_nz, int): raise ValueError( f'max_nz {max_nz} must be an integer') regul = SparsityConstrained(gamma=reg, max_nz=max_nz) else: raise NotImplementedError('Unknown regularization') # solve dual alpha, res = solve_semi_dual(a, b, M, regul, max_iter=numItermax, tol=stopThr, verbose=verbose) # reconstruct transport matrix G = get_plan_from_semi_dual(alpha, b, M, regul) if log: log = {'alpha': alpha, 'res': res} return G, log else: return G