From 7573e4c5501699b317ed504450b699e0bef040b2 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Thu, 7 May 2026 13:25:07 +0200 Subject: [PATCH 01/15] add proxsuite --- pyproject.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e13772b3..7dab2866 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,8 @@ authors = [ requires-python = ">=3.10" dependencies = [ "torch>=2.3.0", # Problems before 2.4.0, especially with autogram. - "quadprog>=0.1.9, != 0.1.10", # Doesn't work before 0.1.9, 0.1.10 is yanked "numpy>=1.21.2", # Does not work before 1.21. No python 3.10 wheel before 1.21.2. - "qpsolvers>=1.0.1", # Does not work before 1.0.1 + "proxsuite>=0.7.2", ] classifiers = [ "Development Status :: 4 - Beta", @@ -101,8 +100,7 @@ plot = [ lower_bounds = [ "torch==2.3.0", "numpy==1.21.2", - "quadprog==0.1.9", - "qpsolvers==1.0.1", + "proxsuite==0.7.2", ] [project.optional-dependencies] From 69e42a3dd3f62a2c578f434955ab9f5690dec6ac Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Thu, 7 May 2026 13:47:40 +0200 Subject: [PATCH 02/15] Translate from qpsolvers to proxsuite (done by opencode) --- src/torchjd/aggregation/_dualproj.py | 4 +- src/torchjd/aggregation/_upgrad.py | 4 +- src/torchjd/aggregation/_utils/dual_cone.py | 79 +++++++++++-------- .../unit/aggregation/_utils/test_dual_cone.py | 12 +-- tests/unit/aggregation/test_dualproj.py | 8 +- tests/unit/aggregation/test_pcgrad.py | 2 +- tests/unit/aggregation/test_upgrad.py | 10 ++- 7 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/torchjd/aggregation/_dualproj.py b/src/torchjd/aggregation/_dualproj.py index acb87d2f..62f1f894 100644 --- a/src/torchjd/aggregation/_dualproj.py +++ b/src/torchjd/aggregation/_dualproj.py @@ -30,7 +30,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: super().__init__() self.pref_vector = pref_vector @@ -100,7 +100,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: self._solver: SUPPORTED_SOLVER = solver diff --git a/src/torchjd/aggregation/_upgrad.py b/src/torchjd/aggregation/_upgrad.py index 68689829..914a8aa9 100644 --- a/src/torchjd/aggregation/_upgrad.py +++ b/src/torchjd/aggregation/_upgrad.py @@ -31,7 +31,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: super().__init__() self.pref_vector = pref_vector @@ -103,7 +103,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: self._solver: SUPPORTED_SOLVER = solver diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py index b076366b..53756b05 100644 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ b/src/torchjd/aggregation/_utils/dual_cone.py @@ -1,11 +1,10 @@ from typing import Literal, TypeAlias -import numpy as np import torch -from qpsolvers import solve_qp +from proxsuite.torch.qplayer import QPFunction from torch import Tensor -SUPPORTED_SOLVER: TypeAlias = Literal["quadprog"] +SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"] def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor: @@ -19,44 +18,62 @@ def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor: :return: A tensor of projection weights with the same shape as `U`. """ - G_ = _to_array(G) - U_ = _to_array(U) + original_shape = U.shape + m = G.shape[0] + U_flat = U.reshape(-1, m) # [nBatch, m] - W = np.apply_along_axis(lambda u: _project_weight_vector(u, G_, solver), axis=-1, arr=U_) + W = _project_weight_vector_batch(U_flat, G, solver) - return torch.as_tensor(W, device=G.device, dtype=G.dtype) + return W.reshape(original_shape) -def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray: - r""" - Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`, - given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies - `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1]. +# TODO: should merge docstrings appropriately - By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program: - minimize v^T G v - subject to u \preceq v +# def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray: +# r""" +# Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`, +# given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies +# `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1]. - Reference: - [1] `Jacobian Descent For Multi-Objective Optimization `_. +# By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program: +# minimize v^T G v +# subject to u \preceq v - :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to - project. - :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be - symmetric and positive definite. - :param solver: The quadratic programming solver to use. - """ +# Reference: +# [1] `Jacobian Descent For Multi-Objective Optimization `_. + +# :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to +# project. +# :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be +# symmetric and positive definite. +# :param solver: The quadratic programming solver to use. +# """ +# ... - m = G.shape[0] - w = solve_qp(G, np.zeros(m), -np.eye(m), -u, solver=solver) - if w is None: # This may happen when G has large values. - raise ValueError("Failed to solve the quadratic programming problem.") +@torch.no_grad() +def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER) -> Tensor: + r""" + Solves the batch of quadratic programs minimizing `v^T G v` subject to `u_i \preceq v_i` for + each row `u_i` of `U`. + + :param U: The tensor of vectors `u_i` of shape `[n, m]`. + :param G: The Gramian matrix of shape `[m, m]`. It must be symmetric and positive definite. + :param solver: The quadratic programming solver to use. + :return: A tensor of projection weights of shape `[n, m]`. + """ - return w + _, m = U.shape + device = U.device + dtype = U.dtype + Q = G.cpu().to(dtype=torch.float64) + p = torch.zeros(m, dtype=torch.float64) + C = -torch.eye(m, dtype=torch.float64) + lb = torch.full((m,), -1e20, dtype=torch.float64) + ub = -U.cpu().to(dtype=torch.float64) -def _to_array(tensor: Tensor) -> np.ndarray: - """Transforms a tensor into a numpy array with float64 dtype.""" + solver_fn = QPFunction(structural_feasibility=True) + zhats, _, _ = solver_fn(Q, p, torch.Tensor(), torch.Tensor(), C, lb, ub) - return tensor.cpu().detach().numpy().astype(np.float64) + return zhats.to(device=device, dtype=dtype) diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/aggregation/_utils/test_dual_cone.py index 68a8a75d..2ada2ded 100644 --- a/tests/unit/aggregation/_utils/test_dual_cone.py +++ b/tests/unit/aggregation/_utils/test_dual_cone.py @@ -34,7 +34,7 @@ def test_solution_weights(shape: tuple[int, int]) -> None: G = J @ J.T u = rand_(shape[0]) - w = project_weights(u, G, "quadprog") + w = project_weights(u, G, "proxsuite") dual_gap = w - u # Dual feasibility @@ -63,8 +63,8 @@ def test_scale_invariant(shape: tuple[int, int], scaling: float) -> None: G = J @ J.T u = rand_(shape[0]) - w = project_weights(u, G, "quadprog") - w_scaled = project_weights(u, scaling * G, "quadprog") + w = project_weights(u, G, "proxsuite") + w_scaled = project_weights(u, scaling * G, "proxsuite") assert_close(w_scaled, w) @@ -82,8 +82,8 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None: G = matrix @ matrix.T - W_tensor = project_weights(U_tensor, G, "quadprog") - W_matrix = project_weights(U_matrix, G, "quadprog") + W_tensor = project_weights(U_tensor, G, "proxsuite") + W_matrix = project_weights(U_matrix, G, "proxsuite") assert_close(W_matrix.reshape(shape), W_tensor) @@ -94,4 +94,4 @@ def test_project_weight_vector_failure() -> None: large_J = np.random.randn(10, 100) * 1e5 large_G = large_J @ large_J.T with raises(ValueError): - _project_weight_vector(np.ones(10), large_G, "quadprog") + _project_weight_vector(np.ones(10), large_G, "proxsuite") diff --git a/tests/unit/aggregation/test_dualproj.py b/tests/unit/aggregation/test_dualproj.py index 34fe8d46..c8bea16e 100644 --- a/tests/unit/aggregation/test_dualproj.py +++ b/tests/unit/aggregation/test_dualproj.py @@ -47,9 +47,9 @@ def test_non_differentiable(aggregator: DualProj, matrix: Tensor) -> None: def test_representations() -> None: - A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog") + A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") assert ( - repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')" + repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" ) assert str(A) == "DualProj" @@ -57,11 +57,11 @@ def test_representations() -> None: pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), norm_eps=0.0001, reg_eps=0.0001, - solver="quadprog", + solver="proxsuite", ) assert ( repr(A) == "DualProj(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='quadprog')" + "solver='proxsuite')" ) assert str(A) == "DualProj([1., 2., 3.])" diff --git a/tests/unit/aggregation/test_pcgrad.py b/tests/unit/aggregation/test_pcgrad.py index b776071d..b9c4cf63 100644 --- a/tests/unit/aggregation/test_pcgrad.py +++ b/tests/unit/aggregation/test_pcgrad.py @@ -55,7 +55,7 @@ def test_equivalence_upgrad_sum_two_rows(shape: tuple[int, int]) -> None: ones_((2,)), norm_eps=0.0, reg_eps=0.0, - solver="quadprog", + solver="proxsuite", ) result = pc_grad_weighting(gramian) diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py index 075680a0..b2f7df4e 100644 --- a/tests/unit/aggregation/test_upgrad.py +++ b/tests/unit/aggregation/test_upgrad.py @@ -53,19 +53,21 @@ def test_non_differentiable(aggregator: UPGrad, matrix: Tensor) -> None: def test_representations() -> None: - A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog") - assert repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')" + A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") + assert ( + repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" + ) assert str(A) == "UPGrad" A = UPGrad( pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), norm_eps=0.0001, reg_eps=0.0001, - solver="quadprog", + solver="proxsuite", ) assert ( repr(A) == "UPGrad(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='quadprog')" + "solver='proxsuite')" ) assert str(A) == "UPGrad([1., 2., 3.])" From 952e6171224507dd8f102b721cc3efc3a6d24b0d Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Thu, 7 May 2026 14:04:35 +0200 Subject: [PATCH 03/15] remove `project_weights_vector` --- tests/unit/aggregation/_utils/test_dual_cone.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/aggregation/_utils/test_dual_cone.py index 2ada2ded..0fa3e8a0 100644 --- a/tests/unit/aggregation/_utils/test_dual_cone.py +++ b/tests/unit/aggregation/_utils/test_dual_cone.py @@ -1,10 +1,9 @@ -import numpy as np import torch -from pytest import mark, raises +from pytest import mark from torch.testing import assert_close from utils.tensors import rand_, randn_ -from torchjd.aggregation._utils.dual_cone import _project_weight_vector, project_weights +from torchjd.aggregation._utils.dual_cone import project_weights @mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)]) @@ -88,10 +87,10 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None: assert_close(W_matrix.reshape(shape), W_tensor) -def test_project_weight_vector_failure() -> None: - """Tests that `_project_weight_vector` raises an error when the input G has too large values.""" +# def test_project_weight_vector_failure() -> None: +# """Tests that `_project_weight_vector` raises an error when the input G has too large values.""" - large_J = np.random.randn(10, 100) * 1e5 - large_G = large_J @ large_J.T - with raises(ValueError): - _project_weight_vector(np.ones(10), large_G, "proxsuite") +# large_J = np.random.randn(10, 100) * 1e5 +# large_G = large_J @ large_J.T +# with raises(ValueError): +# _project_weight_vector(np.ones(10), large_G, "proxsuite") From b1b8c8dbef16433e6aa74dfb02024cbf0ee3305e Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 8 May 2026 09:24:30 +0200 Subject: [PATCH 04/15] Change from qplayer to BatchQP (lower level) --- src/torchjd/aggregation/_utils/dual_cone.py | 38 +++++++++++++++------ 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py index 53756b05..2f4d2693 100644 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ b/src/torchjd/aggregation/_utils/dual_cone.py @@ -1,7 +1,8 @@ from typing import Literal, TypeAlias +import numpy as np import torch -from proxsuite.torch.qplayer import QPFunction +from proxsuite import proxqp from torch import Tensor SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"] @@ -63,17 +64,34 @@ def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER :return: A tensor of projection weights of shape `[n, m]`. """ - _, m = U.shape + n, m = U.shape device = U.device dtype = U.dtype - Q = G.cpu().to(dtype=torch.float64) - p = torch.zeros(m, dtype=torch.float64) - C = -torch.eye(m, dtype=torch.float64) - lb = torch.full((m,), -1e20, dtype=torch.float64) - ub = -U.cpu().to(dtype=torch.float64) - - solver_fn = QPFunction(structural_feasibility=True) - zhats, _, _ = solver_fn(Q, p, torch.Tensor(), torch.Tensor(), C, lb, ub) + Q_np = G.cpu().to(dtype=torch.float64).numpy() + p_np = np.zeros(m, dtype=np.float64) + C_np = -np.eye(m, dtype=np.float64) + lb_np = np.full(m, -1e20, dtype=np.float64) + ub_np = (-U.cpu().to(dtype=torch.float64)).numpy() + + batch_qps = proxqp.dense.BatchQP() + default_rho = 5.0e-5 + + for i in range(n): + qp = batch_qps.init_qp_in_place(m, 0, m) + qp.settings.primal_infeasibility_solving = False + qp.settings.max_iter = 1000 + qp.settings.max_iter_in = 100 + qp.settings.default_rho = default_rho + qp.settings.refactor_rho_threshold = default_rho + qp.settings.eps_abs = 1e-9 + qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho) + + for i in range(n): + batch_qps.get(i).solve() + + zhats = torch.empty((n, m), dtype=torch.float64) + for i in range(n): + zhats[i] = torch.from_numpy(batch_qps.get(i).results.x) return zhats.to(device=device, dtype=dtype) From 8968715a44e46ab3422a1899c2347bd1481beafa Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Sun, 10 May 2026 11:41:28 +0200 Subject: [PATCH 05/15] Make proxsuite run QPs in parallel, put the cast of result to tensor at the end. --- src/torchjd/aggregation/_utils/dual_cone.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py index 2f4d2693..ae25bdbd 100644 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ b/src/torchjd/aggregation/_utils/dual_cone.py @@ -1,3 +1,4 @@ +import os from typing import Literal, TypeAlias import numpy as np @@ -87,11 +88,11 @@ def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER qp.settings.eps_abs = 1e-9 qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho) - for i in range(n): - batch_qps.get(i).solve() + num_threads = max(1, (os.cpu_count() or 2) // 2) + proxqp.dense.solve_in_parallel(num_threads=num_threads, qps=batch_qps) - zhats = torch.empty((n, m), dtype=torch.float64) + zhats_np = np.empty((n, m), dtype=np.float64) for i in range(n): - zhats[i] = torch.from_numpy(batch_qps.get(i).results.x) + zhats_np[i] = batch_qps.get(i).results.x - return zhats.to(device=device, dtype=dtype) + return torch.from_numpy(zhats_np).to(device=device, dtype=dtype) From fcc19d3aeed936682c6bc91b0a266664f7535545 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Thu, 14 May 2026 11:34:15 +0200 Subject: [PATCH 06/15] refactor!: Add `DualConeProjector` (#678) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `DualConeProjector` and `QuadprogProjector` to represent the method to solve QPs in UPGrad and DualProj. QP related parameters (`reg_eps` and `norm_eps`) responsibilities are now delegated to subclasses of `DualConeProjector`. Co-authored-by: Valérian Rey --- CHANGELOG.md | 18 +++ docs/source/docs/linalg/dual_cone.rst | 9 ++ docs/source/docs/linalg/index.rst | 1 + src/torchjd/_linalg/__init__.py | 4 + src/torchjd/_linalg/_dual_cone.py | 121 ++++++++++++++++++ src/torchjd/aggregation/_dualproj.py | 82 +++--------- src/torchjd/aggregation/_upgrad.py | 84 +++--------- src/torchjd/aggregation/_utils/dual_cone.py | 98 -------------- src/torchjd/linalg/__init__.py | 18 ++- tests/plots/interactive_plotter.py | 5 +- tests/unit/aggregation/test_dualproj.py | 56 +++----- tests/unit/aggregation/test_pcgrad.py | 6 +- tests/unit/aggregation/test_upgrad.py | 54 +++----- .../_utils => linalg}/test_dual_cone.py | 84 +++++++++--- 14 files changed, 308 insertions(+), 332 deletions(-) create mode 100644 docs/source/docs/linalg/dual_cone.rst create mode 100644 src/torchjd/_linalg/_dual_cone.py delete mode 100644 src/torchjd/aggregation/_utils/dual_cone.py rename tests/unit/{aggregation/_utils => linalg}/test_dual_cone.py (53%) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebeeb67d..e2bd218f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,21 @@ changelog does not include internal changes that do not affect the user. ### Changed +- **BREAKING**: Removed `norm_eps`, `rep_eps` and `solver` parameters from the `__init__` of + `UPGrad`, `UPGradWeighting`, `DualProj` and `DualProjWeighting` in favor of a `projector` + parameter of type `DualConeProjector`. To update: + ```python + # Before + from torchjd.aggregation import UPGrad + aggregator = UPGrad(norm_eps=1e-6, reg_eps=1e-6, solver="quadprog") + + # After + from torchjd.aggregation import UPGrad + from torchjd.linalg import QuadprogProjector + aggregator = UPGrad(projector=QuadprogProjector(norm_eps=1e-6, reg_eps=1e-6)) + ``` + If you used the default `norm_eps`, `reg_eps` and `solver`, you don't have to change anything and + you will get the same results. - `CAGrad`, `CAGradWeighting`, and `NashMTL` are now always importable from `torchjd.aggregation`, even when their optional dependencies are not installed. Attempting to instantiate them without the required dependencies now raises an `ImportError` with installation instructions, instead of @@ -21,6 +36,9 @@ changelog does not include internal changes that do not affect the user. ### Added +- Added a new abstraction: the `DualConeProjector` abstract base class and its concrete + `QuadprogProjector` implementation, to do the projection of the gradients onto the dual cone, as + required in `UPGrad`, and `DualProj`. These classes can be found in `torchjd.linalg`. - Made `WeightedAggregator` and `GramianWeightedAggregator` public. These abstract base classes are now importable from `torchjd.aggregation` and documented. They can be extended to easily implement custom `Aggregator`s. diff --git a/docs/source/docs/linalg/dual_cone.rst b/docs/source/docs/linalg/dual_cone.rst new file mode 100644 index 00000000..f7db87ad --- /dev/null +++ b/docs/source/docs/linalg/dual_cone.rst @@ -0,0 +1,9 @@ +:hide-toc: + +Dual Cone Projectors +==================== + +.. autoclass:: torchjd.linalg.DualConeProjector + :members: __call__ + +.. autoclass:: torchjd.linalg.QuadprogProjector diff --git a/docs/source/docs/linalg/index.rst b/docs/source/docs/linalg/index.rst index 4446ccea..94fcce20 100644 --- a/docs/source/docs/linalg/index.rst +++ b/docs/source/docs/linalg/index.rst @@ -10,3 +10,4 @@ linalg matrix.rst psd_matrix.rst + dual_cone.rst diff --git a/src/torchjd/_linalg/__init__.py b/src/torchjd/_linalg/__init__.py index 29b8cd0b..fce72e8e 100644 --- a/src/torchjd/_linalg/__init__.py +++ b/src/torchjd/_linalg/__init__.py @@ -1,3 +1,4 @@ +from ._dual_cone import DualConeProjector, QuadprogProjector, projector_or_default from ._generalized_gramian import flatten, movedim, reshape from ._gramian import compute_gramian, normalize, regularize from ._matrix import Matrix, PSDMatrix, PSDTensor, is_matrix, is_psd_matrix, is_psd_tensor @@ -15,4 +16,7 @@ "flatten", "reshape", "movedim", + "DualConeProjector", + "QuadprogProjector", + "projector_or_default", ] diff --git a/src/torchjd/_linalg/_dual_cone.py b/src/torchjd/_linalg/_dual_cone.py new file mode 100644 index 00000000..d3d8fe25 --- /dev/null +++ b/src/torchjd/_linalg/_dual_cone.py @@ -0,0 +1,121 @@ +from abc import ABC, abstractmethod + +import numpy as np +import torch +from qpsolvers import solve_qp +from torch import Tensor + +from ._gramian import normalize, regularize +from ._matrix import PSDMatrix + + +class DualConeProjector(ABC): + """ + Abstract class whose instances are responsible for projecting vectors onto the dual cone of the + rows of a matrix, or rather the dual form of this problem. + """ + + @abstractmethod + def __call__(self, U: Tensor, G: PSDMatrix) -> Tensor: + r""" + Computes for each vector :math:`u` in the provided tensor ``U`` + the weights :math:`w` of the projection of :math:`J^\top u` onto the dual cone of + the rows of :math:`J`, provided :math:`G = J J^\top` and :math:`u`. In other words, this + computes the :math:`w` that satisfies :math:`\pi_J(J^\top u) = J^\top w`, with + :math:`\pi_J` defined in Equation 3 of [1]. + + By Proposition 1 of [1], this is equivalent to solving for :math:`v` the following + quadratic program: + + .. math:: + + \min_{v} \quad & v^\top G v \\ + \text{subject to} \quad & u \preceq v + + Reference: + [1] `Jacobian Descent For Multi-Objective Optimization `_. + + :param U: The tensor of weights corresponding to the vectors to project, of shape + ``[..., m]``. + :param G: The Gramian matrix of shape ``[m, m]``. It must be symmetric and positive + semi-definite. + :return: A tensor of projection weights with the same shape as ``U``. + """ + + +def projector_or_default(projector: DualConeProjector | None) -> DualConeProjector: + if projector is None: + return QuadprogProjector() + return projector + + +class QuadprogProjector(DualConeProjector): + r""" + Solves the quadratic program defined in :meth:`DualConeProjector.__call__` using the + `quadprog `_ QP solver. + + :param norm_eps: A small value to avoid division by zero when normalizing. + :param reg_eps: A small value to add to the diagonal of the gramian of the matrix. Due to + numerical errors when computing the gramian, it might not exactly be positive definite. + This issue can make the optimization fail. Adding ``reg_eps`` to the diagonal of the gramian + ensures that it is positive definite. + """ + + def __init__( + self, + *, + norm_eps: float = 0.0001, + reg_eps: float = 0.0001, + ) -> None: + self._norm_eps = norm_eps + self._reg_eps = reg_eps + + @property + def norm_eps(self) -> float: + return self._norm_eps + + @norm_eps.setter + def norm_eps(self, value: float) -> None: + if value < 0.0: + raise ValueError(f"norm_eps must be non-negative, but got {value}.") + self._norm_eps = value + + @property + def reg_eps(self) -> float: + return self._reg_eps + + @reg_eps.setter + def reg_eps(self, value: float) -> None: + if value < 0.0: + raise ValueError(f"reg_eps must be non-negative, but got {value}.") + self._reg_eps = value + + def __repr__(self) -> str: + return f"QuadprogProjector(norm_eps={self._norm_eps}, reg_eps={self._reg_eps})" + + def __call__(self, U: Tensor, G: PSDMatrix) -> Tensor: + + G = regularize(normalize(G, self._norm_eps), self._reg_eps) + + G_ = _to_array(G) + U_ = _to_array(U) + + W = np.apply_along_axis(lambda u: self._project_weight_vector(u, G_), axis=-1, arr=U_) + + return torch.as_tensor(W, device=G.device, dtype=G.dtype) + + def _project_weight_vector(self, u: np.ndarray, G: np.ndarray) -> np.ndarray: + + m = G.shape[0] + w = solve_qp(G, np.zeros(m), -np.eye(m), -u, solver="quadprog") + + if w is None: # This may happen when G has large values. + raise ValueError("Failed to solve the quadratic programming problem.") + + return w + + +def _to_array(tensor: Tensor) -> np.ndarray: + """Transforms a tensor into a numpy array with float64 dtype.""" + + return tensor.cpu().detach().numpy().astype(np.float64) diff --git a/src/torchjd/aggregation/_dualproj.py b/src/torchjd/aggregation/_dualproj.py index f8bc973e..e839d6e8 100644 --- a/src/torchjd/aggregation/_dualproj.py +++ b/src/torchjd/aggregation/_dualproj.py @@ -1,12 +1,11 @@ from torch import Tensor -from torchjd._linalg import normalize, regularize +from torchjd._linalg import DualConeProjector, projector_or_default from torchjd.linalg import PSDMatrix from ._aggregator_bases import GramianWeightedAggregator from ._mean import MeanWeighting from ._mixins import _NonDifferentiable -from ._utils.dual_cone import SUPPORTED_SOLVER, project_weights from ._utils.pref_vector import pref_vector_to_str_suffix, pref_vector_to_weighting from ._weighting_bases import _GramianWeighting @@ -19,31 +18,21 @@ class DualProjWeighting(_NonDifferentiable, _GramianWeighting): :param pref_vector: The preference vector to use. If not provided, defaults to :math:`\begin{bmatrix} \frac{1}{m} & \dots & \frac{1}{m} \end{bmatrix}^T \in \mathbb{R}^m`. - :param norm_eps: A small value to avoid division by zero when normalizing. - :param reg_eps: A small value to add to the diagonal of the gramian of the matrix. Due to - numerical errors when computing the gramian, it might not exactly be positive definite. - This issue can make the optimization fail. Adding ``reg_eps`` to the diagonal of the gramian - ensures that it is positive definite. - :param solver: The solver used to optimize the underlying optimization problem. + :param projector: The :class:`~torchjd.linalg.DualConeProjector` used to compute the projection. """ def __init__( self, pref_vector: Tensor | None = None, - norm_eps: float = 0.0001, - reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "proxsuite", + projector: DualConeProjector | None = None, ) -> None: super().__init__() self.pref_vector = pref_vector - self.norm_eps = norm_eps - self.reg_eps = reg_eps - self.solver: SUPPORTED_SOLVER = solver + self.projector = projector_or_default(projector) def forward(self, gramian: PSDMatrix, /) -> Tensor: u = self.weighting(gramian) - G = regularize(normalize(gramian, self.norm_eps), self.reg_eps) - w = project_weights(u, G, self.solver) + w = self.projector(u, gramian) return w @property @@ -56,26 +45,12 @@ def pref_vector(self, value: Tensor | None) -> None: self._pref_vector = value @property - def norm_eps(self) -> float: - return self._norm_eps + def projector(self) -> DualConeProjector: + return self._projector - @norm_eps.setter - def norm_eps(self, value: float) -> None: - if value < 0: - raise ValueError(f"norm_eps must be non-negative, but got {value}.") - - self._norm_eps = value - - @property - def reg_eps(self) -> float: - return self._reg_eps - - @reg_eps.setter - def reg_eps(self, value: float) -> None: - if value < 0: - raise ValueError(f"reg_eps must be non-negative, but got {value}.") - - self._reg_eps = value + @projector.setter + def projector(self, value: DualConeProjector | None) -> None: + self._projector = projector_or_default(value) class DualProj(_NonDifferentiable, GramianWeightedAggregator): @@ -87,12 +62,7 @@ class DualProj(_NonDifferentiable, GramianWeightedAggregator): :param pref_vector: The preference vector used to combine the rows. If not provided, defaults to :math:`\begin{bmatrix} \frac{1}{m} & \dots & \frac{1}{m} \end{bmatrix}^T \in \mathbb{R}^m`. - :param norm_eps: A small value to avoid division by zero when normalizing. - :param reg_eps: A small value to add to the diagonal of the gramian of the matrix. Due to - numerical errors when computing the gramian, it might not exactly be positive definite. - This issue can make the optimization fail. Adding ``reg_eps`` to the diagonal of the gramian - ensures that it is positive definite. - :param solver: The solver used to optimize the underlying optimization problem. + :param projector: The :class:`~torchjd.linalg.DualConeProjector` used to compute the projection. """ gramian_weighting: DualProjWeighting @@ -100,14 +70,10 @@ class DualProj(_NonDifferentiable, GramianWeightedAggregator): def __init__( self, pref_vector: Tensor | None = None, - norm_eps: float = 0.0001, - reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "proxsuite", + projector: DualConeProjector | None = None, ) -> None: - self._solver: SUPPORTED_SOLVER = solver - super().__init__( - DualProjWeighting(pref_vector, norm_eps=norm_eps, reg_eps=reg_eps, solver=solver), + DualProjWeighting(pref_vector, projector=projector), ) @property @@ -119,25 +85,17 @@ def pref_vector(self, value: Tensor | None) -> None: self.gramian_weighting.pref_vector = value @property - def norm_eps(self) -> float: - return self.gramian_weighting.norm_eps - - @norm_eps.setter - def norm_eps(self, value: float) -> None: - self.gramian_weighting.norm_eps = value - - @property - def reg_eps(self) -> float: - return self.gramian_weighting.reg_eps + def projector(self) -> DualConeProjector: + return self.gramian_weighting.projector - @reg_eps.setter - def reg_eps(self, value: float) -> None: - self.gramian_weighting.reg_eps = value + @projector.setter + def projector(self, value: DualConeProjector | None) -> None: + self.gramian_weighting.projector = value def __repr__(self) -> str: return ( - f"{self.__class__.__name__}(pref_vector={repr(self.pref_vector)}, norm_eps=" - f"{self.norm_eps}, reg_eps={self.reg_eps}, solver={repr(self._solver)})" + f"{self.__class__.__name__}(pref_vector={repr(self.pref_vector)}, projector=" + f"{repr(self.projector)})" ) def __str__(self) -> str: diff --git a/src/torchjd/aggregation/_upgrad.py b/src/torchjd/aggregation/_upgrad.py index 5e11d320..691232eb 100644 --- a/src/torchjd/aggregation/_upgrad.py +++ b/src/torchjd/aggregation/_upgrad.py @@ -1,13 +1,12 @@ import torch from torch import Tensor -from torchjd._linalg import normalize, regularize +from torchjd._linalg import DualConeProjector, projector_or_default from torchjd.linalg import PSDMatrix from ._aggregator_bases import GramianWeightedAggregator from ._mean import MeanWeighting from ._mixins import _NonDifferentiable -from ._utils.dual_cone import SUPPORTED_SOLVER, project_weights from ._utils.pref_vector import pref_vector_to_str_suffix, pref_vector_to_weighting from ._weighting_bases import _GramianWeighting @@ -20,31 +19,21 @@ class UPGradWeighting(_NonDifferentiable, _GramianWeighting): :param pref_vector: The preference vector to use. If not provided, defaults to :math:`\begin{bmatrix} \frac{1}{m} & \dots & \frac{1}{m} \end{bmatrix}^T \in \mathbb{R}^m`. - :param norm_eps: A small value to avoid division by zero when normalizing. - :param reg_eps: A small value to add to the diagonal of the gramian of the matrix. Due to - numerical errors when computing the gramian, it might not exactly be positive definite. - This issue can make the optimization fail. Adding ``reg_eps`` to the diagonal of the gramian - ensures that it is positive definite. - :param solver: The solver used to optimize the underlying optimization problem. + :param projector: The :class:`~torchjd.linalg.DualConeProjector` used to compute the projection. """ def __init__( self, pref_vector: Tensor | None = None, - norm_eps: float = 0.0001, - reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "proxsuite", + projector: DualConeProjector | None = None, ) -> None: super().__init__() self.pref_vector = pref_vector - self.norm_eps = norm_eps - self.reg_eps = reg_eps - self.solver: SUPPORTED_SOLVER = solver + self.projector = projector_or_default(projector) def forward(self, gramian: PSDMatrix, /) -> Tensor: U = torch.diag(self.weighting(gramian)) - G = regularize(normalize(gramian, self.norm_eps), self.reg_eps) - W = project_weights(U, G, self.solver) + W = self.projector(U, gramian) return torch.sum(W, dim=0) @property @@ -57,28 +46,12 @@ def pref_vector(self, value: Tensor | None) -> None: self._pref_vector = value @property - def norm_eps(self) -> float: - return self._norm_eps + def projector(self) -> DualConeProjector: + return self._projector - @norm_eps.setter - def norm_eps(self, value: float) -> None: - - if value < 0: - raise ValueError(f"norm_eps must be non-negative, but got {value}.") - - self._norm_eps = value - - @property - def reg_eps(self) -> float: - return self._reg_eps - - @reg_eps.setter - def reg_eps(self, value: float) -> None: - - if value < 0: - raise ValueError(f"reg_eps must be non-negative, but got {value}.") - - self._reg_eps = value + @projector.setter + def projector(self, value: DualConeProjector | None) -> None: + self._projector = projector_or_default(value) class UPGrad(_NonDifferentiable, GramianWeightedAggregator): @@ -90,12 +63,7 @@ class UPGrad(_NonDifferentiable, GramianWeightedAggregator): :param pref_vector: The preference vector used to combine the projected rows. If not provided, defaults to :math:`\begin{bmatrix} \frac{1}{m} & \dots & \frac{1}{m} \end{bmatrix}^T \in \mathbb{R}^m`. - :param norm_eps: A small value to avoid division by zero when normalizing. - :param reg_eps: A small value to add to the diagonal of the gramian of the matrix. Due to - numerical errors when computing the gramian, it might not exactly be positive definite. - This issue can make the optimization fail. Adding ``reg_eps`` to the diagonal of the gramian - ensures that it is positive definite. - :param solver: The solver used to optimize the underlying optimization problem. + :param projector: The :class:`~torchjd.linalg.DualConeProjector` used to compute the projection. """ gramian_weighting: UPGradWeighting @@ -103,14 +71,10 @@ class UPGrad(_NonDifferentiable, GramianWeightedAggregator): def __init__( self, pref_vector: Tensor | None = None, - norm_eps: float = 0.0001, - reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "proxsuite", + projector: DualConeProjector | None = None, ) -> None: - self._solver: SUPPORTED_SOLVER = solver - super().__init__( - UPGradWeighting(pref_vector, norm_eps=norm_eps, reg_eps=reg_eps, solver=solver), + UPGradWeighting(pref_vector, projector=projector), ) @property @@ -122,25 +86,17 @@ def pref_vector(self, value: Tensor | None) -> None: self.gramian_weighting.pref_vector = value @property - def norm_eps(self) -> float: - return self.gramian_weighting.norm_eps - - @norm_eps.setter - def norm_eps(self, value: float) -> None: - self.gramian_weighting.norm_eps = value - - @property - def reg_eps(self) -> float: - return self.gramian_weighting.reg_eps + def projector(self) -> DualConeProjector: + return self.gramian_weighting.projector - @reg_eps.setter - def reg_eps(self, value: float) -> None: - self.gramian_weighting.reg_eps = value + @projector.setter + def projector(self, value: DualConeProjector | None) -> None: + self.gramian_weighting.projector = value def __repr__(self) -> str: return ( - f"{self.__class__.__name__}(pref_vector={repr(self.pref_vector)}, norm_eps=" - f"{self.norm_eps}, reg_eps={self.reg_eps}, solver={repr(self._solver)})" + f"{self.__class__.__name__}(pref_vector={repr(self.pref_vector)}, projector=" + f"{repr(self.projector)})" ) def __str__(self) -> str: diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py deleted file mode 100644 index ae25bdbd..00000000 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ /dev/null @@ -1,98 +0,0 @@ -import os -from typing import Literal, TypeAlias - -import numpy as np -import torch -from proxsuite import proxqp -from torch import Tensor - -SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"] - - -def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor: - """ - Computes the tensor of weights corresponding to the projection of the vectors in `U` onto the - rows of a matrix whose Gramian is provided. - - :param U: The tensor of weights corresponding to the vectors to project, of shape `[..., m]`. - :param G: The Gramian matrix of shape `[m, m]`. It must be symmetric and positive definite. - :param solver: The quadratic programming solver to use. - :return: A tensor of projection weights with the same shape as `U`. - """ - - original_shape = U.shape - m = G.shape[0] - U_flat = U.reshape(-1, m) # [nBatch, m] - - W = _project_weight_vector_batch(U_flat, G, solver) - - return W.reshape(original_shape) - - -# TODO: should merge docstrings appropriately - -# def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray: -# r""" -# Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`, -# given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies -# `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1]. - -# By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program: -# minimize v^T G v -# subject to u \preceq v - -# Reference: -# [1] `Jacobian Descent For Multi-Objective Optimization `_. - -# :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to -# project. -# :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be -# symmetric and positive definite. -# :param solver: The quadratic programming solver to use. -# """ -# ... - - -@torch.no_grad() -def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER) -> Tensor: - r""" - Solves the batch of quadratic programs minimizing `v^T G v` subject to `u_i \preceq v_i` for - each row `u_i` of `U`. - - :param U: The tensor of vectors `u_i` of shape `[n, m]`. - :param G: The Gramian matrix of shape `[m, m]`. It must be symmetric and positive definite. - :param solver: The quadratic programming solver to use. - :return: A tensor of projection weights of shape `[n, m]`. - """ - - n, m = U.shape - device = U.device - dtype = U.dtype - - Q_np = G.cpu().to(dtype=torch.float64).numpy() - p_np = np.zeros(m, dtype=np.float64) - C_np = -np.eye(m, dtype=np.float64) - lb_np = np.full(m, -1e20, dtype=np.float64) - ub_np = (-U.cpu().to(dtype=torch.float64)).numpy() - - batch_qps = proxqp.dense.BatchQP() - default_rho = 5.0e-5 - - for i in range(n): - qp = batch_qps.init_qp_in_place(m, 0, m) - qp.settings.primal_infeasibility_solving = False - qp.settings.max_iter = 1000 - qp.settings.max_iter_in = 100 - qp.settings.default_rho = default_rho - qp.settings.refactor_rho_threshold = default_rho - qp.settings.eps_abs = 1e-9 - qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho) - - num_threads = max(1, (os.cpu_count() or 2) // 2) - proxqp.dense.solve_in_parallel(num_threads=num_threads, qps=batch_qps) - - zhats_np = np.empty((n, m), dtype=np.float64) - for i in range(n): - zhats_np[i] = batch_qps.get(i).results.x - - return torch.from_numpy(zhats_np).to(device=device, dtype=dtype) diff --git a/src/torchjd/linalg/__init__.py b/src/torchjd/linalg/__init__.py index f8238104..15476b73 100644 --- a/src/torchjd/linalg/__init__.py +++ b/src/torchjd/linalg/__init__.py @@ -1,8 +1,18 @@ """ -This module provides type annotation classes representing tensors with specific structural -properties. +This module provides utilitary linear algebra methods as well as types to represent specific +structural properties. """ -from torchjd._linalg._matrix import Matrix, PSDMatrix +from torchjd._linalg import ( + DualConeProjector, + Matrix, + PSDMatrix, + QuadprogProjector, +) -__all__ = ["Matrix", "PSDMatrix"] +__all__ = [ + "DualConeProjector", + "Matrix", + "PSDMatrix", + "QuadprogProjector", +] diff --git a/tests/plots/interactive_plotter.py b/tests/plots/interactive_plotter.py index 1c2b6240..c8b3871a 100644 --- a/tests/plots/interactive_plotter.py +++ b/tests/plots/interactive_plotter.py @@ -11,6 +11,7 @@ from typing_extensions import Unpack from plots._utils import Plotter, angle_to_coord, coord_to_angle +from torchjd._linalg import QuadprogProjector from torchjd.aggregation import ( IMTLG, MGDA, @@ -61,7 +62,7 @@ def main() -> None: "AlignedMTL-RMSE": lambda: AlignedMTL(scale_mode="rmse"), str(CAGrad(c=0.5)): lambda: CAGrad(c=0.5), str(ConFIG()): lambda: ConFIG(), - str(DualProj()): lambda: DualProj(reg_eps=1e-7), + str(DualProj()): lambda: DualProj(projector=QuadprogProjector(reg_eps=1e-7)), str(GradDrop()): lambda: GradDrop(), str(GradVac()): lambda: GradVac(), str(IMTLG()): lambda: IMTLG(), @@ -72,7 +73,7 @@ def main() -> None: str(Random()): lambda: Random(), str(Sum()): lambda: Sum(), str(TrimmedMean(trim_number=1)): lambda: TrimmedMean(trim_number=1), - str(UPGrad()): lambda: UPGrad(reg_eps=1e-7), + str(UPGrad()): lambda: UPGrad(projector=QuadprogProjector(reg_eps=1e-7)), } aggregator_strings = list(aggregator_factories.keys()) diff --git a/tests/unit/aggregation/test_dualproj.py b/tests/unit/aggregation/test_dualproj.py index c8bea16e..190eaaeb 100644 --- a/tests/unit/aggregation/test_dualproj.py +++ b/tests/unit/aggregation/test_dualproj.py @@ -1,10 +1,10 @@ import torch -from pytest import mark, raises +from pytest import mark from torch import Tensor from utils.tensors import ones_ +from torchjd._linalg import QuadprogProjector from torchjd.aggregation import ConstantWeighting, DualProj -from torchjd.aggregation._dualproj import DualProjWeighting from ._asserts import ( assert_expected_structure, @@ -47,21 +47,20 @@ def test_non_differentiable(aggregator: DualProj, matrix: Tensor) -> None: def test_representations() -> None: - A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") + A = DualProj(pref_vector=None, projector=QuadprogProjector(norm_eps=0.001, reg_eps=0.01)) assert ( - repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" + repr(A) == "DualProj(pref_vector=None, projector=QuadprogProjector(norm_eps=0.001, " + "reg_eps=0.01))" ) assert str(A) == "DualProj" A = DualProj( pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), - norm_eps=0.0001, - reg_eps=0.0001, - solver="proxsuite", + projector=QuadprogProjector(norm_eps=0.001, reg_eps=0.01), ) assert ( - repr(A) == "DualProj(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='proxsuite')" + repr(A) == "DualProj(pref_vector=tensor([1., 2., 3.]), projector=QuadprogProjector(" + "norm_eps=0.001, reg_eps=0.01))" ) assert str(A) == "DualProj([1., 2., 3.])" @@ -75,39 +74,14 @@ def test_pref_vector_setter_updates_value() -> None: assert A.gramian_weighting.weighting.weights is new_pref -def test_norm_eps_setter_updates_value() -> None: +def test_projector_getter_returns_default() -> None: A = DualProj() - A.norm_eps = 0.25 - assert A.norm_eps == 0.25 - assert A.gramian_weighting.norm_eps == 0.25 + assert isinstance(A.projector, QuadprogProjector) -def test_reg_eps_setter_updates_value() -> None: +def test_projector_setter_updates_value() -> None: A = DualProj() - A.reg_eps = 0.25 - assert A.reg_eps == 0.25 - assert A.gramian_weighting.reg_eps == 0.25 - - -def test_norm_eps_setter_rejects_negative() -> None: - A = DualProj() - with raises(ValueError, match="norm_eps"): - A.norm_eps = -1e-9 - - -def test_reg_eps_setter_rejects_negative() -> None: - A = DualProj() - with raises(ValueError, match="reg_eps"): - A.reg_eps = -1e-9 - - -def test_weighting_norm_eps_setter_rejects_negative() -> None: - W = DualProjWeighting() - with raises(ValueError, match="norm_eps"): - W.norm_eps = -1e-9 - - -def test_weighting_reg_eps_setter_rejects_negative() -> None: - W = DualProjWeighting() - with raises(ValueError, match="reg_eps"): - W.reg_eps = -1e-9 + new_projector = QuadprogProjector(norm_eps=0.001, reg_eps=0.01) + A.projector = new_projector + assert A.projector is new_projector + assert A.gramian_weighting.projector is new_projector diff --git a/tests/unit/aggregation/test_pcgrad.py b/tests/unit/aggregation/test_pcgrad.py index b9c4cf63..6d22359f 100644 --- a/tests/unit/aggregation/test_pcgrad.py +++ b/tests/unit/aggregation/test_pcgrad.py @@ -3,7 +3,7 @@ from torch.testing import assert_close from utils.tensors import ones_, randn_ -from torchjd._linalg import compute_gramian +from torchjd._linalg import QuadprogProjector, compute_gramian from torchjd.aggregation import PCGrad from torchjd.aggregation._pcgrad import PCGradWeighting from torchjd.aggregation._upgrad import UPGradWeighting @@ -53,9 +53,7 @@ def test_equivalence_upgrad_sum_two_rows(shape: tuple[int, int]) -> None: pc_grad_weighting = PCGradWeighting() upgrad_sum_weighting = UPGradWeighting( ones_((2,)), - norm_eps=0.0, - reg_eps=0.0, - solver="proxsuite", + projector=QuadprogProjector(norm_eps=0.0, reg_eps=0.0), ) result = pc_grad_weighting(gramian) diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py index b2f7df4e..b639763c 100644 --- a/tests/unit/aggregation/test_upgrad.py +++ b/tests/unit/aggregation/test_upgrad.py @@ -1,10 +1,10 @@ import torch -from pytest import mark, raises +from pytest import mark from torch import Tensor from utils.tensors import ones_ +from torchjd._linalg import QuadprogProjector from torchjd.aggregation import ConstantWeighting, UPGrad -from torchjd.aggregation._upgrad import UPGradWeighting from ._asserts import ( assert_expected_structure, @@ -53,21 +53,20 @@ def test_non_differentiable(aggregator: UPGrad, matrix: Tensor) -> None: def test_representations() -> None: - A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") + A = UPGrad(pref_vector=None, projector=QuadprogProjector(norm_eps=0.001, reg_eps=0.01)) assert ( - repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" + repr(A) == "UPGrad(pref_vector=None, projector=QuadprogProjector(norm_eps=0.001, " + "reg_eps=0.01))" ) assert str(A) == "UPGrad" A = UPGrad( pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), - norm_eps=0.0001, - reg_eps=0.0001, - solver="proxsuite", + projector=QuadprogProjector(norm_eps=0.001, reg_eps=0.01), ) assert ( - repr(A) == "UPGrad(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='proxsuite')" + repr(A) == "UPGrad(pref_vector=tensor([1., 2., 3.]), projector=QuadprogProjector(" + "norm_eps=0.001, reg_eps=0.01))" ) assert str(A) == "UPGrad([1., 2., 3.])" @@ -81,37 +80,14 @@ def test_pref_vector_setter_updates_value() -> None: assert A.gramian_weighting.weighting.weights is new_pref -def test_norm_eps_setter_updates_value() -> None: +def test_projector_getter_returns_default() -> None: A = UPGrad() - A.norm_eps = 0.25 - assert A.norm_eps == 0.25 + assert isinstance(A.projector, QuadprogProjector) -def test_reg_eps_setter_updates_value() -> None: +def test_projector_setter_updates_value() -> None: A = UPGrad() - A.reg_eps = 0.25 - assert A.reg_eps == 0.25 - - -def test_norm_eps_setter_rejects_negative() -> None: - A = UPGrad() - with raises(ValueError, match="norm_eps"): - A.norm_eps = -1e-9 - - -def test_reg_eps_setter_rejects_negative() -> None: - A = UPGrad() - with raises(ValueError, match="reg_eps"): - A.reg_eps = -1e-9 - - -def test_weighting_norm_eps_setter_rejects_negative() -> None: - W = UPGradWeighting() - with raises(ValueError, match="norm_eps"): - W.norm_eps = -1e-9 - - -def test_weighting_reg_eps_setter_rejects_negative() -> None: - W = UPGradWeighting() - with raises(ValueError, match="reg_eps"): - W.reg_eps = -1e-9 + new_projector = QuadprogProjector(norm_eps=0.001, reg_eps=0.01) + A.projector = new_projector + assert A.projector is new_projector + assert A.gramian_weighting.projector is new_projector diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/linalg/test_dual_cone.py similarity index 53% rename from tests/unit/aggregation/_utils/test_dual_cone.py rename to tests/unit/linalg/test_dual_cone.py index 0fa3e8a0..35d91ed0 100644 --- a/tests/unit/aggregation/_utils/test_dual_cone.py +++ b/tests/unit/linalg/test_dual_cone.py @@ -1,13 +1,17 @@ +from typing import cast + +import numpy as np import torch from pytest import mark from torch.testing import assert_close from utils.tensors import rand_, randn_ -from torchjd.aggregation._utils.dual_cone import project_weights +from torchjd._linalg import DualConeProjector, PSDMatrix, QuadprogProjector, compute_gramian +@mark.parametrize("projector", [QuadprogProjector(reg_eps=0.0, norm_eps=0.0)]) @mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)]) -def test_solution_weights(shape: tuple[int, int]) -> None: +def test_solution_weights(projector: DualConeProjector, shape: tuple[int, int]) -> None: r""" Tests that `_project_weights` returns valid weights corresponding to the projection onto the dual cone of a matrix with the specified shape. @@ -30,10 +34,10 @@ def test_solution_weights(shape: tuple[int, int]) -> None: """ J = randn_(shape) - G = J @ J.T + G = compute_gramian(J) u = rand_(shape[0]) - w = project_weights(u, G, "proxsuite") + w = projector(u, G) dual_gap = w - u # Dual feasibility @@ -51,25 +55,30 @@ def test_solution_weights(shape: tuple[int, int]) -> None: assert_close(slackness, torch.zeros_like(slackness), atol=3e-03, rtol=0) +@mark.parametrize("projector", [QuadprogProjector(reg_eps=0.0, norm_eps=0.0)]) @mark.parametrize("shape", [(5, 7), (9, 37), (32, 114)]) @mark.parametrize("scaling", [2 ** (-4), 2 ** (-2), 2**2, 2**4]) -def test_scale_invariant(shape: tuple[int, int], scaling: float) -> None: +def test_scale_invariant( + projector: DualConeProjector, shape: tuple[int, int], scaling: float +) -> None: """ Tests that `_project_weights` is invariant under scaling. """ J = randn_(shape) - G = J @ J.T + G = compute_gramian(J) + scaled_G = cast(PSDMatrix, scaling * G) u = rand_(shape[0]) - w = project_weights(u, G, "proxsuite") - w_scaled = project_weights(u, scaling * G, "proxsuite") + w = projector(u, G) + w_scaled = projector(u, scaled_G) assert_close(w_scaled, w) +@mark.parametrize("projector", [QuadprogProjector(reg_eps=0.0, norm_eps=0.0)]) @mark.parametrize("shape", [(5, 2, 3), (1, 3, 6, 9), (2, 1, 1, 5, 8), (3, 1)]) -def test_tensorization_shape(shape: tuple[int, ...]) -> None: +def test_tensorization_shape(projector: DualConeProjector, shape: tuple[int, ...]) -> None: """ Tests that applying `_project_weights` on a tensor is equivalent to applying it on the tensor reshaped as matrix and to reshape the result back to the original tensor's shape. @@ -79,18 +88,57 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None: U_tensor = randn_(shape) U_matrix = U_tensor.reshape([-1, shape[-1]]) - G = matrix @ matrix.T + G = compute_gramian(matrix) - W_tensor = project_weights(U_tensor, G, "proxsuite") - W_matrix = project_weights(U_matrix, G, "proxsuite") + W_tensor = projector(U_tensor, G) + W_matrix = projector(U_matrix, G) assert_close(W_matrix.reshape(shape), W_tensor) -# def test_project_weight_vector_failure() -> None: -# """Tests that `_project_weight_vector` raises an error when the input G has too large values.""" +def test_norm_eps_default() -> None: + projector = QuadprogProjector() + assert projector.norm_eps == 0.0001 + + +def test_norm_eps_setter_updates_value() -> None: + projector = QuadprogProjector() + projector.norm_eps = 0.25 + assert projector.norm_eps == 0.25 + + +def test_norm_eps_setter_rejects_negative() -> None: + projector = QuadprogProjector() + with raises(ValueError, match="norm_eps"): + projector.norm_eps = -1e-9 + + +def test_reg_eps_default() -> None: + projector = QuadprogProjector() + assert projector.reg_eps == 0.0001 + + +def test_reg_eps_setter_updates_value() -> None: + projector = QuadprogProjector() + projector.reg_eps = 0.25 + assert projector.reg_eps == 0.25 + + +def test_reg_eps_setter_rejects_negative() -> None: + projector = QuadprogProjector() + with raises(ValueError, match="reg_eps"): + projector.reg_eps = -1e-9 + + +def test_qp_solver_based_failure() -> None: + """ + Tests that `QPSolverBased._project_weight_vector` raises an error when the input G has too large + values. + """ + + projector = QuadprogProjector() -# large_J = np.random.randn(10, 100) * 1e5 -# large_G = large_J @ large_J.T -# with raises(ValueError): -# _project_weight_vector(np.ones(10), large_G, "proxsuite") + large_J = np.random.randn(10, 100) * 1e5 + large_G = large_J @ large_J.T + with raises(ValueError): + projector._project_weight_vector(np.ones(10), large_G) From 5b37228d49ba0d6ccbbaf2b8234a346581e1eb03 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 15 May 2026 13:51:45 +0200 Subject: [PATCH 07/15] expose ProxsuiteProjector documentation. --- docs/source/docs/linalg/dual_cone.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/docs/linalg/dual_cone.rst b/docs/source/docs/linalg/dual_cone.rst index f7db87ad..b4cf602e 100644 --- a/docs/source/docs/linalg/dual_cone.rst +++ b/docs/source/docs/linalg/dual_cone.rst @@ -7,3 +7,5 @@ Dual Cone Projectors :members: __call__ .. autoclass:: torchjd.linalg.QuadprogProjector + +.. autoclass:: torchjd.linalg.ProxsuiteProjector From d546f9191b6479a02a89baccc2f02aa2e692fbb9 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 15 May 2026 14:08:21 +0200 Subject: [PATCH 08/15] test UPGrad and DualProj with ProxsuiteProjector --- tests/unit/aggregation/test_dualproj.py | 26 ++++++++++++++++++++----- tests/unit/aggregation/test_upgrad.py | 22 ++++++++++++++++----- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/tests/unit/aggregation/test_dualproj.py b/tests/unit/aggregation/test_dualproj.py index 190eaaeb..cb3db432 100644 --- a/tests/unit/aggregation/test_dualproj.py +++ b/tests/unit/aggregation/test_dualproj.py @@ -3,7 +3,7 @@ from torch import Tensor from utils.tensors import ones_ -from torchjd._linalg import QuadprogProjector +from torchjd._linalg import ProxsuiteProjector, QuadprogProjector from torchjd.aggregation import ConstantWeighting, DualProj from ._asserts import ( @@ -15,10 +15,26 @@ ) from ._inputs import non_strong_matrices, scaled_matrices, typical_matrices -scaled_pairs = [(DualProj(), matrix) for matrix in scaled_matrices] -typical_pairs = [(DualProj(), matrix) for matrix in typical_matrices] -non_strong_pairs = [(DualProj(), matrix) for matrix in non_strong_matrices] -requires_grad_pairs = [(DualProj(), ones_(3, 5, requires_grad=True))] +projectors = [QuadprogProjector(), ProxsuiteProjector()] + +scaled_pairs = [ + (DualProj(projector=projector), matrix) + for matrix in scaled_matrices + for projector in projectors +] +typical_pairs = [ + (DualProj(projector=projector), matrix) + for matrix in typical_matrices + for projector in projectors +] +non_strong_pairs = [ + (DualProj(projector=projector), matrix) + for matrix in non_strong_matrices + for projector in projectors +] +requires_grad_pairs = [ + (DualProj(projector=projector), ones_(3, 5, requires_grad=True)) for projector in projectors +] @mark.parametrize(["aggregator", "matrix"], scaled_pairs + typical_pairs) diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py index b639763c..261fb717 100644 --- a/tests/unit/aggregation/test_upgrad.py +++ b/tests/unit/aggregation/test_upgrad.py @@ -3,7 +3,7 @@ from torch import Tensor from utils.tensors import ones_ -from torchjd._linalg import QuadprogProjector +from torchjd._linalg import ProxsuiteProjector, QuadprogProjector from torchjd.aggregation import ConstantWeighting, UPGrad from ._asserts import ( @@ -16,10 +16,22 @@ ) from ._inputs import non_strong_matrices, scaled_matrices, typical_matrices -scaled_pairs = [(UPGrad(), matrix) for matrix in scaled_matrices] -typical_pairs = [(UPGrad(), matrix) for matrix in typical_matrices] -non_strong_pairs = [(UPGrad(), matrix) for matrix in non_strong_matrices] -requires_grad_pairs = [(UPGrad(), ones_(3, 5, requires_grad=True))] +projectors = [QuadprogProjector(), ProxsuiteProjector()] + +scaled_pairs = [ + (UPGrad(projector=projector), matrix) for matrix in scaled_matrices for projector in projectors +] +typical_pairs = [ + (UPGrad(projector=projector), matrix) for matrix in typical_matrices for projector in projectors +] +non_strong_pairs = [ + (UPGrad(projector=projector), matrix) + for matrix in non_strong_matrices + for projector in projectors +] +requires_grad_pairs = [ + (UPGrad(projector=projector), ones_(3, 5, requires_grad=True)) for projector in projectors +] @mark.parametrize(["aggregator", "matrix"], scaled_pairs + typical_pairs) From 3d866fd9315ad5b3084dcaa735199afe7d35a23a Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 15 May 2026 14:20:31 +0200 Subject: [PATCH 09/15] Add back ProxsuiteProjector --- src/torchjd/_linalg/__init__.py | 8 +++- src/torchjd/_linalg/_dual_cone.py | 59 +++++++++++++++++++++++++++++ src/torchjd/linalg/__init__.py | 2 + tests/unit/linalg/test_dual_cone.py | 14 +++++-- 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/src/torchjd/_linalg/__init__.py b/src/torchjd/_linalg/__init__.py index fce72e8e..b34b39f8 100644 --- a/src/torchjd/_linalg/__init__.py +++ b/src/torchjd/_linalg/__init__.py @@ -1,4 +1,9 @@ -from ._dual_cone import DualConeProjector, QuadprogProjector, projector_or_default +from ._dual_cone import ( + DualConeProjector, + ProxsuiteProjector, + QuadprogProjector, + projector_or_default, +) from ._generalized_gramian import flatten, movedim, reshape from ._gramian import compute_gramian, normalize, regularize from ._matrix import Matrix, PSDMatrix, PSDTensor, is_matrix, is_psd_matrix, is_psd_tensor @@ -18,5 +23,6 @@ "movedim", "DualConeProjector", "QuadprogProjector", + "ProxsuiteProjector", "projector_or_default", ] diff --git a/src/torchjd/_linalg/_dual_cone.py b/src/torchjd/_linalg/_dual_cone.py index d3d8fe25..f86a53e8 100644 --- a/src/torchjd/_linalg/_dual_cone.py +++ b/src/torchjd/_linalg/_dual_cone.py @@ -1,7 +1,9 @@ +import os from abc import ABC, abstractmethod import numpy as np import torch +from proxsuite import proxqp from qpsolvers import solve_qp from torch import Tensor @@ -115,6 +117,63 @@ def _project_weight_vector(self, u: np.ndarray, G: np.ndarray) -> np.ndarray: return w +class ProxsuiteProjector(DualConeProjector): + r""" + Solves the quadratic program defined in :meth:`DualConeProjector.__call__` using the + `proxsuite `_ QP solver. + """ + + def __init__(self) -> None: + pass + + def __repr__(self) -> str: + return "ProxsuiteProjector()" + + def __call__(self, U: Tensor, G: PSDMatrix) -> Tensor: + original_shape = U.shape + m = G.shape[0] + U_flat = U.reshape(-1, m) # [nBatch, m] + + W = self._project_weight_vector_batch(U_flat, G) + + return W.reshape(original_shape) + + @torch.no_grad() + def _project_weight_vector_batch(self, U: Tensor, G: Tensor) -> Tensor: + + n, m = U.shape + device = U.device + dtype = U.dtype + + Q_np = _to_array(G) + p_np = np.zeros(m, dtype=np.float64) + C_np = -np.eye(m, dtype=np.float64) + lb_np = np.full(m, -1e20, dtype=np.float64) + ub_np = _to_array(U) + + batch_qps = proxqp.dense.BatchQP() + default_rho = 5.0e-5 + + for i in range(n): + qp = batch_qps.init_qp_in_place(m, 0, m) + qp.settings.primal_infeasibility_solving = False + qp.settings.max_iter = 1000 + qp.settings.max_iter_in = 100 + qp.settings.default_rho = default_rho + qp.settings.refactor_rho_threshold = default_rho + qp.settings.eps_abs = 1e-9 + qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=-ub_np[i], rho=default_rho) + + num_threads = max(1, (os.cpu_count() or 2) // 2) + proxqp.dense.solve_in_parallel(num_threads=num_threads, qps=batch_qps) + + zhats_np = np.empty((n, m), dtype=np.float64) + for i in range(n): + zhats_np[i] = batch_qps.get(i).results.x + + return torch.from_numpy(zhats_np).to(device=device, dtype=dtype) + + def _to_array(tensor: Tensor) -> np.ndarray: """Transforms a tensor into a numpy array with float64 dtype.""" diff --git a/src/torchjd/linalg/__init__.py b/src/torchjd/linalg/__init__.py index 15476b73..aef297f6 100644 --- a/src/torchjd/linalg/__init__.py +++ b/src/torchjd/linalg/__init__.py @@ -6,6 +6,7 @@ from torchjd._linalg import ( DualConeProjector, Matrix, + ProxsuiteProjector, PSDMatrix, QuadprogProjector, ) @@ -15,4 +16,5 @@ "Matrix", "PSDMatrix", "QuadprogProjector", + "ProxsuiteProjector", ] diff --git a/tests/unit/linalg/test_dual_cone.py b/tests/unit/linalg/test_dual_cone.py index 35d91ed0..2799024a 100644 --- a/tests/unit/linalg/test_dual_cone.py +++ b/tests/unit/linalg/test_dual_cone.py @@ -2,14 +2,22 @@ import numpy as np import torch -from pytest import mark +from pytest import mark, raises from torch.testing import assert_close from utils.tensors import rand_, randn_ -from torchjd._linalg import DualConeProjector, PSDMatrix, QuadprogProjector, compute_gramian +from torchjd._linalg import ( + DualConeProjector, + ProxsuiteProjector, + PSDMatrix, + QuadprogProjector, + compute_gramian, +) +projectors = [QuadprogProjector(reg_eps=0.0, norm_eps=0.0), ProxsuiteProjector()] -@mark.parametrize("projector", [QuadprogProjector(reg_eps=0.0, norm_eps=0.0)]) + +@mark.parametrize("projector", projectors) @mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)]) def test_solution_weights(projector: DualConeProjector, shape: tuple[int, int]) -> None: r""" From 8f2bb914858921e137d8b684b6fbfd1ac467f9fc Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 15 May 2026 14:35:12 +0200 Subject: [PATCH 10/15] Make `_project_weight_vector_batch` handle only numpy arrays. --- src/torchjd/_linalg/_dual_cone.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/torchjd/_linalg/_dual_cone.py b/src/torchjd/_linalg/_dual_cone.py index f86a53e8..eaaf8f8b 100644 --- a/src/torchjd/_linalg/_dual_cone.py +++ b/src/torchjd/_linalg/_dual_cone.py @@ -132,24 +132,23 @@ def __repr__(self) -> str: def __call__(self, U: Tensor, G: PSDMatrix) -> Tensor: original_shape = U.shape m = G.shape[0] - U_flat = U.reshape(-1, m) # [nBatch, m] + G_ = _to_array(G) + U_flat = _to_array(U.reshape(-1, m)) # [nBatch, m] - W = self._project_weight_vector_batch(U_flat, G) + W = self._project_weight_vector_batch(U_flat, G_) - return W.reshape(original_shape) + return torch.as_tensor(W, device=G.device, dtype=G.dtype).reshape(original_shape) @torch.no_grad() - def _project_weight_vector_batch(self, U: Tensor, G: Tensor) -> Tensor: + def _project_weight_vector_batch(self, U: np.ndarray, G: np.ndarray) -> np.ndarray: n, m = U.shape - device = U.device - dtype = U.dtype - Q_np = _to_array(G) + Q_np = G p_np = np.zeros(m, dtype=np.float64) C_np = -np.eye(m, dtype=np.float64) lb_np = np.full(m, -1e20, dtype=np.float64) - ub_np = _to_array(U) + ub_np = U batch_qps = proxqp.dense.BatchQP() default_rho = 5.0e-5 @@ -171,7 +170,7 @@ def _project_weight_vector_batch(self, U: Tensor, G: Tensor) -> Tensor: for i in range(n): zhats_np[i] = batch_qps.get(i).results.x - return torch.from_numpy(zhats_np).to(device=device, dtype=dtype) + return zhats_np def _to_array(tensor: Tensor) -> np.ndarray: From ab04850f2a91ded092961101c99ba97e41df3706 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 15 May 2026 14:38:16 +0200 Subject: [PATCH 11/15] readd qpsolvers in pyproject.toml --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 7dab2866..769cb07f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,9 @@ authors = [ requires-python = ">=3.10" dependencies = [ "torch>=2.3.0", # Problems before 2.4.0, especially with autogram. + "quadprog>=0.1.9, != 0.1.10", # Doesn't work before 0.1.9, 0.1.10 is yanked "numpy>=1.21.2", # Does not work before 1.21. No python 3.10 wheel before 1.21.2. + "qpsolvers>=1.0.1", # Does not work before 1.0.1 "proxsuite>=0.7.2", ] classifiers = [ @@ -101,6 +103,8 @@ lower_bounds = [ "torch==2.3.0", "numpy==1.21.2", "proxsuite==0.7.2", + "quadprog==0.1.9", + "qpsolvers==1.0.1", ] [project.optional-dependencies] From 88c4557622f8bb159c5f58538ceb1ac0a506333e Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 15 May 2026 14:51:17 +0200 Subject: [PATCH 12/15] Add repr tests. --- tests/unit/linalg/test_dual_cone.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/unit/linalg/test_dual_cone.py b/tests/unit/linalg/test_dual_cone.py index 2799024a..f3ae5650 100644 --- a/tests/unit/linalg/test_dual_cone.py +++ b/tests/unit/linalg/test_dual_cone.py @@ -138,6 +138,18 @@ def test_reg_eps_setter_rejects_negative() -> None: projector.reg_eps = -1e-9 +def test_quadprog_repr() -> None: + A = QuadprogProjector(norm_eps=0.001, reg_eps=0.01) + assert repr(A) == "QuadprogProjector(norm_eps=0.001, reg_eps=0.01)" + assert str(A) == "QuadprogProjector(norm_eps=0.001, reg_eps=0.01)" + + +def test_proxsuite_repr() -> None: + A = ProxsuiteProjector() + assert repr(A) == "ProxsuiteProjector()" + assert str(A) == "ProxsuiteProjector()" + + def test_qp_solver_based_failure() -> None: """ Tests that `QPSolverBased._project_weight_vector` raises an error when the input G has too large From b1c3a457c03205493893420aea29faec46e67ee2 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Sun, 17 May 2026 12:03:15 +0200 Subject: [PATCH 13/15] add initial guess (doens't solve permutation invaiance but is a good idea). --- src/torchjd/_linalg/_dual_cone.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/torchjd/_linalg/_dual_cone.py b/src/torchjd/_linalg/_dual_cone.py index eaaf8f8b..a73bf608 100644 --- a/src/torchjd/_linalg/_dual_cone.py +++ b/src/torchjd/_linalg/_dual_cone.py @@ -161,7 +161,23 @@ def _project_weight_vector_batch(self, U: np.ndarray, G: np.ndarray) -> np.ndarr qp.settings.default_rho = default_rho qp.settings.refactor_rho_threshold = default_rho qp.settings.eps_abs = 1e-9 - qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=-ub_np[i], rho=default_rho) + + u = -ub_np[i] + + qp.init( + H=Q_np, + g=p_np, + A=None, + b=None, + C=C_np, + l=lb_np, + u=u, + rho=default_rho, + ) + + # Initial guess + qp.results.x = u.copy() + qp.results.z = np.maximum(0.0, Q_np @ u) num_threads = max(1, (os.cpu_count() or 2) // 2) proxqp.dense.solve_in_parallel(num_threads=num_threads, qps=batch_qps) From b7b6c424fdde852b4d6e0349b271fc5c584ef345 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Sun, 17 May 2026 12:08:23 +0200 Subject: [PATCH 14/15] Decrease absolute precision (Which makes convergence faster). --- src/torchjd/_linalg/_dual_cone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchjd/_linalg/_dual_cone.py b/src/torchjd/_linalg/_dual_cone.py index a73bf608..de0c29da 100644 --- a/src/torchjd/_linalg/_dual_cone.py +++ b/src/torchjd/_linalg/_dual_cone.py @@ -160,7 +160,7 @@ def _project_weight_vector_batch(self, U: np.ndarray, G: np.ndarray) -> np.ndarr qp.settings.max_iter_in = 100 qp.settings.default_rho = default_rho qp.settings.refactor_rho_threshold = default_rho - qp.settings.eps_abs = 1e-9 + qp.settings.eps_abs = 1e-6 u = -ub_np[i] From cc4674414d136f864c54fa8e76bf013d8a906398 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Sun, 17 May 2026 12:12:25 +0200 Subject: [PATCH 15/15] Update precision of permutation invariance tests (it's not too bad given that it failed only for rank deficient matrices). --- tests/unit/aggregation/test_dualproj.py | 2 +- tests/unit/aggregation/test_upgrad.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/aggregation/test_dualproj.py b/tests/unit/aggregation/test_dualproj.py index cb3db432..655ad730 100644 --- a/tests/unit/aggregation/test_dualproj.py +++ b/tests/unit/aggregation/test_dualproj.py @@ -49,7 +49,7 @@ def test_non_conflicting(aggregator: DualProj, matrix: Tensor) -> None: @mark.parametrize(["aggregator", "matrix"], typical_pairs) def test_permutation_invariant(aggregator: DualProj, matrix: Tensor) -> None: - assert_permutation_invariant(aggregator, matrix, n_runs=5, atol=2e-07, rtol=2e-07) + assert_permutation_invariant(aggregator, matrix, n_runs=5, atol=6e-05, rtol=2e-07) @mark.parametrize(["aggregator", "matrix"], non_strong_pairs) diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py index 261fb717..f7c6160b 100644 --- a/tests/unit/aggregation/test_upgrad.py +++ b/tests/unit/aggregation/test_upgrad.py @@ -46,7 +46,7 @@ def test_non_conflicting(aggregator: UPGrad, matrix: Tensor) -> None: @mark.parametrize(["aggregator", "matrix"], typical_pairs) def test_permutation_invariant(aggregator: UPGrad, matrix: Tensor) -> None: - assert_permutation_invariant(aggregator, matrix, n_runs=5, atol=5e-07, rtol=5e-07) + assert_permutation_invariant(aggregator, matrix, n_runs=5, atol=7e-05, rtol=5e-07) @mark.parametrize(["aggregator", "matrix"], typical_pairs)