Files
solver/kindred_solver/bfgs.py
forbes-0023 b4b8724ff1 feat(solver): diagnostics, half-space preference, and weight vectors (phase 4)
- Add per-entity DOF analysis via Jacobian SVD (diagnostics.py)
- Add overconstrained detection: redundant vs conflicting constraints
- Add half-space tracking to preserve configuration branch (preference.py)
- Add minimum-movement weighting for least-squares solve
- Extend BFGS fallback with weight vector and quaternion renormalization
- Add snapshot/restore and env accessor to ParamTable
- Fix DistancePointPointConstraint sign for half-space tracking
2026-02-20 23:32:45 -06:00

162 lines
4.4 KiB
Python

"""L-BFGS-B fallback solver for when Newton-Raphson fails to converge.
Minimizes f(x) = 0.5 * sum(r_i(x)^2) using scipy's L-BFGS-B with
analytic gradient from the Expr DAG's symbolic differentiation.
"""
from __future__ import annotations
import math
from typing import List
import numpy as np
from .expr import Expr
from .params import ParamTable
try:
from scipy.optimize import minimize as _scipy_minimize
_HAS_SCIPY = True
except ImportError:
_HAS_SCIPY = False
def bfgs_solve(
residuals: List[Expr],
params: ParamTable,
quat_groups: List[tuple[str, str, str, str]] | None = None,
max_iter: int = 200,
tol: float = 1e-10,
weight_vector: "np.ndarray | None" = None,
) -> bool:
"""Solve ``residuals == 0`` by minimizing sum of squared residuals.
Falls back gracefully to False if scipy is not available.
When *weight_vector* is provided, residuals are scaled by
``sqrt(w)`` so that the objective becomes
``0.5 * sum(w_i * r_i^2)`` — equivalent to weighted least-squares.
Returns True if converged (||r|| < tol).
"""
if not _HAS_SCIPY:
return False
free = params.free_names()
n_free = len(free)
n_res = len(residuals)
if n_free == 0 or n_res == 0:
return True
# Build symbolic gradient expressions once: d(r_i)/d(x_j)
jac_exprs: List[List[Expr]] = []
for r in residuals:
row = []
for name in free:
row.append(r.diff(name).simplify())
jac_exprs.append(row)
# Pre-compute scaling for weighted minimum-norm
if weight_vector is not None:
w_sqrt = np.sqrt(weight_vector)
w_inv_sqrt = 1.0 / w_sqrt
else:
w_sqrt = None
w_inv_sqrt = None
def objective_and_grad(y_vec):
# Transform back from scaled space if weighted
if w_inv_sqrt is not None:
x_vec = y_vec * w_inv_sqrt
else:
x_vec = y_vec
# Update params
params.set_free_vector(x_vec)
if quat_groups:
_renormalize_quats(params, quat_groups)
env = params.get_env()
# Evaluate residuals
r_vals = np.array([r.eval(env) for r in residuals])
f = 0.5 * np.dot(r_vals, r_vals)
# Evaluate Jacobian
J = np.empty((n_res, n_free))
for i in range(n_res):
for j in range(n_free):
J[i, j] = jac_exprs[i][j].eval(env)
# Gradient of f w.r.t. x = J^T @ r
grad_x = J.T @ r_vals
# Chain rule: df/dy = df/dx * dx/dy = grad_x * w_inv_sqrt
if w_inv_sqrt is not None:
grad = grad_x * w_inv_sqrt
else:
grad = grad_x
return f, grad
x0 = params.get_free_vector().copy()
# Transform initial guess to scaled space
if w_sqrt is not None:
y0 = x0 * w_sqrt
else:
y0 = x0
result = _scipy_minimize(
objective_and_grad,
y0,
method="L-BFGS-B",
jac=True,
options={"maxiter": max_iter, "ftol": tol * tol, "gtol": tol},
)
# Apply final result (transform back from scaled space)
if w_inv_sqrt is not None:
params.set_free_vector(result.x * w_inv_sqrt)
else:
params.set_free_vector(result.x)
if quat_groups:
_renormalize_quats(params, quat_groups)
# Check convergence on actual residual norm
env = params.get_env()
r_vals = np.array([r.eval(env) for r in residuals])
return bool(np.linalg.norm(r_vals) < tol)
def _renormalize_quats(
params: ParamTable,
groups: List[tuple[str, str, str, str]],
):
"""Project quaternion params back onto the unit sphere."""
for qw_name, qx_name, qy_name, qz_name in groups:
if (
params.is_fixed(qw_name)
and params.is_fixed(qx_name)
and params.is_fixed(qy_name)
and params.is_fixed(qz_name)
):
continue
w = params.get_value(qw_name)
x = params.get_value(qx_name)
y = params.get_value(qy_name)
z = params.get_value(qz_name)
norm = math.sqrt(w * w + x * x + y * y + z * z)
if norm < 1e-15:
params.set_value(qw_name, 1.0)
params.set_value(qx_name, 0.0)
params.set_value(qy_name, 0.0)
params.set_value(qz_name, 0.0)
else:
params.set_value(qw_name, w / norm)
params.set_value(qx_name, x / norm)
params.set_value(qy_name, y / norm)
params.set_value(qz_name, z / norm)