perf(solver): cache compiled system across drag steps
During interactive drag, the constraint topology is invariant — only the dragged part's parameter values change between steps. Previously, drag_step() called solve() which rebuilt everything from scratch each frame: new ParamTable, new Expr trees, symbolic differentiation, CSE, and compilation (~150 ms overhead per frame). Now pre_drag() builds and caches the system, symbolic Jacobian, compiled evaluator, half-spaces, and weight vector. drag_step() reuses all cached artifacts, only updating the dragged part's 7 parameter values before running Newton-Raphson. Expected ~1.5-2x speedup on drag step latency (eliminating rebuild overhead, leaving only the irreducible Newton iteration cost).
This commit is contained in:
@@ -91,6 +91,7 @@ class KindredSolver(kcsolve.IKCSolver):
|
||||
super().__init__()
|
||||
self._drag_ctx = None
|
||||
self._drag_parts = None
|
||||
self._drag_cache = None
|
||||
self._limits_warned = False
|
||||
|
||||
def name(self):
|
||||
@@ -244,8 +245,86 @@ class KindredSolver(kcsolve.IKCSolver):
|
||||
self._drag_ctx = ctx
|
||||
self._drag_parts = set(drag_parts)
|
||||
self._drag_step_count = 0
|
||||
result = self.solve(ctx)
|
||||
log.info("pre_drag: initial solve status=%s", result.status)
|
||||
|
||||
# Build the system once and cache everything for drag_step() reuse.
|
||||
t0 = time.perf_counter()
|
||||
system = _build_system(ctx)
|
||||
|
||||
half_spaces = compute_half_spaces(
|
||||
system.constraint_objs,
|
||||
system.constraint_indices,
|
||||
system.params,
|
||||
)
|
||||
weight_vec = build_weight_vector(system.params)
|
||||
|
||||
if half_spaces:
|
||||
post_step_fn = lambda p: apply_half_space_correction(p, half_spaces)
|
||||
else:
|
||||
post_step_fn = None
|
||||
|
||||
residuals = substitution_pass(system.all_residuals, system.params)
|
||||
residuals = single_equation_pass(residuals, system.params)
|
||||
|
||||
# Build symbolic Jacobian + compile once
|
||||
from .codegen import try_compile_system
|
||||
|
||||
free = system.params.free_names()
|
||||
n_res = len(residuals)
|
||||
n_free = len(free)
|
||||
jac_exprs = [[r.diff(name).simplify() for name in free] for r in residuals]
|
||||
compiled_eval = try_compile_system(residuals, jac_exprs, n_res, n_free)
|
||||
|
||||
# Initial solve
|
||||
converged = newton_solve(
|
||||
residuals,
|
||||
system.params,
|
||||
quat_groups=system.quat_groups,
|
||||
max_iter=100,
|
||||
tol=1e-10,
|
||||
post_step=post_step_fn,
|
||||
weight_vector=weight_vec,
|
||||
jac_exprs=jac_exprs,
|
||||
compiled_eval=compiled_eval,
|
||||
)
|
||||
if not converged:
|
||||
converged = bfgs_solve(
|
||||
residuals,
|
||||
system.params,
|
||||
quat_groups=system.quat_groups,
|
||||
max_iter=200,
|
||||
tol=1e-10,
|
||||
weight_vector=weight_vec,
|
||||
jac_exprs=jac_exprs,
|
||||
compiled_eval=compiled_eval,
|
||||
)
|
||||
|
||||
# Cache for drag_step() reuse
|
||||
cache = _DragCache()
|
||||
cache.system = system
|
||||
cache.residuals = residuals
|
||||
cache.jac_exprs = jac_exprs
|
||||
cache.compiled_eval = compiled_eval
|
||||
cache.half_spaces = half_spaces
|
||||
cache.weight_vec = weight_vec
|
||||
cache.post_step_fn = post_step_fn
|
||||
self._drag_cache = cache
|
||||
|
||||
# Build result
|
||||
dof = count_dof(residuals, system.params, jac_exprs=jac_exprs)
|
||||
result = kcsolve.SolveResult()
|
||||
result.status = (
|
||||
kcsolve.SolveStatus.Success if converged else kcsolve.SolveStatus.Failed
|
||||
)
|
||||
result.dof = dof
|
||||
result.placements = _extract_placements(system.params, system.bodies)
|
||||
|
||||
elapsed = (time.perf_counter() - t0) * 1000
|
||||
log.info(
|
||||
"pre_drag: initial solve %s in %.1f ms — dof=%d",
|
||||
"converged" if converged else "FAILED",
|
||||
elapsed,
|
||||
dof,
|
||||
)
|
||||
return result
|
||||
|
||||
def drag_step(self, drag_placements):
|
||||
@@ -254,19 +333,73 @@ class KindredSolver(kcsolve.IKCSolver):
|
||||
log.warning("drag_step: no drag context (pre_drag not called?)")
|
||||
return kcsolve.SolveResult()
|
||||
self._drag_step_count = getattr(self, "_drag_step_count", 0) + 1
|
||||
|
||||
# Update dragged part placements in ctx (for caller consistency)
|
||||
for pr in drag_placements:
|
||||
for part in ctx.parts:
|
||||
if part.id == pr.id:
|
||||
part.placement = pr.placement
|
||||
break
|
||||
t0 = time.perf_counter()
|
||||
result = self.solve(ctx)
|
||||
elapsed = (time.perf_counter() - t0) * 1000
|
||||
if result.status != kcsolve.SolveStatus.Success:
|
||||
log.warning(
|
||||
"drag_step #%d: solve %s in %.1f ms",
|
||||
|
||||
cache = getattr(self, "_drag_cache", None)
|
||||
if cache is None:
|
||||
# Fallback: no cache, do a full solve
|
||||
log.debug(
|
||||
"drag_step #%d: no cache, falling back to full solve",
|
||||
self._drag_step_count,
|
||||
)
|
||||
return self.solve(ctx)
|
||||
|
||||
t0 = time.perf_counter()
|
||||
params = cache.system.params
|
||||
|
||||
# Update only the dragged part's 7 parameter values
|
||||
for pr in drag_placements:
|
||||
pfx = pr.id + "/"
|
||||
params.set_value(pfx + "tx", pr.placement.position[0])
|
||||
params.set_value(pfx + "ty", pr.placement.position[1])
|
||||
params.set_value(pfx + "tz", pr.placement.position[2])
|
||||
params.set_value(pfx + "qw", pr.placement.quaternion[0])
|
||||
params.set_value(pfx + "qx", pr.placement.quaternion[1])
|
||||
params.set_value(pfx + "qy", pr.placement.quaternion[2])
|
||||
params.set_value(pfx + "qz", pr.placement.quaternion[3])
|
||||
|
||||
# Solve with cached artifacts — no rebuild
|
||||
converged = newton_solve(
|
||||
cache.residuals,
|
||||
params,
|
||||
quat_groups=cache.system.quat_groups,
|
||||
max_iter=100,
|
||||
tol=1e-10,
|
||||
post_step=cache.post_step_fn,
|
||||
weight_vector=cache.weight_vec,
|
||||
jac_exprs=cache.jac_exprs,
|
||||
compiled_eval=cache.compiled_eval,
|
||||
)
|
||||
if not converged:
|
||||
converged = bfgs_solve(
|
||||
cache.residuals,
|
||||
params,
|
||||
quat_groups=cache.system.quat_groups,
|
||||
max_iter=200,
|
||||
tol=1e-10,
|
||||
weight_vector=cache.weight_vec,
|
||||
jac_exprs=cache.jac_exprs,
|
||||
compiled_eval=cache.compiled_eval,
|
||||
)
|
||||
|
||||
result = kcsolve.SolveResult()
|
||||
result.status = (
|
||||
kcsolve.SolveStatus.Success if converged else kcsolve.SolveStatus.Failed
|
||||
)
|
||||
result.dof = -1 # skip DOF counting during drag for speed
|
||||
result.placements = _extract_placements(params, cache.system.bodies)
|
||||
|
||||
elapsed = (time.perf_counter() - t0) * 1000
|
||||
if not converged:
|
||||
log.warning(
|
||||
"drag_step #%d: solve FAILED in %.1f ms",
|
||||
self._drag_step_count,
|
||||
result.status,
|
||||
elapsed,
|
||||
)
|
||||
else:
|
||||
@@ -283,6 +416,7 @@ class KindredSolver(kcsolve.IKCSolver):
|
||||
self._drag_ctx = None
|
||||
self._drag_parts = None
|
||||
self._drag_step_count = 0
|
||||
self._drag_cache = None
|
||||
|
||||
# ── Diagnostics ─────────────────────────────────────────────────
|
||||
|
||||
@@ -300,6 +434,26 @@ class KindredSolver(kcsolve.IKCSolver):
|
||||
return True
|
||||
|
||||
|
||||
class _DragCache:
|
||||
"""Cached artifacts from pre_drag() reused across drag_step() calls.
|
||||
|
||||
During interactive drag the constraint topology is invariant — only
|
||||
the dragged part's parameter values change. Caching the built
|
||||
system, symbolic Jacobian, and compiled evaluator eliminates the
|
||||
expensive rebuild overhead (~150 ms) on every frame.
|
||||
"""
|
||||
|
||||
__slots__ = (
|
||||
"system", # _System — owns ParamTable + Expr trees
|
||||
"residuals", # list[Expr] — after substitution + single-equation pass
|
||||
"jac_exprs", # list[list[Expr]] — symbolic Jacobian
|
||||
"compiled_eval", # Callable or None
|
||||
"half_spaces", # list[HalfSpace]
|
||||
"weight_vec", # ndarray or None
|
||||
"post_step_fn", # Callable or None
|
||||
)
|
||||
|
||||
|
||||
class _System:
|
||||
"""Intermediate representation of a built constraint system."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user