From bfb787157c9fa7bdbfff95e08955d65078e06076 Mon Sep 17 00:00:00 2001 From: forbes-0023 Date: Sat, 21 Feb 2026 12:23:32 -0600 Subject: [PATCH] perf(solver): cache compiled system across drag steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During interactive drag, the constraint topology is invariant — only the dragged part's parameter values change between steps. Previously, drag_step() called solve() which rebuilt everything from scratch each frame: new ParamTable, new Expr trees, symbolic differentiation, CSE, and compilation (~150 ms overhead per frame). Now pre_drag() builds and caches the system, symbolic Jacobian, compiled evaluator, half-spaces, and weight vector. drag_step() reuses all cached artifacts, only updating the dragged part's 7 parameter values before running Newton-Raphson. Expected ~1.5-2x speedup on drag step latency (eliminating rebuild overhead, leaving only the irreducible Newton iteration cost). --- kindred_solver/solver.py | 172 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 163 insertions(+), 9 deletions(-) diff --git a/kindred_solver/solver.py b/kindred_solver/solver.py index 3d8e76a..f6467b5 100644 --- a/kindred_solver/solver.py +++ b/kindred_solver/solver.py @@ -91,6 +91,7 @@ class KindredSolver(kcsolve.IKCSolver): super().__init__() self._drag_ctx = None self._drag_parts = None + self._drag_cache = None self._limits_warned = False def name(self): @@ -244,8 +245,86 @@ class KindredSolver(kcsolve.IKCSolver): self._drag_ctx = ctx self._drag_parts = set(drag_parts) self._drag_step_count = 0 - result = self.solve(ctx) - log.info("pre_drag: initial solve status=%s", result.status) + + # Build the system once and cache everything for drag_step() reuse. + t0 = time.perf_counter() + system = _build_system(ctx) + + half_spaces = compute_half_spaces( + system.constraint_objs, + system.constraint_indices, + system.params, + ) + weight_vec = build_weight_vector(system.params) + + if half_spaces: + post_step_fn = lambda p: apply_half_space_correction(p, half_spaces) + else: + post_step_fn = None + + residuals = substitution_pass(system.all_residuals, system.params) + residuals = single_equation_pass(residuals, system.params) + + # Build symbolic Jacobian + compile once + from .codegen import try_compile_system + + free = system.params.free_names() + n_res = len(residuals) + n_free = len(free) + jac_exprs = [[r.diff(name).simplify() for name in free] for r in residuals] + compiled_eval = try_compile_system(residuals, jac_exprs, n_res, n_free) + + # Initial solve + converged = newton_solve( + residuals, + system.params, + quat_groups=system.quat_groups, + max_iter=100, + tol=1e-10, + post_step=post_step_fn, + weight_vector=weight_vec, + jac_exprs=jac_exprs, + compiled_eval=compiled_eval, + ) + if not converged: + converged = bfgs_solve( + residuals, + system.params, + quat_groups=system.quat_groups, + max_iter=200, + tol=1e-10, + weight_vector=weight_vec, + jac_exprs=jac_exprs, + compiled_eval=compiled_eval, + ) + + # Cache for drag_step() reuse + cache = _DragCache() + cache.system = system + cache.residuals = residuals + cache.jac_exprs = jac_exprs + cache.compiled_eval = compiled_eval + cache.half_spaces = half_spaces + cache.weight_vec = weight_vec + cache.post_step_fn = post_step_fn + self._drag_cache = cache + + # Build result + dof = count_dof(residuals, system.params, jac_exprs=jac_exprs) + result = kcsolve.SolveResult() + result.status = ( + kcsolve.SolveStatus.Success if converged else kcsolve.SolveStatus.Failed + ) + result.dof = dof + result.placements = _extract_placements(system.params, system.bodies) + + elapsed = (time.perf_counter() - t0) * 1000 + log.info( + "pre_drag: initial solve %s in %.1f ms — dof=%d", + "converged" if converged else "FAILED", + elapsed, + dof, + ) return result def drag_step(self, drag_placements): @@ -254,19 +333,73 @@ class KindredSolver(kcsolve.IKCSolver): log.warning("drag_step: no drag context (pre_drag not called?)") return kcsolve.SolveResult() self._drag_step_count = getattr(self, "_drag_step_count", 0) + 1 + + # Update dragged part placements in ctx (for caller consistency) for pr in drag_placements: for part in ctx.parts: if part.id == pr.id: part.placement = pr.placement break - t0 = time.perf_counter() - result = self.solve(ctx) - elapsed = (time.perf_counter() - t0) * 1000 - if result.status != kcsolve.SolveStatus.Success: - log.warning( - "drag_step #%d: solve %s in %.1f ms", + + cache = getattr(self, "_drag_cache", None) + if cache is None: + # Fallback: no cache, do a full solve + log.debug( + "drag_step #%d: no cache, falling back to full solve", + self._drag_step_count, + ) + return self.solve(ctx) + + t0 = time.perf_counter() + params = cache.system.params + + # Update only the dragged part's 7 parameter values + for pr in drag_placements: + pfx = pr.id + "/" + params.set_value(pfx + "tx", pr.placement.position[0]) + params.set_value(pfx + "ty", pr.placement.position[1]) + params.set_value(pfx + "tz", pr.placement.position[2]) + params.set_value(pfx + "qw", pr.placement.quaternion[0]) + params.set_value(pfx + "qx", pr.placement.quaternion[1]) + params.set_value(pfx + "qy", pr.placement.quaternion[2]) + params.set_value(pfx + "qz", pr.placement.quaternion[3]) + + # Solve with cached artifacts — no rebuild + converged = newton_solve( + cache.residuals, + params, + quat_groups=cache.system.quat_groups, + max_iter=100, + tol=1e-10, + post_step=cache.post_step_fn, + weight_vector=cache.weight_vec, + jac_exprs=cache.jac_exprs, + compiled_eval=cache.compiled_eval, + ) + if not converged: + converged = bfgs_solve( + cache.residuals, + params, + quat_groups=cache.system.quat_groups, + max_iter=200, + tol=1e-10, + weight_vector=cache.weight_vec, + jac_exprs=cache.jac_exprs, + compiled_eval=cache.compiled_eval, + ) + + result = kcsolve.SolveResult() + result.status = ( + kcsolve.SolveStatus.Success if converged else kcsolve.SolveStatus.Failed + ) + result.dof = -1 # skip DOF counting during drag for speed + result.placements = _extract_placements(params, cache.system.bodies) + + elapsed = (time.perf_counter() - t0) * 1000 + if not converged: + log.warning( + "drag_step #%d: solve FAILED in %.1f ms", self._drag_step_count, - result.status, elapsed, ) else: @@ -283,6 +416,7 @@ class KindredSolver(kcsolve.IKCSolver): self._drag_ctx = None self._drag_parts = None self._drag_step_count = 0 + self._drag_cache = None # ── Diagnostics ───────────────────────────────────────────────── @@ -300,6 +434,26 @@ class KindredSolver(kcsolve.IKCSolver): return True +class _DragCache: + """Cached artifacts from pre_drag() reused across drag_step() calls. + + During interactive drag the constraint topology is invariant — only + the dragged part's parameter values change. Caching the built + system, symbolic Jacobian, and compiled evaluator eliminates the + expensive rebuild overhead (~150 ms) on every frame. + """ + + __slots__ = ( + "system", # _System — owns ParamTable + Expr trees + "residuals", # list[Expr] — after substitution + single-equation pass + "jac_exprs", # list[list[Expr]] — symbolic Jacobian + "compiled_eval", # Callable or None + "half_spaces", # list[HalfSpace] + "weight_vec", # ndarray or None + "post_step_fn", # Callable or None + ) + + class _System: """Intermediate representation of a built constraint system."""