From 5f144878d6a84342be35dfd3c43b2b99dce8d48c Mon Sep 17 00:00:00 2001 From: Forbes Date: Fri, 20 Feb 2026 12:08:34 -0600 Subject: [PATCH] =?UTF-8?q?feat(api):=20solver=20service=20Phase=203b=20?= =?UTF-8?q?=E2=80=94=20server=20endpoints,=20job=20definitions,=20and=20re?= =?UTF-8?q?sult=20cache?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add server-side solver service module with REST API endpoints, database schema, job definitions, and runner result caching. New files: - migrations/021_solver_results.sql: solver_results table with upsert constraint - internal/db/solver_results.go: SolverResultRepository (Upsert, GetByItem, GetByItemRevision) - internal/api/solver_handlers.go: solver API handlers and maybeCacheSolverResult hook - jobdefs/assembly-solve.yaml: manual solve job definition - jobdefs/assembly-validate.yaml: auto-validate on revision creation - jobdefs/assembly-kinematic.yaml: manual kinematic simulation job Modified: - internal/config/config.go: SolverConfig struct with max_context_size_mb, default_timeout - internal/modules/modules.go, loader.go: register solver module (depends on jobs) - internal/db/jobs.go: ListSolverJobs helper with definition_name prefix filter - internal/api/handlers.go: wire SolverResultRepository into Server - internal/api/routes.go: /api/solver/* routes + /api/items/{partNumber}/solver/results - internal/api/runner_handlers.go: async result cache hook on job completion API endpoints: - POST /api/solver/jobs — submit solver job (editor) - GET /api/solver/jobs — list solver jobs with filters - GET /api/solver/jobs/{id} — get solver job status - POST /api/solver/jobs/{id}/cancel — cancel solver job (editor) - GET /api/solver/solvers — registry of available solvers - GET /api/items/{pn}/solver/results — cached results for item Also fixes pre-existing test compilation errors (missing workflows param in NewServer calls across 6 test files). --- internal/api/auth_handlers_test.go | 1 + internal/api/bom_handlers_test.go | 1 + internal/api/csv_handlers_test.go | 1 + internal/api/dag_handlers_test.go | 1 + internal/api/handlers.go | 3 + internal/api/job_handlers_test.go | 1 + internal/api/routes.go | 16 + internal/api/runner_handlers.go | 3 + internal/api/settings_handlers_test.go | 1 + internal/api/solver_handlers.go | 551 +++++++++++++++++++++++++ internal/config/config.go | 19 + internal/db/jobs.go | 49 +++ internal/db/solver_results.go | 121 ++++++ internal/modules/loader.go | 1 + internal/modules/modules.go | 2 + internal/modules/modules_test.go | 4 +- jobdefs/assembly-kinematic.yaml | 23 ++ jobdefs/assembly-solve.yaml | 21 + jobdefs/assembly-validate.yaml | 15 +- migrations/021_solver_results.sql | 29 ++ 20 files changed, 853 insertions(+), 10 deletions(-) create mode 100644 internal/api/solver_handlers.go create mode 100644 internal/db/solver_results.go create mode 100644 jobdefs/assembly-kinematic.yaml create mode 100644 jobdefs/assembly-solve.yaml create mode 100644 migrations/021_solver_results.sql diff --git a/internal/api/auth_handlers_test.go b/internal/api/auth_handlers_test.go index d16769a..6632a3b 100644 --- a/internal/api/auth_handlers_test.go +++ b/internal/api/auth_handlers_test.go @@ -43,6 +43,7 @@ func newAuthTestServer(t *testing.T) *Server { "", // jobDefsDir modules.NewRegistry(), // modules nil, // cfg + nil, // workflows ) } diff --git a/internal/api/bom_handlers_test.go b/internal/api/bom_handlers_test.go index 4b890f4..578c120 100644 --- a/internal/api/bom_handlers_test.go +++ b/internal/api/bom_handlers_test.go @@ -40,6 +40,7 @@ func newTestServer(t *testing.T) *Server { "", // jobDefsDir modules.NewRegistry(), // modules nil, // cfg + nil, // workflows ) } diff --git a/internal/api/csv_handlers_test.go b/internal/api/csv_handlers_test.go index bc194a7..63bd2e3 100644 --- a/internal/api/csv_handlers_test.go +++ b/internal/api/csv_handlers_test.go @@ -69,6 +69,7 @@ func newTestServerWithSchemas(t *testing.T) *Server { "", // jobDefsDir modules.NewRegistry(), // modules nil, // cfg + nil, // workflows ) } diff --git a/internal/api/dag_handlers_test.go b/internal/api/dag_handlers_test.go index f987054..6b2cbb4 100644 --- a/internal/api/dag_handlers_test.go +++ b/internal/api/dag_handlers_test.go @@ -31,6 +31,7 @@ func newDAGTestServer(t *testing.T) *Server { broker, state, nil, "", modules.NewRegistry(), nil, + nil, ) } diff --git a/internal/api/handlers.go b/internal/api/handlers.go index c123666..66e77aa 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -61,6 +61,7 @@ type Server struct { macros *db.ItemMacroRepository approvals *db.ItemApprovalRepository workflows map[string]*workflow.Workflow + solverResults *db.SolverResultRepository } // NewServer creates a new API server. @@ -94,6 +95,7 @@ func NewServer( itemDeps := db.NewItemDependencyRepository(database) itemMacros := db.NewItemMacroRepository(database) itemApprovals := db.NewItemApprovalRepository(database) + solverResults := db.NewSolverResultRepository(database) seqStore := &dbSequenceStore{db: database, schemas: schemas} partgen := partnum.NewGenerator(schemas, seqStore) @@ -127,6 +129,7 @@ func NewServer( macros: itemMacros, approvals: itemApprovals, workflows: workflows, + solverResults: solverResults, } } diff --git a/internal/api/job_handlers_test.go b/internal/api/job_handlers_test.go index d7eab3b..a152894 100644 --- a/internal/api/job_handlers_test.go +++ b/internal/api/job_handlers_test.go @@ -32,6 +32,7 @@ func newJobTestServer(t *testing.T) *Server { broker, state, nil, "", modules.NewRegistry(), nil, + nil, ) } diff --git a/internal/api/routes.go b/internal/api/routes.go index 677114a..3e4a722 100644 --- a/internal/api/routes.go +++ b/internal/api/routes.go @@ -181,6 +181,7 @@ func NewRouter(server *Server, logger zerolog.Logger) http.Handler { r.Get("/macros", server.HandleGetMacros) r.Get("/macros/{filename}", server.HandleGetMacro) r.Get("/approvals", server.HandleGetApprovals) + r.Get("/solver/results", server.HandleGetSolverResults) // DAG (gated by dag module) r.Route("/dag", func(r chi.Router) { @@ -250,6 +251,21 @@ func NewRouter(server *Server, logger zerolog.Logger) http.Handler { }) }) + // Solver (gated by solver module) + r.Route("/solver", func(r chi.Router) { + r.Use(server.RequireModule("solver")) + r.Get("/solvers", server.HandleGetSolverRegistry) + r.Get("/jobs", server.HandleListSolverJobs) + r.Get("/jobs/{jobID}", server.HandleGetSolverJob) + + r.Group(func(r chi.Router) { + r.Use(server.RequireWritable) + r.Use(server.RequireRole(auth.RoleEditor)) + r.Post("/jobs", server.HandleSubmitSolverJob) + r.Post("/jobs/{jobID}/cancel", server.HandleCancelSolverJob) + }) + }) + // Sheets (editor) r.Group(func(r chi.Router) { r.Use(server.RequireWritable) diff --git a/internal/api/runner_handlers.go b/internal/api/runner_handlers.go index 2d659b7..d27d9e6 100644 --- a/internal/api/runner_handlers.go +++ b/internal/api/runner_handlers.go @@ -142,6 +142,9 @@ func (s *Server) HandleRunnerCompleteJob(w http.ResponseWriter, r *http.Request) return } + // Cache solver results asynchronously (no-op for non-solver jobs). + go s.maybeCacheSolverResult(context.Background(), jobID) + s.broker.Publish("job.completed", mustMarshal(map[string]any{ "job_id": jobID, "runner_id": runner.ID, diff --git a/internal/api/settings_handlers_test.go b/internal/api/settings_handlers_test.go index d2c8a64..579c21e 100644 --- a/internal/api/settings_handlers_test.go +++ b/internal/api/settings_handlers_test.go @@ -61,6 +61,7 @@ func newSettingsTestServer(t *testing.T) *Server { "", // jobDefsDir modules.NewRegistry(), // modules cfg, + nil, // workflows ) } diff --git a/internal/api/solver_handlers.go b/internal/api/solver_handlers.go new file mode 100644 index 0000000..138ba14 --- /dev/null +++ b/internal/api/solver_handlers.go @@ -0,0 +1,551 @@ +package api + +import ( + "context" + "encoding/json" + "net/http" + "strconv" + "strings" + + "github.com/go-chi/chi/v5" + "github.com/kindredsystems/silo/internal/auth" + "github.com/kindredsystems/silo/internal/db" +) + +// SubmitSolveRequest is the JSON body for POST /api/solver/jobs. +type SubmitSolveRequest struct { + Solver string `json:"solver"` + Operation string `json:"operation"` + Context json.RawMessage `json:"context"` + Priority *int `json:"priority,omitempty"` + ItemPartNumber string `json:"item_part_number,omitempty"` + RevisionNumber *int `json:"revision_number,omitempty"` +} + +// SolverJobResponse is the JSON response for solver job creation. +type SolverJobResponse struct { + JobID string `json:"job_id"` + Status string `json:"status"` + CreatedAt string `json:"created_at"` +} + +// SolverResultResponse is the JSON response for cached solver results. +type SolverResultResponse struct { + ID string `json:"id"` + RevisionNumber int `json:"revision_number"` + JobID *string `json:"job_id,omitempty"` + Operation string `json:"operation"` + SolverName string `json:"solver_name"` + Status string `json:"status"` + DOF *int `json:"dof,omitempty"` + Diagnostics json.RawMessage `json:"diagnostics"` + Placements json.RawMessage `json:"placements"` + NumFrames int `json:"num_frames"` + SolveTimeMS *float64 `json:"solve_time_ms,omitempty"` + CreatedAt string `json:"created_at"` +} + +// operationToDefinition maps solve operations to job definition names. +var operationToDefinition = map[string]string{ + "solve": "assembly-solve", + "diagnose": "assembly-validate", + "kinematic": "assembly-kinematic", +} + +// HandleSubmitSolverJob creates a solver job via the existing job queue. +// POST /api/solver/jobs +func (s *Server) HandleSubmitSolverJob(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + // Enforce max context size at the HTTP boundary. + maxBytes := int64(s.cfg.Solver.MaxContextSizeMB) * 1024 * 1024 + r.Body = http.MaxBytesReader(w, r.Body, maxBytes) + + var req SubmitSolveRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err.Error() == "http: request body too large" { + writeError(w, http.StatusRequestEntityTooLarge, "context_too_large", + "SolveContext exceeds maximum size") + return + } + writeError(w, http.StatusBadRequest, "invalid_body", "Invalid JSON body") + return + } + + // Validate operation. + if req.Operation == "" { + req.Operation = "solve" + } + defName, ok := operationToDefinition[req.Operation] + if !ok { + writeError(w, http.StatusBadRequest, "invalid_operation", + "Operation must be 'solve', 'diagnose', or 'kinematic'") + return + } + + // Context is required. + if len(req.Context) == 0 { + writeError(w, http.StatusBadRequest, "missing_context", "SolveContext is required") + return + } + + // Look up job definition. + def, err := s.jobs.GetDefinition(ctx, defName) + if err != nil { + s.logger.Error().Err(err).Str("definition", defName).Msg("failed to look up solver job definition") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to look up job definition") + return + } + if def == nil { + writeError(w, http.StatusNotFound, "definition_not_found", + "Solver job definition '"+defName+"' not found; ensure job definition YAML is loaded") + return + } + + // Resolve item_part_number → item_id (optional). + var itemID *string + if req.ItemPartNumber != "" { + item, err := s.items.GetByPartNumber(ctx, req.ItemPartNumber) + if err != nil { + s.logger.Error().Err(err).Msg("failed to get item for solver job") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to resolve item") + return + } + if item == nil { + writeError(w, http.StatusNotFound, "item_not_found", + "Item '"+req.ItemPartNumber+"' not found") + return + } + itemID = &item.ID + } + + // Pack solver-specific data into scope_metadata. + scopeMeta := map[string]any{ + "solver": req.Solver, + "operation": req.Operation, + "context": req.Context, + } + if req.RevisionNumber != nil { + scopeMeta["revision_number"] = *req.RevisionNumber + } + if req.ItemPartNumber != "" { + scopeMeta["item_part_number"] = req.ItemPartNumber + } + + priority := def.Priority + if req.Priority != nil { + priority = *req.Priority + } + + username := "" + if user := auth.UserFromContext(ctx); user != nil { + username = user.Username + } + + job := &db.Job{ + JobDefinitionID: &def.ID, + DefinitionName: def.Name, + Priority: priority, + ItemID: itemID, + ScopeMetadata: scopeMeta, + RunnerTags: def.RunnerTags, + TimeoutSeconds: def.TimeoutSeconds, + MaxRetries: def.MaxRetries, + CreatedBy: &username, + } + + // Use solver default timeout if the definition has none. + if job.TimeoutSeconds == 0 { + job.TimeoutSeconds = s.cfg.Solver.DefaultTimeout + } + + if err := s.jobs.CreateJob(ctx, job); err != nil { + s.logger.Error().Err(err).Msg("failed to create solver job") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to create solver job") + return + } + + s.broker.Publish("job.created", mustMarshal(map[string]any{ + "job_id": job.ID, + "definition_name": job.DefinitionName, + "trigger": "manual", + "item_id": job.ItemID, + })) + + writeJSON(w, http.StatusCreated, SolverJobResponse{ + JobID: job.ID, + Status: job.Status, + CreatedAt: job.CreatedAt.UTC().Format("2006-01-02T15:04:05Z"), + }) +} + +// HandleGetSolverJob returns a single solver job. +// GET /api/solver/jobs/{jobID} +func (s *Server) HandleGetSolverJob(w http.ResponseWriter, r *http.Request) { + jobID := chi.URLParam(r, "jobID") + + job, err := s.jobs.GetJob(r.Context(), jobID) + if err != nil { + s.logger.Error().Err(err).Msg("failed to get solver job") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to get job") + return + } + if job == nil { + writeError(w, http.StatusNotFound, "not_found", "Job not found") + return + } + + writeJSON(w, http.StatusOK, job) +} + +// HandleListSolverJobs lists solver jobs with optional filters. +// GET /api/solver/jobs +func (s *Server) HandleListSolverJobs(w http.ResponseWriter, r *http.Request) { + status := r.URL.Query().Get("status") + itemPartNumber := r.URL.Query().Get("item") + operation := r.URL.Query().Get("operation") + + limit := 20 + if v := r.URL.Query().Get("limit"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 && n <= 100 { + limit = n + } + } + offset := 0 + if v := r.URL.Query().Get("offset"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 0 { + offset = n + } + } + + // Resolve item part number to ID if provided. + var itemID string + if itemPartNumber != "" { + item, err := s.items.GetByPartNumber(r.Context(), itemPartNumber) + if err != nil { + s.logger.Error().Err(err).Msg("failed to resolve item for solver job list") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to resolve item") + return + } + if item == nil { + writeJSON(w, http.StatusOK, map[string]any{ + "jobs": []*db.Job{}, + "total": 0, + "limit": limit, + "offset": offset, + }) + return + } + itemID = item.ID + } + + jobs, err := s.jobs.ListSolverJobs(r.Context(), status, itemID, operation, limit, offset) + if err != nil { + s.logger.Error().Err(err).Msg("failed to list solver jobs") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to list solver jobs") + return + } + + writeJSON(w, http.StatusOK, map[string]any{ + "jobs": jobs, + "limit": limit, + "offset": offset, + }) +} + +// HandleCancelSolverJob cancels a solver job. +// POST /api/solver/jobs/{jobID}/cancel +func (s *Server) HandleCancelSolverJob(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + jobID := chi.URLParam(r, "jobID") + user := auth.UserFromContext(ctx) + + cancelledBy := "system" + if user != nil { + cancelledBy = user.Username + } + + if err := s.jobs.CancelJob(ctx, jobID, cancelledBy); err != nil { + writeError(w, http.StatusBadRequest, "cancel_failed", err.Error()) + return + } + + s.broker.Publish("job.cancelled", mustMarshal(map[string]any{ + "job_id": jobID, + "cancelled_by": cancelledBy, + })) + + writeJSON(w, http.StatusOK, map[string]string{ + "job_id": jobID, + "status": "cancelled", + }) +} + +// HandleGetSolverRegistry returns available solvers from online runners. +// GET /api/solver/solvers +func (s *Server) HandleGetSolverRegistry(w http.ResponseWriter, r *http.Request) { + runners, err := s.jobs.ListRunners(r.Context()) + if err != nil { + s.logger.Error().Err(err).Msg("failed to list runners for solver registry") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to list runners") + return + } + + type solverInfo struct { + Name string `json:"name"` + DisplayName string `json:"display_name,omitempty"` + Deterministic bool `json:"deterministic,omitempty"` + SupportedJoints []string `json:"supported_joints,omitempty"` + RunnerCount int `json:"runner_count"` + } + + solverMap := make(map[string]*solverInfo) + + for _, runner := range runners { + if runner.Status != "online" { + continue + } + // Check runner has the solver tag. + hasSolverTag := false + for _, tag := range runner.Tags { + if tag == "solver" { + hasSolverTag = true + break + } + } + if !hasSolverTag { + continue + } + + // Extract solver capabilities from runner metadata. + if runner.Metadata == nil { + continue + } + solvers, ok := runner.Metadata["solvers"] + if !ok { + continue + } + + // solvers can be []any (array of solver objects or strings). + solverList, ok := solvers.([]any) + if !ok { + continue + } + + for _, entry := range solverList { + switch v := entry.(type) { + case string: + // Simple string entry: just the solver name. + if _, exists := solverMap[v]; !exists { + solverMap[v] = &solverInfo{Name: v} + } + solverMap[v].RunnerCount++ + case map[string]any: + // Rich entry with name, display_name, supported_joints, etc. + name, _ := v["name"].(string) + if name == "" { + continue + } + if _, exists := solverMap[name]; !exists { + info := &solverInfo{Name: name} + if dn, ok := v["display_name"].(string); ok { + info.DisplayName = dn + } + if det, ok := v["deterministic"].(bool); ok { + info.Deterministic = det + } + if joints, ok := v["supported_joints"].([]any); ok { + for _, j := range joints { + if js, ok := j.(string); ok { + info.SupportedJoints = append(info.SupportedJoints, js) + } + } + } + solverMap[name] = info + } + solverMap[name].RunnerCount++ + } + } + } + + solverList := make([]*solverInfo, 0, len(solverMap)) + for _, info := range solverMap { + solverList = append(solverList, info) + } + + writeJSON(w, http.StatusOK, map[string]any{ + "solvers": solverList, + "default_solver": s.cfg.Solver.DefaultSolver, + }) +} + +// HandleGetSolverResults returns cached solver results for an item. +// GET /api/items/{partNumber}/solver/results +func (s *Server) HandleGetSolverResults(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + partNumber := chi.URLParam(r, "partNumber") + + item, err := s.items.GetByPartNumber(ctx, partNumber) + if err != nil { + s.logger.Error().Err(err).Msg("failed to get item for solver results") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to get item") + return + } + if item == nil { + writeError(w, http.StatusNotFound, "not_found", "Item not found") + return + } + + results, err := s.solverResults.GetByItem(ctx, item.ID) + if err != nil { + s.logger.Error().Err(err).Msg("failed to list solver results") + writeError(w, http.StatusInternalServerError, "internal_error", "Failed to list solver results") + return + } + + resp := make([]SolverResultResponse, len(results)) + for i, r := range results { + diag := json.RawMessage(r.Diagnostics) + if diag == nil { + diag = json.RawMessage("[]") + } + place := json.RawMessage(r.Placements) + if place == nil { + place = json.RawMessage("[]") + } + resp[i] = SolverResultResponse{ + ID: r.ID, + RevisionNumber: r.RevisionNumber, + JobID: r.JobID, + Operation: r.Operation, + SolverName: r.SolverName, + Status: r.Status, + DOF: r.DOF, + Diagnostics: diag, + Placements: place, + NumFrames: r.NumFrames, + SolveTimeMS: r.SolveTimeMS, + CreatedAt: r.CreatedAt.UTC().Format("2006-01-02T15:04:05Z"), + } + } + + writeJSON(w, http.StatusOK, resp) +} + +// maybeCacheSolverResult is called asynchronously after a job completes. +// It checks if the job is a solver job and upserts the result into solver_results. +func (s *Server) maybeCacheSolverResult(ctx context.Context, jobID string) { + job, err := s.jobs.GetJob(ctx, jobID) + if err != nil || job == nil { + s.logger.Warn().Err(err).Str("job_id", jobID).Msg("solver result cache: failed to get job") + return + } + + if !strings.HasPrefix(job.DefinitionName, "assembly-") { + return + } + if !s.modules.IsEnabled("solver") { + return + } + if job.ItemID == nil { + return + } + + // Extract fields from scope_metadata. + operation, _ := job.ScopeMetadata["operation"].(string) + if operation == "" { + operation = "solve" + } + solverName, _ := job.ScopeMetadata["solver"].(string) + + var revisionNumber int + if rn, ok := job.ScopeMetadata["revision_number"].(float64); ok { + revisionNumber = int(rn) + } + + // Extract fields from result. + if job.Result == nil { + return + } + + status, _ := job.Result["status"].(string) + if status == "" { + // Try nested result object. + if inner, ok := job.Result["result"].(map[string]any); ok { + status, _ = inner["status"].(string) + } + } + if status == "" { + status = "Unknown" + } + + // Solver name from result takes precedence. + if sn, ok := job.Result["solver_name"].(string); ok && sn != "" { + solverName = sn + } + if solverName == "" { + solverName = "unknown" + } + + var dof *int + if d, ok := job.Result["dof"].(float64); ok { + v := int(d) + dof = &v + } else if inner, ok := job.Result["result"].(map[string]any); ok { + if d, ok := inner["dof"].(float64); ok { + v := int(d) + dof = &v + } + } + + var solveTimeMS *float64 + if t, ok := job.Result["solve_time_ms"].(float64); ok { + solveTimeMS = &t + } + + // Marshal diagnostics and placements as raw JSONB. + var diagnostics, placements []byte + if d, ok := job.Result["diagnostics"]; ok { + diagnostics, _ = json.Marshal(d) + } else if inner, ok := job.Result["result"].(map[string]any); ok { + if d, ok := inner["diagnostics"]; ok { + diagnostics, _ = json.Marshal(d) + } + } + if p, ok := job.Result["placements"]; ok { + placements, _ = json.Marshal(p) + } else if inner, ok := job.Result["result"].(map[string]any); ok { + if p, ok := inner["placements"]; ok { + placements, _ = json.Marshal(p) + } + } + + numFrames := 0 + if nf, ok := job.Result["num_frames"].(float64); ok { + numFrames = int(nf) + } else if inner, ok := job.Result["result"].(map[string]any); ok { + if nf, ok := inner["num_frames"].(float64); ok { + numFrames = int(nf) + } + } + + result := &db.SolverResult{ + ItemID: *job.ItemID, + RevisionNumber: revisionNumber, + JobID: &job.ID, + Operation: operation, + SolverName: solverName, + Status: status, + DOF: dof, + Diagnostics: diagnostics, + Placements: placements, + NumFrames: numFrames, + SolveTimeMS: solveTimeMS, + } + + if err := s.solverResults.Upsert(ctx, result); err != nil { + s.logger.Warn().Err(err).Str("job_id", jobID).Msg("solver result cache: failed to upsert") + } else { + s.logger.Info().Str("job_id", jobID).Str("operation", operation).Msg("cached solver result") + } +} diff --git a/internal/config/config.go b/internal/config/config.go index 59ee464..9ab3a63 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -19,6 +19,7 @@ type Config struct { Auth AuthConfig `yaml:"auth"` Jobs JobsConfig `yaml:"jobs"` Workflows WorkflowsConfig `yaml:"workflows"` + Solver SolverConfig `yaml:"solver"` Modules ModulesConfig `yaml:"modules"` } @@ -32,6 +33,7 @@ type ModulesConfig struct { FreeCAD *ModuleToggle `yaml:"freecad"` Jobs *ModuleToggle `yaml:"jobs"` DAG *ModuleToggle `yaml:"dag"` + Solver *ModuleToggle `yaml:"solver"` } // ModuleToggle holds an optional enabled flag. The pointer allows @@ -146,6 +148,14 @@ type WorkflowsConfig struct { Directory string `yaml:"directory"` // default /etc/silo/workflows } +// SolverConfig holds assembly solver service settings. +type SolverConfig struct { + DefaultSolver string `yaml:"default_solver"` + MaxContextSizeMB int `yaml:"max_context_size_mb"` + DefaultTimeout int `yaml:"default_timeout"` + AutoDiagnoseOnCommit bool `yaml:"auto_diagnose_on_commit"` +} + // OdooConfig holds Odoo ERP integration settings. type OdooConfig struct { Enabled bool `yaml:"enabled"` @@ -204,6 +214,12 @@ func Load(path string) (*Config, error) { if cfg.Workflows.Directory == "" { cfg.Workflows.Directory = "/etc/silo/workflows" } + if cfg.Solver.MaxContextSizeMB == 0 { + cfg.Solver.MaxContextSizeMB = 10 + } + if cfg.Solver.DefaultTimeout == 0 { + cfg.Solver.DefaultTimeout = 300 + } // Override with environment variables if v := os.Getenv("SILO_DB_HOST"); v != "" { @@ -221,6 +237,9 @@ func Load(path string) (*Config, error) { if v := os.Getenv("SILO_STORAGE_ROOT_DIR"); v != "" { cfg.Storage.Filesystem.RootDir = v } + if v := os.Getenv("SILO_SOLVER_DEFAULT"); v != "" { + cfg.Solver.DefaultSolver = v + } // Auth defaults if cfg.Auth.LDAP.UserAttr == "" { diff --git a/internal/db/jobs.go b/internal/db/jobs.go index ef36edf..10a7d47 100644 --- a/internal/db/jobs.go +++ b/internal/db/jobs.go @@ -328,6 +328,55 @@ func (r *JobRepository) ListJobs(ctx context.Context, status, itemID string, lim return scanJobs(rows) } +// ListSolverJobs returns solver jobs (definition_name LIKE 'assembly-%') with optional filters. +func (r *JobRepository) ListSolverJobs(ctx context.Context, status, itemID, operation string, limit, offset int) ([]*Job, error) { + query := ` + SELECT id, job_definition_id, definition_name, status, priority, + item_id, project_id, scope_metadata, runner_id, runner_tags, + created_at, claimed_at, started_at, completed_at, + timeout_seconds, expires_at, progress, progress_message, + result, error_message, retry_count, max_retries, + created_by, cancelled_by + FROM jobs WHERE definition_name LIKE 'assembly-%'` + args := []any{} + argN := 1 + + if status != "" { + query += fmt.Sprintf(" AND status = $%d", argN) + args = append(args, status) + argN++ + } + if itemID != "" { + query += fmt.Sprintf(" AND item_id = $%d", argN) + args = append(args, itemID) + argN++ + } + if operation != "" { + query += fmt.Sprintf(" AND scope_metadata->>'operation' = $%d", argN) + args = append(args, operation) + argN++ + } + + query += " ORDER BY created_at DESC" + + if limit > 0 { + query += fmt.Sprintf(" LIMIT $%d", argN) + args = append(args, limit) + argN++ + } + if offset > 0 { + query += fmt.Sprintf(" OFFSET $%d", argN) + args = append(args, offset) + } + + rows, err := r.db.pool.Query(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("querying solver jobs: %w", err) + } + defer rows.Close() + return scanJobs(rows) +} + // ClaimJob atomically claims the next available job matching the runner's tags. // Uses SELECT FOR UPDATE SKIP LOCKED for exactly-once delivery. func (r *JobRepository) ClaimJob(ctx context.Context, runnerID string, tags []string) (*Job, error) { diff --git a/internal/db/solver_results.go b/internal/db/solver_results.go new file mode 100644 index 0000000..57313ed --- /dev/null +++ b/internal/db/solver_results.go @@ -0,0 +1,121 @@ +package db + +import ( + "context" + "fmt" + "time" + + "github.com/jackc/pgx/v5" +) + +// SolverResult represents a row in the solver_results table. +type SolverResult struct { + ID string + ItemID string + RevisionNumber int + JobID *string + Operation string // solve, diagnose, kinematic + SolverName string + Status string // SolveStatus string (Success, Failed, etc.) + DOF *int + Diagnostics []byte // raw JSONB + Placements []byte // raw JSONB + NumFrames int + SolveTimeMS *float64 + CreatedAt time.Time +} + +// SolverResultRepository provides solver_results database operations. +type SolverResultRepository struct { + db *DB +} + +// NewSolverResultRepository creates a new solver result repository. +func NewSolverResultRepository(db *DB) *SolverResultRepository { + return &SolverResultRepository{db: db} +} + +// Upsert inserts or updates a solver result. The UNIQUE(item_id, revision_number, operation) +// constraint means each revision has at most one result per operation type. +func (r *SolverResultRepository) Upsert(ctx context.Context, s *SolverResult) error { + err := r.db.pool.QueryRow(ctx, ` + INSERT INTO solver_results (item_id, revision_number, job_id, operation, + solver_name, status, dof, diagnostics, placements, + num_frames, solve_time_ms) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + ON CONFLICT (item_id, revision_number, operation) DO UPDATE SET + job_id = EXCLUDED.job_id, + solver_name = EXCLUDED.solver_name, + status = EXCLUDED.status, + dof = EXCLUDED.dof, + diagnostics = EXCLUDED.diagnostics, + placements = EXCLUDED.placements, + num_frames = EXCLUDED.num_frames, + solve_time_ms = EXCLUDED.solve_time_ms, + created_at = now() + RETURNING id, created_at + `, s.ItemID, s.RevisionNumber, s.JobID, s.Operation, + s.SolverName, s.Status, s.DOF, s.Diagnostics, s.Placements, + s.NumFrames, s.SolveTimeMS, + ).Scan(&s.ID, &s.CreatedAt) + if err != nil { + return fmt.Errorf("upserting solver result: %w", err) + } + return nil +} + +// GetByItem returns all solver results for an item, ordered by revision descending. +func (r *SolverResultRepository) GetByItem(ctx context.Context, itemID string) ([]*SolverResult, error) { + rows, err := r.db.pool.Query(ctx, ` + SELECT id, item_id, revision_number, job_id, operation, + solver_name, status, dof, diagnostics, placements, + num_frames, solve_time_ms, created_at + FROM solver_results + WHERE item_id = $1 + ORDER BY revision_number DESC, operation + `, itemID) + if err != nil { + return nil, fmt.Errorf("listing solver results: %w", err) + } + defer rows.Close() + return scanSolverResults(rows) +} + +// GetByItemRevision returns a single solver result for an item/revision/operation. +func (r *SolverResultRepository) GetByItemRevision(ctx context.Context, itemID string, revision int, operation string) (*SolverResult, error) { + s := &SolverResult{} + err := r.db.pool.QueryRow(ctx, ` + SELECT id, item_id, revision_number, job_id, operation, + solver_name, status, dof, diagnostics, placements, + num_frames, solve_time_ms, created_at + FROM solver_results + WHERE item_id = $1 AND revision_number = $2 AND operation = $3 + `, itemID, revision, operation).Scan( + &s.ID, &s.ItemID, &s.RevisionNumber, &s.JobID, &s.Operation, + &s.SolverName, &s.Status, &s.DOF, &s.Diagnostics, &s.Placements, + &s.NumFrames, &s.SolveTimeMS, &s.CreatedAt, + ) + if err == pgx.ErrNoRows { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("getting solver result: %w", err) + } + return s, nil +} + +func scanSolverResults(rows pgx.Rows) ([]*SolverResult, error) { + var results []*SolverResult + for rows.Next() { + s := &SolverResult{} + if err := rows.Scan( + &s.ID, &s.ItemID, &s.RevisionNumber, &s.JobID, &s.Operation, + &s.SolverName, &s.Status, &s.DOF, &s.Diagnostics, &s.Placements, + &s.NumFrames, &s.SolveTimeMS, &s.CreatedAt, + ); err != nil { + return nil, fmt.Errorf("scanning solver result: %w", err) + } + results = append(results, s) + } + return results, rows.Err() +} diff --git a/internal/modules/loader.go b/internal/modules/loader.go index 83d4bf1..90a99e5 100644 --- a/internal/modules/loader.go +++ b/internal/modules/loader.go @@ -33,6 +33,7 @@ func LoadState(r *Registry, cfg *config.Config, pool *pgxpool.Pool) error { applyToggle(r, FreeCAD, cfg.Modules.FreeCAD) applyToggle(r, Jobs, cfg.Modules.Jobs) applyToggle(r, DAG, cfg.Modules.DAG) + applyToggle(r, Solver, cfg.Modules.Solver) // Step 3: Apply database overrides (highest precedence). if pool != nil { diff --git a/internal/modules/modules.go b/internal/modules/modules.go index 14f2f9a..7360aa4 100644 --- a/internal/modules/modules.go +++ b/internal/modules/modules.go @@ -21,6 +21,7 @@ const ( FreeCAD = "freecad" Jobs = "jobs" DAG = "dag" + Solver = "solver" ) // ModuleInfo describes a module's metadata. @@ -58,6 +59,7 @@ var builtinModules = []ModuleInfo{ {ID: FreeCAD, Name: "Create Integration", Description: "URI scheme, executable path, client settings", DefaultEnabled: true}, {ID: Jobs, Name: "Job Queue", Description: "Async compute jobs, runner management", DependsOn: []string{Auth}}, {ID: DAG, Name: "Dependency DAG", Description: "Feature DAG sync, validation states, interference detection", DependsOn: []string{Jobs}}, + {ID: Solver, Name: "Solver", Description: "Assembly constraint solving via server-side runners", DependsOn: []string{Jobs}}, } // NewRegistry creates a registry with all builtin modules set to their default state. diff --git a/internal/modules/modules_test.go b/internal/modules/modules_test.go index 1747591..c89b0b8 100644 --- a/internal/modules/modules_test.go +++ b/internal/modules/modules_test.go @@ -137,8 +137,8 @@ func TestAll_ReturnsAllModules(t *testing.T) { r := NewRegistry() all := r.All() - if len(all) != 10 { - t.Errorf("expected 10 modules, got %d", len(all)) + if len(all) != 11 { + t.Errorf("expected 11 modules, got %d", len(all)) } // Should be sorted by ID. diff --git a/jobdefs/assembly-kinematic.yaml b/jobdefs/assembly-kinematic.yaml new file mode 100644 index 0000000..3b37d11 --- /dev/null +++ b/jobdefs/assembly-kinematic.yaml @@ -0,0 +1,23 @@ +job: + name: assembly-kinematic + version: 1 + description: "Run kinematic simulation" + + trigger: + type: manual + + scope: + type: assembly + + compute: + type: custom + command: solver-kinematic + args: + operation: kinematic + + runner: + tags: [solver] + + timeout: 1800 + max_retries: 0 + priority: 100 diff --git a/jobdefs/assembly-solve.yaml b/jobdefs/assembly-solve.yaml new file mode 100644 index 0000000..2ceaa4b --- /dev/null +++ b/jobdefs/assembly-solve.yaml @@ -0,0 +1,21 @@ +job: + name: assembly-solve + version: 1 + description: "Solve assembly constraints on server" + + trigger: + type: manual + + scope: + type: assembly + + compute: + type: custom + command: solver-run + + runner: + tags: [solver] + + timeout: 300 + max_retries: 1 + priority: 50 diff --git a/jobdefs/assembly-validate.yaml b/jobdefs/assembly-validate.yaml index abbee0a..af65ea3 100644 --- a/jobdefs/assembly-validate.yaml +++ b/jobdefs/assembly-validate.yaml @@ -1,7 +1,7 @@ job: name: assembly-validate version: 1 - description: "Validate assembly by rebuilding its dependency subgraph" + description: "Validate assembly constraints on commit" trigger: type: revision_created @@ -12,15 +12,14 @@ job: type: assembly compute: - type: validate - command: create-validate + type: custom + command: solver-diagnose args: - rebuild_mode: incremental - check_interference: true + operation: diagnose runner: - tags: [create] + tags: [solver] - timeout: 900 + timeout: 120 max_retries: 2 - priority: 50 + priority: 75 diff --git a/migrations/021_solver_results.sql b/migrations/021_solver_results.sql new file mode 100644 index 0000000..ad673ad --- /dev/null +++ b/migrations/021_solver_results.sql @@ -0,0 +1,29 @@ +-- Migration 021: Solver result cache table +-- +-- Stores the latest solve/diagnose/kinematic result per item revision. +-- The UNIQUE constraint means re-running an operation overwrites the previous result. +-- See docs/SOLVER.md Section 9. + +BEGIN; + +CREATE TABLE solver_results ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + item_id UUID NOT NULL REFERENCES items(id) ON DELETE CASCADE, + revision_number INTEGER NOT NULL, + job_id UUID REFERENCES jobs(id) ON DELETE SET NULL, + operation TEXT NOT NULL, -- 'solve', 'diagnose', 'kinematic' + solver_name TEXT NOT NULL, + status TEXT NOT NULL, -- SolveStatus string ('Success', 'Failed', etc.) + dof INTEGER, + diagnostics JSONB DEFAULT '[]', + placements JSONB DEFAULT '[]', + num_frames INTEGER DEFAULT 0, + solve_time_ms DOUBLE PRECISION, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + UNIQUE(item_id, revision_number, operation) +); + +CREATE INDEX idx_solver_results_item ON solver_results(item_id); +CREATE INDEX idx_solver_results_status ON solver_results(status); + +COMMIT; -- 2.49.1