Files
silo/pkg/calc/pythonpath/silo_calc/sync_engine.py
Zoe Forbes 36a8d9995d feat: LibreOffice Calc extension, ODS library, AI description, audit design
Calc extension (pkg/calc/):
- Python UNO ProtocolHandler with 8 toolbar commands
- SiloClient HTTP client adapted from FreeCAD workbench
- Pull BOM/Project: populates sheets with 28-col format, hidden property
  columns, row hash tracking, auto project tagging
- Push: row classification, create/update items, conflict detection
- Completion wizard: 3-step category/description/fields with PN conflict
  resolution dialog
- OpenRouter AI integration: generate standardized descriptions from seller
  text, configurable model/instructions, review dialog
- Settings: JSON persistence, env var fallbacks, OpenRouter fields
- 31 unit tests (no UNO/network required)

Go ODS library (internal/ods/):
- Pure Go ODS read/write (ZIP of XML, no headless LibreOffice)
- Writer, reader, 10 round-trip tests

Server ODS endpoints (internal/api/ods.go):
- GET /api/items/export.ods, template.ods, POST import.ods
- GET /api/items/{pn}/bom/export.ods
- GET /api/projects/{code}/sheet.ods
- POST /api/sheets/diff

Documentation:
- docs/CALC_EXTENSION.md: extension progress report
- docs/COMPONENT_AUDIT.md: web audit tool design with weighted scoring,
  assembly computed fields, batch AI assistance plan
2026-02-01 10:06:20 -06:00

161 lines
4.9 KiB
Python

"""Row hashing, diff classification, and sync state tracking.
Used by push/pull commands to detect which rows have been modified locally
since the last pull, and to detect conflicts with server-side changes.
"""
import hashlib
import json
from typing import Any, Dict, List, Optional, Tuple
from . import sheet_format as sf
# Row statuses
STATUS_SYNCED = "synced"
STATUS_MODIFIED = "modified"
STATUS_NEW = "new"
STATUS_ERROR = "error"
STATUS_CONFLICT = "conflict"
def compute_row_hash(cells: List[str]) -> str:
"""SHA-256 hash of the visible + property columns of a row.
Only the data columns are hashed (not sync tracking columns).
Blank/empty cells are normalised to the empty string.
"""
# Use columns 0..COL_PROP_END-1 (visible + properties, not sync cols)
data_cells = cells[: sf.COL_PROP_END]
# Normalise
normalised = [str(c).strip() if c else "" for c in data_cells]
raw = "\t".join(normalised).encode("utf-8")
return hashlib.sha256(raw).hexdigest()
def classify_row(cells: List[str]) -> str:
"""Return the sync status of a single row.
Reads the stored hash and current cell values to determine whether
the row is synced, modified, new, or in an error state.
"""
# Ensure we have enough columns
while len(cells) < sf.BOM_TOTAL_COLS:
cells.append("")
stored_hash = cells[sf.COL_ROW_HASH].strip() if cells[sf.COL_ROW_HASH] else ""
stored_status = cells[sf.COL_ROW_STATUS].strip() if cells[sf.COL_ROW_STATUS] else ""
# No hash -> new row (never pulled from server)
if not stored_hash:
# Check if there's any data in the row
has_data = any(
cells[i].strip()
for i in range(sf.COL_PROP_END)
if i < len(cells) and cells[i]
)
return STATUS_NEW if has_data else ""
# Compute current hash and compare
current_hash = compute_row_hash(cells)
if current_hash == stored_hash:
return STATUS_SYNCED
return STATUS_MODIFIED
def classify_rows(all_rows: List[List[str]]) -> List[Tuple[int, str, List[str]]]:
"""Classify every row in a sheet.
Returns list of ``(row_index, status, cells)`` for rows that have data.
Blank separator rows and the header row (index 0) are skipped.
"""
results = []
for i, cells in enumerate(all_rows):
if i == 0:
continue # header row
status = classify_row(list(cells))
if status:
results.append((i, status, list(cells)))
return results
def build_push_diff(
classified: List[Tuple[int, str, List[str]]],
server_timestamps: Optional[Dict[str, str]] = None,
) -> Dict[str, List[Dict[str, Any]]]:
"""Build a push diff from classified rows.
*server_timestamps* maps part numbers to their server ``updated_at``
values, used for conflict detection.
Returns a dict with keys ``new``, ``modified``, ``conflicts``, and
the count of ``unchanged`` rows.
"""
server_ts = server_timestamps or {}
new_rows = []
modified_rows = []
conflicts = []
unchanged = 0
for row_idx, status, cells in classified:
if status == STATUS_SYNCED:
unchanged += 1
continue
pn = cells[sf.COL_PN].strip() if len(cells) > sf.COL_PN else ""
stored_ts = (
cells[sf.COL_UPDATED_AT].strip()
if len(cells) > sf.COL_UPDATED_AT and cells[sf.COL_UPDATED_AT]
else ""
)
row_info = {
"row_index": row_idx,
"part_number": pn,
"description": cells[sf.COL_DESCRIPTION].strip()
if len(cells) > sf.COL_DESCRIPTION
else "",
"cells": cells[: sf.COL_PROP_END],
}
if status == STATUS_NEW:
new_rows.append(row_info)
elif status == STATUS_MODIFIED:
# Check for conflict: server changed since we pulled
server_updated = server_ts.get(pn, "")
if stored_ts and server_updated and server_updated != stored_ts:
row_info["local_ts"] = stored_ts
row_info["server_ts"] = server_updated
conflicts.append(row_info)
else:
modified_rows.append(row_info)
return {
"new": new_rows,
"modified": modified_rows,
"conflicts": conflicts,
"unchanged": unchanged,
}
def update_row_sync_state(
cells: List[str],
status: str,
updated_at: str = "",
parent_pn: str = "",
) -> List[str]:
"""Set the sync tracking columns on a row and return it.
Recomputes the row hash from current visible+property data.
"""
while len(cells) < sf.BOM_TOTAL_COLS:
cells.append("")
cells[sf.COL_ROW_HASH] = compute_row_hash(cells)
cells[sf.COL_ROW_STATUS] = status
if updated_at:
cells[sf.COL_UPDATED_AT] = updated_at
if parent_pn:
cells[sf.COL_PARENT_PN] = parent_pn
return cells