package ods import ( "archive/zip" "bytes" "encoding/xml" "fmt" "io" "strconv" "strings" ) // Read parses an ODS file from bytes and returns sheets and metadata. func Read(data []byte) (*Workbook, error) { r, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) if err != nil { return nil, fmt.Errorf("open zip: %w", err) } wb := &Workbook{ Meta: make(map[string]string), } for _, f := range r.File { switch f.Name { case "content.xml": sheets, err := readContent(f) if err != nil { return nil, fmt.Errorf("read content.xml: %w", err) } wb.Sheets = sheets case "meta.xml": meta, err := readMeta(f) if err != nil { return nil, fmt.Errorf("read meta.xml: %w", err) } wb.Meta = meta } } return wb, nil } // readContent parses content.xml and extracts sheets. func readContent(f *zip.File) ([]Sheet, error) { rc, err := f.Open() if err != nil { return nil, err } defer rc.Close() data, err := io.ReadAll(rc) if err != nil { return nil, err } return parseContentXML(data) } // parseContentXML extracts sheet data from content.xml bytes. func parseContentXML(data []byte) ([]Sheet, error) { decoder := xml.NewDecoder(bytes.NewReader(data)) var sheets []Sheet var currentSheet *Sheet var currentRow *Row var currentCellText strings.Builder var inTextP bool // Current cell attributes for the cell being parsed var cellValueType string var cellValue string var cellFormula string var cellRepeated int // Track row repeated var rowRepeated int for { tok, err := decoder.Token() if err == io.EOF { break } if err != nil { return nil, fmt.Errorf("xml decode: %w", err) } switch t := tok.(type) { case xml.StartElement: localName := t.Name.Local switch localName { case "table": name := getAttr(t.Attr, "name") sheets = append(sheets, Sheet{Name: name}) currentSheet = &sheets[len(sheets)-1] case "table-column": if currentSheet != nil { col := Column{} vis := getAttr(t.Attr, "visibility") if vis == "collapse" || vis == "hidden" { col.Hidden = true } width := getAttrNS(t.Attr, "column-width") if width != "" { col.Width = width } // Handle repeated columns rep := getAttr(t.Attr, "number-columns-repeated") count := 1 if rep != "" { if n, err := strconv.Atoi(rep); err == nil && n > 0 { count = n } } // Cap at reasonable max to avoid memory issues from huge repeated counts if count > 1024 { count = 1024 } for i := 0; i < count; i++ { currentSheet.Columns = append(currentSheet.Columns, col) } } case "table-row": rowRepeated = 1 rep := getAttr(t.Attr, "number-rows-repeated") if rep != "" { if n, err := strconv.Atoi(rep); err == nil && n > 0 { rowRepeated = n } } currentRow = &Row{} case "table-cell": cellValueType = getAttrNS(t.Attr, "value-type") cellValue = getAttrNS(t.Attr, "value") if cellValue == "" { cellValue = getAttrNS(t.Attr, "date-value") } cellFormula = getAttr(t.Attr, "formula") cellRepeated = 1 rep := getAttr(t.Attr, "number-columns-repeated") if rep != "" { if n, err := strconv.Atoi(rep); err == nil && n > 0 { cellRepeated = n } } currentCellText.Reset() case "covered-table-cell": // Merged cell continuation -- treat as empty if currentRow != nil { rep := getAttr(t.Attr, "number-columns-repeated") count := 1 if rep != "" { if n, err := strconv.Atoi(rep); err == nil && n > 0 { count = n } } if count > 1024 { count = 1024 } for i := 0; i < count; i++ { currentRow.Cells = append(currentRow.Cells, Cell{Type: CellEmpty}) } } case "p": inTextP = true } case xml.CharData: if inTextP { currentCellText.Write(t) } case xml.EndElement: localName := t.Name.Local switch localName { case "table": currentSheet = nil case "table-row": if currentRow != nil && currentSheet != nil { // Determine if the row is blank isBlank := true for _, c := range currentRow.Cells { if c.Type != CellEmpty && c.Value != "" { isBlank = false break } } currentRow.IsBlank = isBlank && len(currentRow.Cells) == 0 // Cap row repeats to avoid memory blow-up from trailing empty rows if rowRepeated > 1 && isBlank { // Only emit one blank row for large repeats (trailing whitespace) if rowRepeated > 2 { rowRepeated = 1 } } for i := 0; i < rowRepeated; i++ { rowCopy := Row{ IsBlank: currentRow.IsBlank, Cells: make([]Cell, len(currentRow.Cells)), } copy(rowCopy.Cells, currentRow.Cells) currentSheet.Rows = append(currentSheet.Rows, rowCopy) } } currentRow = nil case "table-cell": if currentRow != nil { cell := buildCell(cellValueType, cellValue, cellFormula, currentCellText.String()) // Cap repeated to avoid memory issues from trailing empties if cellRepeated > 256 && cell.Type == CellEmpty && cell.Value == "" { cellRepeated = 1 } for i := 0; i < cellRepeated; i++ { currentRow.Cells = append(currentRow.Cells, cell) } } cellValueType = "" cellValue = "" cellFormula = "" cellRepeated = 1 currentCellText.Reset() case "p": inTextP = false } } } // Trim trailing empty rows from each sheet for i := range sheets { sheets[i].Rows = trimTrailingBlankRows(sheets[i].Rows) } // Trim trailing empty cells from each row for i := range sheets { for j := range sheets[i].Rows { sheets[i].Rows[j].Cells = trimTrailingEmptyCells(sheets[i].Rows[j].Cells) } } return sheets, nil } func buildCell(valueType, value, formula, text string) Cell { if formula != "" { return Cell{ Type: CellFormula, Formula: formula, Value: text, } } switch valueType { case "float": // Prefer the office:value attribute for precision; fall back to text v := value if v == "" { v = text } return Cell{Type: CellFloat, Value: v} case "currency": v := value if v == "" { v = strings.TrimPrefix(text, "$") v = strings.ReplaceAll(v, ",", "") } return Cell{Type: CellCurrency, Value: v} case "date": v := value if v == "" { v = text } return Cell{Type: CellDate, Value: v} case "string": return Cell{Type: CellString, Value: text} default: if text != "" { return Cell{Type: CellString, Value: text} } return Cell{Type: CellEmpty} } } // readMeta parses meta.xml for custom Silo metadata. func readMeta(f *zip.File) (map[string]string, error) { rc, err := f.Open() if err != nil { return nil, err } defer rc.Close() data, err := io.ReadAll(rc) if err != nil { return nil, err } return parseMetaXML(data) } func parseMetaXML(data []byte) (map[string]string, error) { decoder := xml.NewDecoder(bytes.NewReader(data)) meta := make(map[string]string) var inUserDefined bool var userDefName string var textBuf strings.Builder for { tok, err := decoder.Token() if err == io.EOF { break } if err != nil { return nil, err } switch t := tok.(type) { case xml.StartElement: if t.Name.Local == "user-defined" { inUserDefined = true userDefName = getAttrNS(t.Attr, "name") textBuf.Reset() } case xml.CharData: if inUserDefined { textBuf.Write(t) } case xml.EndElement: if t.Name.Local == "user-defined" && inUserDefined { if userDefName == "_silo_meta" { // Parse key=value pairs for _, line := range strings.Split(textBuf.String(), "\n") { line = strings.TrimSpace(line) if idx := strings.Index(line, "="); idx > 0 { meta[line[:idx]] = line[idx+1:] } } } else if userDefName != "" { meta[userDefName] = textBuf.String() } inUserDefined = false userDefName = "" } } } return meta, nil } // getAttr returns the value of a local-name attribute (no namespace). func getAttr(attrs []xml.Attr, localName string) string { for _, a := range attrs { if a.Name.Local == localName { return a.Value } } return "" } // getAttrNS returns the value of a local-name attribute, ignoring namespace. func getAttrNS(attrs []xml.Attr, localName string) string { for _, a := range attrs { if a.Name.Local == localName { return a.Value } } return "" } func trimTrailingBlankRows(rows []Row) []Row { for len(rows) > 0 { last := rows[len(rows)-1] if last.IsBlank || isRowEmpty(last) { rows = rows[:len(rows)-1] } else { break } } return rows } func isRowEmpty(row Row) bool { for _, c := range row.Cells { if c.Type != CellEmpty && c.Value != "" { return false } } return true } func trimTrailingEmptyCells(cells []Cell) []Cell { for len(cells) > 0 { last := cells[len(cells)-1] if last.Type == CellEmpty && last.Value == "" { cells = cells[:len(cells)-1] } else { break } } return cells }