From 74f09bbf8d8224fb20de6e4a0c9e3edce320bca6 Mon Sep 17 00:00:00 2001 From: wmayer Date: Fri, 7 Mar 2025 17:46:09 +0100 Subject: [PATCH] Sheet: Fix reading xlsx files for the specs of the XLSX file format see: https://jkp-ads.com/articles/excel2007fileformat00.aspx or https://www.data2type.de/xml-xslt-xslfo/spreadsheetml/xlsx-format In order to find the correct sheet file one has to first read-in xl/_rels/workbook.xml.rels and save the relations of Id and Target. The file xl/workbook.xml contains a list of sheet elements where for each element the attribute 'r:id' is set. This attribute can be used to look up for the actual data file. This fixes issue 19757 --- src/Mod/Spreadsheet/importXLSX.py | 36 ++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/Mod/Spreadsheet/importXLSX.py b/src/Mod/Spreadsheet/importXLSX.py index 2b06bfa45e..6c28188544 100644 --- a/src/Mod/Spreadsheet/importXLSX.py +++ b/src/Mod/Spreadsheet/importXLSX.py @@ -364,8 +364,22 @@ def handleCells(cellList, actCellSheet, sList): actCellSheet.set(ref, (sList[int(theValue)])) -def handleWorkBook(theBook, sheetDict, Doc): +def handleWorkBookRels(theBookRels): + theRels = theBookRels.getElementsByTagName("Relationship") + idTarget = {} + for rel in theRels: + relAtts = rel.attributes + idRef = relAtts.getNamedItem("Id") + relRef = getText(idRef.childNodes) + targetRef = relAtts.getNamedItem("Target") + relTarget = getText(targetRef.childNodes) + idTarget[relRef] = relTarget + return idTarget + + +def handleWorkBook(theBook, theBookRels, sheetDict, Doc): theSheets = theBook.getElementsByTagName("sheet") + theIdTargetMap = handleWorkBookRels(theBookRels) # print("theSheets: ", theSheets) for sheet in theSheets: sheetAtts = sheet.attributes @@ -373,7 +387,7 @@ def handleWorkBook(theBook, sheetDict, Doc): sheetName = getText(nameRef.childNodes) # print("table name: ", sheetName) idRef = sheetAtts.getNamedItem("r:id") - sheetFile = "sheet" + getText(idRef.childNodes)[3:] + ".xml" + sheetFile = theIdTargetMap[getText(idRef.childNodes)] # print("sheetFile: ", sheetFile) # add FreeCAD-spreadsheet sheetDict[sheetName] = (Doc.addObject("Spreadsheet::Sheet", sheetName), sheetFile) @@ -397,10 +411,10 @@ def handleWorkBook(theBook, sheetDict, Doc): def handleStrings(theStr, sList): - print("process Strings: ") + # print("process Strings: ") stringElements = theStr.getElementsByTagName("t") for sElem in stringElements: - print("string: ", getText(sElem.childNodes)) + # print("string: ", getText(sElem.childNodes)) sList.append(getText(sElem.childNodes)) @@ -416,8 +430,11 @@ def open(nameXLSX): theBookFile = z.open("xl/workbook.xml") theBook = xml.dom.minidom.parse(theBookFile) - handleWorkBook(theBook, sheetDict, theDoc) + theBookRelsFile = z.open("xl/_rels/workbook.xml.rels") + theBookRels = xml.dom.minidom.parse(theBookRelsFile) + handleWorkBook(theBook, theBookRels, sheetDict, theDoc) theBook.unlink() + theBookRels.unlink() if "xl/sharedStrings.xml" in z.namelist(): theStringFile = z.open("xl/sharedStrings.xml") @@ -428,7 +445,7 @@ def open(nameXLSX): for sheetSpec in sheetDict: # print("sheetSpec: ", sheetSpec) theSheet, sheetFile = sheetDict[sheetSpec] - f = z.open("xl/worksheets/" + sheetFile) + f = z.open("xl/" + sheetFile) myDom = xml.dom.minidom.parse(f) handleWorkSheet(myDom, theSheet, stringList) @@ -455,8 +472,11 @@ def insert(nameXLSX, docname): z = zipfile.ZipFile(nameXLSX) theBookFile = z.open("xl/workbook.xml") theBook = xml.dom.minidom.parse(theBookFile) - handleWorkBook(theBook, sheetDict, theDoc) + theBookRelsFile = z.open("xl/_rels/workbook.xml.rels") + theBookRels = xml.dom.minidom.parse(theBookRelsFile) + handleWorkBook(theBook, theBookRels, sheetDict, theDoc) theBook.unlink() + theBookRels.unlink() if "xl/sharedStrings.xml" in z.namelist(): theStringFile = z.open("xl/sharedStrings.xml") @@ -467,7 +487,7 @@ def insert(nameXLSX, docname): for sheetSpec in sheetDict: # print("sheetSpec: ", sheetSpec) theSheet, sheetFile = sheetDict[sheetSpec] - f = z.open("xl/worksheets/" + sheetFile) + f = z.open("xl/" + sheetFile) myDom = xml.dom.minidom.parse(f) handleWorkSheet(myDom, theSheet, stringList)