Sheet: Fix reading xlsx files

for the specs of the XLSX file format see:
https://jkp-ads.com/articles/excel2007fileformat00.aspx or
https://www.data2type.de/xml-xslt-xslfo/spreadsheetml/xlsx-format

In order to find the correct sheet file one has to first read-in
xl/_rels/workbook.xml.rels and save the relations of Id and Target.
The file xl/workbook.xml contains a list of sheet elements where for
each element the attribute 'r:id' is set. This attribute can be used
to look up for the actual data file.

This fixes issue 19757
This commit is contained in:
wmayer
2025-03-07 17:46:09 +01:00
committed by Ladislav Michl
parent 4d123e8ae9
commit 74f09bbf8d

View File

@@ -364,8 +364,22 @@ def handleCells(cellList, actCellSheet, sList):
actCellSheet.set(ref, (sList[int(theValue)]))
def handleWorkBook(theBook, sheetDict, Doc):
def handleWorkBookRels(theBookRels):
theRels = theBookRels.getElementsByTagName("Relationship")
idTarget = {}
for rel in theRels:
relAtts = rel.attributes
idRef = relAtts.getNamedItem("Id")
relRef = getText(idRef.childNodes)
targetRef = relAtts.getNamedItem("Target")
relTarget = getText(targetRef.childNodes)
idTarget[relRef] = relTarget
return idTarget
def handleWorkBook(theBook, theBookRels, sheetDict, Doc):
theSheets = theBook.getElementsByTagName("sheet")
theIdTargetMap = handleWorkBookRels(theBookRels)
# print("theSheets: ", theSheets)
for sheet in theSheets:
sheetAtts = sheet.attributes
@@ -373,7 +387,7 @@ def handleWorkBook(theBook, sheetDict, Doc):
sheetName = getText(nameRef.childNodes)
# print("table name: ", sheetName)
idRef = sheetAtts.getNamedItem("r:id")
sheetFile = "sheet" + getText(idRef.childNodes)[3:] + ".xml"
sheetFile = theIdTargetMap[getText(idRef.childNodes)]
# print("sheetFile: ", sheetFile)
# add FreeCAD-spreadsheet
sheetDict[sheetName] = (Doc.addObject("Spreadsheet::Sheet", sheetName), sheetFile)
@@ -397,10 +411,10 @@ def handleWorkBook(theBook, sheetDict, Doc):
def handleStrings(theStr, sList):
print("process Strings: ")
# print("process Strings: ")
stringElements = theStr.getElementsByTagName("t")
for sElem in stringElements:
print("string: ", getText(sElem.childNodes))
# print("string: ", getText(sElem.childNodes))
sList.append(getText(sElem.childNodes))
@@ -416,8 +430,11 @@ def open(nameXLSX):
theBookFile = z.open("xl/workbook.xml")
theBook = xml.dom.minidom.parse(theBookFile)
handleWorkBook(theBook, sheetDict, theDoc)
theBookRelsFile = z.open("xl/_rels/workbook.xml.rels")
theBookRels = xml.dom.minidom.parse(theBookRelsFile)
handleWorkBook(theBook, theBookRels, sheetDict, theDoc)
theBook.unlink()
theBookRels.unlink()
if "xl/sharedStrings.xml" in z.namelist():
theStringFile = z.open("xl/sharedStrings.xml")
@@ -428,7 +445,7 @@ def open(nameXLSX):
for sheetSpec in sheetDict:
# print("sheetSpec: ", sheetSpec)
theSheet, sheetFile = sheetDict[sheetSpec]
f = z.open("xl/worksheets/" + sheetFile)
f = z.open("xl/" + sheetFile)
myDom = xml.dom.minidom.parse(f)
handleWorkSheet(myDom, theSheet, stringList)
@@ -455,8 +472,11 @@ def insert(nameXLSX, docname):
z = zipfile.ZipFile(nameXLSX)
theBookFile = z.open("xl/workbook.xml")
theBook = xml.dom.minidom.parse(theBookFile)
handleWorkBook(theBook, sheetDict, theDoc)
theBookRelsFile = z.open("xl/_rels/workbook.xml.rels")
theBookRels = xml.dom.minidom.parse(theBookRelsFile)
handleWorkBook(theBook, theBookRels, sheetDict, theDoc)
theBook.unlink()
theBookRels.unlink()
if "xl/sharedStrings.xml" in z.namelist():
theStringFile = z.open("xl/sharedStrings.xml")
@@ -467,7 +487,7 @@ def insert(nameXLSX, docname):
for sheetSpec in sheetDict:
# print("sheetSpec: ", sheetSpec)
theSheet, sheetFile = sheetDict[sheetSpec]
f = z.open("xl/worksheets/" + sheetFile)
f = z.open("xl/" + sheetFile)
myDom = xml.dom.minidom.parse(f)
handleWorkSheet(myDom, theSheet, stringList)