create/src/Mod/Spreadsheet/importXLSX.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#***************************************************************************
#*   Copyright (c) 2016 Ulrich Brammer <ulrich1a@users.sourceforge.net>    *
#*                                                                         *
#*   This program is free software; you can redistribute it and/or modify  *
#*   it under the terms of the GNU General Public License (GPL)            *
#*   as published by the Free Software Foundation; either version 2 of     *
#*   the License, or (at your option) any later version.                   *
#*   for detail see the LICENCE text file.                                 *
#*                                                                         *
#*   FreeCAD is distributed in the hope that it will be useful,            *
#*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
#*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
#*   GNU Library General Public License for more details.                  *
#*                                                                         *
#*   You should have received a copy of the GNU Library General Public     *
#*   License along with FreeCAD; if not, write to the Free Software        *
#*   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  *
#*   USA                                                                   *
#***************************************************************************/

from __future__ import print_function

__title__  = "FreeCAD Spreadsheet Workbench - XLSX importer"
__author__ = "Ulrich Brammer <ulrich1a@users.sourceforge.net>"
__url__    = ["https://www.freecadweb.org"]

'''
This library imports an Excel-XLSX-file into FreeCAD.

Version 1.1, Nov. 2016:
Changed parser, adds rad-unit to trigonometric functions in order
to give the same result in FreeCAD.
Added factor to arcus-function in order to give the same result in FreeCAD
Added support for celltype "inlineStr"

Version 1.0:
It uses a minimal parser, in order to translate the IF-function into
the different FreeCAD version.
The other function-names are translated by search and replace.
Features:
- Imports tables defined inside Excel-document
- Set alias definitions
- Translate formulas known by FreeCAD. (see tokenDic as by version 1.1)
- set cross table references
- strings are imported
- references to cells with strings are working

known issues:
- units are not imported
- string support is minimal, the same as in FreeCAD
'''


import zipfile
import xml.dom.minidom
import FreeCAD as App
import sys

try: import FreeCADGui
except ValueError: gui = False
else: gui = True

if open.__module__ in ['__builtin__','io']:
    pythonopen = open


# The sepToken structure is used in the tokenizer functions isKey and
# getNextToken.
# sepToken defines a search tree for separator tokens with length of 1 to 3 characters
# it is also used as a list of separators between other tokens.
sepToken = {
  '(':None,
  '=':None,
  '<':'branchLower',
  '>':'branchHigher',
  ')':None,
#  '"':None,
#  ';':None,
  ' ':None,
  ',':None, # Separator on lists
  '!':None, #Connector to cells on other Sheets
  '+':None,
  '-':None,
  '*':None,
  '/':None,
  '^':None
  }

branchLower ={
  '>':None,
  '=':None
  }

branchHigher = {'=':None}


# Needed to get a reference from a string to a dict
treeDict = {
  'branchLower':branchLower,
  'branchHigher':branchHigher
  }

# The tokenDic is used in parseExpr.
# The tokenDic contains the following information:
# levelchange: -1: tree down, 0, +1: tree up
# replacement token
# function-state: needed to do something special in the parser
#     0 = normal, 1 = the pi-case, 2 = angle-function,
#     3 = IF-function, 4 = IF-truecase, 5 IF-falsecase


tokenDic = {
  '('  :( 1, '(',  0),
  '='  :( 0 ,'==', 0),
  '<>' :( 0 ,'!=', 0),
  '>=' :( 0 ,'>=', 0),
  '<=' :( 0 ,'<=', 0),
  '<'  :( 0 ,'<',  0),
  '>'  :( 0 ,'>',  0),
  ','  :( 0 ,',',  0),
  ')'  :(-1 ,')',  0),
  '!'  :( 0 ,'.',  0),   #Connector to cells on other Sheets
#  '"'  :( 2 ,'',  0),
  '+'  :( 0 ,'+',  0),
  '-'  :( 0 ,'-',  0),
  '*'  :( 0 ,'*',  0),
  '/'  :( 0 ,'/',  0),
  '^'  :( 0 ,'^',  0),
  'IF' :( 0, '',   3),
  'ABS'  :( 0, 'abs',  0),
  'ACOS' :( 0, 'pi/180deg*acos', 0),
  'ASIN' :( 0, 'pi/180deg*asin', 0),
  'ATAN' :( 0, 'pi/180deg*atan', 0),
  'ATAN2':( 0, 'pi/180deg*atan2',0),
  'COS'  :( 0, 'cos',  2),
  'COSH' :( 0, 'cosh', 2),
  'EXP'  :( 0, 'exp',  0),
  'LOG'  :( 0, 'log',  0),
  'LOG10':( 0, 'log10',0),
  'MOD'  :( 0, 'mod',  0),
  'POWER':( 0, 'pow',  0),
  'SIN'  :( 0, 'sin',  2),
  'SINH' :( 0, 'sinh', 2),
  'SQRT' :( 0, 'sqrt', 0),
  'TAN'  :( 0, 'tan',  2),
  'TANH' :( 0, 'tanh', 2),
  'AVERAGE':( 0, 'average', 0),
  'COUNT':( 0, 'count',0),
  'MAX'  :( 0, 'max',  0),
  'MIN'  :( 0, 'min',  0),
  'STDEVA':( 0, 'stddev',0),
  'SUM'  :( 0, 'sum',  0),
  'PI'   :( 0, 'pi',   1),
  '_xlfn.CEILING.MATH':( 0, 'ceil',  0),
  '_xlfn.FLOOR.MATH'  :( 0, 'floor', 0)
  }


class exprNode(object):
  ''' This defines a tree class for expression parsing.
  A tree is built, to step down into the levels of the expression.'''
  def __init__(self, parent, state, actIndex):
    self.state = state #see comment: State used for Angle-functions and IF-function
    self.parent = parent # Parent tree node
    self.lIndex = actIndex # Index to the list of tokens
    self.result = ''


class FormulaTranslator(object):
  ''' This class translates a cell-formula from Excel to FreeCAD.'''
  def __init__(self):
    self.tokenList = ['=']

  def translateForm(self, actExpr):
    self.getNextToken(actExpr)
    #print("tokenList: ", self.tokenList)
    self.resultTree = exprNode(None, 0, 1)
    self.resultTree.result = self.tokenList[0]
    self.parseExpr(self.resultTree)
    #print('parseResult: ', self.resultTree.result)
    return self.resultTree.result

  def getNextToken(self, theExpr):
    ''' This is the recursive tokenizer for an excel formula.
    It appends all identified tokens to self.tokenList.'''
    #print('next Token theExpr: ', theExpr)
    #print('actTList: ', self.tokenList)
    tokenComplete = False
    keyToken = False
    if len(theExpr)>0:
      theTok = theExpr[0]
      theExpr = theExpr[1:]
      if theTok in sepToken:
        keyToken = True
        branch = sepToken[theTok]
        while branch:
          #print(branch, ' theExpr[0]: ',theExpr[0])
          if theExpr[0] in treeDict[branch]:
            branch = treeDict[branch][theExpr[0]]
            theTok = theTok + theExpr[0]
            theExpr = theExpr[1:]
          else:
            branch= None
        tokenComplete = True
        self.tokenList.append(theTok)
        self.getNextToken(theExpr)
      else:
        if len(theExpr)>0:
          while (not tokenComplete):
            if not self.isKey(theExpr):
              theTok = theTok + theExpr[0]
              theExpr = theExpr[1:]
              if len(theExpr) == 0:
                tokenComplete = True
            else:
              tokenComplete = True
        self.tokenList.append(theTok)
        self.getNextToken(theExpr)


  def isKey(self, theExpr):
    #print('look up: ', theExpr)
    keyToken = False
    lenExpr = len(theExpr)
    if theExpr[0] in sepToken:
      branch = sepToken[theExpr[0]]

      if branch is None:
        keyToken = True
      else:
        #print('There is a branch. look up: ', theExpr[1])
        if (lenExpr > 1) and (theExpr[1] in treeDict[branch]):
          branch = treeDict[branch][theExpr[0]]
          if branch is None:
            keyToken = True
          else:
            if (lenExpr > 2) and (theExpr[2] in treeDict[branch]):
              keyToken = True
        else:
          keyToken = True
    return keyToken


  def parseExpr(self, treeNode):
    token = self.tokenList[treeNode.lIndex]
    treeNode.lIndex += 1
    if token in tokenDic:
      lChange, newToken, funcState = tokenDic[token]
    else:
      lChange = 0
      newToken = token
      funcState = 0
    #print('treeNode.state: ', treeNode.state, ' my.index: ', treeNode.lIndex-1, ' ', token, ' fState: ', funcState)

    if token == ',':
      if (treeNode.state == 4):
        newToken = ':'
        treeNode.state = 6
      if (treeNode.state == 3):
        newToken = '?'
        treeNode.state = 4

    if funcState == 3:
      funcState = 0
      newNode = exprNode(treeNode, 3, treeNode.lIndex)
      self.parseIF(newNode)
    else:
      treeNode.result = treeNode.result + newToken

    if funcState == 2:
      funcState = 0
      newNode = exprNode(treeNode, 2, treeNode.lIndex)
      self.parseAngle(newNode)
      treeNode.result = treeNode.result + ')'
    elif funcState == 1:
      treeNode.lIndex += 2  # do skip the 2 parentheses of the PI()

    if lChange == -1:
      #print 'state: ', treeNode.state, 'parent.result: ', treeNode.parent.result, ' mine: ', treeNode.result
      treeNode.parent.result = treeNode.parent.result + treeNode.result
      treeNode.parent.lIndex = treeNode.lIndex
      #print('Go one level up, state: ', treeNode.state)
      if (treeNode.state < 2):
        #print(' Look up more token above')
        if treeNode.lIndex < len(self.tokenList):
          self.parseExpr(treeNode.parent)

    elif lChange == 1:
      #print('Go one level down')
      newNode = exprNode(treeNode, 1, treeNode.lIndex)
      self.parseExpr(newNode)
      treeNode.lIndex = newNode.lIndex
    else:
      if treeNode.lIndex < len(self.tokenList):
        #print('parse to the end')
        self.parseExpr(treeNode)


  def parseIF(self, treeNode):
    #print('IF state: ', treeNode.state)
    treeNode.result = treeNode.result + '('
    treeNode.lIndex += 1
    self.parseExpr(treeNode)
    #print('IF result: ', treeNode.result)
    return

  def parseAngle(self, treeNode):
    #print('Angle state: ', treeNode.state)
    treeNode.result = treeNode.result + '(1rad*('
    treeNode.lIndex += 1
    self.parseExpr(treeNode)
    #print('angle result: ', treeNode.result)


def getText(nodelist):
  rc = []
  for node in nodelist:
    if node.nodeType == node.TEXT_NODE:
      rc.append(node.data)
    if sys.version_info.major >= 3:
      return ''.join(rc)
    else:
      return ''.join(rc).encode('utf8')


def handleWorkSheet(theDom, actSheet, strList):
  rows = theDom.getElementsByTagName("row")
  for row in rows:
    handleCells(row.getElementsByTagName("c"), actSheet, strList)


def handleCells(cellList, actCellSheet, sList):
  for cell in cellList:
    cellAtts = cell.attributes
    refRef = cellAtts.getNamedItem("r")
    ref = getText(refRef.childNodes)

    refType = cellAtts.getNamedItem("t")
    if refType:
      cellType = getText(refType.childNodes)
    else:
      cellType = 'n'   # FIXME: some cells don't have t and s attributes

    #print("reference: ", ref, ' Cell type: ', cellType)

    if cellType == 'inlineStr':
      iStringList = cell.getElementsByTagName("is")
      #print('iString: ', iStringList)
      for stringEle in iStringList:
        tElement = stringEle.getElementsByTagName('t')[0]
        theString = getText(tElement.childNodes)

        #print('theString: ', theString)
        actCellSheet.set(ref, theString)

    formulaRef = cell.getElementsByTagName("f")
    if len(formulaRef)==1:
      theFormula = getText(formulaRef[0].childNodes)
      #print("theFormula: ", theFormula)
      fTrans = FormulaTranslator()
      actCellSheet.set(ref, fTrans.translateForm(theFormula))

    else:
      valueRef = cell.getElementsByTagName("v")
      #print('valueRef: ', valueRef)
      if len(valueRef)==1:
        valueRef = cell.getElementsByTagName("v")[0]
        if valueRef:
          theValue = getText(valueRef.childNodes)
          #print("theValue: ", theValue)
          if cellType == 'n':
            actCellSheet.set(ref, theValue)
          if cellType == 's':
            actCellSheet.set(ref, (sList[int(theValue)]))


def handleWorkBook(theBook, sheetDict, Doc):
  theSheets = theBook.getElementsByTagName("sheet")
  #print("theSheets: ", theSheets)
  for sheet in theSheets:
    sheetAtts = sheet.attributes
    nameRef = sheetAtts.getNamedItem("name")
    sheetName = getText(nameRef.childNodes)
    #print("table name: ", sheetName)
    idRef = sheetAtts.getNamedItem("sheetId")
    sheetFile = "sheet" + getText(idRef.childNodes) + '.xml'
    #print("sheetFile: ", sheetFile)
    # add FreeCAD-spreadsheet
    sheetDict[sheetName] = (Doc.addObject('Spreadsheet::Sheet', sheetName), sheetFile)

  theAliases = theBook.getElementsByTagName("definedName")
  for theAlias in theAliases:
    aliAtts = theAlias.attributes
    nameRef = aliAtts.getNamedItem("name")
    aliasName = getText(nameRef.childNodes)
    #print("aliasName: ", aliasName)

    aliasRef = getText(theAlias.childNodes) # aliasRef can be None
    if aliasRef and '$' in aliasRef:
      refList = aliasRef.split('!$')
      adressList = refList[1].split('$')
      #print("aliasRef: ", aliasRef)
      #print('Sheet Name: ', refList[0])
      #print('Adress: ', adressList[0] + adressList[1])
      actSheet, sheetFile = sheetDict[refList[0]]
      actSheet.setAlias(adressList[0]+adressList[1], aliasName)

def handleStrings(theStr, sList):
  print('process Strings: ')
  stringElements = theStr.getElementsByTagName('t')
  for sElem in stringElements:
    print('string: ', getText(sElem.childNodes))
    sList.append(getText(sElem.childNodes))

def open(nameXLSX):

  if len(nameXLSX) > 0:
    z=zipfile.ZipFile(nameXLSX)

    theDoc = App.newDocument()

    sheetDict = dict()
    stringList = []

    theBookFile=z.open('xl/workbook.xml')
    theBook = xml.dom.minidom.parse(theBookFile)
    handleWorkBook(theBook, sheetDict, theDoc)
    theBook.unlink()

    if 'xl/sharedStrings.xml' in z.namelist():
      theStringFile=z.open('xl/sharedStrings.xml')
      theStrings = xml.dom.minidom.parse(theStringFile)
      handleStrings(theStrings, stringList)
      theStrings.unlink()

    for sheetSpec in sheetDict:
      #print("sheetSpec: ", sheetSpec)
      theSheet, sheetFile = sheetDict[sheetSpec]
      f=z.open('xl/worksheets/' + sheetFile)
      myDom = xml.dom.minidom.parse(f)

      handleWorkSheet(myDom, theSheet, stringList)
      myDom.unlink()

    z.close()
    # This is needed more than once, otherwise some references are not calculated!
    theDoc.recompute()
    theDoc.recompute()
    theDoc.recompute()
    return theDoc

def insert(nameXLSX,docname):
  try:
          theDoc=App.getDocument(docname)
  except NameError:
          theDoc=App.newDocument(docname)
  App.ActiveDocument = theDoc

  sheetDict = dict()
  stringList = []

  z=zipfile.ZipFile(nameXLSX)
  theBookFile=z.open('xl/workbook.xml')
  theBook = xml.dom.minidom.parse(theBookFile)
  handleWorkBook(theBook, sheetDict, theDoc)
  theBook.unlink()

  if 'xl/sharedStrings.xml' in z.namelist():
    theStringFile=z.open('xl/sharedStrings.xml')
    theStrings = xml.dom.minidom.parse(theStringFile)
    handleStrings(theStrings, stringList)
    theStrings.unlink()

  for sheetSpec in sheetDict:
    #print("sheetSpec: ", sheetSpec)
    theSheet, sheetFile = sheetDict[sheetSpec]
    f=z.open('xl/worksheets/' + sheetFile)
    myDom = xml.dom.minidom.parse(f)

    handleWorkSheet(myDom, theSheet, stringList)
    myDom.unlink()

  z.close()
  # This is needed more than once, otherwise some references are not calculated!
  theDoc.recompute()
  theDoc.recompute()
  theDoc.recompute()