Merge pull request #4346 from chennes/validateStartHTMLWithW3C

[Start] Modify generated HTML to comply with W3C Validator Recommendations
2021-02-02 13:59:09 +01:00
parent bdc07a1d22 1958e30f84
commit 253ef2a99a
3 changed files with 130 additions and 31 deletions
--- a/src/Mod/Start/StartPage/StartPage.html
+++ b/src/Mod/Start/StartPage/StartPage.html
@@ -1,9 +1,10 @@
-<html>
+<!DOCTYPE html>
+<html lang="BCP47_LANGUAGE">
    <head>
        <title>T_TITLE</title>
        <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
-        <script language="javascript">JS</script>
-        <style type="text/css">CSS</style>
+        <script>JS</script>
+        <style>CSS</style>
        <!--QSS-->
    </head>
    <body onload="load()">
@@ -12,7 +13,7 @@
                <div class="version">
                    VERSIONSTRING 
                    <a title="T_VTOOLTIP" href="OpenSettings.py">
-                        <img class="settingsicon" src="IMAGE_SRC_SETTINGS">
+                        <img class="settingsicon" src="IMAGE_SRC_SETTINGS" alt="T_VTOOLTIP">
                    </a>
                </div>
            </div>
@@ -57,25 +58,25 @@
                    <h2>T_GENERALDOCUMENTATION</h2>

                    <h3>
-                        <img src="IMAGE_SRC_USERHUB" />
+                        <img src="IMAGE_SRC_USERHUB" alt="T_USERHUB"/>
                        <a href="https://www.freecadweb.org/wiki/User_hub">T_USERHUB</a>
                    </h3>
                    <p>T_DESCR_USERHUB</p>

                    <h3>
-                        <img src="IMAGE_SRC_POWERHUB" />
+                        <img src="IMAGE_SRC_POWERHUB" alt="T_POWERHUB"/>
                        <a href="https://www.freecadweb.org/wiki/Power_users_hub">T_POWERHUB</a>
                    </h3>
                    <p>T_DESCR_POWERHUB</p>

                    <h3>
-                        <img src="IMAGE_SRC_DEVHUB" />
+                        <img src="IMAGE_SRC_DEVHUB" alt="T_DEVHUB"/>
                        <a href="https://www.freecadweb.org/wiki/Developer_hub">T_DEVHUB</a>
                    </h3>
                    <p>T_DESCR_DEVHUB</p>

                    <h3>
-                        <img src="IMAGE_SRC_MANUAL" />
+                        <img src="IMAGE_SRC_MANUAL" alt="T_MANUAL"/>
                        <a href="https://www.freecadweb.org/wiki/Manual:Introduction">T_MANUAL</a>
                    </h3>
                    <p>T_DESCR_MANUAL</p>
--- a/src/Mod/Start/StartPage/StartPage.py
+++ b/src/Mod/Start/StartPage/StartPage.py
@@ -148,19 +148,19 @@ def getInfo(filename):
            if files[0] == "Document.xml":
                doc = str(zfile.read(files[0]))
                doc = doc.replace("\n"," ")
-                r = re.findall("Property name=\"CreatedBy.*?String value=\"(.*?)\"\/>",doc)
+                r = re.findall("Property name=\"CreatedBy.*?String value=\"(.*?)\"/>",doc)
                if r:
                    author = r[0]
                    # remove email if present in author field
                    if "&lt;" in author:
                        author = author.split("&lt;")[0].strip()
-                r = re.findall("Property name=\"Company.*?String value=\"(.*?)\"\/>",doc)
+                r = re.findall("Property name=\"Company.*?String value=\"(.*?)\"/>",doc)
                if r:
                    company = r[0]
-                r = re.findall("Property name=\"License.*?String value=\"(.*?)\"\/>",doc)
+                r = re.findall("Property name=\"License.*?String value=\"(.*?)\"/>",doc)
                if r:
                    lic = r[0]
-                r = re.findall("Property name=\"Comment.*?String value=\"(.*?)\"\/>",doc)
+                r = re.findall("Property name=\"Comment.*?String value=\"(.*?)\"/>",doc)
                if r:
                    descr = r[0]
                if "thumbnails/Thumbnail.png" in files:
@@ -247,16 +247,16 @@ def buildCard(filename,method,arg=None):
            if finfo[5]:
                infostring += "\n\n" + encode(finfo[5])
            if size:
-                result += '<a href="'+method+arg+'" title="'+infostring+'">'
                result += '<li class="icon">'
-                result += '<img src="file:///'+image+'">'
+                result += '<a href="'+method+arg+'" title="'+infostring+'">'
+                result += '<img src="file:///'+image.replace('\\','/')+'" alt="'+encode(basename)+'">'
                result += '<div class="caption">'
                result += '<h4>'+encode(basename)+'</h4>'
                result += '<p>'+encode(author)+'</p>'
                result += '<p>'+size+'</p>'
                result += '</div>'
-                result += '</li>'
                result += '</a>'
+                result += '</li>'
    return result


@@ -298,6 +298,10 @@ def handle():
    HTML = HTML.replace("CSS",CSS)
    HTML = encode(HTML)

+    # set the language
+
+    HTML = HTML.replace("BCP47_LANGUAGE",QtCore.QLocale().bcp47Name())
+
    # get the stylesheet if we are using one

    if FreeCAD.ParamGet("User parameter:BaseApp/Preferences/Mod/Start").GetBool("UseStyleSheet",False):
@@ -370,17 +374,17 @@ def handle():
    rfcount = rf.GetInt("RecentFiles",0)
    SECTION_RECENTFILES = encode("<h2>"+TranslationTexts.T_RECENTFILES+"</h2>")
    SECTION_RECENTFILES += "<ul>"
-    SECTION_RECENTFILES += '<a href="LoadNew.py" title="'+encode(TranslationTexts.T_CREATENEW)+'">'
    SECTION_RECENTFILES += '<li class="icon">'
+    SECTION_RECENTFILES += '<a href="LoadNew.py" title="'+encode(TranslationTexts.T_CREATENEW)+'">'
    if FreeCAD.ParamGet("User parameter:BaseApp/Preferences/Mod/Start").GetBool("NewFileGradient",False):
-        SECTION_RECENTFILES += '<img src="file:///'+encode(iconbank["createimg"])+'">'
+        SECTION_RECENTFILES += '<img src="file:///'+encode(iconbank["createimg"]).replace('\\','/')+'" alt="'+encode(TranslationTexts.T_CREATENEW)+'">'
    else:
-        SECTION_RECENTFILES += '<img src="file:///'+os.path.join(resources_dir, "images/new_file_thumbnail.svg")+'">'
+        SECTION_RECENTFILES += '<img src="file:///'+os.path.join(resources_dir, "images/new_file_thumbnail.svg").replace('\\','/')+'" alt="'+encode(TranslationTexts.T_CREATENEW)+'">'
    SECTION_RECENTFILES += '<div class="caption">'
    SECTION_RECENTFILES += '<h4>'+encode(TranslationTexts.T_CREATENEW)+'</h4>'
    SECTION_RECENTFILES += '</div>'
-    SECTION_RECENTFILES += '</li>'
    SECTION_RECENTFILES += '</a>'
+    SECTION_RECENTFILES += '</li>'
    for i in range(rfcount):
        filename = rf.GetString("MRU%d" % (i))
        SECTION_RECENTFILES += encode(buildCard(filename,method="LoadMRU.py?MRU=",arg=str(i)))
@@ -424,14 +428,12 @@ def handle():

    # build IMAGE_SRC paths

-    HTML = HTML.replace("IMAGE_SRC_USERHUB",'file:///'+os.path.join(resources_dir, 'images/userhub.png'))
-    HTML = HTML.replace("IMAGE_SRC_POWERHUB",'file:///'+os.path.join(resources_dir, 'images/poweruserhub.png'))
-    HTML = HTML.replace("IMAGE_SRC_DEVHUB",'file:///'+os.path.join(resources_dir, 'images/developerhub.png'))
-    HTML = HTML.replace("IMAGE_SRC_MANUAL",'file:///'+os.path.join(resources_dir, 'images/manual.png'))
-    HTML = HTML.replace("IMAGE_SRC_SETTINGS",'file:///'+os.path.join(resources_dir, 'images/settings.png'))
-    imagepath= 'file:///'+os.path.join(resources_dir, 'images/installed.png')
-    imagepath = imagepath.replace('\\','/')  # replace Windows backslash with slash to make the path javascript compatible
-    HTML = HTML.replace("IMAGE_SRC_INSTALLED",imagepath)
+    HTML = HTML.replace("IMAGE_SRC_USERHUB",'file:///'+os.path.join(resources_dir, 'images/userhub.png').replace('\\','/'))
+    HTML = HTML.replace("IMAGE_SRC_POWERHUB",'file:///'+os.path.join(resources_dir, 'images/poweruserhub.png').replace('\\','/'))
+    HTML = HTML.replace("IMAGE_SRC_DEVHUB",'file:///'+os.path.join(resources_dir, 'images/developerhub.png').replace('\\','/'))
+    HTML = HTML.replace("IMAGE_SRC_MANUAL",'file:///'+os.path.join(resources_dir, 'images/manual.png').replace('\\','/'))
+    HTML = HTML.replace("IMAGE_SRC_SETTINGS",'file:///'+os.path.join(resources_dir, 'images/settings.png').replace('\\','/'))
+    HTML = HTML.replace("IMAGE_SRC_INSTALLED",'file:///'+os.path.join(resources_dir, 'images/installed.png').replace('\\','/'))

    # build UL_WORKBENCHES

@@ -481,7 +483,7 @@ def handle():
                    xpm = w.Icon
                    if "XPM" in xpm:
                        xpm = xpm.replace("\n        ","\n") # some XPMs have some indent that QT doesn't like
-                        r = [s[:-1].strip('"') for s in re.findall("(?s)\{(.*?)\};",xpm)[0].split("\n")[1:]]
+                        r = [s[:-1].strip('"') for s in re.findall("(?s){(.*?)};",xpm)[0].split("\n")[1:]]
                        p = QtGui.QPixmap(r)
                        p = p.scaled(24,24)
                        img = tempfile.mkstemp(dir=tempfolder,suffix='.png')[1]
@@ -492,7 +494,7 @@ def handle():
                    img = os.path.join(resources_dir,"images/freecad.png")
            iconbank[wb] = img
        UL_WORKBENCHES += '<li>'
-        UL_WORKBENCHES += '<img src="file:///'+img+'">&nbsp;'
+        UL_WORKBENCHES += '<img src="file:///'+img.replace('\\','/')+'" alt="'+wn+'">&nbsp;'
        UL_WORKBENCHES += '<a href="https://www.freecadweb.org/wiki/'+wn+'_Workbench">'+wn.replace("ReverseEngineering","ReverseEng")+'</a>'
        UL_WORKBENCHES += '</li>'
    UL_WORKBENCHES += '</ul>'
--- a/src/Mod/Start/TestStart/TestStartPage.py
+++ b/src/Mod/Start/TestStart/TestStartPage.py
@@ -24,7 +24,7 @@ import unittest
 import FreeCAD
 import Start
 from StartPage import StartPage
-from html.parser import HTMLParser
+import re

 class TestStartPage(unittest.TestCase):
    """Basic validation of the generated Start page."""
@@ -35,6 +35,7 @@ class TestStartPage(unittest.TestCase):
    def setUp(self):
        pass

+
    def test_all_css_placeholders_removed(self):
        """Check to see if all of the CSS placeholders have been replaced."""
        placeholders = ["BACKGROUND","BGTCOLOR","FONTFAMILY","FONTSIZE","LINKCOLOR",
@@ -44,6 +45,7 @@ class TestStartPage(unittest.TestCase):
        for placeholder in placeholders:
            self.assertNotIn (placeholder, page, "{} was not removed from the CSS".format(placeholder))

+
    def test_all_js_placeholders_removed(self):
        """Check to see if all of the JavaScript placeholders have been replaced."""
        placeholders = ["IMAGE_SRC_INSTALLED"]
@@ -51,6 +53,7 @@ class TestStartPage(unittest.TestCase):
        for placeholder in placeholders:
            self.assertNotIn (placeholder, page, "{} was not removed from the JS".format(placeholder))

+
    def test_all_html_placeholders_removed(self):
        """Check to see if all of the HTML placeholders have been replaced."""
        placeholders = ["T_TITLE","VERSIONSTRING","T_DOCUMENTS","T_HELP","T_ACTIVITY",
@@ -69,4 +72,97 @@ class TestStartPage(unittest.TestCase):
        page = StartPage.handle()
        for placeholder in placeholders:
            self.assertNotIn (placeholder, page, "{} was not removed from the HTML".format(placeholder))
-    
+
+
+    def test_files_do_not_contain_backslashes(self):
+        # This would be caught by the W3C validator if we didn't sanitize the filenames before sending them.
+        page = StartPage.handle()
+        fileRE = re.compile(r'"file:///(.*?)"')
+        results = fileRE.findall(string=page)
+
+        badFilenames = []
+        for result in results:
+            if result.find("\\") != -1:
+                badFilenames.append(result)
+
+        if len(badFilenames) > 0:
+            self.fail("The following filenames contain backslashes, which is prohibited in HTML: {}".format(badFilenames))
+    
+
+    def test_html_validates(self):
+        # Send the generated html to the W3C validator for analysis (removing potentially-sensitive data first)
+        import urllib.request
+        import os
+        import json
+        page = self.sanitize(StartPage.handle()) # Remove potentially sensitive data
+
+        # For debugging, if you want to ensure that the sanitization worked correctly:
+        # from pathlib import Path
+        # home = str(Path.home())
+        # f=open(home+"/test.html", "w")
+        # f.write(page)
+        # f.close()
+
+        validation_url = "https://validator.w3.org/nu/?out=json"
+        data = page.encode('utf-8') # data should be bytes
+        req = urllib.request.Request(validation_url, data)
+        req.add_header("Content-type","text/html; charset=utf-8")
+        errorCount = 0
+        warningCount = 0
+        infoCount = 0
+        validationResultString = ""
+        try:
+            with urllib.request.urlopen (req) as response:
+                text = response.read()
+
+                responseJSON = json.loads(text)
+
+                for message in responseJSON["messages"]:
+                    if "type" in message:
+                        if message["type"] == "info":
+                            if "subtype" in message:
+                                if message["subtype"] == "warning":
+                                    warningCount += 1
+                                    validationResultString += "WARNING: {}\n".format(ascii(message["message"]))
+                            else:
+                                infoCount += 1
+                                validationResultString += "INFO: {}\n".format(ascii(message["message"]))
+                        elif message["type"] == "error":
+                            errorCount += 1
+                            validationResultString += "ERROR: {}\n".format(ascii(message["message"]))
+                        elif message["type"] == "non-document-error":
+                            FreeCAD.Console.PrintWarning("W3C validator returned a non-document error:\n {}".format(message))
+                            return
+
+        except urllib.error.HTTPError as e:
+            FreeCAD.Console.PrintWarning("W3C validator returned response code {}".format(e.code))
+
+        except urllib.error.URLError:
+            FreeCAD.Console.PrintWarning("Could not communicate with W3C validator")
+    
+        if errorCount > 0 or warningCount > 0:
+            StartPage.exportTestFile()
+            FreeCAD.Console.PrintWarning("HTML validation failed: Start page source written to your home directory for analysis.")
+            self.fail("W3C Validator analysis shows the Start page has {} errors and {} warnings:\n\n{}".format(errorCount, warningCount, validationResultString))
+        elif infoCount > 0:
+            FreeCAD.Console.PrintWarning("The Start page is valid HTML, but the W3C sent back {} informative messages:\n{}.".format(infoCount,validationResultString))
+
+    def sanitize (self, html):
+
+        # Anonymize all local filenames
+        fileRE = re.compile(r'"file:///.*?"')
+        html = fileRE.sub(repl=r'"file:///A/B/C"', string=html)
+
+        # Anonymize titles, which are used for mouseover text and might contain document information
+        titleRE = re.compile(r'title="[\s\S]*?"') # Some titles have newlines in them
+        html = titleRE.sub(repl=r'title="Y"', string=html)
+        
+        # Anonymize the document names, which we display in <h4> tags
+        h4RE = re.compile(r'<h4>.*?</h4>')
+        html = h4RE.sub(repl=r'<h4>Z</h4>', string=html)
+
+        # Remove any simple single-line paragraphs, which might contain document author information, file size information, etc.
+        pRE = re.compile(r'<p>[^<]*?</p>')
+        html = pRE.sub(repl=r'<p>X</p>', string=html)
+
+        return html