Merge pull request #4346 from chennes/validateStartHTMLWithW3C

[Start] Modify generated HTML to comply with W3C Validator Recommendations
This commit is contained in:
Yorik van Havre
2021-02-02 13:59:09 +01:00
committed by GitHub
3 changed files with 130 additions and 31 deletions

View File

@@ -1,9 +1,10 @@
<html>
<!DOCTYPE html>
<html lang="BCP47_LANGUAGE">
<head>
<title>T_TITLE</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<script language="javascript">JS</script>
<style type="text/css">CSS</style>
<script>JS</script>
<style>CSS</style>
<!--QSS-->
</head>
<body onload="load()">
@@ -12,7 +13,7 @@
<div class="version">
VERSIONSTRING
<a title="T_VTOOLTIP" href="OpenSettings.py">
<img class="settingsicon" src="IMAGE_SRC_SETTINGS">
<img class="settingsicon" src="IMAGE_SRC_SETTINGS" alt="T_VTOOLTIP">
</a>
</div>
</div>
@@ -57,25 +58,25 @@
<h2>T_GENERALDOCUMENTATION</h2>
<h3>
<img src="IMAGE_SRC_USERHUB" />
<img src="IMAGE_SRC_USERHUB" alt="T_USERHUB"/>
<a href="https://www.freecadweb.org/wiki/User_hub">T_USERHUB</a>
</h3>
<p>T_DESCR_USERHUB</p>
<h3>
<img src="IMAGE_SRC_POWERHUB" />
<img src="IMAGE_SRC_POWERHUB" alt="T_POWERHUB"/>
<a href="https://www.freecadweb.org/wiki/Power_users_hub">T_POWERHUB</a>
</h3>
<p>T_DESCR_POWERHUB</p>
<h3>
<img src="IMAGE_SRC_DEVHUB" />
<img src="IMAGE_SRC_DEVHUB" alt="T_DEVHUB"/>
<a href="https://www.freecadweb.org/wiki/Developer_hub">T_DEVHUB</a>
</h3>
<p>T_DESCR_DEVHUB</p>
<h3>
<img src="IMAGE_SRC_MANUAL" />
<img src="IMAGE_SRC_MANUAL" alt="T_MANUAL"/>
<a href="https://www.freecadweb.org/wiki/Manual:Introduction">T_MANUAL</a>
</h3>
<p>T_DESCR_MANUAL</p>

View File

@@ -148,19 +148,19 @@ def getInfo(filename):
if files[0] == "Document.xml":
doc = str(zfile.read(files[0]))
doc = doc.replace("\n"," ")
r = re.findall("Property name=\"CreatedBy.*?String value=\"(.*?)\"\/>",doc)
r = re.findall("Property name=\"CreatedBy.*?String value=\"(.*?)\"/>",doc)
if r:
author = r[0]
# remove email if present in author field
if "&lt;" in author:
author = author.split("&lt;")[0].strip()
r = re.findall("Property name=\"Company.*?String value=\"(.*?)\"\/>",doc)
r = re.findall("Property name=\"Company.*?String value=\"(.*?)\"/>",doc)
if r:
company = r[0]
r = re.findall("Property name=\"License.*?String value=\"(.*?)\"\/>",doc)
r = re.findall("Property name=\"License.*?String value=\"(.*?)\"/>",doc)
if r:
lic = r[0]
r = re.findall("Property name=\"Comment.*?String value=\"(.*?)\"\/>",doc)
r = re.findall("Property name=\"Comment.*?String value=\"(.*?)\"/>",doc)
if r:
descr = r[0]
if "thumbnails/Thumbnail.png" in files:
@@ -247,16 +247,16 @@ def buildCard(filename,method,arg=None):
if finfo[5]:
infostring += "\n\n" + encode(finfo[5])
if size:
result += '<a href="'+method+arg+'" title="'+infostring+'">'
result += '<li class="icon">'
result += '<img src="file:///'+image+'">'
result += '<a href="'+method+arg+'" title="'+infostring+'">'
result += '<img src="file:///'+image.replace('\\','/')+'" alt="'+encode(basename)+'">'
result += '<div class="caption">'
result += '<h4>'+encode(basename)+'</h4>'
result += '<p>'+encode(author)+'</p>'
result += '<p>'+size+'</p>'
result += '</div>'
result += '</li>'
result += '</a>'
result += '</li>'
return result
@@ -298,6 +298,10 @@ def handle():
HTML = HTML.replace("CSS",CSS)
HTML = encode(HTML)
# set the language
HTML = HTML.replace("BCP47_LANGUAGE",QtCore.QLocale().bcp47Name())
# get the stylesheet if we are using one
if FreeCAD.ParamGet("User parameter:BaseApp/Preferences/Mod/Start").GetBool("UseStyleSheet",False):
@@ -370,17 +374,17 @@ def handle():
rfcount = rf.GetInt("RecentFiles",0)
SECTION_RECENTFILES = encode("<h2>"+TranslationTexts.T_RECENTFILES+"</h2>")
SECTION_RECENTFILES += "<ul>"
SECTION_RECENTFILES += '<a href="LoadNew.py" title="'+encode(TranslationTexts.T_CREATENEW)+'">'
SECTION_RECENTFILES += '<li class="icon">'
SECTION_RECENTFILES += '<a href="LoadNew.py" title="'+encode(TranslationTexts.T_CREATENEW)+'">'
if FreeCAD.ParamGet("User parameter:BaseApp/Preferences/Mod/Start").GetBool("NewFileGradient",False):
SECTION_RECENTFILES += '<img src="file:///'+encode(iconbank["createimg"])+'">'
SECTION_RECENTFILES += '<img src="file:///'+encode(iconbank["createimg"]).replace('\\','/')+'" alt="'+encode(TranslationTexts.T_CREATENEW)+'">'
else:
SECTION_RECENTFILES += '<img src="file:///'+os.path.join(resources_dir, "images/new_file_thumbnail.svg")+'">'
SECTION_RECENTFILES += '<img src="file:///'+os.path.join(resources_dir, "images/new_file_thumbnail.svg").replace('\\','/')+'" alt="'+encode(TranslationTexts.T_CREATENEW)+'">'
SECTION_RECENTFILES += '<div class="caption">'
SECTION_RECENTFILES += '<h4>'+encode(TranslationTexts.T_CREATENEW)+'</h4>'
SECTION_RECENTFILES += '</div>'
SECTION_RECENTFILES += '</li>'
SECTION_RECENTFILES += '</a>'
SECTION_RECENTFILES += '</li>'
for i in range(rfcount):
filename = rf.GetString("MRU%d" % (i))
SECTION_RECENTFILES += encode(buildCard(filename,method="LoadMRU.py?MRU=",arg=str(i)))
@@ -424,14 +428,12 @@ def handle():
# build IMAGE_SRC paths
HTML = HTML.replace("IMAGE_SRC_USERHUB",'file:///'+os.path.join(resources_dir, 'images/userhub.png'))
HTML = HTML.replace("IMAGE_SRC_POWERHUB",'file:///'+os.path.join(resources_dir, 'images/poweruserhub.png'))
HTML = HTML.replace("IMAGE_SRC_DEVHUB",'file:///'+os.path.join(resources_dir, 'images/developerhub.png'))
HTML = HTML.replace("IMAGE_SRC_MANUAL",'file:///'+os.path.join(resources_dir, 'images/manual.png'))
HTML = HTML.replace("IMAGE_SRC_SETTINGS",'file:///'+os.path.join(resources_dir, 'images/settings.png'))
imagepath= 'file:///'+os.path.join(resources_dir, 'images/installed.png')
imagepath = imagepath.replace('\\','/') # replace Windows backslash with slash to make the path javascript compatible
HTML = HTML.replace("IMAGE_SRC_INSTALLED",imagepath)
HTML = HTML.replace("IMAGE_SRC_USERHUB",'file:///'+os.path.join(resources_dir, 'images/userhub.png').replace('\\','/'))
HTML = HTML.replace("IMAGE_SRC_POWERHUB",'file:///'+os.path.join(resources_dir, 'images/poweruserhub.png').replace('\\','/'))
HTML = HTML.replace("IMAGE_SRC_DEVHUB",'file:///'+os.path.join(resources_dir, 'images/developerhub.png').replace('\\','/'))
HTML = HTML.replace("IMAGE_SRC_MANUAL",'file:///'+os.path.join(resources_dir, 'images/manual.png').replace('\\','/'))
HTML = HTML.replace("IMAGE_SRC_SETTINGS",'file:///'+os.path.join(resources_dir, 'images/settings.png').replace('\\','/'))
HTML = HTML.replace("IMAGE_SRC_INSTALLED",'file:///'+os.path.join(resources_dir, 'images/installed.png').replace('\\','/'))
# build UL_WORKBENCHES
@@ -481,7 +483,7 @@ def handle():
xpm = w.Icon
if "XPM" in xpm:
xpm = xpm.replace("\n ","\n") # some XPMs have some indent that QT doesn't like
r = [s[:-1].strip('"') for s in re.findall("(?s)\{(.*?)\};",xpm)[0].split("\n")[1:]]
r = [s[:-1].strip('"') for s in re.findall("(?s){(.*?)};",xpm)[0].split("\n")[1:]]
p = QtGui.QPixmap(r)
p = p.scaled(24,24)
img = tempfile.mkstemp(dir=tempfolder,suffix='.png')[1]
@@ -492,7 +494,7 @@ def handle():
img = os.path.join(resources_dir,"images/freecad.png")
iconbank[wb] = img
UL_WORKBENCHES += '<li>'
UL_WORKBENCHES += '<img src="file:///'+img+'">&nbsp;'
UL_WORKBENCHES += '<img src="file:///'+img.replace('\\','/')+'" alt="'+wn+'">&nbsp;'
UL_WORKBENCHES += '<a href="https://www.freecadweb.org/wiki/'+wn+'_Workbench">'+wn.replace("ReverseEngineering","ReverseEng")+'</a>'
UL_WORKBENCHES += '</li>'
UL_WORKBENCHES += '</ul>'

View File

@@ -24,7 +24,7 @@ import unittest
import FreeCAD
import Start
from StartPage import StartPage
from html.parser import HTMLParser
import re
class TestStartPage(unittest.TestCase):
"""Basic validation of the generated Start page."""
@@ -35,6 +35,7 @@ class TestStartPage(unittest.TestCase):
def setUp(self):
pass
def test_all_css_placeholders_removed(self):
"""Check to see if all of the CSS placeholders have been replaced."""
placeholders = ["BACKGROUND","BGTCOLOR","FONTFAMILY","FONTSIZE","LINKCOLOR",
@@ -44,6 +45,7 @@ class TestStartPage(unittest.TestCase):
for placeholder in placeholders:
self.assertNotIn (placeholder, page, "{} was not removed from the CSS".format(placeholder))
def test_all_js_placeholders_removed(self):
"""Check to see if all of the JavaScript placeholders have been replaced."""
placeholders = ["IMAGE_SRC_INSTALLED"]
@@ -51,6 +53,7 @@ class TestStartPage(unittest.TestCase):
for placeholder in placeholders:
self.assertNotIn (placeholder, page, "{} was not removed from the JS".format(placeholder))
def test_all_html_placeholders_removed(self):
"""Check to see if all of the HTML placeholders have been replaced."""
placeholders = ["T_TITLE","VERSIONSTRING","T_DOCUMENTS","T_HELP","T_ACTIVITY",
@@ -69,4 +72,97 @@ class TestStartPage(unittest.TestCase):
page = StartPage.handle()
for placeholder in placeholders:
self.assertNotIn (placeholder, page, "{} was not removed from the HTML".format(placeholder))
def test_files_do_not_contain_backslashes(self):
# This would be caught by the W3C validator if we didn't sanitize the filenames before sending them.
page = StartPage.handle()
fileRE = re.compile(r'"file:///(.*?)"')
results = fileRE.findall(string=page)
badFilenames = []
for result in results:
if result.find("\\") != -1:
badFilenames.append(result)
if len(badFilenames) > 0:
self.fail("The following filenames contain backslashes, which is prohibited in HTML: {}".format(badFilenames))
def test_html_validates(self):
# Send the generated html to the W3C validator for analysis (removing potentially-sensitive data first)
import urllib.request
import os
import json
page = self.sanitize(StartPage.handle()) # Remove potentially sensitive data
# For debugging, if you want to ensure that the sanitization worked correctly:
# from pathlib import Path
# home = str(Path.home())
# f=open(home+"/test.html", "w")
# f.write(page)
# f.close()
validation_url = "https://validator.w3.org/nu/?out=json"
data = page.encode('utf-8') # data should be bytes
req = urllib.request.Request(validation_url, data)
req.add_header("Content-type","text/html; charset=utf-8")
errorCount = 0
warningCount = 0
infoCount = 0
validationResultString = ""
try:
with urllib.request.urlopen (req) as response:
text = response.read()
responseJSON = json.loads(text)
for message in responseJSON["messages"]:
if "type" in message:
if message["type"] == "info":
if "subtype" in message:
if message["subtype"] == "warning":
warningCount += 1
validationResultString += "WARNING: {}\n".format(ascii(message["message"]))
else:
infoCount += 1
validationResultString += "INFO: {}\n".format(ascii(message["message"]))
elif message["type"] == "error":
errorCount += 1
validationResultString += "ERROR: {}\n".format(ascii(message["message"]))
elif message["type"] == "non-document-error":
FreeCAD.Console.PrintWarning("W3C validator returned a non-document error:\n {}".format(message))
return
except urllib.error.HTTPError as e:
FreeCAD.Console.PrintWarning("W3C validator returned response code {}".format(e.code))
except urllib.error.URLError:
FreeCAD.Console.PrintWarning("Could not communicate with W3C validator")
if errorCount > 0 or warningCount > 0:
StartPage.exportTestFile()
FreeCAD.Console.PrintWarning("HTML validation failed: Start page source written to your home directory for analysis.")
self.fail("W3C Validator analysis shows the Start page has {} errors and {} warnings:\n\n{}".format(errorCount, warningCount, validationResultString))
elif infoCount > 0:
FreeCAD.Console.PrintWarning("The Start page is valid HTML, but the W3C sent back {} informative messages:\n{}.".format(infoCount,validationResultString))
def sanitize (self, html):
# Anonymize all local filenames
fileRE = re.compile(r'"file:///.*?"')
html = fileRE.sub(repl=r'"file:///A/B/C"', string=html)
# Anonymize titles, which are used for mouseover text and might contain document information
titleRE = re.compile(r'title="[\s\S]*?"') # Some titles have newlines in them
html = titleRE.sub(repl=r'title="Y"', string=html)
# Anonymize the document names, which we display in <h4> tags
h4RE = re.compile(r'<h4>.*?</h4>')
html = h4RE.sub(repl=r'<h4>Z</h4>', string=html)
# Remove any simple single-line paragraphs, which might contain document author information, file size information, etc.
pRE = re.compile(r'<p>[^<]*?</p>')
html = pRE.sub(repl=r'<p>X</p>', string=html)
return html