Addon Manager: change lookup mechanism

Switch addon lookup mechanism from parsing html page to extracting info
from .gitmodules file.
This simplifies logic and allows using non-Github repos.

Readme for Github repos are extracted from HTML pages using regex.

Gitlab pages are converted to HTML using Python Markdown lib if present,
falling back to displaying raw markdown.
In this case image links are converted from relative to absolute paths.
This commit is contained in:
Matsievskiy S.V
2020-09-17 20:17:03 +03:00
committed by Yorik van Havre
parent 2b49d60ac5
commit 07db27d0dd
3 changed files with 141 additions and 90 deletions

View File

@@ -116,8 +116,10 @@ def urlopen(url):
urllib2.install_opener(opener)
# Url opening
req = urllib2.Request(url,
headers={'User-Agent' : "Magic Browser"})
try:
u = urllib2.urlopen(url, timeout=timeout)
u = urllib2.urlopen(req, timeout=timeout)
except:
return None
else:
@@ -259,7 +261,7 @@ def getZipUrl(baseurl):
url = getserver(baseurl).strip("/")
if url.endswith("github.com"):
return baseurl+"/archive/master.zip"
elif url.endswith("framagit.org"):
elif url.endswith("framagit.org") or url.endswith("gitlab.com"):
# https://framagit.org/freecad-france/mooc-workbench/-/archive/master/mooc-workbench-master.zip
reponame = baseurl.strip("/").split("/")[-1]
return baseurl+"/-/archive/master/"+reponame+"-master.zip"
@@ -272,12 +274,37 @@ def getReadmeUrl(url):
"Returns the location of a readme file"
if ("github" in url) or ("framagit" in url):
return url+"/blob/master/README.md"
print("Debug: addonmanager_utilities.getReadmeUrl: Unknown git host:",url)
if "github" in url or "framagit" in url or "gitlab" in url:
return url+"/raw/master/README.md"
else:
print("Debug: addonmanager_utilities.getReadmeUrl: Unknown git host:",url)
return None
def getDescRegex(url):
"""Returns a regex string that extracts a WB description to be displayed in the description
panel of the Addon manager, if the README could not be found"""
if "github" in url:
return "<meta property=\"og:description\" content=\"(.*?)\""
elif "framagit" in url or "gitlab" in url:
return "<meta.*?content=\"(.*?)\".*?og\:description.*?>"
print("Debug: addonmanager_utilities.getDescRegex: Unknown git host:",url)
return None
def getReadmeHTMLUrl(url):
"Returns the location of a html file containing readme"
if ("github" in url):
return url+"/blob/master/README.md"
else:
print("Debug: addonmanager_utilities.getReadmeUrl: Unknown git host:",url)
return None
def getReadmeRegex(url):
"""Return a regex string that extracts the contents to be displayed in the description
@@ -285,32 +312,24 @@ def getReadmeRegex(url):
if ("github" in url):
return "<article.*?>(.*?)</article>"
elif ("framagit" in url):
return None # the readme content on framagit is generated by javascript so unretrievable by urlopen
print("Debug: addonmanager_utilities.getReadmeRegex: Unknown git host:",url)
return None
else:
print("Debug: addonmanager_utilities.getReadmeRegex: Unknown git host:",url)
return None
def getDescRegex(url):
"""Returns a regex string that extracts a WB description to be displayed in the description
panel of the Addon manager, if the README could not be found"""
def fixRelativeLinks(text, base_url):
if ("github" in url):
return "<meta property=\"og:description\" content=\"(.*?)\""
elif ("framagit" in url):
return "<meta.*?content=\"(.*?)\".*?og\:description.*?>"
print("Debug: addonmanager_utilities.getDescRegex: Unknown git host:",url)
return None
"""Replace markdown image relative links with
absolute ones using the base URL"""
def getRepoUrl(text):
"finds an URL in a given piece of text extracted from github's HTML"
if ("href" in text):
return "https://github.com/" + re.findall("href=\"\/(.*?)\/tree",text)[0]
elif ("MOOC" in text):
# Bad hack for now... We need to do better
return "https://framagit.org/freecad-france/mooc-workbench"
print("Debug: addonmanager_utilities.getRepoUrl: Unable to find repo:",text)
return None
new_text = ""
for line in text.splitlines():
for link in (re.findall(r"!\[.*?\]\((.*?)\)", line) +
re.findall(r"src\s*=\s*[\"'](.+?)[\"']", line)):
parts = link.split('/')
if len(parts) < 2 or not re.match(r"^http|^www|^.+\.|^/", parts[0]):
newlink = os.path.join(base_url, link.lstrip('./'))
line = line.replace(link, newlink)
print("Debug: replaced " + link + " with " + newlink)
new_text = new_text + '\n' + line
return new_text