Addon Manager: change lookup mechanism

Switch addon lookup mechanism from parsing html page to extracting info from .gitmodules file. This simplifies logic and allows using non-Github repos. Readme for Github repos are extracted from HTML pages using regex. Gitlab pages are converted to HTML using Python Markdown lib if present, falling back to displaying raw markdown. In this case image links are converted from relative to absolute paths.
2020-09-17 20:17:03 +03:00
parent 2b49d60ac5
commit 07db27d0dd
3 changed files with 141 additions and 90 deletions
--- a/src/Mod/AddonManager/addonmanager_utilities.py
+++ b/src/Mod/AddonManager/addonmanager_utilities.py
@@ -116,8 +116,10 @@ def urlopen(url):
    urllib2.install_opener(opener) 
    
    # Url opening
+    req = urllib2.Request(url,
+                          headers={'User-Agent' : "Magic Browser"})
    try:
-        u = urllib2.urlopen(url, timeout=timeout)
+        u = urllib2.urlopen(req, timeout=timeout)
    except:
        return None
    else:
@@ -259,7 +261,7 @@ def getZipUrl(baseurl):
    url = getserver(baseurl).strip("/")
    if url.endswith("github.com"):
        return baseurl+"/archive/master.zip"
-    elif url.endswith("framagit.org"):
+    elif url.endswith("framagit.org") or url.endswith("gitlab.com"):
        # https://framagit.org/freecad-france/mooc-workbench/-/archive/master/mooc-workbench-master.zip
        reponame = baseurl.strip("/").split("/")[-1]
        return baseurl+"/-/archive/master/"+reponame+"-master.zip"
@@ -272,12 +274,37 @@ def getReadmeUrl(url):
    
    "Returns the location of a readme file"

-    if ("github" in url) or ("framagit" in url):
-        return url+"/blob/master/README.md"
-    print("Debug: addonmanager_utilities.getReadmeUrl: Unknown git host:",url)
+    if "github" in url or "framagit" in url or "gitlab" in url:
+        return url+"/raw/master/README.md"
+    else:
+        print("Debug: addonmanager_utilities.getReadmeUrl: Unknown git host:",url)
    return None


+def getDescRegex(url):
+    
+    """Returns a regex string that extracts a WB description to be displayed in the description
+    panel of the Addon manager, if the README could not be found"""
+
+    if "github" in url:
+        return "<meta property=\"og:description\" content=\"(.*?)\""
+    elif "framagit" in url or "gitlab" in url:
+        return "<meta.*?content=\"(.*?)\".*?og\:description.*?>"
+    print("Debug: addonmanager_utilities.getDescRegex: Unknown git host:",url)
+    return None
+
+
+def getReadmeHTMLUrl(url):
+    
+    "Returns the location of a html file containing readme"
+
+    if ("github" in url):
+        return url+"/blob/master/README.md"
+    else:
+        print("Debug: addonmanager_utilities.getReadmeUrl: Unknown git host:",url)
+        return None
+
+
 def getReadmeRegex(url):
    
    """Return a regex string that extracts the contents to be displayed in the description
@@ -285,32 +312,24 @@ def getReadmeRegex(url):
    
    if ("github" in url):
        return "<article.*?>(.*?)</article>"
-    elif ("framagit" in url):
-        return None # the readme content on framagit is generated by javascript so unretrievable by urlopen
-    print("Debug: addonmanager_utilities.getReadmeRegex: Unknown git host:",url)
-    return None
+    else:
+        print("Debug: addonmanager_utilities.getReadmeRegex: Unknown git host:",url)
+        return None


-def getDescRegex(url):
-    
-    """Returns a regex string that extracts a WB description to be displayed in the description
-    panel of the Addon manager, if the README could not be found"""
+def fixRelativeLinks(text, base_url):

-    if ("github" in url):
-        return "<meta property=\"og:description\" content=\"(.*?)\""
-    elif ("framagit" in url):
-        return "<meta.*?content=\"(.*?)\".*?og\:description.*?>"
-    print("Debug: addonmanager_utilities.getDescRegex: Unknown git host:",url)
-    return None
+    """Replace markdown image relative links with
+    absolute ones using the base URL"""

-def getRepoUrl(text):
-    
-    "finds an URL in a given piece of text extracted from github's HTML"
-    
-    if ("href" in text):
-        return "https://github.com/" + re.findall("href=\"\/(.*?)\/tree",text)[0]
-    elif ("MOOC" in text):
-        # Bad hack for now... We need to do better
-        return "https://framagit.org/freecad-france/mooc-workbench"
-    print("Debug: addonmanager_utilities.getRepoUrl: Unable to find repo:",text)
-    return None
+    new_text = ""
+    for line in text.splitlines():
+        for link in (re.findall(r"!\[.*?\]\((.*?)\)", line) +
+                     re.findall(r"src\s*=\s*[\"'](.+?)[\"']", line)):
+            parts = link.split('/')
+            if len(parts) < 2 or not re.match(r"^http|^www|^.+\.|^/", parts[0]):
+                newlink = os.path.join(base_url, link.lstrip('./'))
+                line = line.replace(link, newlink)
+                print("Debug: replaced " + link + " with " + newlink)
+        new_text = new_text + '\n' + line
+    return new_text