Removed old wiki2qhelp file - issue 7120
This commit is contained in:
1
lgtm.yml
1
lgtm.yml
@@ -12,7 +12,6 @@ path_classifiers:
|
||||
- "src/CXX/"
|
||||
template:
|
||||
- "src/Tools/examplePy2wiki.py"
|
||||
- "src/Tools/wiki2qhelp.py"
|
||||
- "src/Mode/TemplatePyMod/"
|
||||
unmaintained:
|
||||
- "src/Mod/Robot/"
|
||||
|
||||
@@ -1,622 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
#***************************************************************************
|
||||
#* *
|
||||
#* Copyright (c) 2009 Yorik van Havre <yorik@uncreated.net> *
|
||||
#* *
|
||||
#* This program is free software; you can redistribute it and/or modify *
|
||||
#* it under the terms of the GNU Library General Public License (LGPL) *
|
||||
#* as published by the Free Software Foundation; either version 2 of *
|
||||
#* the License, or (at your option) any later version. *
|
||||
#* for detail see the LICENCE text file. *
|
||||
#* *
|
||||
#* This program is distributed in the hope that it will be useful, *
|
||||
#* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
#* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
#* GNU Library General Public License for more details. *
|
||||
#* *
|
||||
#* You should have received a copy of the GNU Library General Public *
|
||||
#* License along with this program; if not, write to the Free Software *
|
||||
#* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *
|
||||
#* USA *
|
||||
#* *
|
||||
#***************************************************************************
|
||||
|
||||
__title__="wiki2qhelp"
|
||||
__author__ = "Yorik van Havre <yorik@uncreated.net>"
|
||||
__url__ = "http://www.freecadweb.org"
|
||||
|
||||
"""
|
||||
This script retrieves the contents of a wiki site and saves it locally,
|
||||
then calls qt help compiler to produce a qhelp-assistant help file.
|
||||
The script can be called without arguments, it will then use the default
|
||||
url below, or by passing it an url and optionally a TOC name.
|
||||
"""
|
||||
|
||||
import sys, os, re, tempfile, getopt
|
||||
from urllib2 import urlopen, HTTPError
|
||||
|
||||
# CONFIGURATION #################################################
|
||||
|
||||
DEFAULTURL = "www.freecadweb.org/wiki" #default URL if no URL is passed
|
||||
INDEX = "Online_Help_Toc" # the start page from where to crawl the wiki
|
||||
NORETRIEVE = ['Manual','Developer_hub','Power_users_hub','Users_hub','Source_documentation', 'User_hub','Main_Page','About_this_site'] # pages that won't be fetched (kept online)
|
||||
GETTRANSLATIONS = True # Set true if you want to get the translations too.
|
||||
MAXFAIL = 3 # max number of retries if download fails
|
||||
VERBOSE = True # to display what's going on. Otherwise, runs totally silent.
|
||||
COMPILE = True # Whether Qt assistant will be used to compile the final help file
|
||||
OUTPUTPATH = os.path.expanduser("~")+os.sep+'.FreeCAD' # Where to store the qch file
|
||||
QHELPCOMPILER = 'qhelpgenerator'
|
||||
QCOLLECTIOMGENERATOR = 'qcollectiongenerator'
|
||||
PDFOUTPUT = False # if true, a pdf file will be generated instead of qhelp.
|
||||
REMOVE = True # if true, the temp html files are removed after successful operation
|
||||
PDFCONVERTER = 'pisa' # can be 'pisa' or 'htmldoc'
|
||||
|
||||
# END CONFIGURATION ##############################################
|
||||
|
||||
URL = DEFAULTURL
|
||||
TMPFOLDER = tempfile.mkdtemp()
|
||||
wikiindex = "/index.php?title="
|
||||
processed = []
|
||||
pisa = None
|
||||
usage='''
|
||||
wiki2qhelp [options] [url] [index page]
|
||||
|
||||
fetches wiki pages from the specified url, starting from specified
|
||||
index page, and outputs a .qch file in the specified output path.
|
||||
You must have qassistant installed.
|
||||
|
||||
If no url, index page or output path is specified, the following
|
||||
default values will be used:
|
||||
url: '''+DEFAULTURL+'''
|
||||
index page: '''+INDEX+'''
|
||||
output path: '''+OUTPUTPATH+'''
|
||||
|
||||
Options:
|
||||
|
||||
-v: Verbose mode
|
||||
-c filename or --helpcompiler-exe filename: Uses filename as qt help compiler
|
||||
-g filename or --helpgenerator-exe filename: Uses filename as qt collection generator
|
||||
-o path or --out-path path: Specifies an output path
|
||||
-h or --help: Displays this help message
|
||||
-p [converter] or --pdf [converter]: Outputs a pdf file instead of qhelp. Converter
|
||||
can be pisa (default) or htmldoc
|
||||
-t path or --tempfolder path: Uses path as temp folder for storing html files
|
||||
|
||||
'''
|
||||
css = """/* Basic CSS for offline wiki rendering */
|
||||
|
||||
body {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
font-size: 13px;
|
||||
text-align: justify;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2.2em;
|
||||
font-weight: bold;
|
||||
background: #46A4D0;
|
||||
color: white;
|
||||
padding: 5px;
|
||||
-moz-border-radius: 5px;
|
||||
-webkit-border-radius: 5px;
|
||||
}
|
||||
|
||||
pre {
|
||||
border: 1px dashed #333333;
|
||||
text-align: left;
|
||||
background: #EEEEEE;
|
||||
padding: 5px;
|
||||
}
|
||||
|
||||
a:link, a:visited {
|
||||
font-weight: bold;
|
||||
text-decoration: none;
|
||||
color: #0084FF;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.printfooter {
|
||||
font-size: 0.8em;
|
||||
color: #333333;
|
||||
border-top: 1px solid #333333;
|
||||
}
|
||||
|
||||
.wikitable #toc {
|
||||
font-size: 0.8em;
|
||||
}
|
||||
|
||||
#toc,.docnav {
|
||||
display: none;
|
||||
}
|
||||
|
||||
"""
|
||||
fcount = dcount = 0
|
||||
|
||||
def rmall(dirPath): # delete dirPath and below
|
||||
global fcount, dcount
|
||||
namesHere = os.listdir(dirPath)
|
||||
for name in namesHere: # remove all contents first
|
||||
path = os.path.join(dirPath, name)
|
||||
if not os.path.isdir(path): # remove simple files
|
||||
os.remove(path)
|
||||
fcount = fcount + 1
|
||||
else: # recur to remove subdirs
|
||||
rmall(path)
|
||||
os.rmdir(dirPath) # remove now-empty dirPath
|
||||
dcount = dcount + 1
|
||||
|
||||
def crawl(site=DEFAULTURL):
|
||||
"downloads an entire wiki site"
|
||||
|
||||
# tests ###############################################
|
||||
|
||||
if COMPILE and os.system(QHELPCOMPILER +' -v'):
|
||||
print ("Error: QAssistant not fully installed, exiting.")
|
||||
print (QHELPCOMPILER)
|
||||
return 1
|
||||
if COMPILE and os.system(QCOLLECTIOMGENERATOR +' -v'):
|
||||
print ("Error: QAssistant not fully installed, exiting.")
|
||||
return 1
|
||||
if PDFOUTPUT:
|
||||
if PDFCONVERTER == 'pisa':
|
||||
try:
|
||||
import ho.pisa as pisa
|
||||
except Exception: ("Error: Python-pisa not installed, exiting.")
|
||||
return 1
|
||||
else:
|
||||
if os.system('htmldoc --version'):
|
||||
print ("Error: Htmldoc not found, exiting.")
|
||||
return 1
|
||||
try:
|
||||
from pyPdf import PdfFileReader,PdfFileWriter
|
||||
except Exception:
|
||||
print ("Error: Python-pypdf not installed, exiting.")
|
||||
|
||||
# run ########################################################
|
||||
|
||||
URL = site
|
||||
if VERBOSE: print ("crawling "), URL, ", saving in ", TMPFOLDER
|
||||
if not os.path.isdir(TMPFOLDER): os.mkdir(TMPFOLDER)
|
||||
file = open(TMPFOLDER + os.sep + "wiki.css",'wb')
|
||||
file.write(css)
|
||||
file.close()
|
||||
todolist = []
|
||||
count = 1
|
||||
indexpages = get(INDEX)
|
||||
todolist.extend(indexpages)
|
||||
while todolist:
|
||||
targetpage = todolist.pop()
|
||||
if not targetpage in NORETRIEVE:
|
||||
if VERBOSE: print (count, ": Fetching ", targetpage)
|
||||
pages = get(targetpage)
|
||||
count += 1
|
||||
processed.append(targetpage)
|
||||
for p in pages:
|
||||
if (not (p in todolist)) and (not (p in processed)):
|
||||
todolist.append(p)
|
||||
if VERBOSE: print ("Fetched ", count, " pages")
|
||||
if PDFOUTPUT:
|
||||
buildpdffiles()
|
||||
joinpdf()
|
||||
if REMOVE:
|
||||
if VERBOSE: print ("Deleting temp files...")
|
||||
rmall(TMPFOLDER)
|
||||
if COMPILE:
|
||||
qhp = buildtoc()
|
||||
qhcp = createCollProjectFile()
|
||||
if generate(qhcp) or compile(qhp):
|
||||
print ("Temp Folder ",TMPFOLDER," has not been deleted.")
|
||||
return 1
|
||||
else:
|
||||
if REMOVE:
|
||||
if VERBOSE: print ("Deleting temp files...")
|
||||
rmall(TMPFOLDER)
|
||||
if VERBOSE: print ("All done!")
|
||||
return 0
|
||||
|
||||
def buildpdffiles(folder=TMPFOLDER,converter=PDFCONVERTER):
|
||||
"scans a folder for html files and converts them all to pdf"
|
||||
templist = os.listdir(folder)
|
||||
fileslist = []
|
||||
for i in templist:
|
||||
if i[-5:] == '.html':
|
||||
fileslist.append(i)
|
||||
for f in fileslist:
|
||||
if converter == 'pisa': createpdf_pisa(f[:-5],folder)
|
||||
else: createpdf_htmldoc(f[:-5],folder)
|
||||
|
||||
def fetch_resources(uri, rel):
|
||||
"""
|
||||
Callback to allow pisa/reportlab to retrieve Images,Stylesheets, etc.
|
||||
'uri' is the href attribute from the html link element.
|
||||
'rel' gives a relative path, but it's not used here.
|
||||
|
||||
Note from Yorik: Not working!!
|
||||
"""
|
||||
path = os.path.join(TMPFOLDER,uri.replace("./", ""))
|
||||
return path
|
||||
|
||||
def createpdf_pisa(pagename,folder=TMPFOLDER):
|
||||
"creates a pdf file from a saved page using pisa (python module)"
|
||||
infile = open(folder + os.sep + pagename+'.html','ro')
|
||||
outfile = open(folder + os.sep + pagename+'.pdf','wb')
|
||||
if VERBOSE: print ("Converting " + pagename + " to pdf...")
|
||||
pdf = pisa.CreatePDF(infile,outfile,folder,link_callback=fetch_resources)
|
||||
outfile.close()
|
||||
if pdf.err: return pdf.err
|
||||
return 0
|
||||
|
||||
def createpdf_htmldoc(pagename,folder=TMPFOLDER):
|
||||
"creates a pdf file from a saved page using htmldoc (external app, but supports images)"
|
||||
infile = folder + os.sep + pagename+'.html'
|
||||
outfile = folder + os.sep + pagename+'.pdf'
|
||||
return os.system('htmldoc --webpage -f '+outfile+' '+infile)
|
||||
|
||||
def joinpdf(folder=TMPFOLDER,startpage=INDEX,outputname='freecad.pdf'):
|
||||
"creates one pdf file from several others, following order from startpage"
|
||||
if VERBOSE: print ("Building table of contents...")
|
||||
f = open(folder+os.sep+startpage+'.html')
|
||||
html = ''
|
||||
for line in f: html += line
|
||||
f.close()
|
||||
html = html.replace("\n"," ")
|
||||
html = html.replace("> <","><")
|
||||
html = re.findall("<ul.*/ul>",html)[0]
|
||||
pages = re.findall('href="(.*?)"',html)
|
||||
pages.insert(1,startpage+".html")
|
||||
result = PdfFileWriter()
|
||||
for p in pages:
|
||||
if exists(p[:-5]):
|
||||
if VERBOSE: print ('Appending',p)
|
||||
try: inputfile = PdfFileReader(open(folder+os.sep+p[:-5]+'.pdf','rb'))
|
||||
except Exception: print ('Unable to append',p)
|
||||
else:
|
||||
for i in range(inputfile.getNumPages()):
|
||||
result.addPage(inputfile.getPage(i))
|
||||
outputfile = open(OUTPUTPATH + os.sep + outputname,'wb')
|
||||
result.write(outputfile)
|
||||
outputfile.close()
|
||||
if VERBOSE: print ('Successfully created',OUTPUTPATH,os.sep,outputname)
|
||||
|
||||
def compile(qhpfile,outputname='freecad.qch'):
|
||||
"compiles the whole html doc with qassistant"
|
||||
qchfile = OUTPUTPATH + os.sep + outputname
|
||||
if not os.system(QHELPCOMPILER + ' '+qhpfile+' -o '+qchfile):
|
||||
if VERBOSE: print ("Successfully created",qchfile)
|
||||
return 0
|
||||
|
||||
def generate(qhcpfile):
|
||||
"generates qassistant-specific settings like icon, title, ..."
|
||||
txt="""
|
||||
The help files for FreeCAD.
|
||||
"""
|
||||
about=open(TMPFOLDER + os.sep + "about.txt","w")
|
||||
about.write(txt)
|
||||
about.close()
|
||||
qhcfile = OUTPUTPATH + os.sep + "freecad.qhc"
|
||||
if not os.system(QCOLLECTIOMGENERATOR+' '+qhcpfile+' -o '+qhcfile):
|
||||
if VERBOSE: print ("Successfully created ",qhcfile)
|
||||
return 0
|
||||
|
||||
def createCollProjectFile(folder=TMPFOLDER):
|
||||
qprojectfile = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<QHelpCollectionProject version="1.0">
|
||||
<assistant>
|
||||
<title>FreeCAD User Manual</title>
|
||||
<applicationIcon>Crystal_Clear_app_tutorials.png</applicationIcon>
|
||||
<cacheDirectory>freecad/freecad</cacheDirectory>
|
||||
<startPage>qthelp://org.freecad.usermanual_0.9/doc/Online_Help_Startpage.html</startPage>
|
||||
<aboutMenuText>
|
||||
<text>About FreeCAD</text>
|
||||
</aboutMenuText>
|
||||
<aboutDialog>
|
||||
<file>about.txt</file>
|
||||
<!--
|
||||
<icon>images/icon.png</icon>
|
||||
-->
|
||||
<icon>Crystal_Clear_app_tutorials.png</icon>
|
||||
</aboutDialog>
|
||||
<enableDocumentationManager>true</enableDocumentationManager>
|
||||
<enableAddressBar>true</enableAddressBar>
|
||||
<enableFilterFunctionality>true</enableFilterFunctionality>
|
||||
</assistant>
|
||||
<docFiles>
|
||||
<generate>
|
||||
<file>
|
||||
<input>freecad.qhp</input>
|
||||
<output>freecad.qch</output>
|
||||
</file>
|
||||
</generate>
|
||||
<register>
|
||||
<file>freecad.qch</file>
|
||||
</register>
|
||||
</docFiles>
|
||||
</QHelpCollectionProject>
|
||||
'''
|
||||
if VERBOSE: print ("Building project file...")
|
||||
qfilename = folder + os.sep + "freecad.qhcp"
|
||||
f = open(qfilename,'w')
|
||||
f.write(qprojectfile)
|
||||
f.close()
|
||||
if VERBOSE: print ("Done writing qhcp file.")
|
||||
return qfilename
|
||||
|
||||
def buildtoc(folder=TMPFOLDER,page=INDEX):
|
||||
"gets the table of contents page and parses its contents into a clean lists structure"
|
||||
|
||||
qhelpfile = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<QtHelpProject version="1.0">
|
||||
<namespace>org.freecad.usermanual_0.9</namespace>
|
||||
<virtualFolder>doc</virtualFolder>
|
||||
<!--
|
||||
<customFilter name="FreeCAD 0.10">
|
||||
<filterAttribute>FreeCAD</filterAttribute>
|
||||
<filterAttribute>0.10</filterAttribute>
|
||||
</customFilter>
|
||||
-->
|
||||
<filterSection>
|
||||
<!--
|
||||
<filterAttribute>FreeCAD</filterAttribute>
|
||||
<filterAttribute>0.10</filterAttribute>
|
||||
-->
|
||||
<toc>
|
||||
<inserttoc>
|
||||
</toc>
|
||||
<keywords>
|
||||
<insertkeywords>
|
||||
</keywords>
|
||||
<insertfiles>
|
||||
</filterSection>
|
||||
</QtHelpProject>
|
||||
'''
|
||||
|
||||
def getname(line):
|
||||
line = re.compile('<li>').sub('',line)
|
||||
line = re.compile('</li>').sub('',line)
|
||||
title = line.strip()
|
||||
link = ''
|
||||
if "<a" in line:
|
||||
title = re.findall('<a[^>]*>(.*?)</a>',line)[0].strip()
|
||||
link = re.findall('href="(.*?)"',line)[0].strip()
|
||||
return title,link
|
||||
|
||||
if VERBOSE: print ("Building table of contents...")
|
||||
f = open(folder+os.sep+page+'.html')
|
||||
html = ''
|
||||
for line in f: html += line
|
||||
f.close()
|
||||
html = html.replace("\n"," ")
|
||||
html = html.replace("> <","><")
|
||||
html = re.findall("<ul.*/ul>",html)[0]
|
||||
items = re.findall('<li[^>]*>.*?</li>|</ul></li>',html)
|
||||
inserttoc = '<section title="Table of Contents">\n'
|
||||
insertkeywords = ''
|
||||
for item in items:
|
||||
if not ("<ul>" in item):
|
||||
if ("</ul>" in item):
|
||||
inserttoc += '</section>\n'
|
||||
else:
|
||||
link = ''
|
||||
title,link=getname(item)
|
||||
if link:
|
||||
link='" ref="'+link
|
||||
insertkeywords += ('<keyword name="'+title+link+'"/>\n')
|
||||
inserttoc += ('<section title="'+title+link+'"></section>\n')
|
||||
else:
|
||||
subitems = item.split("<ul>")
|
||||
for i in range(len(subitems)):
|
||||
link = ''
|
||||
title,link=getname(subitems[i])
|
||||
if link:
|
||||
link='" ref="'+link
|
||||
insertkeywords += ('<keyword name="'+title+link+'"/>\n')
|
||||
trail = ''
|
||||
if i == len(subitems)-1: trail = '</section>'
|
||||
inserttoc += ('<section title="'+title+link+'">'+trail+'\n')
|
||||
inserttoc += '</section>\n'
|
||||
|
||||
insertfiles = "<files>\n"
|
||||
for fil in os.listdir(folder):
|
||||
insertfiles += ("<file>"+fil+"</file>\n")
|
||||
insertfiles += "</files>\n"
|
||||
|
||||
qhelpfile = re.compile('<insertkeywords>').sub(insertkeywords,qhelpfile)
|
||||
qhelpfile = re.compile('<inserttoc>').sub(inserttoc,qhelpfile)
|
||||
qhelpfile = re.compile('<insertfiles>').sub(insertfiles,qhelpfile)
|
||||
qfilename = folder + os.sep + "freecad.qhp"
|
||||
f = open(qfilename,'wb')
|
||||
f.write(qhelpfile)
|
||||
f.close()
|
||||
if VERBOSE: print ("Done writing qhp file.")
|
||||
return qfilename
|
||||
|
||||
def get(page):
|
||||
"downloads a single page, returns the other pages it links to"
|
||||
html = fetchpage(page)
|
||||
html = cleanhtml(html)
|
||||
pages = getlinks(html)
|
||||
html = cleanlinks(html,pages)
|
||||
html = cleanimagelinks(html)
|
||||
output(html,page)
|
||||
return pages
|
||||
|
||||
def cleanhtml(html):
|
||||
"cleans given html code from dirty script stuff"
|
||||
html = html.replace('\n','Wlinebreak') # removing linebreaks for regex processing
|
||||
html = re.compile('(.*)<div[^>]+column-content+[^>]+>').sub('',html) # stripping before content
|
||||
html = re.compile('<div[^>]+column-one+[^>]+>.*').sub('',html) # stripping after content
|
||||
html = re.compile('<!--[^>]+-->').sub('',html) # removing comment tags
|
||||
html = re.compile('<script[^>]*>.*?</script>').sub('',html) # removing script tags
|
||||
html = re.compile('<!--\[if[^>]*>.*?endif\]-->').sub('',html) # removing IE tags
|
||||
html = re.compile('<div id="jump-to-nav"[^>]*>.*?</div>').sub('',html) # removing nav div
|
||||
html = re.compile('<h3 id="siteSub"[^>]*>.*?</h3>').sub('',html) # removing print subtitle
|
||||
html = re.compile('Retrieved from').sub('Online version:',html) # changing online title
|
||||
html = re.compile('<div id="mw-normal-catlinks[^>]>.*?</div>').sub('',html) # removing catlinks
|
||||
html = re.compile('<div class="NavHead.*?</div>').sub('',html) # removing nav stuff
|
||||
html = re.compile('<div class="NavContent.*?</div>').sub('',html) # removing nav stuff
|
||||
html = re.compile('<div class="NavEnd.*?</div>').sub('',html) # removing nav stuff
|
||||
if not GETTRANSLATIONS:
|
||||
html = re.compile('<div class="languages.*?</div>').sub('',html) # removing translations links
|
||||
html = re.compile('Wlinebreak').sub('\n',html) # restoring original linebreaks
|
||||
return html
|
||||
|
||||
def getlinks(html):
|
||||
"returns a list of wikipage links in html file"
|
||||
links = re.findall('<a[^>]*>.*?</a>',html)
|
||||
pages = []
|
||||
for l in links:
|
||||
# rg = re.findall('php\?title=(.*)\" title',l)
|
||||
rg = re.findall('href=.*?php\?title=(.*?)"',l)
|
||||
if rg:
|
||||
rg = rg[0]
|
||||
if "#" in rg:
|
||||
rg = rg.split('#')[0]
|
||||
if ":" in rg:
|
||||
NORETRIEVE.append(rg)
|
||||
if ";" in rg:
|
||||
NORETRIEVE.append(rg)
|
||||
if "&" in rg:
|
||||
NORETRIEVE.append(rg)
|
||||
if "/" in rg:
|
||||
if not GETTRANSLATIONS:
|
||||
NORETRIEVE.append(rg)
|
||||
pages.append(rg)
|
||||
return pages
|
||||
|
||||
def getimagelinks(html):
|
||||
"returns a list of image links found in an html file"
|
||||
return re.findall('<img.*?src="(.*?)"',html)
|
||||
|
||||
def cleanlinks(html, pages=None):
|
||||
"cleans page links found in html"
|
||||
if not pages: pages = getlinks(html)
|
||||
for page in pages:
|
||||
if page in NORETRIEVE:
|
||||
output = 'href="' + URL + wikiindex + page + '"'
|
||||
else:
|
||||
output = 'href="' + page.replace("/","-") + '.html"'
|
||||
html = re.compile('href="[^"]+' + page + '"').sub(output,html)
|
||||
return html
|
||||
|
||||
def cleanimagelinks(html,links=None):
|
||||
"cleans image links in given html"
|
||||
if not links: links = getimagelinks(html)
|
||||
if links:
|
||||
for l in links:
|
||||
nl = re.findall('.*/(.*)',l)
|
||||
if nl: html = html.replace(l,nl[0])
|
||||
fetchimage(l)
|
||||
return html
|
||||
|
||||
def fetchpage(page):
|
||||
"retrieves given page from the wiki"
|
||||
failcount = 0
|
||||
while failcount < MAXFAIL:
|
||||
try:
|
||||
html = (urlopen(URL + wikiindex + page).read())
|
||||
return html
|
||||
except HTTPError:
|
||||
failcount += 1
|
||||
print ('Error: unable to fetch page ' + page)
|
||||
|
||||
def fetchimage(imagelink):
|
||||
"retrieves given image from the wiki and saves it"
|
||||
filename = re.findall('.*/(.*)',imagelink)[0]
|
||||
if not (filename in processed):
|
||||
failcount = 0
|
||||
while failcount < MAXFAIL:
|
||||
try:
|
||||
if VERBOSE: print ("Fetching " + filename)
|
||||
data = (urlopen(webroot(URL) + imagelink).read())
|
||||
path = local(filename,image=True)
|
||||
file = open(path,'wb')
|
||||
file.write(data)
|
||||
file.close()
|
||||
processed.append(filename)
|
||||
return
|
||||
except Exception:
|
||||
failcount += 1
|
||||
print ('Error: unable to fetch file ' + filename)
|
||||
|
||||
def local(page,image=False):
|
||||
"returns a local path for a given page/image"
|
||||
if image:
|
||||
return TMPFOLDER + os.sep + page
|
||||
else:
|
||||
return TMPFOLDER + os.sep + page + '.html'
|
||||
|
||||
def exists(page,image=False):
|
||||
"checks if given page/image already exists"
|
||||
path = local(page,image)
|
||||
if os.path.exists(path): return True
|
||||
return False
|
||||
|
||||
def webroot(url):
|
||||
return re.findall('(http://.*?)/',url)[0]
|
||||
|
||||
def output(html,page):
|
||||
"encapsulates raw html code into nice html body"
|
||||
header = "<html><head>"
|
||||
header += "<title>"
|
||||
header += page
|
||||
header += "</title>"
|
||||
header += "<link type='text/css' href='wiki.css' rel='stylesheet'>"
|
||||
header += "</head><body>"
|
||||
footer = "</body></html>"
|
||||
html = header+html+footer
|
||||
filename = local(page.replace("/","-"))
|
||||
file = open(filename,'wb')
|
||||
file.write(html)
|
||||
file.close()
|
||||
|
||||
def main(arg):
|
||||
global QHELPCOMPILER,QCOLLECTIOMGENERATOR,OUTPUTPATH,PDFOUTPUT,PDFCONVERTER,TMPFOLDER,VERBOSE
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "hp:t:c:g:o:", ["help", "pdf=", "noremove", "tempfolder=", "helpcompiler-exe=", "out-path=", "helpgenerator-exe="])
|
||||
except getopt.GetoptError:
|
||||
# print help information and exit:
|
||||
sys.stderr.write(usage)
|
||||
sys.exit(2)
|
||||
|
||||
# checking on the options
|
||||
for o, a in opts:
|
||||
if o == "-v":
|
||||
VERBOSE = True
|
||||
if o in ("-p","--pdf"):
|
||||
PDFOUTPUT = True
|
||||
if a in ['pisa','htmldoc']:
|
||||
print ("using pdf converter:",a)
|
||||
PDFCONVERTER = a
|
||||
if o in ("-t","--tempfolder"):
|
||||
print ("using tempfolder:",a)
|
||||
TMPFOLDER = a
|
||||
if o in ("-h", "--help"):
|
||||
sys.stderr.write(usage)
|
||||
sys.exit()
|
||||
if o in ("-c", "--helpcompiler-exe"):
|
||||
QHELPCOMPILER = a
|
||||
print ('Using: ',QHELPCOMPILER)
|
||||
if o in ("-g", "--helpgenerator-exe"):
|
||||
QCOLLECTIOMGENERATOR = a
|
||||
if o in ("-o", "--out-path"):
|
||||
print ("Using output path:",a)
|
||||
OUTPUTPATH = a
|
||||
# if arg:
|
||||
# if (arg[0] == '-h') or (arg[0] == '--help'):
|
||||
# print usage
|
||||
# else:
|
||||
# URL = arg[0]
|
||||
# if len(arg) > 1: INDEX = arg[1]
|
||||
# if len(arg) > 2: OUTPUTPATH = arg[2]
|
||||
# crawl()
|
||||
# else:
|
||||
crawl()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# main(sys.argv[1:])
|
||||
print ("Warning! This script is obsolete. Use the scripts in the offlinedocs folder...")
|
||||
|
||||
Reference in New Issue
Block a user