Código fuente de MediaBook
From Software libre para los países en desarrollo
Este es el código fuente de una versión preliminar de MediaBook. A diferencia de lo que señala la impronta de este trabajo, el código de este programa (y, por consiguiente, este listado particular) se distribuye bajo los términos de la licencia pública GNU GPL, versión 2.
Para ejecutarlo:
- instalar Python
- instalar httplib2
- depositar el código en un archivo de texto
- ejecutar el archivo con Python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# MediaBook
# copyright 2006 Manuel Amador (Rudd-O) [[http://rudd-o.com/]]
# Reuses the magnificent Boom! standard
# laid out at http://alistapart.com/articles/boom
# version 0.0.1alpha
# distributed under the GNU GPL, version 2
import sys
import os
import urllib
#from xml.dom.ext.reader import Sax2
import xml.dom
xml.dom.getDOMImplementation("minidom")
import xml.dom.Element
import time
import re
import new
import httplib2
from sets import Set
# resources
# FIXME option to make self-contained file with data: URIs
# FIXME make the META header generation, at least for AUTHOR, easier
# maybe we should leave this particular concern to an external tool such as htmlxmlutils?
# FIXME the title should be detected from the title page if present
# FIXME audit because the trailing slash MATTERS A LOT
def walkTree(node):
yield node
if hasattr(node,"childNodes"):
for child in node.childNodes:
for n1 in walkTree(child):
yield n1
# manipulation functions
httpobject = httplib2.Http(os.path.expanduser("~/.mediabook.cache"))
def fetch_url(url):
global httpobject
print "Requesting %s"%url
resp,data = httpobject.request(url)
#print "status %s"%resp["status"]
if int(resp["status"]) not in [200,301,304,302]:
raise Exception, "Unexpected status %s while retrieving %s"%(resp["status"],url)
return data
def parse_document(string):
#t = time.time()
parsed = xml.dom.minidom.parseString(string)
#print "Parse took", time.time()-t
return parsed
def get_document(url):
contents = fetch_url(url)
try: return parse_document(contents)
except: raise Exception,"XML parsing error in URL %s"%url
def firstChildElement(e): return [ a for a in e.childNodes if a.nodeType == 1 ][0]
def firstChildTextNode(e): return [ a for a in e.childNodes if a.nodeType == 3 ][0]
def nextSiblingElement(e):
e = e.nextSibling
while True:
if e.nodeType == 1: return e
if not e.nextSibling: return None
e = e.nextSibling
def replaceNode(old,new):
old.parentNode.replaceChild(new,old)
return new
def morphElement(old,newTagName):
new = old.ownerDocument.createElement(newTagName)
for key,value in old.attributes.items(): new.setAttribute(key,value)
for child in old.childNodes[:]: new.appendChild(child)
if old.parentNode: replaceNode(old,new)
return new
def removeNode(node):
node.parentNode.removeChild(node)
def heightDifference(start,ancestor):
height = 0
while start != ancestor:
height = height + 1
start = start.parentNode
return height
def genid(prefix):
a = 0
while True:
a = a + 1; yield unicode(prefix) + unicode(a)
def transformtotext(node):
return " ".join( [ a.nodeValue for a in walkTree(node) if a.nodeType == 3 ] )
sanitizer = re.compile("[^\w]")
def sanitize_anchor(anchortext):
global sanitizer
return sanitizer.sub("-",anchortext)
def gethtmlheadingnames():
return map(lambda x: "h" + str(x),range(1,7))
def addClass(node,clas):
if node.getAttribute("class"): node.setAttribute("class",node.getAttribute("class")+u" "+clas)
else: node.setAttribute("class",clas)
def insertAfter(newnode,afterwhich):
if afterwhich.nextSibling: afterwhich.parentNode.insertBefore(newnode,afterwhich.nextSibling)
else: afterwhich.parentNode.appendChild(newnode)
frontmattersections = ["frontcover","halftitlepage","titlepage","imprint","preface","dedication","thanks","toc"]
contentsections = ["bookcontents","appendices"]
endmattersections = ["references","glossary","colophon","backcover"]
notocsections = ["frontcover","halftitlepage","titlepage","backcover"] # FIXME check which ones have to be notocized
def bootstrap(url):
def get_key_element(self,element):
return filter(
lambda x: x.getAttribute("id") == element,
self.getElementsByTagName("div"))[0]
document = get_document(url)
for a in ["get_key_element"]:
document.__class__.__dict__[a]=new.instancemethod(locals()[a],document,document.__class__)
return document
def sectionize(group,mode):
# mode: bookcontents or appendices
list_elements = group.getElementsByTagName("li")
def treatListElement(le):
distance = heightDifference(le,group) / 2
if mode == "appendices":
if distance == 1: le.setAttribute("class","appendix")
else: le.setAttribute("class","subappendix-level-"+str(distance))
elif mode == "bookcontents":
if distance == 1: le.setAttribute("class","chapter")
elif distance == 2: le.setAttribute("class","section")
else: le.setAttribute("class","subsection-level-"+str(distance))
else: raise ValueError, "invalid mode %s passed"%mode
map(treatListElement,list_elements)
for a in group.getElementsByTagName("li"): morphElement(a,"div")
ols = group.getElementsByTagName("ol")
for ol in ols:
for div in ol.childNodes[:]: ol.parentNode.insertBefore(div,ol)
map(removeNode,group.getElementsByTagName("ol"))
def subdocument_integration(front_page,url):
#cleanup everything first
def nodeGood(node):
if node.nodeName in ["li","ol","a","div"]: return False
if node.parentNode.nodeName == "a": return False
return True
for a in contentsections:
try: group = front_page.get_key_element(a)
except IndexError: continue
map(removeNode,filter(nodeGood,walkTree(group)))
links = filter(lambda x: x.getAttribute("href"),group.getElementsByTagName("a"))
for link in links:
subdocument = get_document(urllib.basejoin(url,link.getAttribute("href").split("#")[0]))
contents = filter(
lambda x: x.getAttribute("id") == "content",
subdocument.getElementsByTagName("div"))[0]
contents.setAttribute("sourceurl",link.getAttribute("href"))
contents.setAttribute("class","subdocument")
contents.removeAttribute("id")
replaceNode(link,contents)
for a in frontmattersections + endmattersections:
# FIXME define as resource!
try: page = front_page.get_key_element(a)
except IndexError: continue
map(removeNode,filter(nodeGood,walkTree(page)))
link = filter(lambda x: x.getAttribute("href"),page.getElementsByTagName("a"))[0]
subdocument = get_document(urllib.basejoin(url,link.getAttribute("href").split("#")[0]))
contents = filter(
lambda x: x.getAttribute("id") == "content",
subdocument.getElementsByTagName("div"))[0]
contents.setAttribute("sourceurl",link.getAttribute("href"))
contents.setAttribute("class","subdocument")
contents.removeAttribute("id")
page.setAttribute("class",a)
replaceNode(link,contents)
def anchor_fixup(front_page,url):
def jumpids(node):
nextsibling = nextSiblingElement(node)
if nextsibling.getAttribute("id"):
print node
print nextsibling
raise Exception, "already had an id %s"%nextsibling.getAttribute("id")
nextsibling.setAttribute("id",node.getAttribute("id"))
removeNode(node)
#node.appendChild(node.ownerDocument.createTextNode("(an anchor - hide me with stylesheets)"))
def getsourceurl(node): return node.parentNode.getAttribute("sourceurl")
def namespace_anchor(anchor):
namespace = getsourceurl(anchor)
original_anchor_name = anchor.getAttribute("name")
if not original_anchor_name: original_anchor_name = anchor.getAttribute("id")
newanchor = sanitize_anchor(namespace+u"---"+original_anchor_name)
#print "Tidying anchor:\n Before: %s\n After: %s"%("top",newanchor)
anchor.setAttribute("name",newanchor)
anchor.setAttribute("id",newanchor)
def makeintoabsolutehref(tag):
newlink = urllib.basejoin(url,tag.getAttribute("href"))
#print "Absolutizing link:\n Before: %s\n After: %s"%(tag.getAttribute("href"),newlink)
tag.setAttribute("originalhref",tag.getAttribute("href"))
tag.setAttribute("href",newlink)
tobemapped = filter(lambda x: x.getAttribute("name"),front_page.getElementsByTagName("a"))
for a in frontmattersections + endmattersections:
try:
for b in front_page.get_key_element(a).childNodes:
tobemapped = tobemapped + filter(lambda x: x.nodeName != "a" and hasattr(x,"getAttribute") and x.getAttribute("id"),walkTree(b))
except IndexError: continue
for a in contentsections:
try:
for b in front_page.get_key_element(a).childNodes:
tobemapped = tobemapped + filter(lambda x: x.nodeName != "a" and hasattr(x,"getAttribute") and x.getAttribute("id"),walkTree(b))
except IndexError: continue
#print tobemapped
#raise Exception,"a"
map(namespace_anchor,tobemapped)
map(jumpids,filter(lambda x: not x.getAttribute("href"),front_page.getElementsByTagName("a")))
map(makeintoabsolutehref,filter(lambda x: x.getAttribute("href"),front_page.getElementsByTagName("a")))
def fix_hyperlinks(front_page,url):
def getsourceurl(node):
if node.getAttribute("sourceurl"): return node.getAttribute("sourceurl")
return getsourceurl(node.parentNode)
def buildsourceurlcache():
nodeswithpurls = filter(lambda x:hasattr(x,"getAttribute") and x.getAttribute("sourceurl"),walkTree(front_page))
return [ urllib.basejoin(url,x.getAttribute("sourceurl").split("#")[0]) for x in nodeswithpurls ]
def incache(i):
try:
cache.index(i)
return True
except ValueError: return False
cache = buildsourceurlcache()
def processahref(tag):
href = tag.getAttribute("href")
if not href: return
if tag.getAttribute("originalhref")[0] == "#":
#we deal with this special case here. Since now every ID is namespaced...
try: namespace = getsourceurl(tag)
except Exception,e:
print "Skipping URL: No source URL can be found for URL %s found in document"%href
return
fragment = sanitize_anchor(namespace+u"---"+href.split("#")[1])
t = tag.getAttribute("class")
if not t: t = u""
tag.setAttribute("class",t + u" fragmentref")
tag.setAttribute("href",u"#"+fragment)
return
try: uri,fragment = href.split("#",1)
except:
uri = href
fragment = None
if incache(uri):
attr = ""
namespace = tag.getAttribute("originalhref").split("#")[0]
if fragment is None: original_anchor_name = "top"
else: original_anchor_name = fragment
fragment = sanitize_anchor(namespace+u"---"+original_anchor_name)
else:
attr = uri
if fragment is not None: attr = attr + "#" + fragment
tag.setAttribute("href",attr)
if attr[0] == "#":
t = tag.getAttribute("class")
if not t: t = u""
tag.setAttribute("class",t + u" fragmentref")
map(processahref,front_page.getElementsByTagName("a"))
def remove_moles(front_page):
map(removeNode,filter(lambda x: x.getAttribute("id") in ["contentSub","jump-to-nav"],front_page.getElementsByTagName("div")))
map(removeNode,filter(lambda x: x.getAttribute("id") in ["toc"],front_page.getElementsByTagName("table")))
map(removeNode,filter(lambda x: x.getAttribute("id") in ["filetoc"],front_page.getElementsByTagName("ul")))
for a in filter(lambda x: hasattr(x,"getAttribute") and x.getAttribute("id") in ["filehistory"],walkTree(front_page)):
for b in range(7): removeNode(a.nextSibling)
removeNode(a)
map(removeNode,filter(lambda x: x.getAttribute("id") in ["siteSub"],front_page.getElementsByTagName("h3")))
map(removeNode,filter(lambda x: x.getAttribute("class") in ["visualClear","printfooter","magnify"],
front_page.getElementsByTagName("div")))
# editsection in the line above disabled: "editsection",
bodycontents = filter(lambda x:x.getAttribute("id") == "bodyContent",front_page.getElementsByTagName("div"))
for bodycontent in bodycontents:
for snippet in bodycontent.childNodes[:]:
bodycontent.parentNode.insertBefore(snippet,bodycontent)
map(removeNode,bodycontents)
def morphthumbcaption(node):
element = morphElement(node,"p")
element.setAttribute("class","caption")
element.parentNode.insertBefore(element,element.parentNode.firstChild)
map(morphthumbcaption,filter(lambda x: x.getAttribute("class") == "thumbcaption",
front_page.getElementsByTagName("div")))
def morphfigure(node):
newlist = []
for i in node.getAttribute("class").split(" "):
if i == "thumb": newlist.append("figure")
else: newlist.append(i)
node.setAttribute("class"," ".join(newlist))
map(node.appendChild,node.childNodes[0].childNodes[:])
removeNode(node.childNodes[0])
map(morphfigure,filter(lambda x: x.getAttribute("class") in ["thumb tnone","thumb tleft","thumb tright"],
front_page.getElementsByTagName("div")))
def notocize(node): addClass(node,"no-toc")
for keyelement in notocsections:
for tagname in gethtmlheadingnames():
try: map(notocize,front_page.get_key_element(keyelement).getElementsByTagName(tagname))
except IndexError: continue
for keyelement in ["imprint","dedication"]: #FIXME instead of doing this hard, let's do it with CSS
try: map(removeNode,front_page.get_key_element(keyelement).getElementsByTagName("h1"))
except IndexError: continue
def remove_duplicate_ids(front_page):
ids = []
for node in walkTree(front_page):
if not hasattr(node,"getAttribute"): continue
try:
ids.index(node.getAttribute("id"))
if not node.getAttribute("id"): continue
#print "Warning: ID %s already exists in the document, removing"%node.getAttribute("id")
node.removeAttribute("id")
except ValueError: ids.append(node.getAttribute("id"))
def demotion(front_page):
toc = front_page.get_key_element("bookcontents")
def demote_headers_in_node(node,steps):
tagarray = gethtmlheadingnames() ; tagarray.reverse()
for oldtagname in tagarray:
oldheadings = node.getElementsByTagName(oldtagname)
if tagarray.index(oldtagname)-steps < 0 or not oldheadings: continue
newtagname = tagarray[tagarray.index(oldtagname)-steps]
#print " Moving %s into %s"%(oldtagname,newtagname)
for oldheading in oldheadings: morphElement(oldheading,newtagname)
contentNodes = filter(lambda x:x.getAttribute("class") == "subdocument",toc.getElementsByTagName("div"))
for node in contentNodes:
demotion_level = heightDifference(node,toc) - 2
if demotion_level: demote_headers_in_node(node,demotion_level)
def footnotes_and_citations(front_page): # propagate citation titles to all other anonymous citations when the URL matches. use that to generate the list of references and the page numbers where they are shown
for a in frontmattersections + contentsections + endmattersections:
try: key = front_page.get_key_element(a)
except IndexError: continue
cite_cache = {} # url that points to a list of texts that go in the footnote
def add_footnote_after_cite(citenode):
def get_link(fromcite):
if fromcite.getElementsByTagName("a"): return citenode.getElementsByTagName("a")[0].getAttribute("href")
def get_title(fromcite): return fromcite.getAttribute("title")
if get_link(citenode):
try: list_of_texts = cite_cache[get_link(citenode)]
except: list_of_texts = None
else: list_of_texts = None
if not list_of_texts: list_of_texts = filter(lambda x: x,[ get_title(citenode), get_link(citenode)])
if list_of_texts:
span = citenode.ownerDocument.createElement("span")
span.setAttribute("class","footnote")
for element in list_of_texts:
span.appendChild(citenode.ownerDocument.createTextNode(element))
span.appendChild(citenode.ownerDocument.createElement("br"))
removeNode(span.lastChild)
citenode.appendChild(span)
if citenode.getElementsByTagName("a"):
citenode.getElementsByTagName("a")[0].setAttribute("footnoted","1")
if get_link(citenode): cite_cache[get_link(citenode)] = list_of_texts
def add_footnote_after_hyperlink(anode):
href = anode.getAttribute("href")
if re.search("\&action=edit",href) is not None: return # we exclude wiki edit links!
span = anode.ownerDocument.createElement("span")
span.setAttribute("class","footnote")
span.appendChild(anode.ownerDocument.createTextNode(href))
insertAfter(span,anode)
map(add_footnote_after_cite,key.getElementsByTagName("cite"))
map(add_footnote_after_hyperlink,
filter(lambda x:x.getAttribute("href") and x.getAttribute("href")[0] != "#" and not x.getAttribute("footnoted"),
key.getElementsByTagName("a")))
def definitions_abbreviations_acronyms(front_page): #FIXME make it so unmarked text gets automatic definitions. Should this be a job of MediaWiki?
try: glossarypage = front_page.get_key_element("glossary")
except: return
gendfnid = genid("dfn").next
dl = glossarypage.ownerDocument.createElement("dl")
nodes = []
for a in frontmattersections + contentsections + endmattersections:
try: key = front_page.get_key_element(a)
except: continue
nodes = nodes + key.getElementsByTagName("abbr") + key.getElementsByTagName("acronym") + key.getElementsByTagName("dfn")
term2definitionnode = {}
term2sourcenodes = {}
terms = []
for node in nodes:
term = transformtotext(node)
terms.append(term)
if term2sourcenodes.has_key(term): term2sourcenodes[term].append(node)
else: term2sourcenodes[term] = [node]
if node.getAttribute("title") and not term2definitionnode.has_key(term):
term2definitionnode[term] = node
terms = Set(terms)
def alphasort(listorset):
lowerterms = [ (a.lower().encode("utf-8"),a) for a in terms ]
lowerterms.sort()
return [ a[1] for a in lowerterms ]
terms = alphasort(terms)
for term in terms:
try: definitionnode = term2definitionnode[term]
except KeyError:
print "definition for term",term,"missing in list of definitions"
definitionnode = None
dt = front_page.createElement("dt")
dd = front_page.createElement("dd")
# fill the DT out with the term
if definitionnode:
for child in definitionnode.childNodes: dt.appendChild(child.cloneNode(True))
dd.appendChild(front_page.createTextNode(definitionnode.getAttribute("title")))
else:
dt.appendChild(front_page.createTextNode("(no node found for term %s)"%term))
dd.appendChild(front_page.createTextNode("(no node found for term %s"%term))
for refnode in term2sourcenodes[term]:
dfnid = gendfnid()
refnode.setAttribute("id",dfnid)
dt.appendChild(front_page.createTextNode(" "))
pointertoabove = front_page.createElement("a")
pointertoabove.setAttribute("href",u"#" + dfnid)
pointertoabove.setAttribute("class","glossaryref")
pointertoabove.appendChild(front_page.createTextNode("(ref)"))
dt.appendChild(pointertoabove)
dl.appendChild(dt)
dl.appendChild(dd)
contents = filter(lambda x:x.getAttribute("class") == "subdocument",glossarypage.getElementsByTagName("div"))[0]
contents.appendChild(dl)
def gen_ref_list(front_page):
try: referencespage = front_page.get_key_element("references")
except: return
gencitationid = genid("citation").next
def geturl(node):
meck = filter(lambda x:x.getAttribute("href"), node.getElementsByTagName("a"))
if meck: return meck[0].getAttribute("href")
ul = referencespage.ownerDocument.createElement("ul")
nodes = []
for a in frontmattersections + contentsections + endmattersections:
try: key = front_page.get_key_element(a)
except: continue
nodes = nodes + key.getElementsByTagName("cite")
url2definitionnode = {}
url2sourcenodes = {}
urls = []
for node in nodes:
url = geturl(node)
if not url: continue
urls.append(url)
if url2sourcenodes.has_key(url): url2sourcenodes[url].append(node)
else: url2sourcenodes[url] = [node]
if node.getAttribute("title") and not url2definitionnode.has_key(url):
url2definitionnode[url] = node
urls = Set(urls)
for url in urls:
try: definitionnode = url2definitionnode[url]
except KeyError:
print "citation for url",url,"missing in citation database"
definitionnode = None
li = front_page.createElement("li")
if definitionnode:
li.appendChild(front_page.createTextNode(definitionnode.getAttribute("title")))
else:
li.appendChild(front_page.createTextNode("(no reference description found in any CITE title)"))
print "Warning: no reference found for",url
li.appendChild(front_page.createElement("br"))
li.appendChild(front_page.createTextNode(url))
li.appendChild(front_page.createElement("br"))
for refnode in url2sourcenodes[url]:
citationid = gencitationid()
refnode.setAttribute("id",citationid)
li.appendChild(front_page.createTextNode(" "))
pointertoabove = front_page.createElement("a")
pointertoabove.setAttribute("href",u"#" + citationid)
pointertoabove.setAttribute("class","citationref")
pointertoabove.appendChild(front_page.createTextNode("(ref)"))
li.appendChild(pointertoabove)
ul.appendChild(li)
contents = filter(lambda x:x.getAttribute("class") == "subdocument",referencespage.getElementsByTagName("div"))[0]
contents.appendChild(ul)
def gen_toc(front_page):
try: tocpage = front_page.get_key_element("toc")
except: return
level = {"h1":1,"h2":2,"h3":3,"h4":4,"h5":5,"h6":6}
root = front_page.createElement("ul")
root.setAttribute("class","toc")
current_container = root
oldtoclevel = 1
def gentocelement(bodyelement):
link = bodyelement.ownerDocument.createElement("a")
link.appendChild(bodyelement.cloneNode(True))
for tag in ["a"] + level.keys():
for node in link.getElementsByTagName(tag):
if not link.getAttribute("href") and node.getAttribute("id"):
link.setAttribute("href",u"#"+node.getAttribute("id"))
for child in node.childNodes[:]: node.parentNode.insertBefore(child,node)
removeNode(node)
li = bodyelement.ownerDocument.createElement("li")
li.appendChild(link)
return li
sections = frontmattersections + contentsections + endmattersections
sections.remove("toc")
def istagandinclass(node,tagname,clas):
if node.nodeName == tagname and clas in node.parentNode.parentNode.getAttribute("class").split(" "): return True
def ischaptertitle(node): return istagandinclass(node,"h1","chapter")
def isappendixtitle(node): return istagandinclass(node,"h1","appendix")
def isfrontmattertitle(node):
if True in [ istagandinclass(node,"h1",a) for a in frontmattersections ]: return True
def isendmattertitle(node):
if True in [ istagandinclass(node,"h1",a) for a in endmattersections ]: return True
for section in sections:
try: page = front_page.get_key_element(section)
except IndexError: continue
for node in walkTree(page):
if node.nodeType != 1 or not level.has_key(node.nodeName): continue
# we skip the no-tocs marked ones
if node.getAttribute("class") and "no-toc" in node.getAttribute("class").split(" "): continue
# if we reach here, it's a title!
toclevel = level[node.nodeName]
if toclevel == oldtoclevel: pass
elif toclevel < oldtoclevel:
for stepdown in range(oldtoclevel - toclevel): current_container = current_container.parentNode.parentNode
elif toclevel > oldtoclevel:
for stepup in range(toclevel - oldtoclevel):
li = front_page.createElement("li")
current_container.appendChild(li)
ul = front_page.createElement("ul")
li.appendChild(ul)
current_container = ul
element = gentocelement(node)
for a in ["chapter","appendix","frontmatter","endmatter"]:
if locals()["is"+a+"title"](node): element.setAttribute("class",a)
current_container.appendChild(element)
oldtoclevel = toclevel
contents = filter(lambda x:x.getAttribute("class") == "subdocument",tocpage.getElementsByTagName("div"))[0]
contents.appendChild(root)
def download_images(front_page,url,folder):
images = front_page.getElementsByTagName("img")
for image in images:
src = image.getAttribute("src")
filename = os.path.basename(src)
imgurl = urllib.basejoin(url,src)
# FIXME audit because the trailing slash in URL MATTERS A LOT
if imgurl.startswith(urllib.basejoin(url,"images/thumb")):
imgurl = re.sub("images/thumb/","images/",os.path.dirname(imgurl))
contents = fetch_url(imgurl)
file(os.path.join(folder,"images",urllib.unquote_plus(filename.encode("utf-8"))),"wb").write(contents)
image.setAttribute("src","images/"+filename)
def storage(front_page,save_file,title,extra_head_markup):
carcass = \
"""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
%s
</head>
<body>
</body>
</html>"""%extra_head_markup
#FIXME at some point, this would require more than the TOC
target_document = parse_document(carcass)
title_element = target_document.createElement("title");
title_element.appendChild(target_document.createTextNode(title))
target_document.getElementsByTagName("head")[0].appendChild(title_element)
body = target_document.getElementsByTagName("body")[0]
for a in frontmattersections + contentsections + endmattersections:
try: page =body.appendChild(front_page.get_key_element(a))
except IndexError: continue
xml = target_document.toxml("UTF-8")
file(save_file,"w").write(xml)
def gen_mediawiki_book(url,folder,title,extra_head):
front_page = bootstrap(url)
for a in contentsections:
try: sectionize(front_page.get_key_element(a),a)
except IndexError: continue
print "a"
subdocument_integration(front_page,url)
print "b"
remove_moles(front_page)
print "c"
anchor_fixup(front_page,url)
print "d"
remove_duplicate_ids(front_page)
print "e"
fix_hyperlinks(front_page,url)
print "f"
demotion(front_page)
print "g"
download_images(front_page,url,folder)
print "i"
gen_toc(front_page)
print "j"
definitions_abbreviations_acronyms(front_page)
print "k"
footnotes_and_citations(front_page)
print "l"
gen_ref_list(front_page)
print "msunia"
storage(front_page,os.path.join(folder,"index.html"),title,extra_head)
def main():
gen_mediawiki_book("http://software-libre.rudd-o.com/","book",u"Software libre para los países en desarrollo",'<link rel="stylesheet" href="book.css"/><link rel="stylesheet" href="bookextra.css"/>')
if __name__ == "__main__": main()

