From 35590bd990f2fe28a4d39f41155d9020fffe096d Mon Sep 17 00:00:00 2001
From: Dirk Baechle <dl9obn@darc.de>
Date: Tue, 9 Jun 2020 16:49:00 +0200
Subject: First set of changes, started to rip out libxslt2.

---
 SCons/Tool/docbook/__init__.py     | 180 +++---------
 bin/SConsDoc.py                    | 571 ++++++++++---------------------------
 bin/SConsExamples.py               |   1 +
 bin/docs-create-example-outputs.py |   1 +
 bin/docs-update-generated.py       |   1 +
 bin/docs-validate.py               |   1 +
 bin/scons-proc.py                  |  30 +-
 7 files changed, 209 insertions(+), 576 deletions(-)

diff --git a/SCons/Tool/docbook/__init__.py b/SCons/Tool/docbook/__init__.py
index 7f47e9d..3b725da 100644
--- a/SCons/Tool/docbook/__init__.py
+++ b/SCons/Tool/docbook/__init__.py
@@ -51,20 +51,14 @@ scriptpath = os.path.dirname(os.path.realpath(__file__))
 # Local folder for the collection of DocBook XSLs
 db_xsl_folder = 'docbook-xsl-1.76.1'
 
-# Do we have libxml2/libxslt/lxml?
-has_libxml2 = True
+# Do we have lxml?
 has_lxml = True
 try:
-    import libxml2
-    import libxslt
-except:
-    has_libxml2 = False
-try:
     import lxml
 except:
     has_lxml = False
 
-# Set this to True, to prefer xsltproc over libxml2 and lxml
+# Set this to True, to prefer xsltproc over lxml
 prefer_xsltproc = False
 
 # Regexs for parsing Docbook XML sources of MAN pages
@@ -95,20 +89,12 @@ def __init_xsl_stylesheet(kw, env, user_xsl_var, default_path):
             xsl_style = os.path.join(*path_args)
         kw['DOCBOOK_XSL'] =  xsl_style
 
-def __select_builder(lxml_builder, libxml2_builder, cmdline_builder):
+def __select_builder(lxml_builder, cmdline_builder):
     """ Selects a builder, based on which Python modules are present. """
-    if prefer_xsltproc:
-        return cmdline_builder
-
-    if not has_libxml2:
-        # At the moment we prefer libxml2 over lxml, the latter can lead
-        # to conflicts when installed together with libxml2.
-        if has_lxml:
-            return lxml_builder
-        else:
-            return cmdline_builder
+    if has_lxml and not prefer_xsltproc:
+        return lxml_builder
 
-    return libxml2_builder
+    return cmdline_builder
 
 def __ensure_suffix(t, suffix):
     """ Ensure that the target t has the given suffix. """
@@ -207,7 +193,7 @@ def _detect(env):
     if env.get('DOCBOOK_PREFER_XSLTPROC',''):
         prefer_xsltproc = True
 
-    if (not has_libxml2 and not has_lxml) or prefer_xsltproc:
+    if (not has_lxml) or prefer_xsltproc:
         # Try to find the XSLT processors
         __detect_cl_tool(env, 'DOCBOOK_XSLTPROC', xsltproc_com, xsltproc_com_priority)
         __detect_cl_tool(env, 'DOCBOOK_XMLLINT', xmllint_com)
@@ -233,44 +219,26 @@ def __xml_scan(node, env, path, arg):
         return sentity_re.findall(contents)
 
     xsl_file = os.path.join(scriptpath,'utils','xmldepend.xsl')
-    if not has_libxml2 or prefer_xsltproc:
-        if has_lxml and not prefer_xsltproc:
-
-            from lxml import etree
-
-            xsl_tree = etree.parse(xsl_file)
-            doc = etree.parse(str(node))
-            result = doc.xslt(xsl_tree)
-
+    if not has_lxml or prefer_xsltproc:
+        # Try to call xsltproc
+        xsltproc = env.subst("$DOCBOOK_XSLTPROC")
+        if xsltproc and xsltproc.endswith('xsltproc'):
+            result = env.backtick(' '.join([xsltproc, xsl_file, str(node)]))
             depfiles = [x.strip() for x in str(result).splitlines() if x.strip() != "" and not x.startswith("<?xml ")]
             return depfiles
         else:
-            # Try to call xsltproc
-            xsltproc = env.subst("$DOCBOOK_XSLTPROC")
-            if xsltproc and xsltproc.endswith('xsltproc'):
-                result = env.backtick(' '.join([xsltproc, xsl_file, str(node)]))
-                depfiles = [x.strip() for x in str(result).splitlines() if x.strip() != "" and not x.startswith("<?xml ")]
-                return depfiles
-            else:
-                # Use simple pattern matching, there is currently no support
-                # for xi:includes...
-                contents = node.get_text_contents()
-                return include_re.findall(contents)
-
-    styledoc = libxml2.parseFile(xsl_file)
-    style = libxslt.parseStylesheetDoc(styledoc)
-    doc = libxml2.readFile(str(node), None, libxml2.XML_PARSE_NOENT)
-    result = style.applyStylesheet(doc, None)
-
-    depfiles = []
-    for x in str(result).splitlines():
-        if x.strip() != "" and not x.startswith("<?xml "):
-            depfiles.extend(x.strip().split())
-
-    style.freeStylesheet()
-    doc.freeDoc()
-    result.freeDoc()
+            # Use simple pattern matching, there is currently no support
+            # for xi:includes...
+            contents = node.get_text_contents()
+            return include_re.findall(contents)
+        
+    from lxml import etree
 
+    xsl_tree = etree.parse(xsl_file)
+    doc = etree.parse(str(node))
+    result = doc.xslt(xsl_tree)
+
+    depfiles = [x.strip() for x in str(result).splitlines() if x.strip() != "" and not x.startswith("<?xml ")]
     return depfiles
 
 # Creating the instance of our XML dependency scanner
@@ -308,27 +276,6 @@ def __emit_xsl_basedir(target, source, env):
 #
 # Builders
 #
-def __build_libxml2(target, source, env):
-    """
-    General XSLT builder (HTML/FO), using the libxml2 module.
-    """
-    xsl_style = env.subst('$DOCBOOK_XSL')
-    styledoc = libxml2.parseFile(xsl_style)
-    style = libxslt.parseStylesheetDoc(styledoc)
-    doc = libxml2.readFile(str(source[0]),None,libxml2.XML_PARSE_NOENT)
-    # Support for additional parameters
-    parampass = {}
-    if parampass:
-        result = style.applyStylesheet(doc, parampass)
-    else:
-        result = style.applyStylesheet(doc, None)
-    style.saveResultToFilename(str(target[0]), result, 0)
-    style.freeStylesheet()
-    doc.freeDoc()
-    result.freeDoc()
-
-    return None
-
 def __build_lxml(target, source, env):
     """
     General XSLT builder (HTML/FO), using the lxml module.
@@ -351,27 +298,12 @@ def __build_lxml(target, source, env):
     else:
         result = transform(doc)
 
-    # we'd like the resulting output to be readably formatted,
-    # so try pretty-print. Sometimes (esp. if the output is
-    # not an xml file) we end up with a None type somewhere in
-    # the transformed tree and tostring throws TypeError,
-    # so provide a fallback.
     try:
+# TODO DB Check file encoding for unicode/utf-8
         with open(str(target[0]), "wb") as of:
-            of.write(etree.tostring(result, pretty_print=True))
-    except TypeError:
-        result.write_output(str(target[0]))
-
-    return None
-
-def __xinclude_libxml2(target, source, env):
-    """
-    Resolving XIncludes, using the libxml2 module.
-    """
-    doc = libxml2.readFile(str(source[0]), None, libxml2.XML_PARSE_NOENT)
-    doc.xincludeProcessFlags(libxml2.XML_PARSE_NOENT)
-    doc.saveFile(str(target[0]))
-    doc.freeDoc()
+            of.write(etree.tostring(result, encoding="utf-8", pretty_print=True))
+    except:
+        pass
 
     return None
 
@@ -391,22 +323,12 @@ def __xinclude_lxml(target, source, env):
 
     return None
 
-__libxml2_builder = SCons.Builder.Builder(
-        action = __build_libxml2,
-        src_suffix = '.xml',
-        source_scanner = docbook_xml_scanner,
-        emitter = __emit_xsl_basedir)
 __lxml_builder = SCons.Builder.Builder(
         action = __build_lxml,
         src_suffix = '.xml',
         source_scanner = docbook_xml_scanner,
         emitter = __emit_xsl_basedir)
 
-__xinclude_libxml2_builder = SCons.Builder.Builder(
-        action = __xinclude_libxml2,
-        suffix = '.xml',
-        src_suffix = '.xml',
-        source_scanner = docbook_xml_scanner)
 __xinclude_lxml_builder = SCons.Builder.Builder(
         action = __xinclude_lxml,
         suffix = '.xml',
@@ -472,33 +394,7 @@ def DocbookEpub(env, target, source=None, *args, **kw):
             return
 
         hrefs = []
-        if has_libxml2:
-            nsmap = {'opf' : 'http://www.idpf.org/2007/opf'}
-            # Read file and resolve entities
-            doc = libxml2.readFile(content_file, None, 0)
-            opf = doc.getRootElement()
-            # Create xpath context
-            xpath_context = doc.xpathNewContext()
-            # Register namespaces
-            for key, val in nsmap.items():
-                xpath_context.xpathRegisterNs(key, val)
-
-            if hasattr(opf, 'xpathEval') and xpath_context:
-                # Use the xpath context
-                xpath_context.setContextNode(opf)
-                items = xpath_context.xpathEval(".//opf:item")
-            else:
-                items = opf.findall(".//{'http://www.idpf.org/2007/opf'}item")
-
-            for item in items:
-                if hasattr(item, 'prop'):
-                    hrefs.append(item.prop('href'))
-                else:
-                    hrefs.append(item.attrib['href'])
-
-            doc.freeDoc()
-            xpath_context.xpathFreeContext()
-        elif has_lxml:
+        if has_lxml:
             from lxml import etree
 
             opf = etree.parse(content_file)
@@ -521,7 +417,7 @@ def DocbookEpub(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_EPUB', ['epub','docbook.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Create targets
     result = []
@@ -562,7 +458,7 @@ def DocbookHtml(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_HTML', ['html','docbook.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Create targets
     result = []
@@ -590,7 +486,7 @@ def DocbookHtmlChunked(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_HTMLCHUNKED', ['html','chunkfast.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Detect base dir
     base_dir = kw.get('base_dir', '')
@@ -625,7 +521,7 @@ def DocbookHtmlhelp(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_HTMLHELP', ['htmlhelp','htmlhelp.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Detect base dir
     base_dir = kw.get('base_dir', '')
@@ -654,7 +550,7 @@ def DocbookPdf(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_PDF', ['fo','docbook.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Create targets
     result = []
@@ -678,7 +574,7 @@ def DocbookMan(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_MAN', ['manpages','docbook.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Create targets
     result = []
@@ -744,7 +640,7 @@ def DocbookSlidesPdf(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_SLIDESPDF', ['slides','fo','plain.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Create targets
     result = []
@@ -774,7 +670,7 @@ def DocbookSlidesHtml(env, target, source=None, *args, **kw):
     __init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_SLIDESHTML', ['slides','html','plain.xsl'])
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Detect base dir
     base_dir = kw.get('base_dir', '')
@@ -800,7 +696,7 @@ def DocbookXInclude(env, target, source, *args, **kw):
     target, source = __extend_targets_sources(target, source)
 
     # Setup builder
-    __builder = __select_builder(__xinclude_lxml_builder,__xinclude_libxml2_builder,__xmllint_builder)
+    __builder = __select_builder(__xinclude_lxml_builder,__xmllint_builder)
 
     # Create targets
     result = []
@@ -820,7 +716,7 @@ def DocbookXslt(env, target, source=None, *args, **kw):
     kw['DOCBOOK_XSL'] = kw.get('xsl', 'transform.xsl')
 
     # Setup builder
-    __builder = __select_builder(__lxml_builder, __libxml2_builder, __xsltproc_builder)
+    __builder = __select_builder(__lxml_builder, __xsltproc_builder)
 
     # Create targets
     result = []
diff --git a/bin/SConsDoc.py b/bin/SConsDoc.py
index edf38a9..63aa610 100644
--- a/bin/SConsDoc.py
+++ b/bin/SConsDoc.py
@@ -24,6 +24,8 @@
 #
 # Module for handling SCons documentation processing.
 #
+# TODO DB Check file encoding for unicode/utf-8
+
 __doc__ = r"""
 This module parses home-brew XML files that document various things
 in SCons.  Right now, it handles Builders, functions, construction
@@ -116,34 +118,19 @@ import sys
 import copy
 import importlib
 
-# Do we have libxml2/libxslt/lxml?
-has_libxml2 = True
+# Do we have lxml?
 try:
-    import libxml2
-    import libxslt
+    import lxml
 except ImportError:
-    has_libxml2 = False
-    try:
-        import lxml
-    except ImportError:
-        raise ImportError("Failed to import either libxml2/libxslt or lxml")
+    raise ImportError("Failed to import lxml")
 
-has_etree = False
-if not has_libxml2:
-    try:
-        from lxml import etree
-        has_etree = True
-    except ImportError:
-        pass
-if not has_etree:
+try:
+    from lxml import etree
+except ImportError:
     try:
-        # TODO: this is for Python 2.7, cElementTee is deprecated since Py3.3
-        import xml.etree.cElementTree as etree
+        import xml.etree.ElementTree as etree
     except ImportError:
-        try:
-            import xml.etree.ElementTree as etree
-        except ImportError:
-            raise ImportError("Failed to import ElementTree from any known place")
+        raise ImportError("Failed to import ElementTree from any known place")
 
 # patterns to help trim XML passed in as strings
 re_entity = re.compile(r"&([^;]+);")
@@ -250,374 +237,148 @@ class DoctypeDeclaration:
 
         return content
 
-if not has_libxml2:
-    class TreeFactory:
-        def __init__(self):
-            pass
-
-        @staticmethod
-        def newNode(tag, **kwargs):
-            return etree.Element(tag, **kwargs)
-
-        @staticmethod
-        def newSubNode(parent, tag, **kwargs):
-            return etree.SubElement(parent, tag, **kwargs)
-
-        @staticmethod
-        def newEtreeNode(tag, init_ns=False, **kwargs):
-            if init_ns:
-                NSMAP = {None: dbxsd,
-                         'xsi' : xsi}
-                return etree.Element(tag, nsmap=NSMAP, **kwargs)
-
-            return etree.Element(tag, **kwargs)
-
-        @staticmethod
-        def copyNode(node):
-            return copy.deepcopy(node)
-
-        @staticmethod
-        def appendNode(parent, child):
-            parent.append(child)
-
-        @staticmethod
-        def hasAttribute(node, att):
-            return att in node.attrib
-
-        @staticmethod
-        def getAttribute(node, att):
-            return node.attrib[att]
-
-        @staticmethod
-        def setAttribute(node, att, value):
-            node.attrib[att] = value
-
-        @staticmethod
-        def getText(root):
-            return root.text
-
-        @staticmethod
-        def setText(root, txt):
-            root.text = txt
-
-        @staticmethod
-        def getTail(root):
-            return root.tail
-
-        @staticmethod
-        def setTail(root, txt):
-            root.tail = txt
-
-        @staticmethod
-        def writeGenTree(root, fp):
-            dt = DoctypeDeclaration()
-            encfun = str
-            fp.write(etree.tostring(root, encoding=encfun,
-                                    pretty_print=True,
-                                    doctype=dt.createDoctype()))
-
-        @staticmethod
-        def writeTree(root, fpath):
-            encfun = "utf-8"
-            with open(fpath, 'wb') as fp:
-                fp.write(etree.tostring(root, encoding=encfun,
-                                        pretty_print=True))
-
-        @staticmethod
-        def prettyPrintFile(fpath):
-            with open(fpath,'rb') as fin:
-                tree = etree.parse(fin)
-                pretty_content = etree.tostring(tree, pretty_print=True)
-
-            with open(fpath,'wb') as fout:
-                fout.write(pretty_content)
-
-        @staticmethod
-        def decorateWithHeader(root):
-            root.attrib["{"+xsi+"}schemaLocation"] = "%s %s/scons.xsd" % (dbxsd, dbxsd)
-            return root
-
-        def newXmlTree(self, root):
-            """ Return a XML file tree with the correct namespaces set,
-                the element root as top entry and the given header comment.
-            """
-            NSMAP = {None: dbxsd, 'xsi' : xsi}
-            t = etree.Element(root, nsmap=NSMAP)
-            return self.decorateWithHeader(t)
-
-        # singleton to cache parsed xmlschema..
-        xmlschema = None
-
-        @staticmethod
-        def validateXml(fpath, xmlschema_context):
-
-            if TreeFactory.xmlschema is None:
-                TreeFactory.xmlschema = etree.XMLSchema(xmlschema_context)
-            try:
-                doc = etree.parse(fpath)
-            except Exception as e:
-                print("ERROR: %s fails to parse:"%fpath)
-                print(e)
-                return False
-            doc.xinclude()
-            try:
-                TreeFactory.xmlschema.assertValid(doc)
-            except Exception as e:
-                print("ERROR: %s fails to validate:" % fpath)
-                print(e)
-                return False
-            return True
+class TreeFactory:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def newNode(tag, **kwargs):
+        return etree.Element(tag, **kwargs)
+
+    @staticmethod
+    def newSubNode(parent, tag, **kwargs):
+        return etree.SubElement(parent, tag, **kwargs)
+
+    @staticmethod
+    def newEtreeNode(tag, init_ns=False, **kwargs):
+        if init_ns:
+            NSMAP = {None: dbxsd,
+                     'xsi' : xsi}
+            return etree.Element(tag, nsmap=NSMAP, **kwargs)
+
+        return etree.Element(tag, **kwargs)
+
+    @staticmethod
+    def copyNode(node):
+        return copy.deepcopy(node)
+
+    @staticmethod
+    def appendNode(parent, child):
+        parent.append(child)
+
+    @staticmethod
+    def hasAttribute(node, att):
+        return att in node.attrib
+
+    @staticmethod
+    def getAttribute(node, att):
+        return node.attrib[att]
+
+    @staticmethod
+    def setAttribute(node, att, value):
+        node.attrib[att] = value
+
+    @staticmethod
+    def getText(root):
+        return root.text
+
+    @staticmethod
+    def setText(root, txt):
+        root.text = txt
+
+    @staticmethod
+    def getTail(root):
+        return root.tail
+
+    @staticmethod
+    def setTail(root, txt):
+        root.tail = txt
+
+    @staticmethod
+    def writeGenTree(root, fp):
+        dt = DoctypeDeclaration()
+# TODO DB Check file encoding for unicode/utf-8
+        fp.write(etree.tostring(root, encoding="utf-8",
+                                pretty_print=True,
+                                doctype=dt.createDoctype()))
+
+    @staticmethod
+    def writeTree(root, fpath):
+# TODO DB Check file encoding for unicode/utf-8
+        with open(fpath, 'wb') as fp:
+# TODO DB Check file encoding for unicode/utf-8
+            fp.write(etree.tostring(root, encoding="utf-8",
+                                    pretty_print=True))
+
+    @staticmethod
+    def prettyPrintFile(fpath):
+# TODO DB Check file encoding for unicode/utf-8
+        with open(fpath,'rb') as fin:
+            tree = etree.parse(fin)
+# TODO DB Check file encoding for unicode/utf-8
+            pretty_content = etree.tostring(tree, encoding="utf-8", 
+                                            pretty_print=True)
+    
+        with open(fpath,'wb') as fout:
+            fout.write(pretty_content)
+
+    @staticmethod
+    def decorateWithHeader(root):
+        root.attrib["{"+xsi+"}schemaLocation"] = "%s %s/scons.xsd" % (dbxsd, dbxsd)
+        return root
+
+    def newXmlTree(self, root):
+        """ Return a XML file tree with the correct namespaces set,
+            the element root as top entry and the given header comment.
+        """
+        NSMAP = {None: dbxsd, 'xsi' : xsi}
+        t = etree.Element(root, nsmap=NSMAP)
+        return self.decorateWithHeader(t)
 
-        @staticmethod
-        def findAll(root, tag, ns=None, xp_ctxt=None, nsmap=None):
-            expression = ".//{%s}%s" % (nsmap[ns], tag)
-            if not ns or not nsmap:
-                expression = ".//%s" % tag
-            return root.findall(expression)
-
-        @staticmethod
-        def findAllChildrenOf(root, tag, ns=None, xp_ctxt=None, nsmap=None):
-            expression = "./{%s}%s/*" % (nsmap[ns], tag)
-            if not ns or not nsmap:
-                expression = "./%s/*" % tag
-            return root.findall(expression)
-
-        @staticmethod
-        def convertElementTree(root):
-            """ Convert the given tree of etree.Element
-                entries to a list of tree nodes for the
-                current XML toolkit.
-            """
-            return [root]
-
-else:
-    class TreeFactory:
-        def __init__(self):
-            pass
-
-        @staticmethod
-        def newNode(tag, **kwargs):
-            return etree.Element(tag, **kwargs)
-
-        @staticmethod
-        def newSubNode(parent, tag, **kwargs):
-            return etree.SubElement(parent, tag, **kwargs)
-
-        @staticmethod
-        def newEtreeNode(tag, init_ns=False, **kwargs):
-            return etree.Element(tag, **kwargs)
-
-        @staticmethod
-        def copyNode(node):
-            return node.copyNode(1)
-
-        @staticmethod
-        def appendNode(parent, child):
-            if hasattr(parent, 'addChild'):
-                parent.addChild(child)
-            else:
-                parent.append(child)
-
-        @staticmethod
-        def hasAttribute(node, att):
-            if hasattr(node, 'hasProp'):
-                return node.hasProp(att)
-            return att in node.attrib
-
-        @staticmethod
-        def getAttribute(node, att):
-            if hasattr(node, 'prop'):
-                return node.prop(att)
-            return node.attrib[att]
-
-        @staticmethod
-        def setAttribute(node, att, value):
-            if hasattr(node, 'setProp'):
-                node.setProp(att, value)
-            else:
-                node.attrib[att] = value
-
-        @staticmethod
-        def getText(root):
-            if hasattr(root, 'getContent'):
-                return root.getContent()
-            return root.text
-
-        @staticmethod
-        def setText(root, txt):
-            if hasattr(root, 'setContent'):
-                root.setContent(txt)
-            else:
-                root.text = txt
-
-        @staticmethod
-        def getTail(root):
-            return root.tail
-
-        @staticmethod
-        def setTail(root, txt):
-            root.tail = txt
-
-        @staticmethod
-        def writeGenTree(root, fp):
-            doc = libxml2.newDoc('1.0')
-            dtd = doc.newDtd("sconsdoc", None, None)
-            doc.addChild(dtd)
-            doc.setRootElement(root)
-            content = doc.serialize("UTF-8", 1)
-            dt = DoctypeDeclaration()
-            # This is clearly a hack, but unfortunately libxml2
-            # doesn't support writing PERs (Parsed Entity References).
-            # So, we simply replace the empty doctype with the
-            # text we need...
-            content = content.replace("<!DOCTYPE sconsdoc>", dt.createDoctype())
-            fp.write(content)
-            doc.freeDoc()
-
-        @staticmethod
-        def writeTree(root, fpath):
-            with open(fpath, 'wb') as fp:
-                doc = libxml2.newDoc('1.0')
-                doc.setRootElement(root)
-                fp.write(doc.serialize("UTF-8", 1))
-                doc.freeDoc()
-
-        @staticmethod
-        def prettyPrintFile(fpath):
-            # Read file and resolve entities
-            doc = libxml2.readFile(fpath, None, libxml2d.XML_PARSE_NOENT)
-            with open(fpath, 'wb') as fp:
-                # Prettyprint
-                fp.write(doc.serialize("UTF-8", 1))
-            # Cleanup
-            doc.freeDoc()
-
-        @staticmethod
-        def decorateWithHeader(root):
-            # Register the namespaces
-            ns = root.newNs(dbxsd, None)
-            xi = root.newNs(xsi, 'xsi')
-            root.setNs(ns)  #put this node in the target namespace
-
-            root.setNsProp(xi, 'schemaLocation', "%s %s/scons.xsd" % (dbxsd, dbxsd))
-
-            return root
-
-        def newXmlTree(self, root):
-            """ Return a XML file tree with the correct namespaces set,
-                the element root as top entry and the given header comment.
-            """
-            t = libxml2.newNode(root)
-            return self.decorateWithHeader(t)
-
-        @staticmethod
-        def validateXml(fpath, xmlschema_context):
-            retval = True
-
-            # Create validation context
-            validation_context = xmlschema_context.schemaNewValidCtxt()
-            # Set error/warning handlers
-            eh = Libxml2ValidityHandler()
-            validation_context.setValidityErrorHandler(eh.error, eh.warning, ARG)
-            # Read file and resolve entities
-            doc = libxml2.readFile(fpath, None, libxml2.XML_PARSE_NOENT)
-            doc.xincludeProcessFlags(libxml2.XML_PARSE_NOENT)
-            err = validation_context.schemaValidateDoc(doc)
-
-            if err or eh.errors:
-                for e in eh.errors:
-                    print(e.rstrip("\n"))
-                # import pdb; pdb.set_trace()
-                print("%s fails to validate" % fpath)
-                retval = False
-
-            # Cleanup
-            doc.freeDoc()
-            del validation_context
-
-            return retval
-
-        @staticmethod
-        def findAll(root, tag, ns=None, xpath_context=None, nsmap=None):
-            if hasattr(root, 'xpathEval') and xpath_context:
-                # Use the xpath context
-                xpath_context.setContextNode(root)
-                expression = ".//%s" % tag
-                if ns:
-                    expression = ".//%s:%s" % (ns, tag)
-                return xpath_context.xpathEval(expression)
-            else:
-                expression = ".//{%s}%s" % (nsmap[ns], tag)
-                if not ns or not nsmap:
-                    expression = ".//%s" % tag
-                return root.findall(expression)
-
-        @staticmethod
-        def findAllChildrenOf(root, tag, ns=None, xpath_context=None, nsmap=None):
-            if hasattr(root, 'xpathEval') and xpath_context:
-                # Use the xpath context
-                xpath_context.setContextNode(root)
-                expression = "./%s/node()" % tag
-                if ns:
-                    expression = "./%s:%s/node()" % (ns, tag)
-
-                return xpath_context.xpathEval(expression)
-            else:
-                expression = "./{%s}%s/node()" % (nsmap[ns], tag)
-                if not ns or not nsmap:
-                    expression = "./%s/node()" % tag
-                return root.findall(expression)
-
-        def expandChildElements(self, child):
-            """ Helper function for convertElementTree,
-                converts a single child recursively.
-            """
-            nchild = self.newNode(child.tag)
-            # Copy attributes
-            for key, val in child.attrib:
-                self.setAttribute(nchild, key, val)
-            elements = []
-            # Add text
-            if child.text:
-                t = libxml2.newText(child.text)
-                self.appendNode(nchild, t)
-            # Add children
-            for c in child:
-                for n in self.expandChildElements(c):
-                    self.appendNode(nchild, n)
-            elements.append(nchild)
-            # Add tail
-            if child.tail:
-                tail = libxml2.newText(child.tail)
-                elements.append(tail)
-
-            return elements
-
-        def convertElementTree(self, root):
-            """ Convert the given tree of etree.Element
-                entries to a list of tree nodes for the
-                current XML toolkit.
-            """
-            nroot = self.newNode(root.tag)
-            # Copy attributes
-            for key, val in root.attrib:
-                self.setAttribute(nroot, key, val)
-            elements = []
-            # Add text
-            if root.text:
-                t = libxml2.newText(root.text)
-                self.appendNode(nroot, t)
-            # Add children
-            for c in root:
-                for n in self.expandChildElements(c):
-                    self.appendNode(nroot, n)
-            elements.append(nroot)
-            # Add tail
-            if root.tail:
-                tail = libxml2.newText(root.tail)
-                elements.append(tail)
-
-            return elements
+    # singleton to cache parsed xmlschema..
+    xmlschema = None
+
+    @staticmethod
+    def validateXml(fpath, xmlschema_context):
+
+        if TreeFactory.xmlschema is None:
+            TreeFactory.xmlschema = etree.XMLSchema(xmlschema_context)
+        try:
+            doc = etree.parse(fpath)
+        except Exception as e:
+            print("ERROR: %s fails to parse:"%fpath)
+            print(e)
+            return False
+        doc.xinclude()
+        try:
+            TreeFactory.xmlschema.assertValid(doc)
+        except Exception as e:
+            print("ERROR: %s fails to validate:" % fpath)
+            print(e)
+            return False
+        return True
+
+    @staticmethod
+    def findAll(root, tag, ns=None, xp_ctxt=None, nsmap=None):
+        expression = ".//{%s}%s" % (nsmap[ns], tag)
+        if not ns or not nsmap:
+            expression = ".//%s" % tag
+        return root.findall(expression)
+
+    @staticmethod
+    def findAllChildrenOf(root, tag, ns=None, xp_ctxt=None, nsmap=None):
+        expression = "./{%s}%s/*" % (nsmap[ns], tag)
+        if not ns or not nsmap:
+            expression = "./%s/*" % tag
+        return root.findall(expression)
+
+    @staticmethod
+    def convertElementTree(root):
+        """ Convert the given tree of etree.Element
+            entries to a list of tree nodes for the
+            current XML toolkit.
+        """
+        return [root]
 
 tf = TreeFactory()
 
@@ -641,19 +402,9 @@ class SConsDocTree:
         self.root = etree.fromstring(content)
 
     def parseXmlFile(self, fpath):
-        if not has_libxml2:
-            # Create domtree from file
-            domtree = etree.parse(fpath)
-            self.root = domtree.getroot()
-        else:
-            # Read file and resolve entities
-            self.doc = libxml2.readFile(fpath, None, libxml2.XML_PARSE_NOENT)
-            self.root = self.doc.getRootElement()
-            # Create xpath context
-            self.xpath_context = self.doc.xpathNewContext()
-            # Register namespaces
-            for key, val in self.nsmap.items():
-                self.xpath_context.xpathRegisterNs(key, val)
+        # Create domtree from file
+        domtree = etree.parse(fpath)
+        self.root = domtree.getroot()
 
     def __del__(self):
         if self.doc is not None:
@@ -664,15 +415,7 @@ class SConsDocTree:
 perc = "%"
 
 def validate_all_xml(dpaths, xsdfile=default_xsd):
-    xmlschema_context = None
-    if not has_libxml2:
-        # Use lxml
-        xmlschema_context = etree.parse(xsdfile)
-    else:
-        # Use libxml2 and prepare the schema validation context
-        ctxt = libxml2.schemaNewParserCtxt(xsdfile)
-        xmlschema_context = ctxt.schemaParse()
-        del ctxt
+    xmlschema_context = etree.parse(xsdfile)
 
     fpaths = []
     for dp in dpaths:
@@ -698,10 +441,6 @@ def validate_all_xml(dpaths, xsdfile=default_xsd):
             fails.append(fp)
             continue
 
-    if has_libxml2:
-        # Cleanup
-        del xmlschema_context
-
     if fails:
         return False
 
diff --git a/bin/SConsExamples.py b/bin/SConsExamples.py
index 46df103..bfc9002 100644
--- a/bin/SConsExamples.py
+++ b/bin/SConsExamples.py
@@ -85,6 +85,7 @@
 # Error output gets passed through to your error output so you
 # can see if there are any problems executing the command.
 #
+# TODO DB Check file encoding for unicode/utf-8
 
 import os
 import re
diff --git a/bin/docs-create-example-outputs.py b/bin/docs-create-example-outputs.py
index 0124435..e74c547 100644
--- a/bin/docs-create-example-outputs.py
+++ b/bin/docs-create-example-outputs.py
@@ -3,6 +3,7 @@
 # Searches through the whole doc/user tree and creates
 # all output files for the single examples.
 #
+# TODO DB Check file encoding for unicode/utf-8
 import os
 import sys
 import SConsExamples
diff --git a/bin/docs-update-generated.py b/bin/docs-update-generated.py
index 3687896..4226473 100644
--- a/bin/docs-update-generated.py
+++ b/bin/docs-update-generated.py
@@ -6,6 +6,7 @@
 # as well as the entity declarations for them.
 # Uses scons-proc.py under the hood...
 #
+# TODO DB Check file encoding for unicode/utf-8
 import os
 import sys
 import subprocess
diff --git a/bin/docs-validate.py b/bin/docs-validate.py
index c4dd3b7..53b6f86 100644
--- a/bin/docs-validate.py
+++ b/bin/docs-validate.py
@@ -3,6 +3,7 @@
 # Searches through the whole source tree and validates all
 # documentation files against our own XSD in docs/xsd.
 #
+# TODO DB Check file encoding for unicode/utf-8
 import sys,os
 import SConsDoc
 
diff --git a/bin/scons-proc.py b/bin/scons-proc.py
index 95a798c..e1a2d19 100644
--- a/bin/scons-proc.py
+++ b/bin/scons-proc.py
@@ -9,6 +9,7 @@
 # DocBook-formatted generated XML files containing the summary text
 # and/or .mod files containing the ENTITY definitions for each item.
 #
+# TODO DB Check file encoding for unicode/utf-8
 import getopt
 import os
 import sys
@@ -66,6 +67,7 @@ def parse_docs(args, include_entities=True):
                 raise
         else:
             # mode we read (text/bytes) has to match handling in SConsDoc
+            # TODO DB Check file encoding for unicode/utf-8
             with open(f, 'r') as fp:
                 content = fp.read()
             if content:
@@ -76,12 +78,14 @@ def parse_docs(args, include_entities=True):
                     raise
     return h
 
+# TODO DB Check file encoding for unicode/utf-8
 Warning = """\
 <!--
 THIS IS AN AUTOMATICALLY-GENERATED FILE.  DO NOT EDIT.
 -->
 """
 
+# TODO DB Check file encoding for unicode/utf-8
 Regular_Entities_Header = """\
 <!--
 
@@ -107,6 +111,7 @@ class SCons_XML:
     def fopen(self, name, mode='w'):
         if name == '-':
             return sys.stdout
+# TODO DB Check file encoding for unicode/utf-8
         return open(name, mode)
     
     def write(self, files):
@@ -138,32 +143,19 @@ class SCons_XML:
                     added = True
                     stf.appendNode(vl, stf.copyNode(s))
             
-            # Generate the text for sets/uses lists of construction vars.
-            # This used to include an entity reference which would be replaced
-            # by the link to the cvar, but with lxml, dumping out the tree
-            # with tostring() will encode the & introducing the entity,
-            # breaking it. Instead generate the actual link. (issue #3580)
             if v.sets:
                 added = True
                 vp = stf.newNode("para")
-                stf.setText(vp, "Sets: ")
-                for setv in v.sets:
-                    link = stf.newSubNode(vp, "link", linkend="cv-%s" % setv)
-                    linktgt = stf.newSubNode(link, "varname")
-                    stf.setText(linktgt, "$" + setv)
-                    stf.setTail(link, " ")
+                s = ['&cv-link-%s;' % x for x in v.sets]
+                stf.setText(vp, 'Sets:  ' + ', '.join(s) + '.')
                 stf.appendNode(vl, vp)
 
             if v.uses:
                 added = True
                 vp = stf.newNode("para")
-                stf.setText(vp, "Uses: ")
-                for use in v.uses:
-                    link = stf.newSubNode(vp, "link", linkend="cv-%s" % use)
-                    linktgt = stf.newSubNode(link, "varname")
-                    stf.setText(linktgt, "$" + use)
-                    stf.setTail(link, " ")
-                stf.appendNode(vl, vp)
+                u = ['&cv-link-%s;' % x for x in v.uses]
+                stf.setText(vp, 'Uses:  ' + ', '.join(u) + '.')
+                 stf.appendNode(vl, vp)
                 
             # Still nothing added to this list item?
             if not added:
@@ -175,6 +167,7 @@ class SCons_XML:
             stf.appendNode(root, ve)
             
         # Write file        
+# TODO DB Check file encoding for unicode/utf-8
         f = self.fopen(filename)
         stf.writeGenTree(root, f)
         f.close()
@@ -195,6 +188,7 @@ class SCons_XML:
         f.write('\n')
         f.write(Regular_Entities_Header % description)
         f.write('\n')
+# TODO DB Check file encoding for unicode/utf-8
         for v in self.values:
             f.write('<!ENTITY %s%s "<%s xmlns=\'%s\'>%s</%s>">\n' %
                         (v.prefix, v.idfunc(),
-- 
cgit v0.12