From 35590bd990f2fe28a4d39f41155d9020fffe096d Mon Sep 17 00:00:00 2001 From: Dirk Baechle Date: Tue, 9 Jun 2020 16:49:00 +0200 Subject: First set of changes, started to rip out libxslt2. --- SCons/Tool/docbook/__init__.py | 180 +++--------- bin/SConsDoc.py | 571 ++++++++++--------------------------- bin/SConsExamples.py | 1 + bin/docs-create-example-outputs.py | 1 + bin/docs-update-generated.py | 1 + bin/docs-validate.py | 1 + bin/scons-proc.py | 30 +- 7 files changed, 209 insertions(+), 576 deletions(-) diff --git a/SCons/Tool/docbook/__init__.py b/SCons/Tool/docbook/__init__.py index 7f47e9d..3b725da 100644 --- a/SCons/Tool/docbook/__init__.py +++ b/SCons/Tool/docbook/__init__.py @@ -51,20 +51,14 @@ scriptpath = os.path.dirname(os.path.realpath(__file__)) # Local folder for the collection of DocBook XSLs db_xsl_folder = 'docbook-xsl-1.76.1' -# Do we have libxml2/libxslt/lxml? -has_libxml2 = True +# Do we have lxml? has_lxml = True try: - import libxml2 - import libxslt -except: - has_libxml2 = False -try: import lxml except: has_lxml = False -# Set this to True, to prefer xsltproc over libxml2 and lxml +# Set this to True, to prefer xsltproc over lxml prefer_xsltproc = False # Regexs for parsing Docbook XML sources of MAN pages @@ -95,20 +89,12 @@ def __init_xsl_stylesheet(kw, env, user_xsl_var, default_path): xsl_style = os.path.join(*path_args) kw['DOCBOOK_XSL'] = xsl_style -def __select_builder(lxml_builder, libxml2_builder, cmdline_builder): +def __select_builder(lxml_builder, cmdline_builder): """ Selects a builder, based on which Python modules are present. """ - if prefer_xsltproc: - return cmdline_builder - - if not has_libxml2: - # At the moment we prefer libxml2 over lxml, the latter can lead - # to conflicts when installed together with libxml2. - if has_lxml: - return lxml_builder - else: - return cmdline_builder + if has_lxml and not prefer_xsltproc: + return lxml_builder - return libxml2_builder + return cmdline_builder def __ensure_suffix(t, suffix): """ Ensure that the target t has the given suffix. """ @@ -207,7 +193,7 @@ def _detect(env): if env.get('DOCBOOK_PREFER_XSLTPROC',''): prefer_xsltproc = True - if (not has_libxml2 and not has_lxml) or prefer_xsltproc: + if (not has_lxml) or prefer_xsltproc: # Try to find the XSLT processors __detect_cl_tool(env, 'DOCBOOK_XSLTPROC', xsltproc_com, xsltproc_com_priority) __detect_cl_tool(env, 'DOCBOOK_XMLLINT', xmllint_com) @@ -233,44 +219,26 @@ def __xml_scan(node, env, path, arg): return sentity_re.findall(contents) xsl_file = os.path.join(scriptpath,'utils','xmldepend.xsl') - if not has_libxml2 or prefer_xsltproc: - if has_lxml and not prefer_xsltproc: - - from lxml import etree - - xsl_tree = etree.parse(xsl_file) - doc = etree.parse(str(node)) - result = doc.xslt(xsl_tree) - + if not has_lxml or prefer_xsltproc: + # Try to call xsltproc + xsltproc = env.subst("$DOCBOOK_XSLTPROC") + if xsltproc and xsltproc.endswith('xsltproc'): + result = env.backtick(' '.join([xsltproc, xsl_file, str(node)])) depfiles = [x.strip() for x in str(result).splitlines() if x.strip() != "" and not x.startswith("", dt.createDoctype()) - fp.write(content) - doc.freeDoc() - - @staticmethod - def writeTree(root, fpath): - with open(fpath, 'wb') as fp: - doc = libxml2.newDoc('1.0') - doc.setRootElement(root) - fp.write(doc.serialize("UTF-8", 1)) - doc.freeDoc() - - @staticmethod - def prettyPrintFile(fpath): - # Read file and resolve entities - doc = libxml2.readFile(fpath, None, libxml2d.XML_PARSE_NOENT) - with open(fpath, 'wb') as fp: - # Prettyprint - fp.write(doc.serialize("UTF-8", 1)) - # Cleanup - doc.freeDoc() - - @staticmethod - def decorateWithHeader(root): - # Register the namespaces - ns = root.newNs(dbxsd, None) - xi = root.newNs(xsi, 'xsi') - root.setNs(ns) #put this node in the target namespace - - root.setNsProp(xi, 'schemaLocation', "%s %s/scons.xsd" % (dbxsd, dbxsd)) - - return root - - def newXmlTree(self, root): - """ Return a XML file tree with the correct namespaces set, - the element root as top entry and the given header comment. - """ - t = libxml2.newNode(root) - return self.decorateWithHeader(t) - - @staticmethod - def validateXml(fpath, xmlschema_context): - retval = True - - # Create validation context - validation_context = xmlschema_context.schemaNewValidCtxt() - # Set error/warning handlers - eh = Libxml2ValidityHandler() - validation_context.setValidityErrorHandler(eh.error, eh.warning, ARG) - # Read file and resolve entities - doc = libxml2.readFile(fpath, None, libxml2.XML_PARSE_NOENT) - doc.xincludeProcessFlags(libxml2.XML_PARSE_NOENT) - err = validation_context.schemaValidateDoc(doc) - - if err or eh.errors: - for e in eh.errors: - print(e.rstrip("\n")) - # import pdb; pdb.set_trace() - print("%s fails to validate" % fpath) - retval = False - - # Cleanup - doc.freeDoc() - del validation_context - - return retval - - @staticmethod - def findAll(root, tag, ns=None, xpath_context=None, nsmap=None): - if hasattr(root, 'xpathEval') and xpath_context: - # Use the xpath context - xpath_context.setContextNode(root) - expression = ".//%s" % tag - if ns: - expression = ".//%s:%s" % (ns, tag) - return xpath_context.xpathEval(expression) - else: - expression = ".//{%s}%s" % (nsmap[ns], tag) - if not ns or not nsmap: - expression = ".//%s" % tag - return root.findall(expression) - - @staticmethod - def findAllChildrenOf(root, tag, ns=None, xpath_context=None, nsmap=None): - if hasattr(root, 'xpathEval') and xpath_context: - # Use the xpath context - xpath_context.setContextNode(root) - expression = "./%s/node()" % tag - if ns: - expression = "./%s:%s/node()" % (ns, tag) - - return xpath_context.xpathEval(expression) - else: - expression = "./{%s}%s/node()" % (nsmap[ns], tag) - if not ns or not nsmap: - expression = "./%s/node()" % tag - return root.findall(expression) - - def expandChildElements(self, child): - """ Helper function for convertElementTree, - converts a single child recursively. - """ - nchild = self.newNode(child.tag) - # Copy attributes - for key, val in child.attrib: - self.setAttribute(nchild, key, val) - elements = [] - # Add text - if child.text: - t = libxml2.newText(child.text) - self.appendNode(nchild, t) - # Add children - for c in child: - for n in self.expandChildElements(c): - self.appendNode(nchild, n) - elements.append(nchild) - # Add tail - if child.tail: - tail = libxml2.newText(child.tail) - elements.append(tail) - - return elements - - def convertElementTree(self, root): - """ Convert the given tree of etree.Element - entries to a list of tree nodes for the - current XML toolkit. - """ - nroot = self.newNode(root.tag) - # Copy attributes - for key, val in root.attrib: - self.setAttribute(nroot, key, val) - elements = [] - # Add text - if root.text: - t = libxml2.newText(root.text) - self.appendNode(nroot, t) - # Add children - for c in root: - for n in self.expandChildElements(c): - self.appendNode(nroot, n) - elements.append(nroot) - # Add tail - if root.tail: - tail = libxml2.newText(root.tail) - elements.append(tail) - - return elements + # singleton to cache parsed xmlschema.. + xmlschema = None + + @staticmethod + def validateXml(fpath, xmlschema_context): + + if TreeFactory.xmlschema is None: + TreeFactory.xmlschema = etree.XMLSchema(xmlschema_context) + try: + doc = etree.parse(fpath) + except Exception as e: + print("ERROR: %s fails to parse:"%fpath) + print(e) + return False + doc.xinclude() + try: + TreeFactory.xmlschema.assertValid(doc) + except Exception as e: + print("ERROR: %s fails to validate:" % fpath) + print(e) + return False + return True + + @staticmethod + def findAll(root, tag, ns=None, xp_ctxt=None, nsmap=None): + expression = ".//{%s}%s" % (nsmap[ns], tag) + if not ns or not nsmap: + expression = ".//%s" % tag + return root.findall(expression) + + @staticmethod + def findAllChildrenOf(root, tag, ns=None, xp_ctxt=None, nsmap=None): + expression = "./{%s}%s/*" % (nsmap[ns], tag) + if not ns or not nsmap: + expression = "./%s/*" % tag + return root.findall(expression) + + @staticmethod + def convertElementTree(root): + """ Convert the given tree of etree.Element + entries to a list of tree nodes for the + current XML toolkit. + """ + return [root] tf = TreeFactory() @@ -641,19 +402,9 @@ class SConsDocTree: self.root = etree.fromstring(content) def parseXmlFile(self, fpath): - if not has_libxml2: - # Create domtree from file - domtree = etree.parse(fpath) - self.root = domtree.getroot() - else: - # Read file and resolve entities - self.doc = libxml2.readFile(fpath, None, libxml2.XML_PARSE_NOENT) - self.root = self.doc.getRootElement() - # Create xpath context - self.xpath_context = self.doc.xpathNewContext() - # Register namespaces - for key, val in self.nsmap.items(): - self.xpath_context.xpathRegisterNs(key, val) + # Create domtree from file + domtree = etree.parse(fpath) + self.root = domtree.getroot() def __del__(self): if self.doc is not None: @@ -664,15 +415,7 @@ class SConsDocTree: perc = "%" def validate_all_xml(dpaths, xsdfile=default_xsd): - xmlschema_context = None - if not has_libxml2: - # Use lxml - xmlschema_context = etree.parse(xsdfile) - else: - # Use libxml2 and prepare the schema validation context - ctxt = libxml2.schemaNewParserCtxt(xsdfile) - xmlschema_context = ctxt.schemaParse() - del ctxt + xmlschema_context = etree.parse(xsdfile) fpaths = [] for dp in dpaths: @@ -698,10 +441,6 @@ def validate_all_xml(dpaths, xsdfile=default_xsd): fails.append(fp) continue - if has_libxml2: - # Cleanup - del xmlschema_context - if fails: return False diff --git a/bin/SConsExamples.py b/bin/SConsExamples.py index 46df103..bfc9002 100644 --- a/bin/SConsExamples.py +++ b/bin/SConsExamples.py @@ -85,6 +85,7 @@ # Error output gets passed through to your error output so you # can see if there are any problems executing the command. # +# TODO DB Check file encoding for unicode/utf-8 import os import re diff --git a/bin/docs-create-example-outputs.py b/bin/docs-create-example-outputs.py index 0124435..e74c547 100644 --- a/bin/docs-create-example-outputs.py +++ b/bin/docs-create-example-outputs.py @@ -3,6 +3,7 @@ # Searches through the whole doc/user tree and creates # all output files for the single examples. # +# TODO DB Check file encoding for unicode/utf-8 import os import sys import SConsExamples diff --git a/bin/docs-update-generated.py b/bin/docs-update-generated.py index 3687896..4226473 100644 --- a/bin/docs-update-generated.py +++ b/bin/docs-update-generated.py @@ -6,6 +6,7 @@ # as well as the entity declarations for them. # Uses scons-proc.py under the hood... # +# TODO DB Check file encoding for unicode/utf-8 import os import sys import subprocess diff --git a/bin/docs-validate.py b/bin/docs-validate.py index c4dd3b7..53b6f86 100644 --- a/bin/docs-validate.py +++ b/bin/docs-validate.py @@ -3,6 +3,7 @@ # Searches through the whole source tree and validates all # documentation files against our own XSD in docs/xsd. # +# TODO DB Check file encoding for unicode/utf-8 import sys,os import SConsDoc diff --git a/bin/scons-proc.py b/bin/scons-proc.py index 95a798c..e1a2d19 100644 --- a/bin/scons-proc.py +++ b/bin/scons-proc.py @@ -9,6 +9,7 @@ # DocBook-formatted generated XML files containing the summary text # and/or .mod files containing the ENTITY definitions for each item. # +# TODO DB Check file encoding for unicode/utf-8 import getopt import os import sys @@ -66,6 +67,7 @@ def parse_docs(args, include_entities=True): raise else: # mode we read (text/bytes) has to match handling in SConsDoc + # TODO DB Check file encoding for unicode/utf-8 with open(f, 'r') as fp: content = fp.read() if content: @@ -76,12 +78,14 @@ def parse_docs(args, include_entities=True): raise return h +# TODO DB Check file encoding for unicode/utf-8 Warning = """\ """ +# TODO DB Check file encoding for unicode/utf-8 Regular_Entities_Header = """\