Merge commit '0f851b6f17365c29b56d6a17564d58a33120508a' as 'libxml2'

author: William Joye <wjoye@cfa.harvard.edu> 2016-10-19 18:22:05 (GMT)
committer: William Joye <wjoye@cfa.harvard.edu> 2016-10-19 18:22:05 (GMT)
commit: 34be72f9ed749a5c013d3f7f47d810e8caf652cb (patch)
tree: 2f75b6050c088da53d774b3da3fb71e7fa49ec78 /libxml2/genChRanges.py
parent: 604eac5688c5d437ed6d585ee63e53fba6f558c0 (diff)
parent: 0f851b6f17365c29b56d6a17564d58a33120508a (diff)
download: blt-34be72f9ed749a5c013d3f7f47d810e8caf652cb.zip
blt-34be72f9ed749a5c013d3f7f47d810e8caf652cb.tar.gz
blt-34be72f9ed749a5c013d3f7f47d810e8caf652cb.tar.bz2
1 files changed, 578 insertions, 0 deletions
diff --git a/libxml2/genChRanges.py b/libxml2/genChRanges.py
new file mode 100755
index 0000000..c138319
--- /dev/null
+++ b/libxml2/genChRanges.py
@@ -0,0 +1,578 @@
+#!/usr/bin/python -u
+#
+# Portions of this script have been (shamelessly) stolen from the
+# prior work of Daniel Veillard (genUnicode.py)
+#
+# I, however, take full credit for any bugs, errors or difficulties :-)
+#
+# William Brack
+# October 2003
+#
+# 18 October 2003
+# Modified to maintain binary compatibility with previous library versions
+# by adding a suffix 'Q' ('quick') to the macro generated for the original,
+# function, and adding generation of a function (with the original name) which
+# instantiates the macro.
+#
+
+import sys
+import string
+import time
+
+#
+# A routine to take a list of yes/no (1, 0) values and turn it
+# into a list of ranges.  This will later be used to determine whether
+# to generate single-byte lookup tables, or inline comparisons
+#
+def makeRange(lst):
+    ret = []
+    pos = 0
+    while pos < len(lst):
+	try:		# index generates exception if not present
+	    s = lst[pos:].index(1)	# look for start of next range
+	except:
+	    break			# if no more, finished
+	pos += s			# pointer to start of possible range
+	try:
+	    e = lst[pos:].index(0)	# look for end of range
+	    e += pos
+	except:				# if no end, set to end of list
+	    e = len(lst)
+	ret.append((pos, e-1))		# append range tuple to list
+	pos = e + 1			# ready to check for next range
+    return ret
+
+sources = "chvalid.def"			# input filename
+
+# minTableSize gives the minimum number of ranges which must be present
+# before a 256-byte lookup table is produced.  If there are less than this
+# number, a macro with inline comparisons is generated
+minTableSize = 6
+
+# dictionary of functions, key=name, element contains char-map and range-list
+Functs = {}
+
+state = 0
+
+try:
+    defines = open("chvalid.def", "r")
+except:
+    print "Missing chvalid.def, aborting ..."
+    sys.exit(1)
+
+#
+# The lines in the .def file have three types:-
+#   name:   Defines a new function block
+#   ur:	    Defines individual or ranges of unicode values
+#   end:    Indicates the end of the function block
+#
+# These lines are processed below.
+#
+for line in defines.readlines():
+    # ignore blank lines, or lines beginning with '#'
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+        continue
+    # split line into space-separated fields, then split on type
+    try:
+        fields = string.split(line, ' ')
+	#
+	# name line:
+	#   validate any previous function block already ended
+	#   validate this function not already defined
+	#   initialize an entry in the function dicitonary
+	#	including a mask table with no values yet defined
+	#
+	if fields[0] == 'name':
+	    name = fields[1]
+	    if state != 0:
+		print "'name' %s found before previous name" \
+		      "completed" % (fields[1])
+		continue
+	    state = 1
+	    if Functs.has_key(name):
+		print "name '%s' already present - may give" \
+		      " wrong results" % (name)
+	    else:
+		# dict entry with two list elements (chdata, rangedata)
+		Functs[name] = [ [], [] ]
+		for v in range(256):
+		    Functs[name][0].append(0)
+	#
+	# end line:
+	#   validate there was a preceding function name line
+	#   set state to show no current function active
+	#
+	elif fields[0] == 'end':
+	    if state == 0:
+		print "'end' found outside of function block"
+		continue
+	    state = 0
+
+	#
+	# ur line:
+	#   validate function has been defined
+	#   process remaining fields on the line, which may be either
+	#	individual unicode values or ranges of values
+	#
+	elif fields[0] == 'ur':
+	    if state != 1:
+		raise ValidationError, "'ur' found outside of 'name' block"
+	    for el in fields[1:]:
+		pos = string.find(el, '..')
+		# pos <=0 means not a range, so must be individual value
+		if pos <= 0:
+		    # cheap handling of hex or decimal values
+		    if el[0:2] == '0x':
+		        value = int(el[2:],16)
+		    elif el[0] == "'":
+			value = ord(el[1])
+		    else:
+			value = int(el)
+		    if ((value < 0) | (value > 0x1fffff)):
+			raise ValidationError, 'Illegal value (%s) in ch for'\
+				' name %s' % (el,name)
+		    # for ur we have only ranges (makes things simpler),
+		    # so convert val to range
+		    currange = (value, value)
+		# pos > 0 means this is a range, so isolate/validate
+		# the interval
+		else:
+		    # split the range into it's first-val, last-val
+		    (first, last) = string.split(el, "..")
+		    # convert values from text into binary
+		    if first[0:2] == '0x':	
+			start = int(first[2:],16)
+		    elif first[0] == "'":
+			start = ord(first[1])
+		    else:
+			start = int(first)
+		    if last[0:2] == '0x':
+			end = int(last[2:],16)
+		    elif last[0] == "'":
+			end = ord(last[1])
+		    else:
+			end = int(last)
+		    if (start < 0) | (end > 0x1fffff) | (start > end):
+			raise ValidationError, "Invalid range '%s'" % el
+		    currange = (start, end)
+		# common path - 'currange' has the range, now take care of it
+		# We split on single-byte values vs. multibyte
+		if currange[1] < 0x100:	# single-byte
+		    for ch in range(currange[0],currange[1]+1):
+			# validate that value not previously defined
+			if Functs[name][0][ch]:
+			    msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
+			    raise ValidationError, msg
+			Functs[name][0][ch] = 1
+		else:			# multi-byte
+		    if currange in Functs[name][1]:
+			raise ValidationError, "range already defined in" \
+				" function"
+		    else:
+			Functs[name][1].append(currange)
+
+    except:
+	print "Failed to process line: %s" % (line)
+	raise
+#
+# At this point, the entire definition file has been processed.  Now we
+# enter the output phase, where we generate the two files chvalid.c and'
+# chvalid.h
+#
+# To do this, we first output the 'static' data (heading, fixed
+# definitions, etc.), then output the 'dynamic' data (the results
+# of the above processing), and finally output closing 'static' data
+# (e.g. the subroutine to process the ranges)
+#
+
+#
+# Generate the headings:
+#
+try:
+    header = open("include/libxml/chvalid.h", "w")
+except:
+    print "Failed to open include/libxml/chvalid.h"
+    sys.exit(1)
+
+try:
+    output = open("chvalid.c", "w")
+except:
+    print "Failed to open chvalid.c"
+    sys.exit(1)
+
+date = time.asctime(time.localtime(time.time()))
+
+header.write(
+"""/*
+ * Summary: Unicode character range checking
+ * Description: this module exports interfaces for the character
+ *               range validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Author: William Brack <wbrack@mmm.com.hk>
+ */
+
+#ifndef __XML_CHVALID_H__
+#define __XML_CHVALID_H__
+
+#include <libxml/xmlversion.h>
+#include <libxml/xmlstring.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define our typedefs and structures
+ *
+ */
+typedef struct _xmlChSRange xmlChSRange;
+typedef xmlChSRange *xmlChSRangePtr;
+struct _xmlChSRange {
+    unsigned short	low;
+    unsigned short	high;
+};
+
+typedef struct _xmlChLRange xmlChLRange;
+typedef xmlChLRange *xmlChLRangePtr;
+struct _xmlChLRange {
+    unsigned int	low;
+    unsigned int	high;
+};
+
+typedef struct _xmlChRangeGroup xmlChRangeGroup;
+typedef xmlChRangeGroup *xmlChRangeGroupPtr;
+struct _xmlChRangeGroup {
+    int			nbShortRange;
+    int			nbLongRange;
+    const xmlChSRange	*shortRange;	/* points to an array of ranges */
+    const xmlChLRange	*longRange;
+};
+
+/**
+ * Range checking routine
+ */
+XMLPUBFUN int XMLCALL
+		xmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
+
+""" % (date, sources));
+output.write(
+"""/*
+ * chvalid.c:	this module implements the character range
+ *		validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: %s
+ * Sources: %s
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+#include <libxml/chvalid.h>
+
+/*
+ * The initial tables ({func_name}_tab) are used to validate whether a
+ * single-byte character is within the specified group.  Each table
+ * contains 256 bytes, with each byte representing one of the 256
+ * possible characters.  If the table byte is set, the character is
+ * allowed.
+ *
+ */
+""" % (date, sources));
+
+#
+# Now output the generated data.
+# We try to produce the best execution times.  Tests have shown that validation
+# with direct table lookup is, when there are a "small" number of valid items,
+# still not as fast as a sequence of inline compares.  So, if the single-byte
+# portion of a range has a "small" number of ranges, we output a macro for inline
+# compares, otherwise we output a 256-byte table and a macro to use it.
+#
+
+fkeys = Functs.keys()	# Dictionary of all defined functions
+fkeys.sort()		# Put some order to our output
+
+for f in fkeys:
+
+# First we convert the specified single-byte values into a group of ranges.
+# If the total number of such ranges is less than minTableSize, we generate
+# an inline macro for direct comparisons; if greater, we generate a lookup
+# table.
+    if max(Functs[f][0]) > 0:	# only check if at least one entry
+        rangeTable = makeRange(Functs[f][0])
+	numRanges = len(rangeTable)
+	if numRanges >= minTableSize:	# table is worthwhile
+	    header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
+	    header.write("""
+/**
+ * %s_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+""" % f)
+	    header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
+
+	    # write the constant data to the code file
+	    output.write("const unsigned char %s_tab[256] = {\n" % f)
+	    pline = "   "
+	    for n in range(255):
+		pline += " 0x%02x," % Functs[f][0][n]
+		if len(pline) > 72:
+		    output.write(pline + "\n")
+		    pline = "   "
+	    output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
+
+	else:		# inline check is used
+	    # first another little optimisation - if space is present,
+	    # put it at the front of the list so it is checked first
+	    try:
+		ix = rangeTable.remove((0x20, 0x20))
+		rangeTable.insert(0, (0x20, 0x20))
+	    except:
+		pass
+	    firstFlag = 1
+	    
+	    header.write("""
+/**
+ * %s_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+""" % f)
+	    # okay, I'm tired of the messy lineup - let's automate it!
+	    pline = "#define %s_ch(c)" % f
+	    # 'ntab' is number of tabs needed to position to col. 33 from name end
+	    ntab = 4 - (len(pline)) / 8
+	    if ntab < 0:
+		ntab = 0
+	    just = ""
+	    for i in range(ntab):
+		just += "\t"
+	    pline = pline + just + "("
+	    for rg in rangeTable:
+		if not firstFlag:
+		    pline += " || \\\n\t\t\t\t "
+		else:
+		    firstFlag = 0
+		if rg[0] == rg[1]:		# single value - check equal
+		    pline += "((c) == 0x%x)" % rg[0]
+		else:				# value range
+		# since we are doing char, also change range ending in 0xff
+		    if rg[1] != 0xff:
+		        pline += "((0x%x <= (c)) &&" % rg[0]
+		        pline += " ((c) <= 0x%x))" % rg[1]
+		    else:
+		        pline += " (0x%x <= (c))" % rg[0]
+	    pline += ")\n"
+	    header.write(pline)
+
+    header.write("""
+/**
+ * %sQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+""" % f)
+    pline = "#define %sQ(c)" % f
+    ntab = 4 - (len(pline)) / 8
+    if ntab < 0:
+	ntab = 0
+    just = ""
+    for i in range(ntab):
+	just += "\t"
+    header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")
+    if max(Functs[f][0]) > 0:
+	header.write("%s_ch((c)) :" % f)
+    else:
+	header.write("0 :")
+
+    # if no ranges defined, value invalid if >= 0x100
+    numRanges = len(Functs[f][1])
+    if numRanges == 0:
+	header.write(" 0)\n\n")
+    else:
+	if numRanges >= minTableSize:
+	    header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n"  % f)
+	else:		# if < minTableSize, generate inline code
+	    firstFlag = 1
+	    for rg in Functs[f][1]:
+		if not firstFlag:
+		    pline += " || \\\n\t\t\t\t "
+		else:
+		    firstFlag = 0
+		    pline = "\\\n\t\t\t\t("
+		if rg[0] == rg[1]:		# single value - check equal
+		    pline += "((c) == 0x%x)" % rg[0]
+		else:				# value range
+		    pline += "((0x%x <= (c)) &&" % rg[0]
+		    pline += " ((c) <= 0x%x))" % rg[1]
+	    pline += "))\n\n"
+	    header.write(pline)
+
+
+    if len(Functs[f][1]) > 0:
+	header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
+
+
+#
+# Next we do the unicode ranges
+#
+
+for f in fkeys:
+    if len(Functs[f][1]) > 0:	# only generate if unicode ranges present
+	rangeTable = Functs[f][1]
+	rangeTable.sort()	# ascending tuple sequence
+	numShort = 0
+	numLong  = 0
+	for rg in rangeTable:
+	    if rg[1] < 0x10000:	# if short value
+		if numShort == 0:	# first occurence
+		    pline = "static const xmlChSRange %s_srng[] = { " % f
+		else:
+		    pline += ", "
+		numShort += 1
+		if len(pline) > 60:
+		    output.write(pline + "\n")
+		    pline = "    "
+		pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
+	    else:		# if long value
+		if numLong == 0:	# first occurence
+		    if numShort > 0:	# if there were shorts, finish them off
+			output.write(pline + "};\n")
+		    pline = "static const xmlChLRange %s_lrng[] = { " % f
+		else:
+		    pline += ", "
+		numLong += 1
+		if len(pline) > 60:
+		    output.write(pline + "\n")
+		    pline = "    "
+		pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
+	output.write(pline + "};\n")	# finish off last group
+
+	pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong)
+	if numShort > 0:
+	    pline += "%s_srng" % f
+	else:
+	    pline += "(xmlChSRangePtr)0"
+	if numLong > 0:
+	    pline += ", %s_lrng" % f
+	else:
+	    pline += ", (xmlChLRangePtr)0"
+	
+	output.write(pline + "};\n\n")
+
+output.write(
+"""
+/**
+ * xmlCharInRange:
+ * @val: character to be validated
+ * @rptr: pointer to range to be used to validate
+ *
+ * Does a binary search of the range table to determine if char
+ * is valid
+ *
+ * Returns: true if character valid, false otherwise
+ */
+int
+xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
+    int low, high, mid;
+    const xmlChSRange *sptr;
+    const xmlChLRange *lptr;
+
+    if (rptr == NULL) return(0);
+    if (val < 0x10000) {	/* is val in 'short' or 'long'  array? */
+	if (rptr->nbShortRange == 0)
+	    return 0;
+	low = 0;
+	high = rptr->nbShortRange - 1;
+	sptr = rptr->shortRange;
+	while (low <= high) {
+	    mid = (low + high) / 2;
+	    if ((unsigned short) val < sptr[mid].low) {
+		high = mid - 1;
+	    } else {
+	        if ((unsigned short) val > sptr[mid].high) {
+		    low = mid + 1;
+		} else {
+		    return 1;
+		}
+	    }
+	}
+    } else {
+	if (rptr->nbLongRange == 0) {
+	    return 0;
+	}
+	low = 0;
+	high = rptr->nbLongRange - 1;
+	lptr = rptr->longRange;
+	while (low <= high) {
+	    mid = (low + high) / 2;
+	    if (val < lptr[mid].low) {
+		high = mid - 1;
+	    } else {
+	        if (val > lptr[mid].high) {
+		    low = mid + 1;
+		} else {
+		    return 1;
+		}
+	    }
+	}
+    }
+    return 0;
+}
+
+""");
+
+#
+# finally, generate the ABI compatibility functions
+#
+for f in fkeys:
+    output.write("""
+/**
+ * %s:
+ * @ch:  character to validate
+ *
+ * This function is DEPRECATED.
+""" % f);
+    if max(Functs[f][0]) > 0:
+        output.write(" * Use %s_ch or %sQ instead" % (f, f))
+    else:
+        output.write(" * Use %sQ instead" % f)
+    output.write("""
+ *
+ * Returns true if argument valid, false otherwise
+ */
+""")
+    output.write("int\n%s(unsigned int ch) {\n    return(%sQ(ch));\n}\n\n" % (f,f))
+    header.write("XMLPUBFUN int XMLCALL\n\t\t%s(unsigned int ch);\n" % f);
+#
+# Run complete - write trailers and close the output files
+#
+
+header.write("""
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_CHVALID_H__ */
+""")
+
+header.close()
+
+output.write("""#define bottom_chvalid
+#include "elfgcchack.h"
+""")
+output.close()
+
author	William Joye <wjoye@cfa.harvard.edu>	2016-10-19 18:22:05 (GMT)
committer	William Joye <wjoye@cfa.harvard.edu>	2016-10-19 18:22:05 (GMT)
commit	34be72f9ed749a5c013d3f7f47d810e8caf652cb (patch)
tree	2f75b6050c088da53d774b3da3fb71e7fa49ec78 /libxml2/genChRanges.py
parent	604eac5688c5d437ed6d585ee63e53fba6f558c0 (diff)
parent	0f851b6f17365c29b56d6a17564d58a33120508a (diff)
download	blt-34be72f9ed749a5c013d3f7f47d810e8caf652cb.zip blt-34be72f9ed749a5c013d3f7f47d810e8caf652cb.tar.gz blt-34be72f9ed749a5c013d3f7f47d810e8caf652cb.tar.bz2