diff options
Diffstat (limited to 'libxml2/genChRanges.py')
-rwxr-xr-x | libxml2/genChRanges.py | 578 |
1 files changed, 0 insertions, 578 deletions
diff --git a/libxml2/genChRanges.py b/libxml2/genChRanges.py deleted file mode 100755 index c138319..0000000 --- a/libxml2/genChRanges.py +++ /dev/null @@ -1,578 +0,0 @@ -#!/usr/bin/python -u -# -# Portions of this script have been (shamelessly) stolen from the -# prior work of Daniel Veillard (genUnicode.py) -# -# I, however, take full credit for any bugs, errors or difficulties :-) -# -# William Brack -# October 2003 -# -# 18 October 2003 -# Modified to maintain binary compatibility with previous library versions -# by adding a suffix 'Q' ('quick') to the macro generated for the original, -# function, and adding generation of a function (with the original name) which -# instantiates the macro. -# - -import sys -import string -import time - -# -# A routine to take a list of yes/no (1, 0) values and turn it -# into a list of ranges. This will later be used to determine whether -# to generate single-byte lookup tables, or inline comparisons -# -def makeRange(lst): - ret = [] - pos = 0 - while pos < len(lst): - try: # index generates exception if not present - s = lst[pos:].index(1) # look for start of next range - except: - break # if no more, finished - pos += s # pointer to start of possible range - try: - e = lst[pos:].index(0) # look for end of range - e += pos - except: # if no end, set to end of list - e = len(lst) - ret.append((pos, e-1)) # append range tuple to list - pos = e + 1 # ready to check for next range - return ret - -sources = "chvalid.def" # input filename - -# minTableSize gives the minimum number of ranges which must be present -# before a 256-byte lookup table is produced. If there are less than this -# number, a macro with inline comparisons is generated -minTableSize = 6 - -# dictionary of functions, key=name, element contains char-map and range-list -Functs = {} - -state = 0 - -try: - defines = open("chvalid.def", "r") -except: - print "Missing chvalid.def, aborting ..." - sys.exit(1) - -# -# The lines in the .def file have three types:- -# name: Defines a new function block -# ur: Defines individual or ranges of unicode values -# end: Indicates the end of the function block -# -# These lines are processed below. -# -for line in defines.readlines(): - # ignore blank lines, or lines beginning with '#' - if line[0] == '#': - continue - line = string.strip(line) - if line == '': - continue - # split line into space-separated fields, then split on type - try: - fields = string.split(line, ' ') - # - # name line: - # validate any previous function block already ended - # validate this function not already defined - # initialize an entry in the function dicitonary - # including a mask table with no values yet defined - # - if fields[0] == 'name': - name = fields[1] - if state != 0: - print "'name' %s found before previous name" \ - "completed" % (fields[1]) - continue - state = 1 - if Functs.has_key(name): - print "name '%s' already present - may give" \ - " wrong results" % (name) - else: - # dict entry with two list elements (chdata, rangedata) - Functs[name] = [ [], [] ] - for v in range(256): - Functs[name][0].append(0) - # - # end line: - # validate there was a preceding function name line - # set state to show no current function active - # - elif fields[0] == 'end': - if state == 0: - print "'end' found outside of function block" - continue - state = 0 - - # - # ur line: - # validate function has been defined - # process remaining fields on the line, which may be either - # individual unicode values or ranges of values - # - elif fields[0] == 'ur': - if state != 1: - raise ValidationError, "'ur' found outside of 'name' block" - for el in fields[1:]: - pos = string.find(el, '..') - # pos <=0 means not a range, so must be individual value - if pos <= 0: - # cheap handling of hex or decimal values - if el[0:2] == '0x': - value = int(el[2:],16) - elif el[0] == "'": - value = ord(el[1]) - else: - value = int(el) - if ((value < 0) | (value > 0x1fffff)): - raise ValidationError, 'Illegal value (%s) in ch for'\ - ' name %s' % (el,name) - # for ur we have only ranges (makes things simpler), - # so convert val to range - currange = (value, value) - # pos > 0 means this is a range, so isolate/validate - # the interval - else: - # split the range into it's first-val, last-val - (first, last) = string.split(el, "..") - # convert values from text into binary - if first[0:2] == '0x': - start = int(first[2:],16) - elif first[0] == "'": - start = ord(first[1]) - else: - start = int(first) - if last[0:2] == '0x': - end = int(last[2:],16) - elif last[0] == "'": - end = ord(last[1]) - else: - end = int(last) - if (start < 0) | (end > 0x1fffff) | (start > end): - raise ValidationError, "Invalid range '%s'" % el - currange = (start, end) - # common path - 'currange' has the range, now take care of it - # We split on single-byte values vs. multibyte - if currange[1] < 0x100: # single-byte - for ch in range(currange[0],currange[1]+1): - # validate that value not previously defined - if Functs[name][0][ch]: - msg = "Duplicate ch value '%s' for name '%s'" % (el, name) - raise ValidationError, msg - Functs[name][0][ch] = 1 - else: # multi-byte - if currange in Functs[name][1]: - raise ValidationError, "range already defined in" \ - " function" - else: - Functs[name][1].append(currange) - - except: - print "Failed to process line: %s" % (line) - raise -# -# At this point, the entire definition file has been processed. Now we -# enter the output phase, where we generate the two files chvalid.c and' -# chvalid.h -# -# To do this, we first output the 'static' data (heading, fixed -# definitions, etc.), then output the 'dynamic' data (the results -# of the above processing), and finally output closing 'static' data -# (e.g. the subroutine to process the ranges) -# - -# -# Generate the headings: -# -try: - header = open("include/libxml/chvalid.h", "w") -except: - print "Failed to open include/libxml/chvalid.h" - sys.exit(1) - -try: - output = open("chvalid.c", "w") -except: - print "Failed to open chvalid.c" - sys.exit(1) - -date = time.asctime(time.localtime(time.time())) - -header.write( -"""/* - * Summary: Unicode character range checking - * Description: this module exports interfaces for the character - * range validation APIs - * - * This file is automatically generated from the cvs source - * definition files using the genChRanges.py Python script - * - * Generation date: %s - * Sources: %s - * Author: William Brack <wbrack@mmm.com.hk> - */ - -#ifndef __XML_CHVALID_H__ -#define __XML_CHVALID_H__ - -#include <libxml/xmlversion.h> -#include <libxml/xmlstring.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Define our typedefs and structures - * - */ -typedef struct _xmlChSRange xmlChSRange; -typedef xmlChSRange *xmlChSRangePtr; -struct _xmlChSRange { - unsigned short low; - unsigned short high; -}; - -typedef struct _xmlChLRange xmlChLRange; -typedef xmlChLRange *xmlChLRangePtr; -struct _xmlChLRange { - unsigned int low; - unsigned int high; -}; - -typedef struct _xmlChRangeGroup xmlChRangeGroup; -typedef xmlChRangeGroup *xmlChRangeGroupPtr; -struct _xmlChRangeGroup { - int nbShortRange; - int nbLongRange; - const xmlChSRange *shortRange; /* points to an array of ranges */ - const xmlChLRange *longRange; -}; - -/** - * Range checking routine - */ -XMLPUBFUN int XMLCALL - xmlCharInRange(unsigned int val, const xmlChRangeGroup *group); - -""" % (date, sources)); -output.write( -"""/* - * chvalid.c: this module implements the character range - * validation APIs - * - * This file is automatically generated from the cvs source - * definition files using the genChRanges.py Python script - * - * Generation date: %s - * Sources: %s - * William Brack <wbrack@mmm.com.hk> - */ - -#define IN_LIBXML -#include "libxml.h" -#include <libxml/chvalid.h> - -/* - * The initial tables ({func_name}_tab) are used to validate whether a - * single-byte character is within the specified group. Each table - * contains 256 bytes, with each byte representing one of the 256 - * possible characters. If the table byte is set, the character is - * allowed. - * - */ -""" % (date, sources)); - -# -# Now output the generated data. -# We try to produce the best execution times. Tests have shown that validation -# with direct table lookup is, when there are a "small" number of valid items, -# still not as fast as a sequence of inline compares. So, if the single-byte -# portion of a range has a "small" number of ranges, we output a macro for inline -# compares, otherwise we output a 256-byte table and a macro to use it. -# - -fkeys = Functs.keys() # Dictionary of all defined functions -fkeys.sort() # Put some order to our output - -for f in fkeys: - -# First we convert the specified single-byte values into a group of ranges. -# If the total number of such ranges is less than minTableSize, we generate -# an inline macro for direct comparisons; if greater, we generate a lookup -# table. - if max(Functs[f][0]) > 0: # only check if at least one entry - rangeTable = makeRange(Functs[f][0]) - numRanges = len(rangeTable) - if numRanges >= minTableSize: # table is worthwhile - header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f) - header.write(""" -/** - * %s_ch: - * @c: char to validate - * - * Automatically generated by genChRanges.py - */ -""" % f) - header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f)) - - # write the constant data to the code file - output.write("const unsigned char %s_tab[256] = {\n" % f) - pline = " " - for n in range(255): - pline += " 0x%02x," % Functs[f][0][n] - if len(pline) > 72: - output.write(pline + "\n") - pline = " " - output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255]) - - else: # inline check is used - # first another little optimisation - if space is present, - # put it at the front of the list so it is checked first - try: - ix = rangeTable.remove((0x20, 0x20)) - rangeTable.insert(0, (0x20, 0x20)) - except: - pass - firstFlag = 1 - - header.write(""" -/** - * %s_ch: - * @c: char to validate - * - * Automatically generated by genChRanges.py - */ -""" % f) - # okay, I'm tired of the messy lineup - let's automate it! - pline = "#define %s_ch(c)" % f - # 'ntab' is number of tabs needed to position to col. 33 from name end - ntab = 4 - (len(pline)) / 8 - if ntab < 0: - ntab = 0 - just = "" - for i in range(ntab): - just += "\t" - pline = pline + just + "(" - for rg in rangeTable: - if not firstFlag: - pline += " || \\\n\t\t\t\t " - else: - firstFlag = 0 - if rg[0] == rg[1]: # single value - check equal - pline += "((c) == 0x%x)" % rg[0] - else: # value range - # since we are doing char, also change range ending in 0xff - if rg[1] != 0xff: - pline += "((0x%x <= (c)) &&" % rg[0] - pline += " ((c) <= 0x%x))" % rg[1] - else: - pline += " (0x%x <= (c))" % rg[0] - pline += ")\n" - header.write(pline) - - header.write(""" -/** - * %sQ: - * @c: char to validate - * - * Automatically generated by genChRanges.py - */ -""" % f) - pline = "#define %sQ(c)" % f - ntab = 4 - (len(pline)) / 8 - if ntab < 0: - ntab = 0 - just = "" - for i in range(ntab): - just += "\t" - header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ") - if max(Functs[f][0]) > 0: - header.write("%s_ch((c)) :" % f) - else: - header.write("0 :") - - # if no ranges defined, value invalid if >= 0x100 - numRanges = len(Functs[f][1]) - if numRanges == 0: - header.write(" 0)\n\n") - else: - if numRanges >= minTableSize: - header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f) - else: # if < minTableSize, generate inline code - firstFlag = 1 - for rg in Functs[f][1]: - if not firstFlag: - pline += " || \\\n\t\t\t\t " - else: - firstFlag = 0 - pline = "\\\n\t\t\t\t(" - if rg[0] == rg[1]: # single value - check equal - pline += "((c) == 0x%x)" % rg[0] - else: # value range - pline += "((0x%x <= (c)) &&" % rg[0] - pline += " ((c) <= 0x%x))" % rg[1] - pline += "))\n\n" - header.write(pline) - - - if len(Functs[f][1]) > 0: - header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f) - - -# -# Next we do the unicode ranges -# - -for f in fkeys: - if len(Functs[f][1]) > 0: # only generate if unicode ranges present - rangeTable = Functs[f][1] - rangeTable.sort() # ascending tuple sequence - numShort = 0 - numLong = 0 - for rg in rangeTable: - if rg[1] < 0x10000: # if short value - if numShort == 0: # first occurence - pline = "static const xmlChSRange %s_srng[] = { " % f - else: - pline += ", " - numShort += 1 - if len(pline) > 60: - output.write(pline + "\n") - pline = " " - pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) - else: # if long value - if numLong == 0: # first occurence - if numShort > 0: # if there were shorts, finish them off - output.write(pline + "};\n") - pline = "static const xmlChLRange %s_lrng[] = { " % f - else: - pline += ", " - numLong += 1 - if len(pline) > 60: - output.write(pline + "\n") - pline = " " - pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) - output.write(pline + "};\n") # finish off last group - - pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong) - if numShort > 0: - pline += "%s_srng" % f - else: - pline += "(xmlChSRangePtr)0" - if numLong > 0: - pline += ", %s_lrng" % f - else: - pline += ", (xmlChLRangePtr)0" - - output.write(pline + "};\n\n") - -output.write( -""" -/** - * xmlCharInRange: - * @val: character to be validated - * @rptr: pointer to range to be used to validate - * - * Does a binary search of the range table to determine if char - * is valid - * - * Returns: true if character valid, false otherwise - */ -int -xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) { - int low, high, mid; - const xmlChSRange *sptr; - const xmlChLRange *lptr; - - if (rptr == NULL) return(0); - if (val < 0x10000) { /* is val in 'short' or 'long' array? */ - if (rptr->nbShortRange == 0) - return 0; - low = 0; - high = rptr->nbShortRange - 1; - sptr = rptr->shortRange; - while (low <= high) { - mid = (low + high) / 2; - if ((unsigned short) val < sptr[mid].low) { - high = mid - 1; - } else { - if ((unsigned short) val > sptr[mid].high) { - low = mid + 1; - } else { - return 1; - } - } - } - } else { - if (rptr->nbLongRange == 0) { - return 0; - } - low = 0; - high = rptr->nbLongRange - 1; - lptr = rptr->longRange; - while (low <= high) { - mid = (low + high) / 2; - if (val < lptr[mid].low) { - high = mid - 1; - } else { - if (val > lptr[mid].high) { - low = mid + 1; - } else { - return 1; - } - } - } - } - return 0; -} - -"""); - -# -# finally, generate the ABI compatibility functions -# -for f in fkeys: - output.write(""" -/** - * %s: - * @ch: character to validate - * - * This function is DEPRECATED. -""" % f); - if max(Functs[f][0]) > 0: - output.write(" * Use %s_ch or %sQ instead" % (f, f)) - else: - output.write(" * Use %sQ instead" % f) - output.write(""" - * - * Returns true if argument valid, false otherwise - */ -""") - output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f)) - header.write("XMLPUBFUN int XMLCALL\n\t\t%s(unsigned int ch);\n" % f); -# -# Run complete - write trailers and close the output files -# - -header.write(""" -#ifdef __cplusplus -} -#endif -#endif /* __XML_CHVALID_H__ */ -""") - -header.close() - -output.write("""#define bottom_chvalid -#include "elfgcchack.h" -""") -output.close() - |