diff options
-rw-r--r-- | Modules/expat/ascii.h | 7 | ||||
-rw-r--r-- | Modules/expat/expat.h | 867 | ||||
-rw-r--r-- | Modules/expat/internal.h | 73 | ||||
-rw-r--r-- | Modules/expat/xmlparse.c | 5374 | ||||
-rw-r--r-- | Modules/expat/xmlrole.c | 999 | ||||
-rw-r--r-- | Modules/expat/xmlrole.h | 30 | ||||
-rw-r--r-- | Modules/expat/xmltok.c | 740 | ||||
-rw-r--r-- | Modules/expat/xmltok.h | 260 | ||||
-rw-r--r-- | Modules/expat/xmltok_impl.c | 951 | ||||
-rw-r--r-- | Modules/expat/xmltok_ns.c | 84 | ||||
-rw-r--r-- | setup.py | 7 |
11 files changed, 5447 insertions, 3945 deletions
diff --git a/Modules/expat/ascii.h b/Modules/expat/ascii.h index 6376b1f..337e5bb 100644 --- a/Modules/expat/ascii.h +++ b/Modules/expat/ascii.h @@ -1,6 +1,5 @@ -/* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #define ASCII_A 0x41 @@ -69,7 +68,7 @@ See the file COPYING for copying permission. #define ASCII_9 0x39 #define ASCII_TAB 0x09 -#define ASCII_SPACE 0x20 +#define ASCII_SPACE 0x20 #define ASCII_EXCL 0x21 #define ASCII_QUOT 0x22 #define ASCII_AMP 0x26 diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 123b6e5..0b70302 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -1,30 +1,91 @@ -/* -Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifndef XmlParse_INCLUDED #define XmlParse_INCLUDED 1 +#ifdef __VMS +/* 0 1 2 3 0 1 2 3 + 1234567890123456789012345678901 1234567890123456789012345678901 */ +#define XML_SetProcessingInstructionHandler XML_SetProcessingInstrHandler +#define XML_SetUnparsedEntityDeclHandler XML_SetUnparsedEntDeclHandler +#define XML_SetStartNamespaceDeclHandler XML_SetStartNamespcDeclHandler +#define XML_SetExternalEntityRefHandlerArg XML_SetExternalEntRefHandlerArg +#endif + #include <stdlib.h> #ifndef XMLPARSEAPI -# if defined(__declspec) && !defined(__BEOS__) && !defined(__CYGWIN__) -# define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl -# else -# define XMLPARSEAPI(type) type -# endif +#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__) +#ifdef XML_STATIC +#define XMLPARSEAPI(type) type __cdecl +#else +#define XMLPARSEAPI(type) __declspec(dllimport) type __cdecl +#endif +#else +#define XMLPARSEAPI(type) type +#endif #endif /* not defined XMLPARSEAPI */ #ifdef __cplusplus extern "C" { #endif -typedef void *XML_Parser; +#ifdef XML_UNICODE_WCHAR_T +#define XML_UNICODE +#endif + +struct XML_ParserStruct; +typedef struct XML_ParserStruct *XML_Parser; -/* Information is UTF-8 encoded. */ +#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ +#ifdef XML_UNICODE_WCHAR_T +typedef wchar_t XML_Char; +typedef wchar_t XML_LChar; +#else +typedef unsigned short XML_Char; +typedef char XML_LChar; +#endif /* XML_UNICODE_WCHAR_T */ +#else /* Information is UTF-8 encoded. */ typedef char XML_Char; typedef char XML_LChar; +#endif /* XML_UNICODE */ + +/* Should this be defined using stdbool.h when C99 is available? */ +typedef unsigned char XML_Bool; +#define XML_TRUE ((XML_Bool) 1) +#define XML_FALSE ((XML_Bool) 0) + +enum XML_Error { + XML_ERROR_NONE, + XML_ERROR_NO_MEMORY, + XML_ERROR_SYNTAX, + XML_ERROR_NO_ELEMENTS, + XML_ERROR_INVALID_TOKEN, + XML_ERROR_UNCLOSED_TOKEN, + XML_ERROR_PARTIAL_CHAR, + XML_ERROR_TAG_MISMATCH, + XML_ERROR_DUPLICATE_ATTRIBUTE, + XML_ERROR_JUNK_AFTER_DOC_ELEMENT, + XML_ERROR_PARAM_ENTITY_REF, + XML_ERROR_UNDEFINED_ENTITY, + XML_ERROR_RECURSIVE_ENTITY_REF, + XML_ERROR_ASYNC_ENTITY, + XML_ERROR_BAD_CHAR_REF, + XML_ERROR_BINARY_ENTITY_REF, + XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, + XML_ERROR_MISPLACED_XML_PI, + XML_ERROR_UNKNOWN_ENCODING, + XML_ERROR_INCORRECT_ENCODING, + XML_ERROR_UNCLOSED_CDATA_SECTION, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + XML_ERROR_NOT_STANDALONE, + XML_ERROR_UNEXPECTED_STATE, + XML_ERROR_ENTITY_DECLARED_IN_PE, + XML_ERROR_FEATURE_REQUIRES_XML_DTD, + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING +}; enum XML_Content_Type { XML_CTYPE_EMPTY = 1, @@ -63,11 +124,11 @@ enum XML_Content_Quant { typedef struct XML_cp XML_Content; struct XML_cp { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - XML_Char * name; - unsigned int numchildren; - XML_Content * children; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + XML_Char * name; + unsigned int numchildren; + XML_Content * children; }; @@ -75,53 +136,49 @@ struct XML_cp { description of the model argument. It's the caller's responsibility to free model when finished with it. */ - typedef void (*XML_ElementDeclHandler) (void *userData, const XML_Char *name, XML_Content *model); XMLPARSEAPI(void) XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl); - -/* - The Attlist declaration handler is called for *each* attribute. So - a single Attlist declaration with multiple attributes declared will - generate multiple calls to this handler. The "default" parameter - may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword. - The "isrequired" parameter will be true and the default value will - be NULL in the case of "#REQUIRED". If "isrequired" is true and - default is non-NULL, then this is a "#FIXED" default. - */ - -typedef void (*XML_AttlistDeclHandler) (void *userData, + XML_ElementDeclHandler eldecl); + +/* The Attlist declaration handler is called for *each* attribute. So + a single Attlist declaration with multiple attributes declared will + generate multiple calls to this handler. The "default" parameter + may be NULL in the case of the "#IMPLIED" or "#REQUIRED" + keyword. The "isrequired" parameter will be true and the default + value will be NULL in the case of "#REQUIRED". If "isrequired" is + true and default is non-NULL, then this is a "#FIXED" default. +*/ +typedef void (*XML_AttlistDeclHandler) (void *userData, const XML_Char *elname, const XML_Char *attname, const XML_Char *att_type, const XML_Char *dflt, - int isrequired); + int isrequired); XMLPARSEAPI(void) XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl); - - - /* The XML declaration handler is called for *both* XML declarations and - text declarations. The way to distinguish is that the version parameter - will be null for text declarations. The encoding parameter may be null - for XML declarations. The standalone parameter will be -1, 0, or 1 - indicating respectively that there was no standalone parameter in - the declaration, that it was given as no, or that it was given as yes. - */ - -typedef void (*XML_XmlDeclHandler) (void *userData, - const XML_Char *version, - const XML_Char *encoding, - int standalone); + XML_AttlistDeclHandler attdecl); + +/* The XML declaration handler is called for *both* XML declarations + and text declarations. The way to distinguish is that the version + parameter will be NULL for text declarations. The encoding + parameter may be NULL for XML declarations. The standalone + parameter will be -1, 0, or 1 indicating respectively that there + was no standalone parameter in the declaration, that it was given + as no, or that it was given as yes. +*/ +typedef void (*XML_XmlDeclHandler) (void *userData, + const XML_Char *version, + const XML_Char *encoding, + int standalone); XMLPARSEAPI(void) XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler xmldecl); + XML_XmlDeclHandler xmldecl); typedef struct { @@ -131,26 +188,27 @@ typedef struct { } XML_Memory_Handling_Suite; /* Constructs a new parser; encoding is the encoding specified by the -external protocol or null if there is none specified. */ - + external protocol or NULL if there is none specified. +*/ XMLPARSEAPI(XML_Parser) XML_ParserCreate(const XML_Char *encoding); /* Constructs a new parser and namespace processor. Element type -names and attribute names that belong to a namespace will be expanded; -unprefixed attribute names are never expanded; unprefixed element type -names are expanded only if there is a default namespace. The expanded -name is the concatenation of the namespace URI, the namespace -separator character, and the local part of the name. If the namespace -separator is '\0' then the namespace URI and the local part will be -concatenated without any separator. When a namespace is not declared, -the name and prefix will be passed through without expansion. */ - + names and attribute names that belong to a namespace will be + expanded; unprefixed attribute names are never expanded; unprefixed + element type names are expanded only if there is a default + namespace. The expanded name is the concatenation of the namespace + URI, the namespace separator character, and the local part of the + name. If the namespace separator is '\0' then the namespace URI + and the local part will be concatenated without any separator. + When a namespace is not declared, the name and prefix will be + passed through without expansion. +*/ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); -/* Constructs a new parser using the memory management suit referred to +/* Constructs a new parser using the memory management suite referred to by memsuite. If memsuite is NULL, then use the standard library memory suite. If namespaceSeparator is non-NULL it creates a parser with namespace processing as described above. The character pointed at @@ -159,15 +217,26 @@ XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); All further memory operations used for the created parser will come from the given suite. */ - XMLPARSEAPI(XML_Parser) XML_ParserCreate_MM(const XML_Char *encoding, - const XML_Memory_Handling_Suite *memsuite, - const XML_Char *namespaceSeparator); + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *namespaceSeparator); -/* atts is array of name/value pairs, terminated by 0; - names and values are 0 terminated. */ +/* Prepare a parser object to be re-used. This is particularly + valuable when memory allocation overhead is disproportionatly high, + such as when a large number of small documnents need to be parsed. + All handlers are cleared from the parser, except for the + unknownEncodingHandler. The parser's external state is re-initialized + except for the values of ns and ns_triplets. + + Added in Expat 1.95.3. +*/ +XMLPARSEAPI(XML_Bool) +XML_ParserReset(XML_Parser parser, const XML_Char *encoding); +/* atts is array of name/value pairs, terminated by 0; + names and values are 0 terminated. +*/ typedef void (*XML_StartElementHandler)(void *userData, const XML_Char *name, const XML_Char **atts); @@ -192,26 +261,26 @@ typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data); typedef void (*XML_StartCdataSectionHandler)(void *userData); typedef void (*XML_EndCdataSectionHandler)(void *userData); -/* This is called for any characters in the XML document for -which there is no applicable handler. This includes both -characters that are part of markup which is of a kind that is -not reported (comments, markup declarations), or characters -that are part of a construct which could be reported but -for which no handler has been supplied. The characters are passed -exactly as they were in the XML document except that -they will be encoded in UTF-8. Line boundaries are not normalized. -Note that a byte order mark character is not passed to the default handler. -There are no guarantees about how characters are divided between calls -to the default handler: for example, a comment might be split between -multiple calls. */ - +/* This is called for any characters in the XML document for which + there is no applicable handler. This includes both characters that + are part of markup which is of a kind that is not reported + (comments, markup declarations), or characters that are part of a + construct which could be reported but for which no handler has been + supplied. The characters are passed exactly as they were in the XML + document except that they will be encoded in UTF-8 or UTF-16. + Line boundaries are not normalized. Note that a byte order mark + character is not passed to the default handler. There are no + guarantees about how characters are divided between calls to the + default handler: for example, a comment might be split between + multiple calls. +*/ typedef void (*XML_DefaultHandler)(void *userData, const XML_Char *s, int len); /* This is called for the start of the DOCTYPE declaration, before - any DTD or internal subset is parsed. */ - + any DTD or internal subset is parsed. +*/ typedef void (*XML_StartDoctypeDeclHandler)(void *userData, const XML_Char *doctypeName, const XML_Char *sysid, @@ -219,7 +288,9 @@ typedef void (*XML_StartDoctypeDeclHandler)(void *userData, int has_internal_subset); /* This is called for the start of the DOCTYPE declaration when the -closing > is encountered, but after processing any external subset. */ + closing > is encountered, but after processing any external + subset. +*/ typedef void (*XML_EndDoctypeDeclHandler)(void *userData); /* This is called for entity declarations. The is_parameter_entity @@ -227,17 +298,19 @@ typedef void (*XML_EndDoctypeDeclHandler)(void *userData); otherwise. For internal entities (<!ENTITY foo "bar">), value will - be non-null and systemId, publicID, and notationName will be null. - The value string is NOT null terminated; the length is provided in + be non-NULL and systemId, publicID, and notationName will be NULL. + The value string is NOT nul-terminated; the length is provided in the value_length argument. Since it is legal to have zero-length values, do not use this argument to test for internal entities. - For external entities, value will be null and systemId will be non-null. - The publicId argument will be null unless a public identifier was - provided. The notationName argument will have a non-null value only - for unparsed entity declarations. -*/ + For external entities, value will be NULL and systemId will be + non-NULL. The publicId argument will be NULL unless a public + identifier was provided. The notationName argument will have a + non-NULL value only for unparsed entity declarations. + Note that is_parameter_entity can't be changed to XML_Bool, since + that would break binary compatibility. +*/ typedef void (*XML_EntityDeclHandler) (void *userData, const XML_Char *entityName, int is_parameter_entity, @@ -247,19 +320,20 @@ typedef void (*XML_EntityDeclHandler) (void *userData, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); - + XMLPARSEAPI(void) XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler); + XML_EntityDeclHandler handler); /* OBSOLETE -- OBSOLETE -- OBSOLETE This handler has been superceded by the EntityDeclHandler above. It is provided here for backward compatibility. -This is called for a declaration of an unparsed (NDATA) -entity. The base argument is whatever was set by XML_SetBase. -The entityName, systemId and notationName arguments will never be null. -The other arguments may be. */ + This is called for a declaration of an unparsed (NDATA) entity. + The base argument is whatever was set by XML_SetBase. The + entityName, systemId and notationName arguments will never be + NULL. The other arguments may be. +*/ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, const XML_Char *entityName, const XML_Char *base, @@ -267,10 +341,10 @@ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, const XML_Char *publicId, const XML_Char *notationName); -/* This is called for a declaration of notation. -The base argument is whatever was set by XML_SetBase. -The notationName will never be null. The other arguments can be. */ - +/* This is called for a declaration of notation. The base argument is + whatever was set by XML_SetBase. The notationName will never be + NULL. The other arguments can be. +*/ typedef void (*XML_NotationDeclHandler)(void *userData, const XML_Char *notationName, const XML_Char *base, @@ -278,11 +352,11 @@ typedef void (*XML_NotationDeclHandler)(void *userData, const XML_Char *publicId); /* When namespace processing is enabled, these are called once for -each namespace declaration. The call to the start and end element -handlers occur between the calls to the start and end namespace -declaration handlers. For an xmlns attribute, prefix will be null. -For an xmlns="" attribute, uri will be null. */ - + each namespace declaration. The call to the start and end element + handlers occur between the calls to the start and end namespace + declaration handlers. For an xmlns attribute, prefix will be + NULL. For an xmlns="" attribute, uri will be NULL. +*/ typedef void (*XML_StartNamespaceDeclHandler)(void *userData, const XML_Char *prefix, const XML_Char *uri); @@ -290,87 +364,123 @@ typedef void (*XML_StartNamespaceDeclHandler)(void *userData, typedef void (*XML_EndNamespaceDeclHandler)(void *userData, const XML_Char *prefix); -/* This is called if the document is not standalone (it has an -external subset or a reference to a parameter entity, but does not -have standalone="yes"). If this handler returns 0, then processing -will not continue, and the parser will return a -XML_ERROR_NOT_STANDALONE error. */ - +/* This is called if the document is not standalone, that is, it has an + external subset or a reference to a parameter entity, but does not + have standalone="yes". If this handler returns XML_STATUS_ERROR, + then processing will not continue, and the parser will return a + XML_ERROR_NOT_STANDALONE error. + If parameter entity parsing is enabled, then in addition to the + conditions above this handler will only be called if the referenced + entity was actually read. +*/ typedef int (*XML_NotStandaloneHandler)(void *userData); -/* This is called for a reference to an external parsed general entity. -The referenced entity is not automatically parsed. -The application can parse it immediately or later using -XML_ExternalEntityParserCreate. -The parser argument is the parser parsing the entity containing the reference; -it can be passed as the parser argument to XML_ExternalEntityParserCreate. -The systemId argument is the system identifier as specified in the entity -declaration; it will not be null. -The base argument is the system identifier that should be used as the base for -resolving systemId if systemId was relative; this is set by XML_SetBase; -it may be null. -The publicId argument is the public identifier as specified in the entity -declaration, or null if none was specified; the whitespace in the public -identifier will have been normalized as required by the XML spec. -The context argument specifies the parsing context in the format -expected by the context argument to -XML_ExternalEntityParserCreate; context is valid only until the handler -returns, so if the referenced entity is to be parsed later, it must be copied. -The handler should return 0 if processing should not continue because of -a fatal error in the handling of the external entity. -In this case the calling parser will return an -XML_ERROR_EXTERNAL_ENTITY_HANDLING error. -Note that unlike other handlers the first argument is the parser, not -userData. */ - +/* This is called for a reference to an external parsed general + entity. The referenced entity is not automatically parsed. The + application can parse it immediately or later using + XML_ExternalEntityParserCreate. + + The parser argument is the parser parsing the entity containing the + reference; it can be passed as the parser argument to + XML_ExternalEntityParserCreate. The systemId argument is the + system identifier as specified in the entity declaration; it will + not be NULL. + + The base argument is the system identifier that should be used as + the base for resolving systemId if systemId was relative; this is + set by XML_SetBase; it may be NULL. + + The publicId argument is the public identifier as specified in the + entity declaration, or NULL if none was specified; the whitespace + in the public identifier will have been normalized as required by + the XML spec. + + The context argument specifies the parsing context in the format + expected by the context argument to XML_ExternalEntityParserCreate; + context is valid only until the handler returns, so if the + referenced entity is to be parsed later, it must be copied. + context is NULL only when the entity is a parameter entity. + + The handler should return XML_STATUS_ERROR if processing should not + continue because of a fatal error in the handling of the external + entity. In this case the calling parser will return an + XML_ERROR_EXTERNAL_ENTITY_HANDLING error. + + Note that unlike other handlers the first argument is the parser, + not userData. +*/ typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); -/* This structure is filled in by the XML_UnknownEncodingHandler -to provide information to the parser about encodings that are unknown -to the parser. -The map[b] member gives information about byte sequences -whose first byte is b. -If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar -value c. -If map[b] is -1, then the byte sequence is malformed. -If map[b] is -n, where n >= 2, then b is the first byte of an n-byte -sequence that encodes a single Unicode scalar value. -The data member will be passed as the first argument to the convert function. -The convert function is used to convert multibyte sequences; -s will point to a n-byte sequence where map[(unsigned char)*s] == -n. -The convert function must return the Unicode scalar value -represented by this byte sequence or -1 if the byte sequence is malformed. -The convert function may be null if the encoding is a single-byte encoding, -that is if map[b] >= -1 for all bytes b. -When the parser is finished with the encoding, then if release is not null, -it will call release passing it the data member; -once release has been called, the convert function will not be called again. - -Expat places certain restrictions on the encodings that are supported -using this mechanism. - -1. Every ASCII character that can appear in a well-formed XML document, -other than the characters - - $@\^`{}~ - -must be represented by a single byte, and that byte must be the -same byte that represents that character in ASCII. - -2. No character may require more than 4 bytes to encode. - -3. All characters encoded must have Unicode scalar values <= 0xFFFF, (i.e., -characters that would be encoded by surrogates in UTF-16 are not -allowed). Note that this restriction doesn't apply to the built-in -support for UTF-8 and UTF-16. - -4. No Unicode character may be encoded by more than one distinct sequence -of bytes. */ +/* This is called in two situations: + 1) An entity reference is encountered for which no declaration + has been read *and* this is not an error. + 2) An internal entity reference is read, but not expanded, because + XML_SetDefaultHandler has been called. + Note: skipped parameter entities in declarations and skipped general + entities in attribute values cannot be reported, because + the event would be out of sync with the reporting of the + declarations or attribute values +*/ +typedef void (*XML_SkippedEntityHandler)(void *userData, + const XML_Char *entityName, + int is_parameter_entity); + +/* This structure is filled in by the XML_UnknownEncodingHandler to + provide information to the parser about encodings that are unknown + to the parser. + + The map[b] member gives information about byte sequences whose + first byte is b. + + If map[b] is c where c is >= 0, then b by itself encodes the + Unicode scalar value c. + + If map[b] is -1, then the byte sequence is malformed. + + If map[b] is -n, where n >= 2, then b is the first byte of an + n-byte sequence that encodes a single Unicode scalar value. + + The data member will be passed as the first argument to the convert + function. + + The convert function is used to convert multibyte sequences; s will + point to a n-byte sequence where map[(unsigned char)*s] == -n. The + convert function must return the Unicode scalar value represented + by this byte sequence or -1 if the byte sequence is malformed. + + The convert function may be NULL if the encoding is a single-byte + encoding, that is if map[b] >= -1 for all bytes b. + + When the parser is finished with the encoding, then if release is + not NULL, it will call release passing it the data member; once + release has been called, the convert function will not be called + again. + Expat places certain restrictions on the encodings that are supported + using this mechanism. + + 1. Every ASCII character that can appear in a well-formed XML document, + other than the characters + + $@\^`{}~ + + must be represented by a single byte, and that byte must be the + same byte that represents that character in ASCII. + + 2. No character may require more than 4 bytes to encode. + + 3. All characters encoded must have Unicode scalar values <= + 0xFFFF, (i.e., characters that would be encoded by surrogates in + UTF-16 are not allowed). Note that this restriction doesn't + apply to the built-in support for UTF-8 and UTF-16. + + 4. No Unicode character may be encoded by more than one distinct + sequence of bytes. +*/ typedef struct { int map[256]; void *data; @@ -379,24 +489,28 @@ typedef struct { } XML_Encoding; /* This is called for an encoding that is unknown to the parser. -The encodingHandlerData argument is that which was passed as the -second argument to XML_SetUnknownEncodingHandler. -The name argument gives the name of the encoding as specified in -the encoding declaration. -If the callback can provide information about the encoding, -it must fill in the XML_Encoding structure, and return 1. -Otherwise it must return 0. -If info does not describe a suitable encoding, -then the parser will return an XML_UNKNOWN_ENCODING error. */ + The encodingHandlerData argument is that which was passed as the + second argument to XML_SetUnknownEncodingHandler. + + The name argument gives the name of the encoding as specified in + the encoding declaration. + + If the callback can provide information about the encoding, it must + fill in the XML_Encoding structure, and return XML_STATUS_OK. + Otherwise it must return XML_STATUS_ERROR. + + If info does not describe a suitable encoding, then the parser will + return an XML_UNKNOWN_ENCODING error. +*/ typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); XMLPARSEAPI(void) XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end); + XML_StartElementHandler start, + XML_EndElementHandler end); XMLPARSEAPI(void) XML_SetStartElementHandler(XML_Parser, XML_StartElementHandler); @@ -406,19 +520,19 @@ XML_SetEndElementHandler(XML_Parser, XML_EndElementHandler); XMLPARSEAPI(void) XML_SetCharacterDataHandler(XML_Parser parser, - XML_CharacterDataHandler handler); + XML_CharacterDataHandler handler); XMLPARSEAPI(void) XML_SetProcessingInstructionHandler(XML_Parser parser, - XML_ProcessingInstructionHandler handler); + XML_ProcessingInstructionHandler handler); XMLPARSEAPI(void) XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); XMLPARSEAPI(void) XML_SetCdataSectionHandler(XML_Parser parser, - XML_StartCdataSectionHandler start, - XML_EndCdataSectionHandler end); + XML_StartCdataSectionHandler start, + XML_EndCdataSectionHandler end); XMLPARSEAPI(void) XML_SetStartCdataSectionHandler(XML_Parser parser, @@ -429,89 +543,98 @@ XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end); /* This sets the default handler and also inhibits expansion of -internal entities. The entity reference will be passed to the default -handler. */ - + internal entities. These entity references will be passed to the + default handler, or to the skipped entity handler, if one is set. +*/ XMLPARSEAPI(void) XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler); + XML_DefaultHandler handler); /* This sets the default handler but does not inhibit expansion of -internal entities. The entity reference will not be passed to the -default handler. */ - + internal entities. The entity reference will not be passed to the + default handler. +*/ XMLPARSEAPI(void) XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler); + XML_DefaultHandler handler); XMLPARSEAPI(void) XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, - XML_EndDoctypeDeclHandler end); + XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) XML_SetStartDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start); + XML_StartDoctypeDeclHandler start); XMLPARSEAPI(void) XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end); + XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) XML_SetUnparsedEntityDeclHandler(XML_Parser parser, - XML_UnparsedEntityDeclHandler handler); + XML_UnparsedEntityDeclHandler handler); XMLPARSEAPI(void) XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler); + XML_NotationDeclHandler handler); XMLPARSEAPI(void) XML_SetNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end); + XML_StartNamespaceDeclHandler start, + XML_EndNamespaceDeclHandler end); XMLPARSEAPI(void) XML_SetStartNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start); + XML_StartNamespaceDeclHandler start); XMLPARSEAPI(void) XML_SetEndNamespaceDeclHandler(XML_Parser parser, - XML_EndNamespaceDeclHandler end); + XML_EndNamespaceDeclHandler end); XMLPARSEAPI(void) XML_SetNotStandaloneHandler(XML_Parser parser, - XML_NotStandaloneHandler handler); + XML_NotStandaloneHandler handler); XMLPARSEAPI(void) XML_SetExternalEntityRefHandler(XML_Parser parser, - XML_ExternalEntityRefHandler handler); + XML_ExternalEntityRefHandler handler); -/* If a non-null value for arg is specified here, then it will be passed -as the first argument to the external entity ref handler instead -of the parser object. */ +/* If a non-NULL value for arg is specified here, then it will be + passed as the first argument to the external entity ref handler + instead of the parser object. +*/ XMLPARSEAPI(void) XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg); XMLPARSEAPI(void) +XML_SetSkippedEntityHandler(XML_Parser parser, + XML_SkippedEntityHandler handler); + +XMLPARSEAPI(void) XML_SetUnknownEncodingHandler(XML_Parser parser, - XML_UnknownEncodingHandler handler, - void *encodingHandlerData); + XML_UnknownEncodingHandler handler, + void *encodingHandlerData); -/* This can be called within a handler for a start element, end element, -processing instruction or character data. It causes the corresponding -markup to be passed to the default handler. */ +/* This can be called within a handler for a start element, end + element, processing instruction or character data. It causes the + corresponding markup to be passed to the default handler. +*/ XMLPARSEAPI(void) XML_DefaultCurrent(XML_Parser parser); /* If do_nst is non-zero, and namespace processing is in effect, and a name has a prefix (i.e. an explicit namespace qualifier) then - that name is returned as a triplet in a single - string separated by the separator character specified when the parser - was created: URI + sep + local_name + sep + prefix. + that name is returned as a triplet in a single string separated by + the separator character specified when the parser was created: URI + + sep + local_name + sep + prefix. If do_nst is zero, then namespace information is returned in the - default manner (URI + sep + local_name) whether or not the names + default manner (URI + sep + local_name) whether or not the name has a prefix. + + Note: Calling XML_SetReturnNSTriplet after XML_Parse or + XML_ParseBuffer has no effect. */ XMLPARSEAPI(void) @@ -521,84 +644,130 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); XMLPARSEAPI(void) XML_SetUserData(XML_Parser parser, void *userData); -/* Returns the last value set by XML_SetUserData or null. */ +/* Returns the last value set by XML_SetUserData or NULL. */ #define XML_GetUserData(parser) (*(void **)(parser)) -/* This is equivalent to supplying an encoding argument -to XML_ParserCreate. It must not be called after XML_Parse -or XML_ParseBuffer. */ - -XMLPARSEAPI(int) +/* This is equivalent to supplying an encoding argument to + XML_ParserCreate. On success XML_SetEncoding returns non-zero, + zero otherwise. + Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer + has no effect and returns XML_STATUS_ERROR. +*/ +XMLPARSEAPI(enum XML_Status) XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); -/* If this function is called, then the parser will be passed -as the first argument to callbacks instead of userData. -The userData will still be accessible using XML_GetUserData. */ - +/* If this function is called, then the parser will be passed as the + first argument to callbacks instead of userData. The userData will + still be accessible using XML_GetUserData. +*/ XMLPARSEAPI(void) XML_UseParserAsHandlerArg(XML_Parser parser); -/* Sets the base to be used for resolving relative URIs in system -identifiers in declarations. Resolving relative identifiers is left -to the application: this value will be passed through as the base -argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler -and XML_UnparsedEntityDeclHandler. The base argument will be copied. -Returns zero if out of memory, non-zero otherwise. */ +/* If useDTD == XML_TRUE is passed to this function, then the parser + will assume that there is an external subset, even if none is + specified in the document. In such a case the parser will call the + externalEntityRefHandler with a value of NULL for the systemId + argument (the publicId and context arguments will be NULL as well). + Note: If this function is called, then this must be done before + the first call to XML_Parse or XML_ParseBuffer, since it will + have no effect after that. Returns + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING. + Note: If the document does not have a DOCTYPE declaration at all, + then startDoctypeDeclHandler and endDoctypeDeclHandler will not + be called, despite an external subset being parsed. + Note: If XML_DTD is not defined when Expat is compiled, returns + XML_ERROR_FEATURE_REQUIRES_XML_DTD. +*/ +XMLPARSEAPI(enum XML_Error) +XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); -XMLPARSEAPI(int) + +/* Sets the base to be used for resolving relative URIs in system + identifiers in declarations. Resolving relative identifiers is + left to the application: this value will be passed through as the + base argument to the XML_ExternalEntityRefHandler, + XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base + argument will be copied. Returns XML_STATUS_ERROR if out of memory, + XML_STATUS_OK otherwise. +*/ +XMLPARSEAPI(enum XML_Status) XML_SetBase(XML_Parser parser, const XML_Char *base); XMLPARSEAPI(const XML_Char *) XML_GetBase(XML_Parser parser); /* Returns the number of the attribute/value pairs passed in last call -to the XML_StartElementHandler that were specified in the start-tag -rather than defaulted. Each attribute/value pair counts as 2; thus -this correspondds to an index into the atts array passed to the -XML_StartElementHandler. */ - + to the XML_StartElementHandler that were specified in the start-tag + rather than defaulted. Each attribute/value pair counts as 2; thus + this correspondds to an index into the atts array passed to the + XML_StartElementHandler. +*/ XMLPARSEAPI(int) XML_GetSpecifiedAttributeCount(XML_Parser parser); /* Returns the index of the ID attribute passed in the last call to -XML_StartElementHandler, or -1 if there is no ID attribute. Each -attribute/value pair counts as 2; thus this correspondds to an index -into the atts array passed to the XML_StartElementHandler. */ - + XML_StartElementHandler, or -1 if there is no ID attribute. Each + attribute/value pair counts as 2; thus this correspondds to an + index into the atts array passed to the XML_StartElementHandler. +*/ XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); -/* Parses some input. Returns 0 if a fatal error is detected. -The last call to XML_Parse must have isFinal true; -len may be zero for this call (or any other). */ -XMLPARSEAPI(int) +/* Parses some input. Returns XML_STATUS_ERROR if a fatal error is + detected. The last call to XML_Parse must have isFinal true; len + may be zero for this call (or any other). + + The XML_Status enum gives the possible return values for the + XML_Parse and XML_ParseBuffer functions. Though the return values + for these functions has always been described as a Boolean value, + the implementation, at least for the 1.95.x series, has always + returned exactly one of these values. The preprocessor #defines + are included so this stanza can be added to code that still needs + to support older versions of Expat 1.95.x: + + #ifndef XML_STATUS_OK + #define XML_STATUS_OK 1 + #define XML_STATUS_ERROR 0 + #endif + + Otherwise, the #define hackery is quite ugly and would have been dropped. +*/ +enum XML_Status { + XML_STATUS_ERROR = 0, +#define XML_STATUS_ERROR XML_STATUS_ERROR + XML_STATUS_OK = 1 +#define XML_STATUS_OK XML_STATUS_OK +}; + +XMLPARSEAPI(enum XML_Status) XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); XMLPARSEAPI(void *) XML_GetBuffer(XML_Parser parser, int len); -XMLPARSEAPI(int) +XMLPARSEAPI(enum XML_Status) XML_ParseBuffer(XML_Parser parser, int len, int isFinal); /* Creates an XML_Parser object that can parse an external general -entity; context is a '\0'-terminated string specifying the parse -context; encoding is a '\0'-terminated string giving the name of the -externally specified encoding, or null if there is no externally -specified encoding. The context string consists of a sequence of -tokens separated by formfeeds (\f); a token consisting of a name -specifies that the general entity of the name is open; a token of the -form prefix=uri specifies the namespace for a particular prefix; a -token of the form =uri specifies the default namespace. This can be -called at any point after the first call to an -ExternalEntityRefHandler so longer as the parser has not yet been -freed. The new parser is completely independent and may safely be -used in a separate thread. The handlers and userData are initialized -from the parser argument. Returns 0 if out of memory. Otherwise -returns a new XML_Parser object. */ + entity; context is a '\0'-terminated string specifying the parse + context; encoding is a '\0'-terminated string giving the name of + the externally specified encoding, or NULL if there is no + externally specified encoding. The context string consists of a + sequence of tokens separated by formfeeds (\f); a token consisting + of a name specifies that the general entity of the name is open; a + token of the form prefix=uri specifies the namespace for a + particular prefix; a token of the form =uri specifies the default + namespace. This can be called at any point after the first call to + an ExternalEntityRefHandler so longer as the parser has not yet + been freed. The new parser is completely independent and may + safely be used in a separate thread. The handlers and userData are + initialized from the parser argument. Returns NULL if out of memory. + Otherwise returns a new XML_Parser object. +*/ XMLPARSEAPI(XML_Parser) XML_ExternalEntityParserCreate(XML_Parser parser, - const XML_Char *context, - const XML_Char *encoding); + const XML_Char *context, + const XML_Char *encoding); enum XML_ParamEntityParsing { XML_PARAM_ENTITY_PARSING_NEVER, @@ -607,76 +776,56 @@ enum XML_ParamEntityParsing { }; /* Controls parsing of parameter entities (including the external DTD -subset). If parsing of parameter entities is enabled, then references -to external parameter entities (including the external DTD subset) -will be passed to the handler set with -XML_SetExternalEntityRefHandler. The context passed will be 0. -Unlike external general entities, external parameter entities can only -be parsed synchronously. If the external parameter entity is to be -parsed, it must be parsed during the call to the external entity ref -handler: the complete sequence of XML_ExternalEntityParserCreate, -XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during -this call. After XML_ExternalEntityParserCreate has been called to -create the parser for the external parameter entity (context must be 0 -for this call), it is illegal to make any calls on the old parser -until XML_ParserFree has been called on the newly created parser. If -the library has been compiled without support for parameter entity -parsing (ie without XML_DTD being defined), then -XML_SetParamEntityParsing will return 0 if parsing of parameter -entities is requested; otherwise it will return non-zero. */ - + subset). If parsing of parameter entities is enabled, then + references to external parameter entities (including the external + DTD subset) will be passed to the handler set with + XML_SetExternalEntityRefHandler. The context passed will be 0. + + Unlike external general entities, external parameter entities can + only be parsed synchronously. If the external parameter entity is + to be parsed, it must be parsed during the call to the external + entity ref handler: the complete sequence of + XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and + XML_ParserFree calls must be made during this call. After + XML_ExternalEntityParserCreate has been called to create the parser + for the external parameter entity (context must be 0 for this + call), it is illegal to make any calls on the old parser until + XML_ParserFree has been called on the newly created parser. + If the library has been compiled without support for parameter + entity parsing (ie without XML_DTD being defined), then + XML_SetParamEntityParsing will return 0 if parsing of parameter + entities is requested; otherwise it will return non-zero. + Note: If XML_SetParamEntityParsing is called after XML_Parse or + XML_ParseBuffer, then it has no effect and will always return 0. +*/ XMLPARSEAPI(int) XML_SetParamEntityParsing(XML_Parser parser, - enum XML_ParamEntityParsing parsing); - -enum XML_Error { - XML_ERROR_NONE, - XML_ERROR_NO_MEMORY, - XML_ERROR_SYNTAX, - XML_ERROR_NO_ELEMENTS, - XML_ERROR_INVALID_TOKEN, - XML_ERROR_UNCLOSED_TOKEN, - XML_ERROR_PARTIAL_CHAR, - XML_ERROR_TAG_MISMATCH, - XML_ERROR_DUPLICATE_ATTRIBUTE, - XML_ERROR_JUNK_AFTER_DOC_ELEMENT, - XML_ERROR_PARAM_ENTITY_REF, - XML_ERROR_UNDEFINED_ENTITY, - XML_ERROR_RECURSIVE_ENTITY_REF, - XML_ERROR_ASYNC_ENTITY, - XML_ERROR_BAD_CHAR_REF, - XML_ERROR_BINARY_ENTITY_REF, - XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, - XML_ERROR_MISPLACED_XML_PI, - XML_ERROR_UNKNOWN_ENCODING, - XML_ERROR_INCORRECT_ENCODING, - XML_ERROR_UNCLOSED_CDATA_SECTION, - XML_ERROR_EXTERNAL_ENTITY_HANDLING, - XML_ERROR_NOT_STANDALONE, - XML_ERROR_UNEXPECTED_STATE -}; - -/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode -returns information about the error. */ + enum XML_ParamEntityParsing parsing); +/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then + XML_GetErrorCode returns information about the error. +*/ XMLPARSEAPI(enum XML_Error) XML_GetErrorCode(XML_Parser parser); -/* These functions return information about the current parse location. -They may be called when XML_Parse or XML_ParseBuffer return 0; -in this case the location is the location of the character at which -the error was detected. -They may also be called from any other callback called to report -some parse event; in this the location is the location of the first -of the sequence of characters that generated the event. */ - +/* These functions return information about the current parse + location. They may be called from any callback called to report + some parse event; in this case the location is the location of + the first of the sequence of characters that generated the event. + + They may also be called after returning from a call to XML_Parse + or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then + the location is the location of the character at which the error + was detected; otherwise the location is the location of the last + parse event, as described above. +*/ XMLPARSEAPI(int) XML_GetCurrentLineNumber(XML_Parser parser); XMLPARSEAPI(int) XML_GetCurrentColumnNumber(XML_Parser parser); XMLPARSEAPI(long) XML_GetCurrentByteIndex(XML_Parser parser); /* Return the number of bytes in the current event. -Returns 0 if the event is in an internal entity. */ - + Returns 0 if the event is in an internal entity. +*/ XMLPARSEAPI(int) XML_GetCurrentByteCount(XML_Parser parser); @@ -684,21 +833,35 @@ XML_GetCurrentByteCount(XML_Parser parser); the integer pointed to by offset to the offset within this buffer of the current parse position, and sets the integer pointed to by size to the size of this buffer (the number of input bytes). Otherwise - returns a null pointer. Also returns a null pointer if a parse isn't + returns a NULL pointer. Also returns a NULL pointer if a parse isn't active. NOTE: The character pointer returned should not be used outside - the handler that makes the call. */ - + the handler that makes the call. +*/ XMLPARSEAPI(const char *) XML_GetInputContext(XML_Parser parser, - int *offset, - int *size); + int *offset, + int *size); /* For backwards compatibility with previous versions. */ -#define XML_GetErrorLineNumber XML_GetCurrentLineNumber +#define XML_GetErrorLineNumber XML_GetCurrentLineNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber -#define XML_GetErrorByteIndex XML_GetCurrentByteIndex +#define XML_GetErrorByteIndex XML_GetCurrentByteIndex + +/* Frees the content model passed to the element declaration handler */ +XMLPARSEAPI(void) +XML_FreeContentModel(XML_Parser parser, XML_Content *model); + +/* Exposing the memory handling functions used in Expat */ +XMLPARSEAPI(void *) +XML_MemMalloc(XML_Parser parser, size_t size); + +XMLPARSEAPI(void *) +XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); + +XMLPARSEAPI(void) +XML_MemFree(XML_Parser parser, void *ptr); /* Frees memory used by the parser. */ XMLPARSEAPI(void) @@ -706,7 +869,7 @@ XML_ParserFree(XML_Parser parser); /* Returns a string describing the error. */ XMLPARSEAPI(const XML_LChar *) -XML_ErrorString(int code); +XML_ErrorString(enum XML_Error code); /* Return a string containing the version number of this expat */ XMLPARSEAPI(const XML_LChar *) @@ -719,18 +882,42 @@ typedef struct { } XML_Expat_Version; /* Return an XML_Expat_Version structure containing numeric version - number information for this version of expat */ - + number information for this version of expat. +*/ XMLPARSEAPI(XML_Expat_Version) XML_ExpatVersionInfo(void); -/* VERSION is not defined in expat.h.in, but it really belongs here, - and defining it on the command line gives difficulties with MSVC. */ -#define VERSION "1.95.2" +/* Added in Expat 1.95.5. */ +enum XML_FeatureEnum { + XML_FEATURE_END = 0, + XML_FEATURE_UNICODE, + XML_FEATURE_UNICODE_WCHAR_T, + XML_FEATURE_DTD, + XML_FEATURE_CONTEXT_BYTES, + XML_FEATURE_MIN_SIZE, + XML_FEATURE_SIZEOF_XML_CHAR, + XML_FEATURE_SIZEOF_XML_LCHAR + /* Additional features must be added to the end of this enum. */ +}; + +typedef struct { + enum XML_FeatureEnum feature; + const XML_LChar *name; + long int value; +} XML_Feature; + +XMLPARSEAPI(const XML_Feature *) +XML_GetFeatureList(void); + +/* Expat follows the GNU/Linux convention of odd number minor version for + beta/development releases and even number minor version for stable + releases. Micro is bumped with each release, and set to 0 with each + change to major or minor version. +*/ #define XML_MAJOR_VERSION 1 #define XML_MINOR_VERSION 95 -#define XML_MICRO_VERSION 2 +#define XML_MICRO_VERSION 6 #ifdef __cplusplus } diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h new file mode 100644 index 0000000..1bf6baa --- /dev/null +++ b/Modules/expat/internal.h @@ -0,0 +1,73 @@ +/* internal.h + + Internal definitions used by Expat. This is not needed to compile + client code. + + The following calling convention macros are defined for frequently + called functions: + + FASTCALL - Used for those internal functions that have a simple + body and a low number of arguments and local variables. + + PTRCALL - Used for functions called though function pointers. + + PTRFASTCALL - Like PTRCALL, but for low number of arguments. + + inline - Used for selected internal functions for which inlining + may improve performance on some platforms. + + Note: Use of these macros is based on judgement, not hard rules, + and therefore subject to change. +*/ + +#if defined(__GNUC__) +/* Instability reported with egcs on a RedHat Linux 7.3. + Let's comment it out: + #define FASTCALL __attribute__((stdcall, regparm(3))) + and let's try this: +*/ +#define FASTCALL __attribute__((regparm(3))) +#define PTRCALL +#define PTRFASTCALL __attribute__((regparm(3))) + +#elif defined(WIN32) +/* Using __fastcall seems to have an unexpected negative effect under + MS VC++, especially for function pointers, so we won't use it for + now on that platform. It may be reconsidered for a future release + if it can be made more effective. + Likely reason: __fastcall on Windows is like stdcall, therefore + the compiler cannot perform stack optimizations for call clusters. +*/ +#define FASTCALL +#define PTRCALL +#define PTRFASTCALL + +#endif + +#ifndef FASTCALL +#define FASTCALL +#endif + +#ifndef PTRCALL +#define PTRCALL +#endif + +#ifndef PTRFASTCALL +#define PTRFASTCALL +#endif + +#ifndef XML_MIN_SIZE +#if !defined(__cplusplus) && !defined(inline) +#ifdef __GNUC__ +#define inline __inline +#endif /* __GNUC__ */ +#endif +#endif /* XML_MIN_SIZE */ + +#ifdef __cplusplus +#define inline inline +#else +#ifndef inline +#define inline +#endif +#endif diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index c46b601..c465446 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,32 +1,42 @@ -/* -Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ +/* Added to look for memcpy */ +#include <Python.h> + +#include <stddef.h> +#include <string.h> /* memset(), memcpy() */ + #ifdef COMPILED_FROM_DSP -# include "winconfig.h" -# define XMLPARSEAPI(type) __declspec(dllexport) type __cdecl -# include "expat.h" -# undef XMLPARSEAPI + +#include "winconfig.h" +#define XMLPARSEAPI(type) type __cdecl +#include "expat.h" +#undef XMLPARSEAPI + +#elif defined(MACOS_CLASSIC) + +#include "macconfig.h" +#include "expat.h" + #else -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif + +/* Unused - MvL +#include <expat_config.h> +*/ #ifdef __declspec -# define XMLPARSEAPI(type) __declspec(dllexport) type __cdecl +#define XMLPARSEAPI(type) type __cdecl #endif #include "expat.h" #ifdef __declspec -# undef XMLPARSEAPI +#undef XMLPARSEAPI #endif #endif /* ndef COMPILED_FROM_DSP */ -#include <stddef.h> -#include <string.h> - #ifdef XML_UNICODE #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX #define XmlConvert XmlUtf16Convert @@ -56,15 +66,36 @@ typedef char ICHAR; #endif +#ifdef XML_UNICODE + #ifdef XML_UNICODE_WCHAR_T -#define XML_T(x) L ## x +#define XML_T(x) (const wchar_t)x +#define XML_L(x) L ## x #else +#define XML_T(x) (const unsigned short)x +#define XML_L(x) x +#endif + +#else + #define XML_T(x) x +#define XML_L(x) x + #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) +/* Handle the case where memmove() doesn't exist. */ +#ifndef HAVE_MEMMOVE +#ifdef HAVE_BCOPY +#define memmove(d,s,l) bcopy((s),(d),(l)) +#else +#error memmove does not exist on this platform, nor is a substitute available +#endif /* HAVE_BCOPY */ +#endif /* HAVE_MEMMOVE */ + +#include "internal.h" #include "xmltok.h" #include "xmlrole.h" @@ -79,7 +110,7 @@ typedef struct { size_t size; size_t used; size_t usedLim; - XML_Memory_Handling_Suite *mem; + const XML_Memory_Handling_Suite *mem; } HASH_TABLE; typedef struct { @@ -113,16 +144,32 @@ typedef struct prefix { typedef struct { const XML_Char *str; const XML_Char *localPart; + const XML_Char *prefix; + int strLen; int uriLen; + int prefixLen; } TAG_NAME; +/* TAG represents an open element. + The name of the element is stored in both the document and API + encodings. The memory buffer 'buf' is a separately-allocated + memory area which stores the name. During the XML_Parse()/ + XMLParseBuffer() when the element is open, the memory for the 'raw' + version of the name (in the document encoding) is shared with the + document buffer. If the element is open across calls to + XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to + contain the 'raw' name as well. + + A parser re-uses these structures, maintaining a list of allocated + TAG objects in a free list. +*/ typedef struct tag { - struct tag *parent; - const char *rawName; + struct tag *parent; /* parent of this element */ + const char *rawName; /* tagName in the original encoding */ int rawNameLength; - TAG_NAME name; - char *buf; - char *bufEnd; + TAG_NAME name; /* tagName in the API encoding */ + char *buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; @@ -134,20 +181,23 @@ typedef struct { const XML_Char *base; const XML_Char *publicId; const XML_Char *notation; - char open; - char is_param; + XML_Bool open; + XML_Bool is_param; + XML_Bool is_internal; /* true if declared in internal subset outside PE */ } ENTITY; typedef struct { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - const XML_Char * name; - int firstchild; - int lastchild; - int childcnt; - int nextsib; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char * name; + int firstchild; + int lastchild; + int childcnt; + int nextsib; } CONTENT_SCAFFOLD; +#define INIT_SCAFFOLD_ELEMENTS 32 + typedef struct block { struct block *next; int size; @@ -160,21 +210,21 @@ typedef struct { const XML_Char *end; XML_Char *ptr; XML_Char *start; - XML_Memory_Handling_Suite *mem; + const XML_Memory_Handling_Suite *mem; } STRING_POOL; /* The XML_Char before the name is used to determine whether -an attribute has been specified. */ + an attribute has been specified. */ typedef struct attribute_id { XML_Char *name; PREFIX *prefix; - char maybeTokenized; - char xmlns; + XML_Bool maybeTokenized; + XML_Bool xmlns; } ATTRIBUTE_ID; typedef struct { const ATTRIBUTE_ID *id; - char isCdata; + XML_Bool isCdata; const XML_Char *value; } DEFAULT_ATTRIBUTE; @@ -193,14 +243,21 @@ typedef struct { HASH_TABLE attributeIds; HASH_TABLE prefixes; STRING_POOL pool; - int complete; - int standalone; + STRING_POOL entityValuePool; + /* false once a parameter entity reference has been skipped */ + XML_Bool keepProcessing; + /* true once an internal or external PE reference has been encountered; + this includes the reference to an external subset */ + XML_Bool hasParamEntityRefs; + XML_Bool standalone; #ifdef XML_DTD + /* indicates if external PE has been read */ + XML_Bool paramEntityRead; HASH_TABLE paramEntities; #endif /* XML_DTD */ PREFIX defaultPrefix; /* === scaffolding for building content model === */ - int in_eldecl; + XML_Bool in_eldecl; CONTENT_SCAFFOLD *scaffold; unsigned contentStringLen; unsigned scaffSize; @@ -216,10 +273,10 @@ typedef struct open_internal_entity { ENTITY *entity; } OPEN_INTERNAL_ENTITY; -typedef enum XML_Error Processor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr); +typedef enum XML_Error PTRCALL Processor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; @@ -227,6 +284,10 @@ static Processor contentProcessor; static Processor cdataSectionProcessor; #ifdef XML_DTD static Processor ignoreSectionProcessor; +static Processor externalParEntProcessor; +static Processor externalParEntInitProcessor; +static Processor entityValueProcessor; +static Processor entityValueInitProcessor; #endif /* XML_DTD */ static Processor epilogProcessor; static Processor errorProcessor; @@ -238,94 +299,118 @@ static Processor externalEntityContentProcessor; static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); static enum XML_Error -processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *); +processXmlDecl(XML_Parser parser, int isGeneralTextEntity, + const char *, const char *); static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, - const char *end, int tok, const char *next, const char **nextPtr); + const char *end, int tok, const char *next, const char **nextPtr); static enum XML_Error processInternalParamEntity(XML_Parser parser, ENTITY *entity); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, - const char *start, const char *end, const char **endPtr); + const char *start, const char *end, const char **endPtr); static enum XML_Error -doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, + const char *end, const char **nextPtr); #ifdef XML_DTD static enum XML_Error -doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, + const char *end, const char **nextPtr); #endif /* XML_DTD */ -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, - TAG_NAME *tagNamePtr, BINDING **bindingsPtr); -static -int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); +static enum XML_Error +storeAtts(XML_Parser parser, const ENCODING *, + const char *s, TAG_NAME *tagNamePtr, BINDING **bindingsPtr); +static enum XML_Error +addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + const XML_Char *uri, BINDING **bindingsPtr); static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, - int isCdata, int isId, const XML_Char *dfltValue, - XML_Parser parser); - + XML_Bool isCdata, XML_Bool isId, const XML_Char *dfltValue, + XML_Parser parser); static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, - STRING_POOL *); +storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, + const char *, const char *, STRING_POOL *); static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, - STRING_POOL *); +appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, + const char *, const char *, STRING_POOL *); static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); -static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); +getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); +static int +setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error -storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); +reportComment(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); - -static const XML_Char *getContext(XML_Parser parser); -static int setContext(XML_Parser parser, const XML_Char *context); -static void normalizePublicId(XML_Char *s); -static int dtdInit(DTD *, XML_Parser parser); - -static void dtdDestroy(DTD *, XML_Parser parser); - -static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser); - -static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *, - XML_Parser parser); - -#ifdef XML_DTD -static void dtdSwap(DTD *, DTD *); -#endif /* XML_DTD */ - -static NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize); - -static void hashTableInit(HASH_TABLE *, XML_Memory_Handling_Suite *ms); +reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, + const char *end); -static void hashTableDestroy(HASH_TABLE *); -static void hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); -static NAMED *hashTableIterNext(HASH_TABLE_ITER *); -static void poolInit(STRING_POOL *, XML_Memory_Handling_Suite *ms); -static void poolClear(STRING_POOL *); -static void poolDestroy(STRING_POOL *); -static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); -static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end); +static const XML_Char * getContext(XML_Parser parser); +static XML_Bool +setContext(XML_Parser parser, const XML_Char *context); -static int poolGrow(STRING_POOL *pool); +static void FASTCALL normalizePublicId(XML_Char *s); -static int nextScaffoldPart(XML_Parser parser); -static XML_Content *build_model(XML_Parser parser); - -static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); -static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); -static const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s); -static ELEMENT_TYPE * getElementType(XML_Parser Paraser, - const ENCODING *enc, - const char *ptr, - const char *end); +static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms); +/* do not call if parentParser != NULL */ +static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); +static void +dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); +static int +dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); +static int +copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); + +static NAMED * +lookup(HASH_TABLE *table, KEY name, size_t createSize); +static void FASTCALL +hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); +static void FASTCALL hashTableClear(HASH_TABLE *); +static void FASTCALL hashTableDestroy(HASH_TABLE *); +static void FASTCALL +hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); +static NAMED * FASTCALL hashTableIterNext(HASH_TABLE_ITER *); + +static void FASTCALL +poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms); +static void FASTCALL poolClear(STRING_POOL *); +static void FASTCALL poolDestroy(STRING_POOL *); +static XML_Char * +poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Char * +poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end); +static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); +static const XML_Char * FASTCALL +poolCopyString(STRING_POOL *pool, const XML_Char *s); +static const XML_Char * +poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); +static const XML_Char * FASTCALL +poolAppendString(STRING_POOL *pool, const XML_Char *s); + +static int FASTCALL nextScaffoldPart(XML_Parser parser); +static XML_Content * build_model(XML_Parser parser); +static ELEMENT_TYPE * +getElementType(XML_Parser parser, const ENCODING *enc, + const char *ptr, const char *end); + +static XML_Parser +parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep, + DTD *dtd); +static void +parserInit(XML_Parser parser, const XML_Char *encodingName); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) @@ -339,12 +424,13 @@ static ELEMENT_TYPE * getElementType(XML_Parser Paraser, ? 0 \ : ((*((pool)->ptr)++ = c), 1)) -typedef struct { - /* The first member must be userData so that the XML_GetUserData macro works. */ +struct XML_ParserStruct { + /* The first member must be userData so that the XML_GetUserData + macro works. */ void *m_userData; void *m_handlerArg; char *m_buffer; - XML_Memory_Handling_Suite m_mem; + const XML_Memory_Handling_Suite m_mem; /* first character to be parsed */ const char *m_bufferPtr; /* past last character to be parsed */ @@ -371,7 +457,8 @@ typedef struct { XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; XML_NotStandaloneHandler m_notStandaloneHandler; XML_ExternalEntityRefHandler m_externalEntityRefHandler; - void *m_externalEntityRefHandlerArg; + XML_Parser m_externalEntityRefHandlerArg; + XML_SkippedEntityHandler m_skippedEntityHandler; XML_UnknownEncodingHandler m_unknownEncodingHandler; XML_ElementDeclHandler m_elementDeclHandler; XML_AttlistDeclHandler m_attlistDeclHandler; @@ -381,8 +468,8 @@ typedef struct { INIT_ENCODING m_initEncoding; const ENCODING *m_internalEncoding; const XML_Char *m_protocolEncodingName; - int m_ns; - int m_ns_triplets; + XML_Bool m_ns; + XML_Bool m_ns_triplets; void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; @@ -394,7 +481,7 @@ typedef struct { const char *m_eventEndPtr; const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; - int m_defaultExpandInternalEntities; + XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; const XML_Char *m_doctypeName; @@ -405,9 +492,9 @@ typedef struct { const XML_Char *m_declNotationPublicId; ELEMENT_TYPE *m_declElementType; ATTRIBUTE_ID *m_declAttributeId; - char m_declAttributeIsCdata; - char m_declAttributeIsId; - DTD m_dtd; + XML_Bool m_declAttributeIsCdata; + XML_Bool m_declAttributeIsId; + DTD *m_dtd; const XML_Char *m_curBase; TAG *m_tagStack; TAG *m_freeTagList; @@ -422,304 +509,423 @@ typedef struct { STRING_POOL m_temp2Pool; char *m_groupConnector; unsigned m_groupSize; - int m_hadExternalDoctype; XML_Char m_namespaceSeparator; + XML_Parser m_parentParser; #ifdef XML_DTD + XML_Bool m_isParamEntity; + XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; - XML_Parser m_parentParser; #endif -} Parser; - -#define MALLOC(s) (((Parser *)parser)->m_mem.malloc_fcn((s))) -#define REALLOC(p,s) (((Parser *)parser)->m_mem.realloc_fcn((p),(s))) -#define FREE(p) (((Parser *)parser)->m_mem.free_fcn((p))) - -#define userData (((Parser *)parser)->m_userData) -#define handlerArg (((Parser *)parser)->m_handlerArg) -#define startElementHandler (((Parser *)parser)->m_startElementHandler) -#define endElementHandler (((Parser *)parser)->m_endElementHandler) -#define characterDataHandler (((Parser *)parser)->m_characterDataHandler) -#define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler) -#define commentHandler (((Parser *)parser)->m_commentHandler) -#define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler) -#define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler) -#define defaultHandler (((Parser *)parser)->m_defaultHandler) -#define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler) -#define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler) -#define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler) -#define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler) -#define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler) -#define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler) -#define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler) -#define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler) -#define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg) -#define internalEntityRefHandler (((Parser *)parser)->m_internalEntityRefHandler) -#define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler) -#define elementDeclHandler (((Parser *)parser)->m_elementDeclHandler) -#define attlistDeclHandler (((Parser *)parser)->m_attlistDeclHandler) -#define entityDeclHandler (((Parser *)parser)->m_entityDeclHandler) -#define xmlDeclHandler (((Parser *)parser)->m_xmlDeclHandler) -#define encoding (((Parser *)parser)->m_encoding) -#define initEncoding (((Parser *)parser)->m_initEncoding) -#define internalEncoding (((Parser *)parser)->m_internalEncoding) -#define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem) -#define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData) +}; + +#define MALLOC(s) (parser->m_mem.malloc_fcn((s))) +#define REALLOC(p,s) (parser->m_mem.realloc_fcn((p),(s))) +#define FREE(p) (parser->m_mem.free_fcn((p))) + +#define userData (parser->m_userData) +#define handlerArg (parser->m_handlerArg) +#define startElementHandler (parser->m_startElementHandler) +#define endElementHandler (parser->m_endElementHandler) +#define characterDataHandler (parser->m_characterDataHandler) +#define processingInstructionHandler \ + (parser->m_processingInstructionHandler) +#define commentHandler (parser->m_commentHandler) +#define startCdataSectionHandler \ + (parser->m_startCdataSectionHandler) +#define endCdataSectionHandler (parser->m_endCdataSectionHandler) +#define defaultHandler (parser->m_defaultHandler) +#define startDoctypeDeclHandler (parser->m_startDoctypeDeclHandler) +#define endDoctypeDeclHandler (parser->m_endDoctypeDeclHandler) +#define unparsedEntityDeclHandler \ + (parser->m_unparsedEntityDeclHandler) +#define notationDeclHandler (parser->m_notationDeclHandler) +#define startNamespaceDeclHandler \ + (parser->m_startNamespaceDeclHandler) +#define endNamespaceDeclHandler (parser->m_endNamespaceDeclHandler) +#define notStandaloneHandler (parser->m_notStandaloneHandler) +#define externalEntityRefHandler \ + (parser->m_externalEntityRefHandler) +#define externalEntityRefHandlerArg \ + (parser->m_externalEntityRefHandlerArg) +#define internalEntityRefHandler \ + (parser->m_internalEntityRefHandler) +#define skippedEntityHandler (parser->m_skippedEntityHandler) +#define unknownEncodingHandler (parser->m_unknownEncodingHandler) +#define elementDeclHandler (parser->m_elementDeclHandler) +#define attlistDeclHandler (parser->m_attlistDeclHandler) +#define entityDeclHandler (parser->m_entityDeclHandler) +#define xmlDeclHandler (parser->m_xmlDeclHandler) +#define encoding (parser->m_encoding) +#define initEncoding (parser->m_initEncoding) +#define internalEncoding (parser->m_internalEncoding) +#define unknownEncodingMem (parser->m_unknownEncodingMem) +#define unknownEncodingData (parser->m_unknownEncodingData) #define unknownEncodingHandlerData \ - (((Parser *)parser)->m_unknownEncodingHandlerData) -#define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease) -#define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName) -#define ns (((Parser *)parser)->m_ns) -#define ns_triplets (((Parser *)parser)->m_ns_triplets) -#define prologState (((Parser *)parser)->m_prologState) -#define processor (((Parser *)parser)->m_processor) -#define errorCode (((Parser *)parser)->m_errorCode) -#define eventPtr (((Parser *)parser)->m_eventPtr) -#define eventEndPtr (((Parser *)parser)->m_eventEndPtr) -#define positionPtr (((Parser *)parser)->m_positionPtr) -#define position (((Parser *)parser)->m_position) -#define openInternalEntities (((Parser *)parser)->m_openInternalEntities) -#define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities) -#define tagLevel (((Parser *)parser)->m_tagLevel) -#define buffer (((Parser *)parser)->m_buffer) -#define bufferPtr (((Parser *)parser)->m_bufferPtr) -#define bufferEnd (((Parser *)parser)->m_bufferEnd) -#define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex) -#define parseEndPtr (((Parser *)parser)->m_parseEndPtr) -#define bufferLim (((Parser *)parser)->m_bufferLim) -#define dataBuf (((Parser *)parser)->m_dataBuf) -#define dataBufEnd (((Parser *)parser)->m_dataBufEnd) -#define dtd (((Parser *)parser)->m_dtd) -#define curBase (((Parser *)parser)->m_curBase) -#define declEntity (((Parser *)parser)->m_declEntity) -#define doctypeName (((Parser *)parser)->m_doctypeName) -#define doctypeSysid (((Parser *)parser)->m_doctypeSysid) -#define doctypePubid (((Parser *)parser)->m_doctypePubid) -#define declAttributeType (((Parser *)parser)->m_declAttributeType) -#define declNotationName (((Parser *)parser)->m_declNotationName) -#define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId) -#define declElementType (((Parser *)parser)->m_declElementType) -#define declAttributeId (((Parser *)parser)->m_declAttributeId) -#define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata) -#define declAttributeIsId (((Parser *)parser)->m_declAttributeIsId) -#define freeTagList (((Parser *)parser)->m_freeTagList) -#define freeBindingList (((Parser *)parser)->m_freeBindingList) -#define inheritedBindings (((Parser *)parser)->m_inheritedBindings) -#define tagStack (((Parser *)parser)->m_tagStack) -#define atts (((Parser *)parser)->m_atts) -#define attsSize (((Parser *)parser)->m_attsSize) -#define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts) -#define idAttIndex (((Parser *)parser)->m_idAttIndex) -#define tempPool (((Parser *)parser)->m_tempPool) -#define temp2Pool (((Parser *)parser)->m_temp2Pool) -#define groupConnector (((Parser *)parser)->m_groupConnector) -#define groupSize (((Parser *)parser)->m_groupSize) -#define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype) -#define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator) + (parser->m_unknownEncodingHandlerData) +#define unknownEncodingRelease (parser->m_unknownEncodingRelease) +#define protocolEncodingName (parser->m_protocolEncodingName) +#define ns (parser->m_ns) +#define ns_triplets (parser->m_ns_triplets) +#define prologState (parser->m_prologState) +#define processor (parser->m_processor) +#define errorCode (parser->m_errorCode) +#define eventPtr (parser->m_eventPtr) +#define eventEndPtr (parser->m_eventEndPtr) +#define positionPtr (parser->m_positionPtr) +#define position (parser->m_position) +#define openInternalEntities (parser->m_openInternalEntities) +#define defaultExpandInternalEntities \ + (parser->m_defaultExpandInternalEntities) +#define tagLevel (parser->m_tagLevel) +#define buffer (parser->m_buffer) +#define bufferPtr (parser->m_bufferPtr) +#define bufferEnd (parser->m_bufferEnd) +#define parseEndByteIndex (parser->m_parseEndByteIndex) +#define parseEndPtr (parser->m_parseEndPtr) +#define bufferLim (parser->m_bufferLim) +#define dataBuf (parser->m_dataBuf) +#define dataBufEnd (parser->m_dataBufEnd) +#define _dtd (parser->m_dtd) +#define curBase (parser->m_curBase) +#define declEntity (parser->m_declEntity) +#define doctypeName (parser->m_doctypeName) +#define doctypeSysid (parser->m_doctypeSysid) +#define doctypePubid (parser->m_doctypePubid) +#define declAttributeType (parser->m_declAttributeType) +#define declNotationName (parser->m_declNotationName) +#define declNotationPublicId (parser->m_declNotationPublicId) +#define declElementType (parser->m_declElementType) +#define declAttributeId (parser->m_declAttributeId) +#define declAttributeIsCdata (parser->m_declAttributeIsCdata) +#define declAttributeIsId (parser->m_declAttributeIsId) +#define freeTagList (parser->m_freeTagList) +#define freeBindingList (parser->m_freeBindingList) +#define inheritedBindings (parser->m_inheritedBindings) +#define tagStack (parser->m_tagStack) +#define atts (parser->m_atts) +#define attsSize (parser->m_attsSize) +#define nSpecifiedAtts (parser->m_nSpecifiedAtts) +#define idAttIndex (parser->m_idAttIndex) +#define tempPool (parser->m_tempPool) +#define temp2Pool (parser->m_temp2Pool) +#define groupConnector (parser->m_groupConnector) +#define groupSize (parser->m_groupSize) +#define namespaceSeparator (parser->m_namespaceSeparator) +#define parentParser (parser->m_parentParser) #ifdef XML_DTD -#define parentParser (((Parser *)parser)->m_parentParser) -#define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing) +#define isParamEntity (parser->m_isParamEntity) +#define useForeignDTD (parser->m_useForeignDTD) +#define paramEntityParsing (parser->m_paramEntityParsing) #endif /* XML_DTD */ -#ifdef COMPILED_FROM_DSP -BOOL WINAPI DllMain(HINSTANCE h, DWORD r, LPVOID p) { - return TRUE; -} -#endif /* def COMPILED_FROM_DSP */ - -#ifdef _MSC_VER -#ifdef _DEBUG -Parser *asParser(XML_Parser parser) -{ - return parser; -} -#endif -#endif +#define parsing \ + (parentParser \ + ? \ + (isParamEntity \ + ? \ + (processor != externalParEntInitProcessor) \ + : \ + (processor != externalEntityInitProcessor)) \ + : \ + (processor != prologInitProcessor)) -XML_Parser XML_ParserCreate(const XML_Char *encodingName) +XML_Parser +XML_ParserCreate(const XML_Char *encodingName) { return XML_ParserCreate_MM(encodingName, NULL, NULL); } -XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) +XML_Parser +XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { XML_Char tmp[2]; *tmp = nsSep; return XML_ParserCreate_MM(encodingName, NULL, tmp); } +static const XML_Char implicitContext[] = { + 'x', 'm', 'l', '=', 'h', 't', 't', 'p', ':', '/', '/', + 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/', + 'X', 'M', 'L', '/', '1', '9', '9', '8', '/', + 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0' +}; + XML_Parser XML_ParserCreate_MM(const XML_Char *encodingName, - const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep) { - - XML_Parser parser; - static - const XML_Char implicitContext[] = { - XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='), - XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'), - XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'), - XML_T('.'), XML_T('w'), XML_T('3'), - XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'), - XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'), - XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'), - XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'), - XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'), - XML_T('\0') - }; + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep) +{ + XML_Parser parser = parserCreate(encodingName, memsuite, nameSep, NULL); + if (parser != NULL && ns) { + /* implicit context only set for root parser, since child + parsers (i.e. external entity parsers) will inherit it + */ + if (!setContext(parser, implicitContext)) { + XML_ParserFree(parser); + return NULL; + } + } + return parser; +} +static XML_Parser +parserCreate(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep, + DTD *dtd) +{ + XML_Parser parser; if (memsuite) { XML_Memory_Handling_Suite *mtemp; - parser = memsuite->malloc_fcn(sizeof(Parser)); - mtemp = &(((Parser *) parser)->m_mem); - mtemp->malloc_fcn = memsuite->malloc_fcn; - mtemp->realloc_fcn = memsuite->realloc_fcn; - mtemp->free_fcn = memsuite->free_fcn; + parser = (XML_Parser) + memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); + if (parser != NULL) { + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = memsuite->malloc_fcn; + mtemp->realloc_fcn = memsuite->realloc_fcn; + mtemp->free_fcn = memsuite->free_fcn; + } } else { XML_Memory_Handling_Suite *mtemp; - parser = malloc(sizeof(Parser)); - mtemp = &(((Parser *) parser)->m_mem); - mtemp->malloc_fcn = malloc; - mtemp->realloc_fcn = realloc; - mtemp->free_fcn = free; + parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); + if (parser != NULL) { + mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); + mtemp->malloc_fcn = malloc; + mtemp->realloc_fcn = realloc; + mtemp->free_fcn = free; + } } if (!parser) return parser; - processor = prologInitProcessor; - XmlPrologStateInit(&prologState); - userData = 0; - handlerArg = 0; - startElementHandler = 0; - endElementHandler = 0; - characterDataHandler = 0; - processingInstructionHandler = 0; - commentHandler = 0; - startCdataSectionHandler = 0; - endCdataSectionHandler = 0; - defaultHandler = 0; - startDoctypeDeclHandler = 0; - endDoctypeDeclHandler = 0; - unparsedEntityDeclHandler = 0; - notationDeclHandler = 0; - startNamespaceDeclHandler = 0; - endNamespaceDeclHandler = 0; - notStandaloneHandler = 0; - externalEntityRefHandler = 0; - externalEntityRefHandlerArg = parser; - unknownEncodingHandler = 0; - elementDeclHandler = 0; - attlistDeclHandler = 0; - entityDeclHandler = 0; - xmlDeclHandler = 0; - buffer = 0; - bufferPtr = 0; - bufferEnd = 0; - parseEndByteIndex = 0; - parseEndPtr = 0; - bufferLim = 0; - declElementType = 0; - declAttributeId = 0; - declEntity = 0; - doctypeName = 0; - doctypeSysid = 0; - doctypePubid = 0; - declAttributeType = 0; - declNotationName = 0; - declNotationPublicId = 0; - memset(&position, 0, sizeof(POSITION)); - errorCode = XML_ERROR_NONE; - eventPtr = 0; - eventEndPtr = 0; - positionPtr = 0; - openInternalEntities = 0; - tagLevel = 0; - tagStack = 0; - freeTagList = 0; - freeBindingList = 0; - inheritedBindings = 0; + + buffer = NULL; + bufferLim = NULL; + attsSize = INIT_ATTS_SIZE; - atts = MALLOC(attsSize * sizeof(ATTRIBUTE)); - nSpecifiedAtts = 0; - dataBuf = MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE)); + if (atts == NULL) { + FREE(parser); + return NULL; + } + dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + if (dataBuf == NULL) { + FREE(atts); + FREE(parser); + return NULL; + } + dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; + + if (dtd) + _dtd = dtd; + else { + _dtd = dtdCreate(&parser->m_mem); + if (_dtd == NULL) { + FREE(dataBuf); + FREE(atts); + FREE(parser); + return NULL; + } + } + + freeBindingList = NULL; + freeTagList = NULL; + groupSize = 0; - groupConnector = 0; - hadExternalDoctype = 0; - unknownEncodingMem = 0; - unknownEncodingRelease = 0; - unknownEncodingData = 0; - unknownEncodingHandlerData = 0; + groupConnector = NULL; + + unknownEncodingHandler = NULL; + unknownEncodingHandlerData = NULL; + namespaceSeparator = '!'; -#ifdef XML_DTD - parentParser = 0; - paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; -#endif - ns = 0; - ns_triplets = 0; - poolInit(&tempPool, &(((Parser *) parser)->m_mem)); - poolInit(&temp2Pool, &(((Parser *) parser)->m_mem)); - protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0; - curBase = 0; - if (!dtdInit(&dtd, parser) || !atts || !dataBuf - || (encodingName && !protocolEncodingName)) { + ns = XML_FALSE; + ns_triplets = XML_FALSE; + + poolInit(&tempPool, &(parser->m_mem)); + poolInit(&temp2Pool, &(parser->m_mem)); + parserInit(parser, encodingName); + + if (encodingName && !protocolEncodingName) { XML_ParserFree(parser); - return 0; + return NULL; } - dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; if (nameSep) { - XmlInitEncodingNS(&initEncoding, &encoding, 0); - ns = 1; + ns = XML_TRUE; internalEncoding = XmlGetInternalEncodingNS(); namespaceSeparator = *nameSep; - - if (! setContext(parser, implicitContext)) { - XML_ParserFree(parser); - return 0; - } } else { - XmlInitEncoding(&initEncoding, &encoding, 0); internalEncoding = XmlGetInternalEncoding(); } return parser; -} /* End XML_ParserCreate_MM */ +} -int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) +static void +parserInit(XML_Parser parser, const XML_Char *encodingName) { - if (!encodingName) - protocolEncodingName = 0; + processor = prologInitProcessor; + XmlPrologStateInit(&prologState); + protocolEncodingName = (encodingName != NULL + ? poolCopyString(&tempPool, encodingName) + : NULL); + curBase = NULL; + XmlInitEncoding(&initEncoding, &encoding, 0); + userData = NULL; + handlerArg = NULL; + startElementHandler = NULL; + endElementHandler = NULL; + characterDataHandler = NULL; + processingInstructionHandler = NULL; + commentHandler = NULL; + startCdataSectionHandler = NULL; + endCdataSectionHandler = NULL; + defaultHandler = NULL; + startDoctypeDeclHandler = NULL; + endDoctypeDeclHandler = NULL; + unparsedEntityDeclHandler = NULL; + notationDeclHandler = NULL; + startNamespaceDeclHandler = NULL; + endNamespaceDeclHandler = NULL; + notStandaloneHandler = NULL; + externalEntityRefHandler = NULL; + externalEntityRefHandlerArg = parser; + skippedEntityHandler = NULL; + elementDeclHandler = NULL; + attlistDeclHandler = NULL; + entityDeclHandler = NULL; + xmlDeclHandler = NULL; + bufferPtr = buffer; + bufferEnd = buffer; + parseEndByteIndex = 0; + parseEndPtr = NULL; + declElementType = NULL; + declAttributeId = NULL; + declEntity = NULL; + doctypeName = NULL; + doctypeSysid = NULL; + doctypePubid = NULL; + declAttributeType = NULL; + declNotationName = NULL; + declNotationPublicId = NULL; + declAttributeIsCdata = XML_FALSE; + declAttributeIsId = XML_FALSE; + memset(&position, 0, sizeof(POSITION)); + errorCode = XML_ERROR_NONE; + eventPtr = NULL; + eventEndPtr = NULL; + positionPtr = NULL; + openInternalEntities = 0; + defaultExpandInternalEntities = XML_TRUE; + tagLevel = 0; + tagStack = NULL; + inheritedBindings = NULL; + nSpecifiedAtts = 0; + unknownEncodingMem = NULL; + unknownEncodingRelease = NULL; + unknownEncodingData = NULL; + parentParser = NULL; +#ifdef XML_DTD + isParamEntity = XML_FALSE; + useForeignDTD = XML_FALSE; + paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif +} + +/* moves list of bindings to freeBindingList */ +static void FASTCALL +moveToFreeBindingList(XML_Parser parser, BINDING *bindings) +{ + while (bindings) { + BINDING *b = bindings; + bindings = bindings->nextTagBinding; + b->nextTagBinding = freeBindingList; + freeBindingList = b; + } +} + +XML_Bool +XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) +{ + TAG *tStk; + if (parentParser) + return XML_FALSE; + /* move tagStack to freeTagList */ + tStk = tagStack; + while (tStk) { + TAG *tag = tStk; + tStk = tStk->parent; + tag->parent = freeTagList; + moveToFreeBindingList(parser, tag->bindings); + tag->bindings = NULL; + freeTagList = tag; + } + moveToFreeBindingList(parser, inheritedBindings); + if (unknownEncodingMem) + FREE(unknownEncodingMem); + if (unknownEncodingRelease) + unknownEncodingRelease(unknownEncodingData); + poolClear(&tempPool); + poolClear(&temp2Pool); + parserInit(parser, encodingName); + dtdReset(_dtd, &parser->m_mem); + return setContext(parser, implicitContext); +} + +enum XML_Status +XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) +{ + /* Block after XML_Parse()/XML_ParseBuffer() has been called. + XXX There's no way for the caller to determine which of the + XXX possible error cases caused the XML_STATUS_ERROR return. + */ + if (parsing) + return XML_STATUS_ERROR; + if (encodingName == NULL) + protocolEncodingName = NULL; else { protocolEncodingName = poolCopyString(&tempPool, encodingName); if (!protocolEncodingName) - return 0; + return XML_STATUS_ERROR; } - return 1; + return XML_STATUS_OK; } -XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, - const XML_Char *context, - const XML_Char *encodingName) +XML_Parser +XML_ExternalEntityParserCreate(XML_Parser oldParser, + const XML_Char *context, + const XML_Char *encodingName) { XML_Parser parser = oldParser; - DTD *oldDtd = &dtd; + DTD *newDtd = NULL; + DTD *oldDtd = _dtd; XML_StartElementHandler oldStartElementHandler = startElementHandler; XML_EndElementHandler oldEndElementHandler = endElementHandler; XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; - XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler; + XML_ProcessingInstructionHandler oldProcessingInstructionHandler + = processingInstructionHandler; XML_CommentHandler oldCommentHandler = commentHandler; - XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler; - XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler; + XML_StartCdataSectionHandler oldStartCdataSectionHandler + = startCdataSectionHandler; + XML_EndCdataSectionHandler oldEndCdataSectionHandler + = endCdataSectionHandler; XML_DefaultHandler oldDefaultHandler = defaultHandler; - XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler; + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler + = unparsedEntityDeclHandler; XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; - XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler; - XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler; + XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler + = startNamespaceDeclHandler; + XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler + = endNamespaceDeclHandler; XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; - XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; - XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; + XML_ExternalEntityRefHandler oldExternalEntityRefHandler + = externalEntityRefHandler; + XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler; + XML_UnknownEncodingHandler oldUnknownEncodingHandler + = unknownEncodingHandler; XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; @@ -728,27 +934,35 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, void *oldUserData = userData; void *oldHandlerArg = handlerArg; - int oldDefaultExpandInternalEntities = defaultExpandInternalEntities; - void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; + XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities; + XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; #ifdef XML_DTD - int oldParamEntityParsing = paramEntityParsing; + enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing; + int oldInEntityValue = prologState.inEntityValue; #endif - int oldns_triplets = ns_triplets; + XML_Bool oldns_triplets = ns_triplets; + +#ifdef XML_DTD + if (!context) + newDtd = oldDtd; +#endif /* XML_DTD */ + /* Note that the magical uses of the pre-processor to make field + access look more like C++ require that `parser' be overwritten + here. This makes this function more painful to follow than it + would be otherwise. + */ if (ns) { XML_Char tmp[2]; - *tmp = namespaceSeparator; - parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem, - tmp); + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); } else { - parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem, - NULL); + parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); } if (!parser) - return 0; + return NULL; startElementHandler = oldStartElementHandler; endElementHandler = oldEndElementHandler; @@ -764,6 +978,7 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, endNamespaceDeclHandler = oldEndNamespaceDeclHandler; notStandaloneHandler = oldNotStandaloneHandler; externalEntityRefHandler = oldExternalEntityRefHandler; + skippedEntityHandler = oldSkippedEntityHandler; unknownEncodingHandler = oldUnknownEncodingHandler; elementDeclHandler = oldElementDeclHandler; attlistDeclHandler = oldAttlistDeclHandler; @@ -779,30 +994,38 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; ns_triplets = oldns_triplets; + parentParser = oldParser; #ifdef XML_DTD paramEntityParsing = oldParamEntityParsing; + prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (!dtdCopy(&dtd, oldDtd, parser) || !setContext(parser, context)) { + if (!dtdCopy(_dtd, oldDtd, &parser->m_mem) + || !setContext(parser, context)) { XML_ParserFree(parser); - return 0; + return NULL; } processor = externalEntityInitProcessor; #ifdef XML_DTD } else { - dtdSwap(&dtd, oldDtd); - parentParser = oldParser; + /* The DTD instance referenced by _dtd is shared between the document's + root parser and external PE parsers, therefore one does not need to + call setContext. In addition, one also *must* not call setContext, + because this would overwrite existing prefix->binding pointers in + _dtd with ones that get destroyed with the external PE parser. + This would leave those prefixes with dangling pointers. + */ + isParamEntity = XML_TRUE; XmlPrologStateInitExternalEntity(&prologState); - dtd.complete = 1; - hadExternalDoctype = 1; + processor = externalParEntInitProcessor; } #endif /* XML_DTD */ return parser; } -static -void destroyBindings(BINDING *bindings, XML_Parser parser) +static void FASTCALL +destroyBindings(BINDING *bindings, XML_Parser parser) { for (;;) { BINDING *b = bindings; @@ -814,15 +1037,16 @@ void destroyBindings(BINDING *bindings, XML_Parser parser) } } -void XML_ParserFree(XML_Parser parser) +void +XML_ParserFree(XML_Parser parser) { for (;;) { TAG *p; - if (tagStack == 0) { - if (freeTagList == 0) - break; + if (tagStack == NULL) { + if (freeTagList == NULL) + break; tagStack = freeTagList; - freeTagList = 0; + freeTagList = NULL; } p = tagStack; tagStack = tagStack->parent; @@ -835,13 +1059,14 @@ void XML_ParserFree(XML_Parser parser) poolDestroy(&tempPool); poolDestroy(&temp2Pool); #ifdef XML_DTD - if (parentParser) { - if (hadExternalDoctype) - dtd.complete = 0; - dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd); - } + /* external parameter entity parsers share the DTD structure + parser->m_dtd with the root parser, so we must not destroy it + */ + if (!isParamEntity && _dtd) +#else + if (_dtd) #endif /* XML_DTD */ - dtdDestroy(&dtd, parser); + dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem); FREE((void *)atts); if (groupConnector) FREE(groupConnector); @@ -855,17 +1080,37 @@ void XML_ParserFree(XML_Parser parser) FREE(parser); } -void XML_UseParserAsHandlerArg(XML_Parser parser) +void +XML_UseParserAsHandlerArg(XML_Parser parser) { handlerArg = parser; } +enum XML_Error +XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) +{ +#ifdef XML_DTD + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parsing) + return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; + useForeignDTD = useDTD; + return XML_ERROR_NONE; +#else + return XML_ERROR_FEATURE_REQUIRES_XML_DTD; +#endif +} + void -XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { - ns_triplets = do_nst; +XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) +{ + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parsing) + return; + ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } -void XML_SetUserData(XML_Parser parser, void *p) +void +XML_SetUserData(XML_Parser parser, void *p) { if (handlerArg == userData) handlerArg = userData = p; @@ -873,225 +1118,267 @@ void XML_SetUserData(XML_Parser parser, void *p) userData = p; } -int XML_SetBase(XML_Parser parser, const XML_Char *p) +enum XML_Status +XML_SetBase(XML_Parser parser, const XML_Char *p) { if (p) { - p = poolCopyString(&dtd.pool, p); + p = poolCopyString(&_dtd->pool, p); if (!p) - return 0; + return XML_STATUS_ERROR; curBase = p; } else - curBase = 0; - return 1; + curBase = NULL; + return XML_STATUS_OK; } -const XML_Char *XML_GetBase(XML_Parser parser) +const XML_Char * +XML_GetBase(XML_Parser parser) { return curBase; } -int XML_GetSpecifiedAttributeCount(XML_Parser parser) +int +XML_GetSpecifiedAttributeCount(XML_Parser parser) { return nSpecifiedAtts; } -int XML_GetIdAttributeIndex(XML_Parser parser) +int +XML_GetIdAttributeIndex(XML_Parser parser) { return idAttIndex; } -void XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end) +void +XML_SetElementHandler(XML_Parser parser, + XML_StartElementHandler start, + XML_EndElementHandler end) { startElementHandler = start; endElementHandler = end; } -void XML_SetStartElementHandler(XML_Parser parser, - XML_StartElementHandler start) { +void +XML_SetStartElementHandler(XML_Parser parser, + XML_StartElementHandler start) { startElementHandler = start; } -void XML_SetEndElementHandler(XML_Parser parser, - XML_EndElementHandler end) { +void +XML_SetEndElementHandler(XML_Parser parser, + XML_EndElementHandler end) { endElementHandler = end; } -void XML_SetCharacterDataHandler(XML_Parser parser, - XML_CharacterDataHandler handler) +void +XML_SetCharacterDataHandler(XML_Parser parser, + XML_CharacterDataHandler handler) { characterDataHandler = handler; } -void XML_SetProcessingInstructionHandler(XML_Parser parser, - XML_ProcessingInstructionHandler handler) +void +XML_SetProcessingInstructionHandler(XML_Parser parser, + XML_ProcessingInstructionHandler handler) { processingInstructionHandler = handler; } -void XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler) +void +XML_SetCommentHandler(XML_Parser parser, + XML_CommentHandler handler) { commentHandler = handler; } -void XML_SetCdataSectionHandler(XML_Parser parser, - XML_StartCdataSectionHandler start, - XML_EndCdataSectionHandler end) +void +XML_SetCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start, + XML_EndCdataSectionHandler end) { startCdataSectionHandler = start; endCdataSectionHandler = end; } -void XML_SetStartCdataSectionHandler(XML_Parser parser, - XML_StartCdataSectionHandler start) { +void +XML_SetStartCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start) { startCdataSectionHandler = start; } -void XML_SetEndCdataSectionHandler(XML_Parser parser, - XML_EndCdataSectionHandler end) { +void +XML_SetEndCdataSectionHandler(XML_Parser parser, + XML_EndCdataSectionHandler end) { endCdataSectionHandler = end; } -void XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler) +void +XML_SetDefaultHandler(XML_Parser parser, + XML_DefaultHandler handler) { defaultHandler = handler; - defaultExpandInternalEntities = 0; + defaultExpandInternalEntities = XML_FALSE; } -void XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler) +void +XML_SetDefaultHandlerExpand(XML_Parser parser, + XML_DefaultHandler handler) { defaultHandler = handler; - defaultExpandInternalEntities = 1; + defaultExpandInternalEntities = XML_TRUE; } -void XML_SetDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start, - XML_EndDoctypeDeclHandler end) +void +XML_SetDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end) { startDoctypeDeclHandler = start; endDoctypeDeclHandler = end; } -void XML_SetStartDoctypeDeclHandler(XML_Parser parser, - XML_StartDoctypeDeclHandler start) { +void +XML_SetStartDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start) { startDoctypeDeclHandler = start; } -void XML_SetEndDoctypeDeclHandler(XML_Parser parser, - XML_EndDoctypeDeclHandler end) { +void +XML_SetEndDoctypeDeclHandler(XML_Parser parser, + XML_EndDoctypeDeclHandler end) { endDoctypeDeclHandler = end; } -void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, - XML_UnparsedEntityDeclHandler handler) +void +XML_SetUnparsedEntityDeclHandler(XML_Parser parser, + XML_UnparsedEntityDeclHandler handler) { unparsedEntityDeclHandler = handler; } -void XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler) +void +XML_SetNotationDeclHandler(XML_Parser parser, + XML_NotationDeclHandler handler) { notationDeclHandler = handler; } -void XML_SetNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end) +void +XML_SetNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start, + XML_EndNamespaceDeclHandler end) { startNamespaceDeclHandler = start; endNamespaceDeclHandler = end; } -void XML_SetStartNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start) { +void +XML_SetStartNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start) { startNamespaceDeclHandler = start; } -void XML_SetEndNamespaceDeclHandler(XML_Parser parser, - XML_EndNamespaceDeclHandler end) { +void +XML_SetEndNamespaceDeclHandler(XML_Parser parser, + XML_EndNamespaceDeclHandler end) { endNamespaceDeclHandler = end; } - -void XML_SetNotStandaloneHandler(XML_Parser parser, - XML_NotStandaloneHandler handler) +void +XML_SetNotStandaloneHandler(XML_Parser parser, + XML_NotStandaloneHandler handler) { notStandaloneHandler = handler; } -void XML_SetExternalEntityRefHandler(XML_Parser parser, - XML_ExternalEntityRefHandler handler) +void +XML_SetExternalEntityRefHandler(XML_Parser parser, + XML_ExternalEntityRefHandler handler) { externalEntityRefHandler = handler; } -void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) +void +XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { if (arg) - externalEntityRefHandlerArg = arg; + externalEntityRefHandlerArg = (XML_Parser)arg; else externalEntityRefHandlerArg = parser; } -void XML_SetUnknownEncodingHandler(XML_Parser parser, - XML_UnknownEncodingHandler handler, - void *data) +void +XML_SetSkippedEntityHandler(XML_Parser parser, + XML_SkippedEntityHandler handler) +{ + skippedEntityHandler = handler; +} + +void +XML_SetUnknownEncodingHandler(XML_Parser parser, + XML_UnknownEncodingHandler handler, + void *data) { unknownEncodingHandler = handler; unknownEncodingHandlerData = data; } -void XML_SetElementDeclHandler(XML_Parser parser, - XML_ElementDeclHandler eldecl) +void +XML_SetElementDeclHandler(XML_Parser parser, + XML_ElementDeclHandler eldecl) { elementDeclHandler = eldecl; } -void XML_SetAttlistDeclHandler(XML_Parser parser, - XML_AttlistDeclHandler attdecl) +void +XML_SetAttlistDeclHandler(XML_Parser parser, + XML_AttlistDeclHandler attdecl) { attlistDeclHandler = attdecl; } -void XML_SetEntityDeclHandler(XML_Parser parser, - XML_EntityDeclHandler handler) +void +XML_SetEntityDeclHandler(XML_Parser parser, + XML_EntityDeclHandler handler) { entityDeclHandler = handler; } -void XML_SetXmlDeclHandler(XML_Parser parser, - XML_XmlDeclHandler handler) { +void +XML_SetXmlDeclHandler(XML_Parser parser, + XML_XmlDeclHandler handler) { xmlDeclHandler = handler; } -int XML_SetParamEntityParsing(XML_Parser parser, - enum XML_ParamEntityParsing parsing) +int +XML_SetParamEntityParsing(XML_Parser parser, + enum XML_ParamEntityParsing peParsing) { + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parsing) + return 0; #ifdef XML_DTD - paramEntityParsing = parsing; + paramEntityParsing = peParsing; return 1; #else - return parsing == XML_PARAM_ENTITY_PARSING_NEVER; + return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; #endif } -int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) +enum XML_Status +XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { if (len == 0) { if (!isFinal) - return 1; + return XML_STATUS_OK; positionPtr = bufferPtr; errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0); if (errorCode == XML_ERROR_NONE) - return 1; + return XML_STATUS_OK; eventEndPtr = eventPtr; processor = errorProcessor; - return 0; + return XML_STATUS_ERROR; } #ifndef XML_CONTEXT_BYTES else if (bufferPtr == bufferEnd) { @@ -1102,66 +1389,83 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) if (isFinal) { errorCode = processor(parser, s, parseEndPtr = s + len, 0); if (errorCode == XML_ERROR_NONE) - return 1; + return XML_STATUS_OK; eventEndPtr = eventPtr; processor = errorProcessor; - return 0; + return XML_STATUS_ERROR; } errorCode = processor(parser, s, parseEndPtr = s + len, &end); if (errorCode != XML_ERROR_NONE) { eventEndPtr = eventPtr; processor = errorProcessor; - return 0; + return XML_STATUS_ERROR; } XmlUpdatePosition(encoding, positionPtr, end, &position); + positionPtr = end; nLeftOver = s + len - end; if (nLeftOver) { - if (buffer == 0 || nLeftOver > bufferLim - buffer) { - /* FIXME avoid integer overflow */ - buffer = buffer == 0 ? MALLOC(len * 2) : REALLOC(buffer, len * 2); - /* FIXME storage leak if realloc fails */ - if (!buffer) { - errorCode = XML_ERROR_NO_MEMORY; - eventPtr = eventEndPtr = 0; - processor = errorProcessor; - return 0; - } - bufferLim = buffer + len * 2; + if (buffer == NULL || nLeftOver > bufferLim - buffer) { + /* FIXME avoid integer overflow */ + char *temp; + temp = (buffer == NULL + ? (char *)MALLOC(len * 2) + : (char *)REALLOC(buffer, len * 2)); + if (temp == NULL) { + errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + buffer = temp; + if (!buffer) { + errorCode = XML_ERROR_NO_MEMORY; + eventPtr = eventEndPtr = NULL; + processor = errorProcessor; + return XML_STATUS_ERROR; + } + bufferLim = buffer + len * 2; } memcpy(buffer, end, nLeftOver); bufferPtr = buffer; bufferEnd = buffer + nLeftOver; } - return 1; + return XML_STATUS_OK; } #endif /* not defined XML_CONTEXT_BYTES */ else { - memcpy(XML_GetBuffer(parser, len), s, len); - return XML_ParseBuffer(parser, len, isFinal); + void *buff = XML_GetBuffer(parser, len); + if (buff == NULL) + return XML_STATUS_ERROR; + else { + memcpy(buff, s, len); + return XML_ParseBuffer(parser, len, isFinal); + } } } -int XML_ParseBuffer(XML_Parser parser, int len, int isFinal) +enum XML_Status +XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start = bufferPtr; positionPtr = start; bufferEnd += len; parseEndByteIndex += len; errorCode = processor(parser, start, parseEndPtr = bufferEnd, - isFinal ? (const char **)0 : &bufferPtr); + isFinal ? (const char **)NULL : &bufferPtr); if (errorCode == XML_ERROR_NONE) { - if (!isFinal) + if (!isFinal) { XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - return 1; + positionPtr = bufferPtr; + } + return XML_STATUS_OK; } else { eventEndPtr = eventPtr; processor = errorProcessor; - return 0; + return XML_STATUS_ERROR; } } -void *XML_GetBuffer(XML_Parser parser, int len) +void * +XML_GetBuffer(XML_Parser parser, int len) { if (len > bufferLim - bufferEnd) { /* FIXME avoid integer overflow */ @@ -1176,10 +1480,10 @@ void *XML_GetBuffer(XML_Parser parser, int len) if (neededSize <= bufferLim - buffer) { #ifdef XML_CONTEXT_BYTES if (keep < bufferPtr - buffer) { - int offset = (bufferPtr - buffer) - keep; - memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); - bufferEnd -= offset; - bufferPtr -= offset; + int offset = (bufferPtr - buffer) - keep; + memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); + bufferEnd -= offset; + bufferPtr -= offset; } #else memmove(buffer, bufferPtr, bufferEnd - bufferPtr); @@ -1191,35 +1495,35 @@ void *XML_GetBuffer(XML_Parser parser, int len) char *newBuf; int bufferSize = bufferLim - bufferPtr; if (bufferSize == 0) - bufferSize = INIT_BUFFER_SIZE; + bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; + bufferSize *= 2; } while (bufferSize < neededSize); - newBuf = MALLOC(bufferSize); + newBuf = (char *)MALLOC(bufferSize); if (newBuf == 0) { - errorCode = XML_ERROR_NO_MEMORY; - return 0; + errorCode = XML_ERROR_NO_MEMORY; + return NULL; } bufferLim = newBuf + bufferSize; #ifdef XML_CONTEXT_BYTES if (bufferPtr) { - int keep = bufferPtr - buffer; - if (keep > XML_CONTEXT_BYTES) - keep = XML_CONTEXT_BYTES; - memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); - FREE(buffer); - buffer = newBuf; - bufferEnd = buffer + (bufferEnd - bufferPtr) + keep; - bufferPtr = buffer + keep; + int keep = bufferPtr - buffer; + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; + memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); + FREE(buffer); + buffer = newBuf; + bufferEnd = buffer + (bufferEnd - bufferPtr) + keep; + bufferPtr = buffer + keep; } else { - bufferEnd = newBuf + (bufferEnd - bufferPtr); - bufferPtr = buffer = newBuf; + bufferEnd = newBuf + (bufferEnd - bufferPtr); + bufferPtr = buffer = newBuf; } #else if (bufferPtr) { - memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); - FREE(buffer); + memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); + FREE(buffer); } bufferEnd = newBuf + (bufferEnd - bufferPtr); bufferPtr = buffer = newBuf; @@ -1229,26 +1533,30 @@ void *XML_GetBuffer(XML_Parser parser, int len) return bufferEnd; } -enum XML_Error XML_GetErrorCode(XML_Parser parser) +enum XML_Error +XML_GetErrorCode(XML_Parser parser) { return errorCode; } -long XML_GetCurrentByteIndex(XML_Parser parser) +long +XML_GetCurrentByteIndex(XML_Parser parser) { if (eventPtr) return parseEndByteIndex - (parseEndPtr - eventPtr); return -1; } -int XML_GetCurrentByteCount(XML_Parser parser) +int +XML_GetCurrentByteCount(XML_Parser parser) { if (eventEndPtr && eventPtr) return eventEndPtr - eventPtr; return 0; } -const char * XML_GetInputContext(XML_Parser parser, int *offset, int *size) +const char * +XML_GetInputContext(XML_Parser parser, int *offset, int *size) { #ifdef XML_CONTEXT_BYTES if (eventPtr && buffer) { @@ -1260,7 +1568,8 @@ const char * XML_GetInputContext(XML_Parser parser, int *offset, int *size) return (char *) 0; } -int XML_GetCurrentLineNumber(XML_Parser parser) +int +XML_GetCurrentLineNumber(XML_Parser parser) { if (eventPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); @@ -1269,7 +1578,8 @@ int XML_GetCurrentLineNumber(XML_Parser parser) return position.lineNumber + 1; } -int XML_GetCurrentColumnNumber(XML_Parser parser) +int +XML_GetCurrentColumnNumber(XML_Parser parser) { if (eventPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); @@ -1278,59 +1588,104 @@ int XML_GetCurrentColumnNumber(XML_Parser parser) return position.columnNumber; } -void XML_DefaultCurrent(XML_Parser parser) +void +XML_FreeContentModel(XML_Parser parser, XML_Content *model) +{ + FREE(model); +} + +void * +XML_MemMalloc(XML_Parser parser, size_t size) +{ + return MALLOC(size); +} + +void * +XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) +{ + return REALLOC(ptr, size); +} + +void +XML_MemFree(XML_Parser parser, void *ptr) +{ + FREE(ptr); +} + +void +XML_DefaultCurrent(XML_Parser parser) { if (defaultHandler) { if (openInternalEntities) reportDefault(parser, - internalEncoding, - openInternalEntities->internalEventPtr, - openInternalEntities->internalEventEndPtr); + internalEncoding, + openInternalEntities->internalEventPtr, + openInternalEntities->internalEventEndPtr); else reportDefault(parser, encoding, eventPtr, eventEndPtr); } } -const XML_LChar *XML_ErrorString(int code) +const XML_LChar * +XML_ErrorString(enum XML_Error code) { static const XML_LChar *message[] = { 0, - XML_T("out of memory"), - XML_T("syntax error"), - XML_T("no element found"), - XML_T("not well-formed (invalid token)"), - XML_T("unclosed token"), - XML_T("unclosed token"), - XML_T("mismatched tag"), - XML_T("duplicate attribute"), - XML_T("junk after document element"), - XML_T("illegal parameter entity reference"), - XML_T("undefined entity"), - XML_T("recursive entity reference"), - XML_T("asynchronous entity"), - XML_T("reference to invalid character number"), - XML_T("reference to binary entity"), - XML_T("reference to external entity in attribute"), - XML_T("xml processing instruction not at start of external entity"), - XML_T("unknown encoding"), - XML_T("encoding specified in XML declaration is incorrect"), - XML_T("unclosed CDATA section"), - XML_T("error in processing external entity reference"), - XML_T("document is not standalone"), - XML_T("unexpected parser state - please send a bug report") + XML_L("out of memory"), + XML_L("syntax error"), + XML_L("no element found"), + XML_L("not well-formed (invalid token)"), + XML_L("unclosed token"), + XML_L("partial character"), + XML_L("mismatched tag"), + XML_L("duplicate attribute"), + XML_L("junk after document element"), + XML_L("illegal parameter entity reference"), + XML_L("undefined entity"), + XML_L("recursive entity reference"), + XML_L("asynchronous entity"), + XML_L("reference to invalid character number"), + XML_L("reference to binary entity"), + XML_L("reference to external entity in attribute"), + XML_L("xml declaration not at start of external entity"), + XML_L("unknown encoding"), + XML_L("encoding specified in XML declaration is incorrect"), + XML_L("unclosed CDATA section"), + XML_L("error in processing external entity reference"), + XML_L("document is not standalone"), + XML_L("unexpected parser state - please send a bug report"), + XML_L("entity declared in parameter entity"), + XML_L("requested feature requires XML_DTD support in Expat"), + XML_L("cannot change setting once parsing has begun") }; if (code > 0 && code < sizeof(message)/sizeof(message[0])) return message[code]; - return 0; + return NULL; } const XML_LChar * XML_ExpatVersion(void) { - return VERSION; + + /* V1 is used to string-ize the version number. However, it would + string-ize the actual version macro *names* unless we get them + substituted before being passed to V1. CPP is defined to expand + a macro, then rescan for more expansions. Thus, we use V2 to expand + the version macros, then CPP will expand the resulting V1() macro + with the correct numerals. */ + /* ### I'm assuming cpp is portable in this respect... */ + +#define V1(a,b,c) XML_L(#a)XML_L(".")XML_L(#b)XML_L(".")XML_L(#c) +#define V2(a,b,c) XML_L("expat_")V1(a,b,c) + + return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); + +#undef V1 +#undef V2 } XML_Expat_Version -XML_ExpatVersionInfo(void) { +XML_ExpatVersionInfo(void) +{ XML_Expat_Version version; version.major = XML_MAJOR_VERSION; @@ -1340,20 +1695,106 @@ XML_ExpatVersionInfo(void) { return version; } -static -enum XML_Error contentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +const XML_Feature * +XML_GetFeatureList(void) { - return doContent(parser, 0, encoding, start, end, endPtr); + static XML_Feature features[] = { + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)")}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)")}, +#ifdef XML_UNICODE + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE")}, +#endif +#ifdef XML_UNICODE_WCHAR_T + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T")}, +#endif +#ifdef XML_DTD + {XML_FEATURE_DTD, XML_L("XML_DTD")}, +#endif +#ifdef XML_CONTEXT_BYTES + {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), + XML_CONTEXT_BYTES}, +#endif +#ifdef XML_MIN_SIZE + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE")}, +#endif + {XML_FEATURE_END, NULL} + }; + + features[0].value = sizeof(XML_Char); + features[1].value = sizeof(XML_LChar); + return features; +} + +/* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more + permanent location, since the parse buffer is about to be discarded. +*/ +static XML_Bool +storeRawNames(XML_Parser parser) +{ + TAG *tag = tagStack; + while (tag) { + int bufSize; + int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + char *rawNameBuf = tag->buf + nameLen; + /* Stop if already stored. Since tagStack is a stack, we can stop + at the first entry that has already been copied; everything + below it in the stack is already been accounted for in a + previous call to this function. + */ + if (tag->rawName == rawNameBuf) + break; + /* For re-use purposes we need to ensure that the + size of tag->buf is a multiple of sizeof(XML_Char). + */ + bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); + if (bufSize > tag->bufEnd - tag->buf) { + char *temp = (char *)REALLOC(tag->buf, bufSize); + if (temp == NULL) + return XML_FALSE; + /* if tag->name.str points to tag->buf (only when namespace + processing is off) then we have to update it + */ + if (tag->name.str == (XML_Char *)tag->buf) + tag->name.str = (XML_Char *)temp; + /* if tag->name.localPart is set (when namespace processing is on) + then update it as well, since it will always point into tag->buf + */ + if (tag->name.localPart) + tag->name.localPart = (XML_Char *)temp + (tag->name.localPart - + (XML_Char *)tag->buf); + tag->buf = temp; + tag->bufEnd = temp + bufSize; + rawNameBuf = temp + nameLen; + } + memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); + tag->rawName = rawNameBuf; + tag = tag->parent; + } + return XML_TRUE; +} + +static enum XML_Error PTRCALL +contentProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) +{ + enum XML_Error result = + doContent(parser, 0, encoding, start, end, endPtr); + if (result != XML_ERROR_NONE) + return result; + if (!storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + return result; } -static -enum XML_Error externalEntityInitProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +static enum XML_Error PTRCALL +externalEntityInitProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) @@ -1362,16 +1803,25 @@ enum XML_Error externalEntityInitProcessor(XML_Parser parser, return externalEntityInitProcessor2(parser, start, end, endPtr); } -static -enum XML_Error externalEntityInitProcessor2(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +static enum XML_Error PTRCALL +externalEntityInitProcessor2(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - const char *next; + const char *next = start; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: + /* If we are at the end of the buffer, this would cause the next stage, + i.e. externalEntityInitProcessor3, to pass control directly to + doContent (by detecting XML_TOK_NONE) without processing any xml text + declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. + */ + if (next == end && endPtr) { + *endPtr = next; + return XML_ERROR_NONE; + } start = next; break; case XML_TOK_PARTIAL: @@ -1393,20 +1843,20 @@ enum XML_Error externalEntityInitProcessor2(XML_Parser parser, return externalEntityInitProcessor3(parser, start, end, endPtr); } -static -enum XML_Error externalEntityInitProcessor3(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +static enum XML_Error PTRCALL +externalEntityInitProcessor3(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - const char *next; + const char *next = start; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(encoding, start, end, &next); switch (tok) { case XML_TOK_XML_DECL: { enum XML_Error result = processXmlDecl(parser, 1, start, next); if (result != XML_ERROR_NONE) - return result; + return result; start = next; } break; @@ -1427,26 +1877,33 @@ enum XML_Error externalEntityInitProcessor3(XML_Parser parser, } processor = externalEntityContentProcessor; tagLevel = 1; - return doContent(parser, 1, encoding, start, end, endPtr); + return externalEntityContentProcessor(parser, start, end, endPtr); } -static -enum XML_Error externalEntityContentProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) +static enum XML_Error PTRCALL +externalEntityContentProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - return doContent(parser, 1, encoding, start, end, endPtr); + enum XML_Error result = + doContent(parser, 1, encoding, start, end, endPtr); + if (result != XML_ERROR_NONE) + return result; + if (!storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + return result; } static enum XML_Error doContent(XML_Parser parser, - int startTagLevel, - const ENCODING *enc, - const char *s, - const char *end, - const char **nextPtr) + int startTagLevel, + const ENCODING *enc, + const char *s, + const char *end, + const char **nextPtr) { + DTD * const dtd = _dtd; /* save one level of indirection */ const char **eventPP; const char **eventEndPP; if (enc == encoding) { @@ -1465,30 +1922,30 @@ doContent(XML_Parser parser, switch (tok) { case XML_TOK_TRAILING_CR: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } *eventEndPP = end; if (characterDataHandler) { - XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); + XML_Char c = 0xA; + characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) - reportDefault(parser, enc, s, end); + reportDefault(parser, enc, s, end); if (startTagLevel == 0) - return XML_ERROR_NO_ELEMENTS; + return XML_ERROR_NO_ELEMENTS; if (tagLevel != startTagLevel) - return XML_ERROR_ASYNC_ENTITY; + return XML_ERROR_ASYNC_ENTITY; return XML_ERROR_NONE; case XML_TOK_NONE: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } if (startTagLevel > 0) { - if (tagLevel != startTagLevel) - return XML_ERROR_ASYNC_ENTITY; - return XML_ERROR_NONE; + if (tagLevel != startTagLevel) + return XML_ERROR_ASYNC_ENTITY; + return XML_ERROR_NONE; } return XML_ERROR_NO_ELEMENTS; case XML_TOK_INVALID: @@ -1496,374 +1953,401 @@ doContent(XML_Parser parser, return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_ENTITY_REF: { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = XmlPredefinedEntityName(enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (characterDataHandler) - characterDataHandler(handlerArg, &ch, 1); - else if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - name = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) - return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); - poolDiscard(&dtd.pool); - if (!entity) { - if (dtd.complete || dtd.standalone) - return XML_ERROR_UNDEFINED_ENTITY; - if (defaultHandler) - reportDefault(parser, enc, s, next); - break; - } - if (entity->open) - return XML_ERROR_RECURSIVE_ENTITY_REF; - if (entity->notation) - return XML_ERROR_BINARY_ENTITY_REF; - if (entity) { - if (entity->textPtr) { - enum XML_Error result; - OPEN_INTERNAL_ENTITY openEntity; - if (defaultHandler && !defaultExpandInternalEntities) { - reportDefault(parser, enc, s, next); - break; - } - entity->open = 1; - openEntity.next = openInternalEntities; - openInternalEntities = &openEntity; - openEntity.entity = entity; - openEntity.internalEventPtr = 0; - openEntity.internalEventEndPtr = 0; - result = doContent(parser, - tagLevel, - internalEncoding, - (char *)entity->textPtr, - (char *)(entity->textPtr + entity->textLen), - 0); - entity->open = 0; - openInternalEntities = openEntity.next; - if (result) - return result; - } - else if (externalEntityRefHandler) { - const XML_Char *context; - entity->open = 1; - context = getContext(parser); - entity->open = 0; - if (!context) - return XML_ERROR_NO_MEMORY; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - context, - entity->base, - entity->systemId, - entity->publicId)) - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - poolDiscard(&tempPool); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - } - break; - } - case XML_TOK_START_TAG_WITH_ATTS: - if (!startElementHandler) { - enum XML_Error result = storeAtts(parser, enc, s, 0, 0); - if (result) - return result; + const XML_Char *name; + ENTITY *entity; + XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (ch) { + if (characterDataHandler) + characterDataHandler(handlerArg, &ch, 1); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + name = poolStoreString(&dtd->pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + poolDiscard(&dtd->pool); + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity or default handler. + */ + if (!dtd->hasParamEntityRefs || dtd->standalone) { + if (!entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (!entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } + else if (!entity) { + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, name, 0); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->notation) + return XML_ERROR_BINARY_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + OPEN_INTERNAL_ENTITY openEntity; + if (!defaultExpandInternalEntities) { + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, entity->name, 0); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + entity->open = XML_TRUE; + openEntity.next = openInternalEntities; + openInternalEntities = &openEntity; + openEntity.entity = entity; + openEntity.internalEventPtr = NULL; + openEntity.internalEventEndPtr = NULL; + result = doContent(parser, + tagLevel, + internalEncoding, + (char *)entity->textPtr, + (char *)(entity->textPtr + entity->textLen), + 0); + entity->open = XML_FALSE; + openInternalEntities = openEntity.next; + if (result) + return result; + } + else if (externalEntityRefHandler) { + const XML_Char *context; + entity->open = XML_TRUE; + context = getContext(parser); + entity->open = XML_FALSE; + if (!context) + return XML_ERROR_NO_MEMORY; + if (!externalEntityRefHandler((XML_Parser)externalEntityRefHandlerArg, + context, + entity->base, + entity->systemId, + entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + poolDiscard(&tempPool); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); + break; } - /* fall through */ case XML_TOK_START_TAG_NO_ATTS: + /* fall through */ + case XML_TOK_START_TAG_WITH_ATTS: { - TAG *tag; - if (freeTagList) { - tag = freeTagList; - freeTagList = freeTagList->parent; - } - else { - tag = MALLOC(sizeof(TAG)); - if (!tag) - return XML_ERROR_NO_MEMORY; - tag->buf = MALLOC(INIT_TAG_BUF_SIZE); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; - } - tag->bindings = 0; - tag->parent = tagStack; - tagStack = tag; - tag->name.localPart = 0; - tag->rawName = s + enc->minBytesPerChar; - tag->rawNameLength = XmlNameLength(enc, tag->rawName); - if (nextPtr) { - /* Need to guarantee that: - tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */ - if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) { - int bufSize = tag->rawNameLength * 4; - bufSize = ROUND_UP(bufSize, sizeof(XML_Char)); - tag->buf = REALLOC(tag->buf, bufSize); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + bufSize; - } - memcpy(tag->buf, tag->rawName, tag->rawNameLength); - tag->rawName = tag->buf; - } - ++tagLevel; - if (startElementHandler) { - enum XML_Error result; - XML_Char *toPtr; - for (;;) { - const char *rawNameEnd = tag->rawName + tag->rawNameLength; - const char *fromPtr = tag->rawName; - int bufSize; - if (nextPtr) - toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char))); - else - toPtr = (XML_Char *)tag->buf; - tag->name.str = toPtr; - XmlConvert(enc, - &fromPtr, rawNameEnd, - (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - if (fromPtr == rawNameEnd) - break; - bufSize = (tag->bufEnd - tag->buf) << 1; - tag->buf = REALLOC(tag->buf, bufSize); - if (!tag->buf) - return XML_ERROR_NO_MEMORY; - tag->bufEnd = tag->buf + bufSize; - if (nextPtr) - tag->rawName = tag->buf; - } - *toPtr = XML_T('\0'); - result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); - if (result) - return result; - startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts); - poolClear(&tempPool); - } - else { - tag->name.str = 0; - if (defaultHandler) - reportDefault(parser, enc, s, next); - } - break; + TAG *tag; + enum XML_Error result; + XML_Char *toPtr; + if (freeTagList) { + tag = freeTagList; + freeTagList = freeTagList->parent; + } + else { + tag = (TAG *)MALLOC(sizeof(TAG)); + if (!tag) + return XML_ERROR_NO_MEMORY; + tag->buf = (char *)MALLOC(INIT_TAG_BUF_SIZE); + if (!tag->buf) { + FREE(tag); + return XML_ERROR_NO_MEMORY; + } + tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; + } + tag->bindings = NULL; + tag->parent = tagStack; + tagStack = tag; + tag->name.localPart = NULL; + tag->name.prefix = NULL; + tag->rawName = s + enc->minBytesPerChar; + tag->rawNameLength = XmlNameLength(enc, tag->rawName); + ++tagLevel; + { + const char *rawNameEnd = tag->rawName + tag->rawNameLength; + const char *fromPtr = tag->rawName; + toPtr = (XML_Char *)tag->buf; + for (;;) { + int bufSize; + int convLen; + XmlConvert(enc, + &fromPtr, rawNameEnd, + (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); + convLen = toPtr - (XML_Char *)tag->buf; + if (fromPtr == rawNameEnd) { + tag->name.strLen = convLen; + break; + } + bufSize = (tag->bufEnd - tag->buf) << 1; + { + char *temp = (char *)REALLOC(tag->buf, bufSize); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + tag->buf = temp; + tag->bufEnd = temp + bufSize; + toPtr = (XML_Char *)temp + convLen; + } + } + } + tag->name.str = (XML_Char *)tag->buf; + *toPtr = XML_T('\0'); + if (!startElementHandler && (tok == XML_TOK_START_TAG_NO_ATTS)) { + if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + } + result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); + if (result) + return result; + if (startElementHandler) + startElementHandler(handlerArg, tag->name.str, + (const XML_Char **)atts); + else if (defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&tempPool); + break; } - case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: - if (!startElementHandler) { - enum XML_Error result = storeAtts(parser, enc, s, 0, 0); - if (result) - return result; + case XML_TOK_EMPTY_ELEMENT_NO_ATTS: + if (!startElementHandler && !endElementHandler) { + if (defaultHandler) + reportDefault(parser, enc, s, next); + if (tagLevel == 0) + return epilogProcessor(parser, next, end, nextPtr); + break; } /* fall through */ - case XML_TOK_EMPTY_ELEMENT_NO_ATTS: - if (startElementHandler || endElementHandler) { - const char *rawName = s + enc->minBytesPerChar; - enum XML_Error result; - BINDING *bindings = 0; - TAG_NAME name; - name.str = poolStoreString(&tempPool, enc, rawName, - rawName + XmlNameLength(enc, rawName)); - if (!name.str) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - result = storeAtts(parser, enc, s, &name, &bindings); - if (result) - return result; - poolFinish(&tempPool); - if (startElementHandler) - startElementHandler(handlerArg, name.str, (const XML_Char **)atts); - if (endElementHandler) { - if (startElementHandler) - *eventPP = *eventEndPP; - endElementHandler(handlerArg, name.str); - } - poolClear(&tempPool); - while (bindings) { - BINDING *b = bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } + case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: + { + const char *rawName = s + enc->minBytesPerChar; + enum XML_Error result; + BINDING *bindings = NULL; + XML_Bool noElmHandlers = XML_TRUE; + TAG_NAME name; + name.str = poolStoreString(&tempPool, enc, rawName, + rawName + XmlNameLength(enc, rawName)); + if (!name.str) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + if (startElementHandler || + (tok == XML_TOK_EMPTY_ELEMENT_WITH_ATTS)) { + result = storeAtts(parser, enc, s, &name, &bindings); + if (result) + return result; + poolFinish(&tempPool); + } + if (startElementHandler) { + startElementHandler(handlerArg, name.str, (const XML_Char **)atts); + noElmHandlers = XML_FALSE; + } + if (endElementHandler) { + if (startElementHandler) + *eventPP = *eventEndPP; + endElementHandler(handlerArg, name.str); + noElmHandlers = XML_FALSE; + } + if (noElmHandlers && defaultHandler) + reportDefault(parser, enc, s, next); + poolClear(&tempPool); + while (bindings) { + BINDING *b = bindings; + if (endNamespaceDeclHandler) + endNamespaceDeclHandler(handlerArg, b->prefix->name); + bindings = bindings->nextTagBinding; + b->nextTagBinding = freeBindingList; + freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } } - else if (defaultHandler) - reportDefault(parser, enc, s, next); if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); + return epilogProcessor(parser, next, end, nextPtr); break; case XML_TOK_END_TAG: if (tagLevel == startTagLevel) return XML_ERROR_ASYNC_ENTITY; else { - int len; - const char *rawName; - TAG *tag = tagStack; - tagStack = tag->parent; - tag->parent = freeTagList; - freeTagList = tag; - rawName = s + enc->minBytesPerChar*2; - len = XmlNameLength(enc, rawName); - if (len != tag->rawNameLength - || memcmp(tag->rawName, rawName, len) != 0) { - *eventPP = rawName; - return XML_ERROR_TAG_MISMATCH; - } - --tagLevel; - if (endElementHandler && tag->name.str) { - if (tag->name.localPart) { - XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen; - const XML_Char *from = tag->name.localPart; - while ((*to++ = *from++) != 0) - ; - } - endElementHandler(handlerArg, tag->name.str); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); - while (tag->bindings) { - BINDING *b = tag->bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - tag->bindings = tag->bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } - if (tagLevel == 0) - return epilogProcessor(parser, next, end, nextPtr); + int len; + const char *rawName; + TAG *tag = tagStack; + tagStack = tag->parent; + tag->parent = freeTagList; + freeTagList = tag; + rawName = s + enc->minBytesPerChar*2; + len = XmlNameLength(enc, rawName); + if (len != tag->rawNameLength + || memcmp(tag->rawName, rawName, len) != 0) { + *eventPP = rawName; + return XML_ERROR_TAG_MISMATCH; + } + --tagLevel; + if (endElementHandler) { + const XML_Char *localPart; + const XML_Char *prefix; + XML_Char *uri; + localPart = tag->name.localPart; + if (ns && localPart) { + /* localPart and prefix may have been overwritten in + tag->name.str, since this points to the binding->uri + buffer which gets re-used; so we have to add them again + */ + uri = (XML_Char *)tag->name.str + tag->name.uriLen; + /* don't need to check for space - already done in storeAtts() */ + while (*localPart) *uri++ = *localPart++; + prefix = (XML_Char *)tag->name.prefix; + if (ns_triplets && prefix) { + *uri++ = namespaceSeparator; + while (*prefix) *uri++ = *prefix++; + } + *uri = XML_T('\0'); + } + endElementHandler(handlerArg, tag->name.str); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); + while (tag->bindings) { + BINDING *b = tag->bindings; + if (endNamespaceDeclHandler) + endNamespaceDeclHandler(handlerArg, b->prefix->name); + tag->bindings = tag->bindings->nextTagBinding; + b->nextTagBinding = freeBindingList; + freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } + if (tagLevel == 0) + return epilogProcessor(parser, next, end, nextPtr); } break; case XML_TOK_CHAR_REF: { - int n = XmlCharRefNumber(enc, s); - if (n < 0) - return XML_ERROR_BAD_CHAR_REF; - if (characterDataHandler) { - XML_Char buf[XML_ENCODE_MAX]; - characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); - } - else if (defaultHandler) - reportDefault(parser, enc, s, next); + int n = XmlCharRefNumber(enc, s); + if (n < 0) + return XML_ERROR_BAD_CHAR_REF; + if (characterDataHandler) { + XML_Char buf[XML_ENCODE_MAX]; + characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); + } + else if (defaultHandler) + reportDefault(parser, enc, s, next); } break; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; case XML_TOK_DATA_NEWLINE: if (characterDataHandler) { - XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); + XML_Char c = 0xA; + characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; case XML_TOK_CDATA_SECT_OPEN: { - enum XML_Error result; - if (startCdataSectionHandler) - startCdataSectionHandler(handlerArg); + enum XML_Error result; + if (startCdataSectionHandler) + startCdataSectionHandler(handlerArg); #if 0 - /* Suppose you doing a transformation on a document that involves - changing only the character data. You set up a defaultHandler - and a characterDataHandler. The defaultHandler simply copies - characters through. The characterDataHandler does the transformation - and writes the characters out escaping them as necessary. This case - will fail to work if we leave out the following two lines (because & - and < inside CDATA sections will be incorrectly escaped). - - However, now we have a start/endCdataSectionHandler, so it seems - easier to let the user deal with this. */ - - else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); + /* Suppose you doing a transformation on a document that involves + changing only the character data. You set up a defaultHandler + and a characterDataHandler. The defaultHandler simply copies + characters through. The characterDataHandler does the + transformation and writes the characters out escaping them as + necessary. This case will fail to work if we leave out the + following two lines (because & and < inside CDATA sections will + be incorrectly escaped). + + However, now we have a start/endCdataSectionHandler, so it seems + easier to let the user deal with this. + */ + else if (characterDataHandler) + characterDataHandler(handlerArg, dataBuf, 0); #endif - else if (defaultHandler) - reportDefault(parser, enc, s, next); - result = doCdataSection(parser, enc, &next, end, nextPtr); - if (!next) { - processor = cdataSectionProcessor; - return result; - } + else if (defaultHandler) + reportDefault(parser, enc, s, next); + result = doCdataSection(parser, enc, &next, end, nextPtr); + if (!next) { + processor = cdataSectionProcessor; + return result; + } } break; case XML_TOK_TRAILING_RSQB: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)end - (XML_Char *)s); + if (MUST_CONVERT(enc, s)) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + characterDataHandler(handlerArg, dataBuf, + dataPtr - (ICHAR *)dataBuf); + } + else + characterDataHandler(handlerArg, + (XML_Char *)s, + (XML_Char *)end - (XML_Char *)s); } else if (defaultHandler) - reportDefault(parser, enc, s, end); + reportDefault(parser, enc, s, end); if (startTagLevel == 0) { *eventPP = end; - return XML_ERROR_NO_ELEMENTS; + return XML_ERROR_NO_ELEMENTS; } if (tagLevel != startTagLevel) { - *eventPP = end; - return XML_ERROR_ASYNC_ENTITY; + *eventPP = end; + return XML_ERROR_ASYNC_ENTITY; } return XML_ERROR_NONE; case XML_TOK_DATA_CHARS: if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = s; - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - if (s == next) - break; - *eventPP = s; - } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = s; + characterDataHandler(handlerArg, dataBuf, + dataPtr - (ICHAR *)dataBuf); + if (s == next) + break; + *eventPP = s; + } + } + else + characterDataHandler(handlerArg, + (XML_Char *)s, + (XML_Char *)next - (XML_Char *)s); } else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (!reportComment(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; default: if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; } *eventPP = s = next; @@ -1872,30 +2356,35 @@ doContent(XML_Parser parser, } /* If tagNamePtr is non-null, build a real list of attributes, -otherwise just check the attributes for well-formedness. */ - -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, - const char *attStr, TAG_NAME *tagNamePtr, - BINDING **bindingsPtr) + otherwise just check the attributes for well-formedness. +*/ +static enum XML_Error +storeAtts(XML_Parser parser, const ENCODING *enc, + const char *attStr, TAG_NAME *tagNamePtr, + BINDING **bindingsPtr) { - ELEMENT_TYPE *elementType = 0; + DTD * const dtd = _dtd; /* save one level of indirection */ + ELEMENT_TYPE *elementType = NULL; int nDefaultAtts = 0; - const XML_Char **appAtts; /* the attribute list to pass to the application */ + const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; + int prefixLen; int i; int n; + XML_Char *uri; int nPrefixes = 0; BINDING *binding; const XML_Char *localPart; /* lookup the element type name */ if (tagNamePtr) { - elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str,0); + elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, tagNamePtr->str,0); if (!elementType) { - tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str); - if (!tagNamePtr->str) - return XML_ERROR_NO_MEMORY; - elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE)); + const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); + if (!name) + return XML_ERROR_NO_MEMORY; + elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, name, + sizeof(ELEMENT_TYPE)); if (!elementType) return XML_ERROR_NO_MEMORY; if (ns && !setElementTypePrefix(parser, elementType)) @@ -1907,10 +2396,12 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, n = XmlGetAttributes(enc, attStr, attsSize, atts); if (n + nDefaultAtts > attsSize) { int oldAttsSize = attsSize; + ATTRIBUTE *temp; attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; - atts = REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (!atts) + temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); + if (temp == NULL) return XML_ERROR_NO_MEMORY; + atts = temp; if (n > oldAttsSize) XmlGetAttributes(enc, attStr, n, atts); } @@ -1918,63 +2409,66 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, for (i = 0; i < n; i++) { /* add the name and value to the attribute list */ ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, - atts[i].name - + XmlNameLength(enc, atts[i].name)); + atts[i].name + + XmlNameLength(enc, atts[i].name)); if (!attId) return XML_ERROR_NO_MEMORY; /* detect duplicate attributes */ if ((attId->name)[-1]) { if (enc == encoding) - eventPtr = atts[i].name; + eventPtr = atts[i].name; return XML_ERROR_DUPLICATE_ATTRIBUTE; } (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; if (!atts[i].normalized) { enum XML_Error result; - int isCdata = 1; + XML_Bool isCdata = XML_TRUE; /* figure out whether declared as other than CDATA */ if (attId->maybeTokenized) { - int j; - for (j = 0; j < nDefaultAtts; j++) { - if (attId == elementType->defaultAtts[j].id) { - isCdata = elementType->defaultAtts[j].isCdata; - break; - } - } + int j; + for (j = 0; j < nDefaultAtts; j++) { + if (attId == elementType->defaultAtts[j].id) { + isCdata = elementType->defaultAtts[j].isCdata; + break; + } + } } /* normalize the attribute value */ result = storeAttributeValue(parser, enc, isCdata, - atts[i].valuePtr, atts[i].valueEnd, - &tempPool); + atts[i].valuePtr, atts[i].valueEnd, + &tempPool); if (result) - return result; + return result; if (tagNamePtr) { - appAtts[attIndex] = poolStart(&tempPool); - poolFinish(&tempPool); + appAtts[attIndex] = poolStart(&tempPool); + poolFinish(&tempPool); } else - poolDiscard(&tempPool); + poolDiscard(&tempPool); } else if (tagNamePtr) { /* the value did not need normalizing */ - appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd); + appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, + atts[i].valueEnd); if (appAtts[attIndex] == 0) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); } /* handle prefixed attribute names */ if (attId->prefix && tagNamePtr) { if (attId->xmlns) { - /* deal with namespace declarations here */ - if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr)) - return XML_ERROR_NO_MEMORY; + /* deal with namespace declarations here */ + enum XML_Error result = addBinding(parser, attId->prefix, attId, + appAtts[attIndex], bindingsPtr); + if (result) + return result; --attIndex; } else { - /* deal with other prefixed names later */ + /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; @@ -1988,10 +2482,10 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, nSpecifiedAtts = attIndex; if (elementType->idAtt && (elementType->idAtt->name)[-1]) { for (i = 0; i < attIndex; i += 2) - if (appAtts[i] == elementType->idAtt->name) { - idAttIndex = i; - break; - } + if (appAtts[i] == elementType->idAtt->name) { + idAttIndex = i; + break; + } } else idAttIndex = -1; @@ -2001,21 +2495,23 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, if (!(da->id->name)[-1] && da->value) { if (da->id->prefix) { if (da->id->xmlns) { - if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr)) - return XML_ERROR_NO_MEMORY; - } + enum XML_Error result = addBinding(parser, da->id->prefix, da->id, + da->value, bindingsPtr); + if (result) + return result; + } else { - (da->id->name)[-1] = 2; - nPrefixes++; - appAtts[attIndex++] = da->id->name; - appAtts[attIndex++] = da->value; - } - } - else { - (da->id->name)[-1] = 1; - appAtts[attIndex++] = da->id->name; - appAtts[attIndex++] = da->value; - } + (da->id->name)[-1] = 2; + nPrefixes++; + appAtts[attIndex++] = da->id->name; + appAtts[attIndex++] = da->value; + } + } + else { + (da->id->name)[-1] = 1; + appAtts[attIndex++] = da->id->name; + appAtts[attIndex++] = da->value; + } } } appAtts[attIndex] = 0; @@ -2027,38 +2523,38 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, if (appAtts[i][-1] == 2) { ATTRIBUTE_ID *id; ((XML_Char *)(appAtts[i]))[-1] = 0; - id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0); - if (id->prefix->binding) { - int j; - const BINDING *b = id->prefix->binding; - const XML_Char *s = appAtts[i]; - for (j = 0; j < b->uriLen; j++) { - if (!poolAppendChar(&tempPool, b->uri[j])) - return XML_ERROR_NO_MEMORY; - } - while (*s++ != ':') - ; - do { - if (!poolAppendChar(&tempPool, *s)) - return XML_ERROR_NO_MEMORY; - } while (*s++); - if (ns_triplets) { - tempPool.ptr[-1] = namespaceSeparator; - s = b->prefix->name; - do { - if (!poolAppendChar(&tempPool, *s)) - return XML_ERROR_NO_MEMORY; - } while (*s++); - } - - appAtts[i] = poolStart(&tempPool); - poolFinish(&tempPool); - } - if (!--nPrefixes) - break; + id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, appAtts[i], 0); + if (id->prefix->binding) { + int j; + const BINDING *b = id->prefix->binding; + const XML_Char *s = appAtts[i]; + for (j = 0; j < b->uriLen; j++) { + if (!poolAppendChar(&tempPool, b->uri[j])) + return XML_ERROR_NO_MEMORY; + } + while (*s++ != XML_T(':')) + ; + do { + if (!poolAppendChar(&tempPool, *s)) + return XML_ERROR_NO_MEMORY; + } while (*s++); + if (ns_triplets) { + tempPool.ptr[-1] = namespaceSeparator; + s = b->prefix->name; + do { + if (!poolAppendChar(&tempPool, *s)) + return XML_ERROR_NO_MEMORY; + } while (*s++); + } + + appAtts[i] = poolStart(&tempPool); + poolFinish(&tempPool); + } + if (!--nPrefixes) + break; } else - ((XML_Char *)(appAtts[i]))[-1] = 0; + ((XML_Char *)(appAtts[i]))[-1] = 0; } } /* clear the flags that say whether attributes were specified */ @@ -2077,40 +2573,62 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, while (*localPart++ != XML_T(':')) ; } - else if (dtd.defaultPrefix.binding) { - binding = dtd.defaultPrefix.binding; + else if (dtd->defaultPrefix.binding) { + binding = dtd->defaultPrefix.binding; localPart = tagNamePtr->str; } else return XML_ERROR_NONE; + prefixLen = 0; + if (ns && ns_triplets && binding->prefix->name) { + for (; binding->prefix->name[prefixLen++];) + ; + } tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; + tagNamePtr->prefix = binding->prefix->name; + tagNamePtr->prefixLen = prefixLen; for (i = 0; localPart[i++];) ; - n = i + binding->uriLen; + n = i + binding->uriLen + prefixLen; if (n > binding->uriAlloc) { TAG *p; - XML_Char *uri = MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char)); + uri = (XML_Char *)MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char)); if (!uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); for (p = tagStack; p; p = p->parent) if (p->name.str == binding->uri) - p->name.str = uri; + p->name.str = uri; FREE(binding->uri); binding->uri = uri; } - memcpy(binding->uri + binding->uriLen, localPart, i * sizeof(XML_Char)); + uri = binding->uri + binding->uriLen; + memcpy(uri, localPart, i * sizeof(XML_Char)); + if (prefixLen) { + uri = uri + (i - 1); + if (namespaceSeparator) { *(uri) = namespaceSeparator; } + memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); + } tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } -static -int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) +/* addBinding() overwrites the value of prefix->binding without checking. + Therefore one must keep track of the old value outside of addBinding(). +*/ +static enum XML_Error +addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + const XML_Char *uri, BINDING **bindingsPtr) { BINDING *b; int len; + + /* empty string is only valid when there is no prefix per XML NS 1.0 */ + if (*uri == XML_T('\0') && prefix->name) + return XML_ERROR_SYNTAX; + for (len = 0; uri[len]; len++) ; if (namespaceSeparator) @@ -2118,21 +2636,23 @@ int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, con if (freeBindingList) { b = freeBindingList; if (len > b->uriAlloc) { - b->uri = REALLOC(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); - if (!b->uri) - return 0; + XML_Char *temp = (XML_Char *)REALLOC(b->uri, + sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + b->uri = temp; b->uriAlloc = len + EXPAND_SPARE; } freeBindingList = b->nextTagBinding; } else { - b = MALLOC(sizeof(BINDING)); + b = (BINDING *)MALLOC(sizeof(BINDING)); if (!b) - return 0; - b->uri = MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE)); + return XML_ERROR_NO_MEMORY; + b->uri = (XML_Char *)MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE)); if (!b->uri) { FREE(b); - return 0; + return XML_ERROR_NO_MEMORY; } b->uriAlloc = len + EXPAND_SPARE; } @@ -2143,44 +2663,51 @@ int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, con b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; - if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix) - prefix->binding = 0; + if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix) + prefix->binding = NULL; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; if (startNamespaceDeclHandler) startNamespaceDeclHandler(handlerArg, prefix->name, - prefix->binding ? uri : 0); - return 1; + prefix->binding ? uri : 0); + return XML_ERROR_NONE; } /* The idea here is to avoid using stack for each CDATA section when -the whole file is parsed with one call. */ - -static -enum XML_Error cdataSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) + the whole file is parsed with one call. +*/ +static enum XML_Error PTRCALL +cdataSectionProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr); + enum XML_Error result = doCdataSection(parser, encoding, &start, + end, endPtr); if (start) { - processor = contentProcessor; - return contentProcessor(parser, start, end, endPtr); + if (parentParser) { /* we are parsing an external entity */ + processor = externalEntityContentProcessor; + return externalEntityContentProcessor(parser, start, end, endPtr); + } + else { + processor = contentProcessor; + return contentProcessor(parser, start, end, endPtr); + } } return result; } /* startPtr gets set to non-null is the section is closed, and to null if -the section is not yet closed. */ - -static -enum XML_Error doCdataSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr) + the section is not yet closed. +*/ +static enum XML_Error +doCdataSection(XML_Parser parser, + const ENCODING *enc, + const char **startPtr, + const char *end, + const char **nextPtr) { const char *s = *startPtr; const char **eventPP; @@ -2195,7 +2722,7 @@ enum XML_Error doCdataSection(XML_Parser parser, eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; - *startPtr = 0; + *startPtr = NULL; for (;;) { const char *next; int tok = XmlCdataSectionTok(enc, s, end, &next); @@ -2203,59 +2730,60 @@ enum XML_Error doCdataSection(XML_Parser parser, switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: if (endCdataSectionHandler) - endCdataSectionHandler(handlerArg); + endCdataSectionHandler(handlerArg); #if 0 /* see comment under XML_TOK_CDATA_SECT_OPEN */ else if (characterDataHandler) - characterDataHandler(handlerArg, dataBuf, 0); + characterDataHandler(handlerArg, dataBuf, 0); #endif else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); *startPtr = next; return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: if (characterDataHandler) { - XML_Char c = 0xA; - characterDataHandler(handlerArg, &c, 1); + XML_Char c = 0xA; + characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; case XML_TOK_DATA_CHARS: if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = next; - characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); - if (s == next) - break; - *eventPP = s; - } - } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = next; + characterDataHandler(handlerArg, dataBuf, + dataPtr - (ICHAR *)dataBuf); + if (s == next) + break; + *eventPP = s; + } + } + else + characterDataHandler(handlerArg, + (XML_Char *)s, + (XML_Char *)next - (XML_Char *)s); } else if (defaultHandler) - reportDefault(parser, enc, s, next); + reportDefault(parser, enc, s, next); break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: @@ -2270,15 +2798,16 @@ enum XML_Error doCdataSection(XML_Parser parser, #ifdef XML_DTD /* The idea here is to avoid using stack for each IGNORE section when -the whole file is parsed with one call. */ - -static -enum XML_Error ignoreSectionProcessor(XML_Parser parser, - const char *start, - const char *end, - const char **endPtr) + the whole file is parsed with one call. +*/ +static enum XML_Error PTRCALL +ignoreSectionProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) { - enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr); + enum XML_Error result = doIgnoreSection(parser, encoding, &start, + end, endPtr); if (start) { processor = prologProcessor; return prologProcessor(parser, start, end, endPtr); @@ -2286,15 +2815,15 @@ enum XML_Error ignoreSectionProcessor(XML_Parser parser, return result; } -/* startPtr gets set to non-null is the section is closed, and to null if -the section is not yet closed. */ - -static -enum XML_Error doIgnoreSection(XML_Parser parser, - const ENCODING *enc, - const char **startPtr, - const char *end, - const char **nextPtr) +/* startPtr gets set to non-null is the section is closed, and to null + if the section is not yet closed. +*/ +static enum XML_Error +doIgnoreSection(XML_Parser parser, + const ENCODING *enc, + const char **startPtr, + const char *end, + const char **nextPtr) { const char *next; int tok; @@ -2311,7 +2840,7 @@ enum XML_Error doIgnoreSection(XML_Parser parser, eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; - *startPtr = 0; + *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); *eventEndPP = next; switch (tok) { @@ -2352,14 +2881,14 @@ initializeEncoding(XML_Parser parser) #ifdef XML_UNICODE char encodingBuf[128]; if (!protocolEncodingName) - s = 0; + s = NULL; else { int i; for (i = 0; protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 - || (protocolEncodingName[i] & ~0x7f) != 0) { - encodingBuf[0] = '\0'; - break; + || (protocolEncodingName[i] & ~0x7f) != 0) { + encodingBuf[0] = '\0'; + break; } encodingBuf[i] = (char)protocolEncodingName[i]; } @@ -2376,81 +2905,79 @@ initializeEncoding(XML_Parser parser) static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *s, const char *next) + const char *s, const char *next) { - const char *encodingName = 0; - const char *storedEncName = 0; - const ENCODING *newEncoding = 0; - const char *version = 0; + const char *encodingName = NULL; + const XML_Char *storedEncName = NULL; + const ENCODING *newEncoding = NULL; + const char *version = NULL; const char *versionend; - const char *storedversion = 0; + const XML_Char *storedversion = NULL; int standalone = -1; if (!(ns ? XmlParseXmlDeclNS - : XmlParseXmlDecl)(isGeneralTextEntity, - encoding, - s, - next, - &eventPtr, - &version, - &versionend, - &encodingName, - &newEncoding, - &standalone)) + : XmlParseXmlDecl)(isGeneralTextEntity, + encoding, + s, + next, + &eventPtr, + &version, + &versionend, + &encodingName, + &newEncoding, + &standalone)) return XML_ERROR_SYNTAX; if (!isGeneralTextEntity && standalone == 1) { - dtd.standalone = 1; + _dtd->standalone = XML_TRUE; #ifdef XML_DTD if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif /* XML_DTD */ } if (xmlDeclHandler) { - if (encodingName) { + if (encodingName != NULL) { storedEncName = poolStoreString(&temp2Pool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); - if (! storedEncName) - return XML_ERROR_NO_MEMORY; + encoding, + encodingName, + encodingName + + XmlNameLength(encoding, encodingName)); + if (!storedEncName) + return XML_ERROR_NO_MEMORY; poolFinish(&temp2Pool); } if (version) { storedversion = poolStoreString(&temp2Pool, - encoding, - version, - versionend - encoding->minBytesPerChar); - if (! storedversion) - return XML_ERROR_NO_MEMORY; + encoding, + version, + versionend - encoding->minBytesPerChar); + if (!storedversion) + return XML_ERROR_NO_MEMORY; } xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone); } else if (defaultHandler) reportDefault(parser, encoding, s, next); - if (!protocolEncodingName) { + if (protocolEncodingName == NULL) { if (newEncoding) { if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { - eventPtr = encodingName; - return XML_ERROR_INCORRECT_ENCODING; + eventPtr = encodingName; + return XML_ERROR_INCORRECT_ENCODING; } encoding = newEncoding; } else if (encodingName) { enum XML_Error result; - if (! storedEncName) { - storedEncName = poolStoreString(&temp2Pool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); - if (! storedEncName) - return XML_ERROR_NO_MEMORY; + if (!storedEncName) { + storedEncName = poolStoreString( + &temp2Pool, encoding, encodingName, + encodingName + XmlNameLength(encoding, encodingName)); + if (!storedEncName) + return XML_ERROR_NO_MEMORY; } result = handleUnknownEncoding(parser, storedEncName); - poolClear(&tempPool); + poolClear(&temp2Pool); if (result == XML_ERROR_UNKNOWN_ENCODING) - eventPtr = encodingName; + eventPtr = encodingName; return result; } } @@ -2469,41 +2996,42 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) int i; for (i = 0; i < 256; i++) info.map[i] = -1; - info.convert = 0; - info.data = 0; - info.release = 0; - if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) { + info.convert = NULL; + info.data = NULL; + info.release = NULL; + if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, + &info)) { ENCODING *enc; unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding()); if (!unknownEncodingMem) { - if (info.release) - info.release(info.data); - return XML_ERROR_NO_MEMORY; + if (info.release) + info.release(info.data); + return XML_ERROR_NO_MEMORY; } enc = (ns - ? XmlInitUnknownEncodingNS - : XmlInitUnknownEncoding)(unknownEncodingMem, - info.map, - info.convert, - info.data); + ? XmlInitUnknownEncodingNS + : XmlInitUnknownEncoding)(unknownEncodingMem, + info.map, + info.convert, + info.data); if (enc) { - unknownEncodingData = info.data; - unknownEncodingRelease = info.release; - encoding = enc; - return XML_ERROR_NONE; + unknownEncodingData = info.data; + unknownEncodingRelease = info.release; + encoding = enc; + return XML_ERROR_NONE; } } - if (info.release) + if (info.release != NULL) info.release(info.data); } return XML_ERROR_UNKNOWN_ENCODING; } -static enum XML_Error +static enum XML_Error PTRCALL prologInitProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) + const char *s, + const char *end, + const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) @@ -2512,29 +3040,204 @@ prologInitProcessor(XML_Parser parser, return prologProcessor(parser, s, end, nextPtr); } -static enum XML_Error +#ifdef XML_DTD + +static enum XML_Error PTRCALL +externalParEntInitProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + enum XML_Error result = initializeEncoding(parser); + if (result != XML_ERROR_NONE) + return result; + + /* we know now that XML_Parse(Buffer) has been called, + so we consider the external parameter entity read */ + _dtd->paramEntityRead = XML_TRUE; + + if (prologState.inEntityValue) { + processor = entityValueInitProcessor; + return entityValueInitProcessor(parser, s, end, nextPtr); + } + else { + processor = externalParEntProcessor; + return externalParEntProcessor(parser, s, end, nextPtr); + } +} + +static enum XML_Error PTRCALL +entityValueInitProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + const char *start = s; + const char *next = s; + int tok; + + for (;;) { + tok = XmlPrologTok(encoding, start, end, &next); + if (tok <= 0) { + if (nextPtr != 0 && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + return storeEntityValue(parser, encoding, s, end); + } + else if (tok == XML_TOK_XML_DECL) { + enum XML_Error result = processXmlDecl(parser, 0, start, next); + if (result != XML_ERROR_NONE) + return result; + if (nextPtr) *nextPtr = next; + /* stop scanning for text declaration - we found one */ + processor = entityValueProcessor; + return entityValueProcessor(parser, next, end, nextPtr); + } + /* If we are at the end of the buffer, this would cause XmlPrologTok to + return XML_TOK_NONE on the next call, which would then cause the + function to exit with *nextPtr set to s - that is what we want for other + tokens, but not for the BOM - we would rather like to skip it; + then, when this routine is entered the next time, XmlPrologTok will + return XML_TOK_INVALID, since the BOM is still in the buffer + */ + else if (tok == XML_TOK_BOM && next == end && nextPtr) { + *nextPtr = next; + return XML_ERROR_NONE; + } + start = next; + } +} + +static enum XML_Error PTRCALL +externalParEntProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + const char *start = s; + const char *next = s; + int tok; + + tok = XmlPrologTok(encoding, start, end, &next); + if (tok <= 0) { + if (nextPtr != 0 && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + } + /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. + However, when parsing an external subset, doProlog will not accept a BOM + as valid, and report a syntax error, so we have to skip the BOM + */ + else if (tok == XML_TOK_BOM) { + s = next; + tok = XmlPrologTok(encoding, s, end, &next); + } + + processor = prologProcessor; + return doProlog(parser, encoding, s, end, tok, next, nextPtr); +} + +static enum XML_Error PTRCALL +entityValueProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + const char *start = s; + const char *next = s; + const ENCODING *enc = encoding; + int tok; + + for (;;) { + tok = XmlPrologTok(enc, start, end, &next); + if (tok <= 0) { + if (nextPtr != 0 && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; + } + switch (tok) { + case XML_TOK_INVALID: + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL: + return XML_ERROR_UNCLOSED_TOKEN; + case XML_TOK_PARTIAL_CHAR: + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_NONE: /* start == end */ + default: + break; + } + return storeEntityValue(parser, enc, s, end); + } + start = next; + } +} + +#endif /* XML_DTD */ + +static enum XML_Error PTRCALL prologProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) + const char *s, + const char *end, + const char **nextPtr) { - const char *next; + const char *next = s; int tok = XmlPrologTok(encoding, s, end, &next); return doProlog(parser, encoding, s, end, tok, next, nextPtr); } static enum XML_Error doProlog(XML_Parser parser, - const ENCODING *enc, - const char *s, - const char *end, - int tok, - const char *next, - const char **nextPtr) + const ENCODING *enc, + const char *s, + const char *end, + int tok, + const char *next, + const char **nextPtr) { #ifdef XML_DTD static const XML_Char externalSubsetName[] = { '#' , '\0' }; #endif /* XML_DTD */ + static const XML_Char atypeCDATA[] = { 'C', 'D', 'A', 'T', 'A', '\0' }; + static const XML_Char atypeID[] = { 'I', 'D', '\0' }; + static const XML_Char atypeIDREF[] = { 'I', 'D', 'R', 'E', 'F', '\0' }; + static const XML_Char atypeIDREFS[] = { 'I', 'D', 'R', 'E', 'F', 'S', '\0' }; + static const XML_Char atypeENTITY[] = { 'E', 'N', 'T', 'I', 'T', 'Y', '\0' }; + static const XML_Char atypeENTITIES[] = + { 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S', '\0' }; + static const XML_Char atypeNMTOKEN[] = { + 'N', 'M', 'T', 'O', 'K', 'E', 'N', '\0' }; + static const XML_Char atypeNMTOKENS[] = { + 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S', '\0' }; + static const XML_Char notationPrefix[] = { + 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N', '(', '\0' }; + static const XML_Char enumValueSep[] = { '|', '\0' }; + static const XML_Char enumValueStart[] = { '(', '\0' }; + + DTD * const dtd = _dtd; /* save one level of indirection */ const char **eventPP; const char **eventEndPP; @@ -2550,624 +3253,777 @@ doProlog(XML_Parser parser, } for (;;) { int role; + XML_Bool handleDefault = XML_TRUE; *eventPP = s; *eventEndPP = next; if (tok <= 0) { if (nextPtr != 0 && tok != XML_TOK_INVALID) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: - *eventPP = next; - return XML_ERROR_INVALID_TOKEN; + *eventPP = next; + return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - return XML_ERROR_UNCLOSED_TOKEN; + return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - return XML_ERROR_PARTIAL_CHAR; + return XML_ERROR_PARTIAL_CHAR; case XML_TOK_NONE: #ifdef XML_DTD - if (enc != encoding) - return XML_ERROR_NONE; - if (parentParser) { - if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) - == XML_ROLE_ERROR) - return XML_ERROR_SYNTAX; - hadExternalDoctype = 0; - return XML_ERROR_NONE; - } + if (enc != encoding) + return XML_ERROR_NONE; + if (isParamEntity) { + if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) + == XML_ROLE_ERROR) + return XML_ERROR_SYNTAX; + return XML_ERROR_NONE; + } #endif /* XML_DTD */ - return XML_ERROR_NO_ELEMENTS; + return XML_ERROR_NO_ELEMENTS; default: - tok = -tok; - next = end; - break; + tok = -tok; + next = end; + break; } } role = XmlTokenRole(&prologState, tok, s, next, enc); switch (role) { case XML_ROLE_XML_DECL: { - enum XML_Error result = processXmlDecl(parser, 0, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; + enum XML_Error result = processXmlDecl(parser, 0, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = encoding; + handleDefault = XML_FALSE; } break; case XML_ROLE_DOCTYPE_NAME: if (startDoctypeDeclHandler) { - doctypeName = poolStoreString(&tempPool, enc, s, next); - if (! doctypeName) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); - doctypeSysid = 0; - doctypePubid = 0; + doctypeName = poolStoreString(&tempPool, enc, s, next); + if (!doctypeName) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + doctypePubid = NULL; + handleDefault = XML_FALSE; } + doctypeSysid = NULL; /* always initialize to NULL */ break; case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: if (startDoctypeDeclHandler) { - startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, - doctypePubid, 1); - doctypeName = 0; - poolClear(&tempPool); + startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, + doctypePubid, 1); + doctypeName = NULL; + poolClear(&tempPool); + handleDefault = XML_FALSE; } break; #ifdef XML_DTD case XML_ROLE_TEXT_DECL: { - enum XML_Error result = processXmlDecl(parser, 1, s, next); - if (result != XML_ERROR_NONE) - return result; - enc = encoding; + enum XML_Error result = processXmlDecl(parser, 1, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = encoding; + handleDefault = XML_FALSE; } break; #endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: +#ifdef XML_DTD + useForeignDTD = XML_FALSE; +#endif /* XML_DTD */ + dtd->hasParamEntityRefs = XML_TRUE; if (startDoctypeDeclHandler) { - doctypePubid = poolStoreString(&tempPool, enc, s + 1, next - 1); - if (! doctypePubid) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + doctypePubid = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!doctypePubid) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + handleDefault = XML_FALSE; } #ifdef XML_DTD - declEntity = (ENTITY *)lookup(&dtd.paramEntities, - externalSubsetName, - sizeof(ENTITY)); + declEntity = (ENTITY *)lookup(&dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); if (!declEntity) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; #endif /* XML_DTD */ /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: if (!XmlIsPublicId(enc, s, next, eventPP)) - return XML_ERROR_SYNTAX; - if (declEntity) { - XML_Char *tem = poolStoreString(&dtd.pool, - enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!tem) - return XML_ERROR_NO_MEMORY; - normalizePublicId(tem); - declEntity->publicId = tem; - poolFinish(&dtd.pool); + return XML_ERROR_SYNTAX; + if (dtd->keepProcessing && declEntity) { + XML_Char *tem = poolStoreString(&dtd->pool, + enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!tem) + return XML_ERROR_NO_MEMORY; + normalizePublicId(tem); + declEntity->publicId = tem; + poolFinish(&dtd->pool); + if (entityDeclHandler) + handleDefault = XML_FALSE; } break; case XML_ROLE_DOCTYPE_CLOSE: if (doctypeName) { - startDoctypeDeclHandler(handlerArg, doctypeName, - doctypeSysid, doctypePubid, 0); - poolClear(&tempPool); + startDoctypeDeclHandler(handlerArg, doctypeName, + doctypeSysid, doctypePubid, 0); + poolClear(&tempPool); + handleDefault = XML_FALSE; } - if (dtd.complete && hadExternalDoctype) { - dtd.complete = 0; + /* doctypeSysid will be non-NULL in the case of a previous + XML_ROLE_DOCTYPE_SYSTEM_ID, even if startDoctypeDeclHandler + was not set, indicating an external subset + */ #ifdef XML_DTD - if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities, - externalSubsetName, - 0); - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - } + if (doctypeSysid || useForeignDTD) { + dtd->hasParamEntityRefs = XML_TRUE; /* when docTypeSysid == NULL */ + if (paramEntityParsing && externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!entity) + return XML_ERROR_NO_MEMORY; + if (useForeignDTD) + entity->base = curBase; + dtd->paramEntityRead = XML_FALSE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (dtd->paramEntityRead && + !dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + /* end of DTD - no need to update dtd->keepProcessing */ + } + useForeignDTD = XML_FALSE; + } #endif /* XML_DTD */ - if (!dtd.complete - && !dtd.standalone - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (endDoctypeDeclHandler) { + endDoctypeDeclHandler(handlerArg); + handleDefault = XML_FALSE; } - if (endDoctypeDeclHandler) - endDoctypeDeclHandler(handlerArg); break; case XML_ROLE_INSTANCE_START: +#ifdef XML_DTD + /* if there is no DOCTYPE declaration then now is the + last chance to read the foreign DTD + */ + if (useForeignDTD) { + dtd->hasParamEntityRefs = XML_TRUE; + if (paramEntityParsing && externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!entity) + return XML_ERROR_NO_MEMORY; + entity->base = curBase; + dtd->paramEntityRead = XML_FALSE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + if (dtd->paramEntityRead && + !dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + /* end of DTD - no need to update dtd->keepProcessing */ + } + } +#endif /* XML_DTD */ processor = contentProcessor; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: declElementType = getElementType(parser, enc, s, next); if (!declElementType) - return XML_ERROR_NO_MEMORY; - break; + return XML_ERROR_NO_MEMORY; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_NAME: declAttributeId = getAttributeId(parser, enc, s, next); if (!declAttributeId) - return XML_ERROR_NO_MEMORY; - declAttributeIsCdata = 0; - declAttributeType = 0; - declAttributeIsId = 0; - break; + return XML_ERROR_NO_MEMORY; + declAttributeIsCdata = XML_FALSE; + declAttributeType = NULL; + declAttributeIsId = XML_FALSE; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: - declAttributeIsCdata = 1; - declAttributeType = "CDATA"; - break; + declAttributeIsCdata = XML_TRUE; + declAttributeType = atypeCDATA; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ID: - declAttributeIsId = 1; - declAttributeType = "ID"; - break; + declAttributeIsId = XML_TRUE; + declAttributeType = atypeID; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREF: - declAttributeType = "IDREF"; - break; + declAttributeType = atypeIDREF; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: - declAttributeType = "IDREFS"; - break; + declAttributeType = atypeIDREFS; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: - declAttributeType = "ENTITY"; - break; + declAttributeType = atypeENTITY; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: - declAttributeType = "ENTITIES"; - break; + declAttributeType = atypeENTITIES; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: - declAttributeType = "NMTOKEN"; - break; + declAttributeType = atypeNMTOKEN; + goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: - declAttributeType = "NMTOKENS"; + declAttributeType = atypeNMTOKENS; + checkAttListDeclHandler: + if (dtd->keepProcessing && attlistDeclHandler) + handleDefault = XML_FALSE; break; - case XML_ROLE_ATTRIBUTE_ENUM_VALUE: case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: - if (attlistDeclHandler) - { - char *prefix; - if (declAttributeType) { - prefix = "|"; - } - else { - prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE - ? "NOTATION(" - : "("); - } - if (! poolAppendString(&tempPool, prefix)) - return XML_ERROR_NO_MEMORY; - if (! poolAppend(&tempPool, enc, s, next)) - return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; + if (dtd->keepProcessing && attlistDeclHandler) { + const XML_Char *prefix; + if (declAttributeType) { + prefix = enumValueSep; + } + else { + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE + ? notationPrefix + : enumValueStart); + } + if (!poolAppendString(&tempPool, prefix)) + return XML_ERROR_NO_MEMORY; + if (!poolAppend(&tempPool, enc, s, next)) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + handleDefault = XML_FALSE; } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: - if (dtd.complete - && !defineAttribute(declElementType, declAttributeId, - declAttributeIsCdata, declAttributeIsId, 0, - parser)) - return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == '(' - || (*declAttributeType == 'N' && declAttributeType[1] == 'O')) { - /* Enumerated or Notation type */ - if (! poolAppendChar(&tempPool, ')') - || ! poolAppendChar(&tempPool, '\0')) - return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); - } - *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); - poolClear(&tempPool); + if (dtd->keepProcessing) { + if (!defineAttribute(declElementType, declAttributeId, + declAttributeIsCdata, declAttributeIsId, 0, + parser)) + return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == XML_T('(') + || (*declAttributeType == XML_T('N') + && declAttributeType[1] == XML_T('O'))) { + /* Enumerated or Notation type */ + if (!poolAppendChar(&tempPool, XML_T(')')) + || !poolAppendChar(&tempPool, XML_T('\0'))) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + handleDefault = XML_FALSE; + } } break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: - { - const XML_Char *attVal; - enum XML_Error result - = storeAttributeValue(parser, enc, declAttributeIsCdata, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar, - &dtd.pool); - if (result) - return result; - attVal = poolStart(&dtd.pool); - poolFinish(&dtd.pool); - if (dtd.complete - /* ID attributes aren't allowed to have a default */ - && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0, attVal, parser)) - return XML_ERROR_NO_MEMORY; - if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == '(' - || (*declAttributeType == 'N' && declAttributeType[1] == 'O')) { - /* Enumerated or Notation type */ - if (! poolAppendChar(&tempPool, ')') - || ! poolAppendChar(&tempPool, '\0')) - return XML_ERROR_NO_MEMORY; - declAttributeType = tempPool.start; - poolFinish(&tempPool); - } - *eventEndPP = s; - attlistDeclHandler(handlerArg, declElementType->name, - declAttributeId->name, declAttributeType, - attVal, - role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); - poolClear(&tempPool); - } - break; + if (dtd->keepProcessing) { + const XML_Char *attVal; + enum XML_Error result + = storeAttributeValue(parser, enc, declAttributeIsCdata, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar, + &dtd->pool); + if (result) + return result; + attVal = poolStart(&dtd->pool); + poolFinish(&dtd->pool); + /* ID attributes aren't allowed to have a default */ + if (!defineAttribute(declElementType, declAttributeId, + declAttributeIsCdata, XML_FALSE, attVal, parser)) + return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == XML_T('(') + || (*declAttributeType == XML_T('N') + && declAttributeType[1] == XML_T('O'))) { + /* Enumerated or Notation type */ + if (!poolAppendChar(&tempPool, XML_T(')')) + || !poolAppendChar(&tempPool, XML_T('\0'))) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + attVal, + role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + handleDefault = XML_FALSE; + } } + break; case XML_ROLE_ENTITY_VALUE: - { - enum XML_Error result = storeEntityValue(parser, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (declEntity) { - declEntity->textPtr = poolStart(&dtd.pool); - declEntity->textLen = poolLength(&dtd.pool); - poolFinish(&dtd.pool); - if (entityDeclHandler) { - *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - declEntity->is_param, - declEntity->textPtr, - declEntity->textLen, - curBase, 0, 0, 0); - } - } - else - poolDiscard(&dtd.pool); - if (result != XML_ERROR_NONE) - return result; + if (dtd->keepProcessing) { + enum XML_Error result = storeEntityValue(parser, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (declEntity) { + declEntity->textPtr = poolStart(&dtd->entityValuePool); + declEntity->textLen = poolLength(&dtd->entityValuePool); + poolFinish(&dtd->entityValuePool); + if (entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + declEntity->is_param, + declEntity->textPtr, + declEntity->textLen, + curBase, 0, 0, 0); + handleDefault = XML_FALSE; + } + } + else + poolDiscard(&dtd->entityValuePool); + if (result != XML_ERROR_NONE) + return result; } break; case XML_ROLE_DOCTYPE_SYSTEM_ID: +#ifdef XML_DTD + useForeignDTD = XML_FALSE; +#endif /* XML_DTD */ + dtd->hasParamEntityRefs = XML_TRUE; if (startDoctypeDeclHandler) { - doctypeSysid = poolStoreString(&tempPool, enc, s + 1, next - 1); - if (! doctypeSysid) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + doctypeSysid = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (doctypeSysid == NULL) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + handleDefault = XML_FALSE; } - if (!dtd.standalone #ifdef XML_DTD - && !paramEntityParsing + else + /* use externalSubsetName to make doctypeSysid non-NULL + for the case where no startDoctypeDeclHandler is set */ + doctypeSysid = externalSubsetName; +#endif /* XML_DTD */ + if (!dtd->standalone +#ifdef XML_DTD + && !paramEntityParsing #endif /* XML_DTD */ - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; - hadExternalDoctype = 1; + && notStandaloneHandler + && !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; #ifndef XML_DTD break; #else /* XML_DTD */ if (!declEntity) { - declEntity = (ENTITY *)lookup(&dtd.paramEntities, - externalSubsetName, - sizeof(ENTITY)); - declEntity->publicId = 0; - if (!declEntity) - return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(&dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + declEntity->publicId = NULL; } /* fall through */ #endif /* XML_DTD */ case XML_ROLE_ENTITY_SYSTEM_ID: - if (declEntity) { - declEntity->systemId = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!declEntity->systemId) - return XML_ERROR_NO_MEMORY; - declEntity->base = curBase; - poolFinish(&dtd.pool); + if (dtd->keepProcessing && declEntity) { + declEntity->systemId = poolStoreString(&dtd->pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!declEntity->systemId) + return XML_ERROR_NO_MEMORY; + declEntity->base = curBase; + poolFinish(&dtd->pool); + if (entityDeclHandler) + handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_COMPLETE: - if (declEntity && entityDeclHandler) { - *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - 0,0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - 0); + if (dtd->keepProcessing && declEntity && entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + declEntity->is_param, + 0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + 0); + handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_NOTATION_NAME: - if (declEntity) { - declEntity->notation = poolStoreString(&dtd.pool, enc, s, next); - if (!declEntity->notation) - return XML_ERROR_NO_MEMORY; - poolFinish(&dtd.pool); - if (unparsedEntityDeclHandler) { - *eventEndPP = s; - unparsedEntityDeclHandler(handlerArg, - declEntity->name, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); - } - else if (entityDeclHandler) { - *eventEndPP = s; - entityDeclHandler(handlerArg, - declEntity->name, - 0,0,0, - declEntity->base, - declEntity->systemId, - declEntity->publicId, - declEntity->notation); - } + if (dtd->keepProcessing && declEntity) { + declEntity->notation = poolStoreString(&dtd->pool, enc, s, next); + if (!declEntity->notation) + return XML_ERROR_NO_MEMORY; + poolFinish(&dtd->pool); + if (unparsedEntityDeclHandler) { + *eventEndPP = s; + unparsedEntityDeclHandler(handlerArg, + declEntity->name, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + declEntity->notation); + handleDefault = XML_FALSE; + } + else if (entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + 0,0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + declEntity->notation); + handleDefault = XML_FALSE; + } } break; case XML_ROLE_GENERAL_ENTITY_NAME: { - const XML_Char *name; - if (XmlPredefinedEntityName(enc, s, next)) { - declEntity = 0; - break; - } - name = poolStoreString(&dtd.pool, enc, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - if (dtd.complete) { - declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY)); - if (!declEntity) - return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { - poolDiscard(&dtd.pool); - declEntity = 0; - } - else { - poolFinish(&dtd.pool); - declEntity->publicId = 0; - declEntity->is_param = 0; - } - } - else { - poolDiscard(&dtd.pool); - declEntity = 0; - } + if (XmlPredefinedEntityName(enc, s, next)) { + declEntity = NULL; + break; + } + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(&dtd->generalEntities, name, + sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + if (declEntity->name != name) { + poolDiscard(&dtd->pool); + declEntity = NULL; + } + else { + poolFinish(&dtd->pool); + declEntity->publicId = NULL; + declEntity->is_param = XML_FALSE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + declEntity->is_internal = !(parentParser || openInternalEntities); + if (entityDeclHandler) + handleDefault = XML_FALSE; + } + } + else { + poolDiscard(&dtd->pool); + declEntity = NULL; + } } break; case XML_ROLE_PARAM_ENTITY_NAME: #ifdef XML_DTD - if (dtd.complete) { - const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd.paramEntities, - name, sizeof(ENTITY)); - if (!declEntity) - return XML_ERROR_NO_MEMORY; - if (declEntity->name != name) { - poolDiscard(&dtd.pool); - declEntity = 0; - } - else { - poolFinish(&dtd.pool); - declEntity->publicId = 0; - declEntity->is_param = 1; - } + if (dtd->keepProcessing) { + const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(&dtd->paramEntities, + name, sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + if (declEntity->name != name) { + poolDiscard(&dtd->pool); + declEntity = NULL; + } + else { + poolFinish(&dtd->pool); + declEntity->publicId = NULL; + declEntity->is_param = XML_TRUE; + /* if we have a parent parser or are reading an internal parameter + entity, then the entity declaration is not considered "internal" + */ + declEntity->is_internal = !(parentParser || openInternalEntities); + if (entityDeclHandler) + handleDefault = XML_FALSE; + } + } + else { + poolDiscard(&dtd->pool); + declEntity = NULL; } #else /* not XML_DTD */ - declEntity = 0; -#endif /* not XML_DTD */ + declEntity = NULL; +#endif /* XML_DTD */ break; case XML_ROLE_NOTATION_NAME: - declNotationPublicId = 0; - declNotationName = 0; + declNotationPublicId = NULL; + declNotationName = NULL; if (notationDeclHandler) { - declNotationName = poolStoreString(&tempPool, enc, s, next); - if (!declNotationName) - return XML_ERROR_NO_MEMORY; - poolFinish(&tempPool); + declNotationName = poolStoreString(&tempPool, enc, s, next); + if (!declNotationName) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_PUBLIC_ID: if (!XmlIsPublicId(enc, s, next, eventPP)) - return XML_ERROR_SYNTAX; - if (declNotationName) { - XML_Char *tem = poolStoreString(&tempPool, - enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!tem) - return XML_ERROR_NO_MEMORY; - normalizePublicId(tem); - declNotationPublicId = tem; - poolFinish(&tempPool); + return XML_ERROR_SYNTAX; + if (declNotationName) { /* means notationDeclHandler != NULL */ + XML_Char *tem = poolStoreString(&tempPool, + enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!tem) + return XML_ERROR_NO_MEMORY; + normalizePublicId(tem); + declNotationPublicId = tem; + poolFinish(&tempPool); + handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_SYSTEM_ID: if (declNotationName && notationDeclHandler) { - const XML_Char *systemId - = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!systemId) - return XML_ERROR_NO_MEMORY; - *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - systemId, - declNotationPublicId); + const XML_Char *systemId + = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!systemId) + return XML_ERROR_NO_MEMORY; + *eventEndPP = s; + notationDeclHandler(handlerArg, + declNotationName, + curBase, + systemId, + declNotationPublicId); + handleDefault = XML_FALSE; } poolClear(&tempPool); break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: if (declNotationPublicId && notationDeclHandler) { - *eventEndPP = s; - notationDeclHandler(handlerArg, - declNotationName, - curBase, - 0, - declNotationPublicId); + *eventEndPP = s; + notationDeclHandler(handlerArg, + declNotationName, + curBase, + 0, + declNotationPublicId); + handleDefault = XML_FALSE; } poolClear(&tempPool); break; case XML_ROLE_ERROR: switch (tok) { case XML_TOK_PARAM_ENTITY_REF: - return XML_ERROR_PARAM_ENTITY_REF; + return XML_ERROR_PARAM_ENTITY_REF; case XML_TOK_XML_DECL: - return XML_ERROR_MISPLACED_XML_PI; + return XML_ERROR_MISPLACED_XML_PI; default: - return XML_ERROR_SYNTAX; + return XML_ERROR_SYNTAX; } #ifdef XML_DTD case XML_ROLE_IGNORE_SECT: { - enum XML_Error result; - if (defaultHandler) - reportDefault(parser, enc, s, next); - result = doIgnoreSection(parser, enc, &next, end, nextPtr); - if (!next) { - processor = ignoreSectionProcessor; - return result; - } + enum XML_Error result; + if (defaultHandler) + reportDefault(parser, enc, s, next); + handleDefault = XML_FALSE; + result = doIgnoreSection(parser, enc, &next, end, nextPtr); + if (!next) { + processor = ignoreSectionProcessor; + return result; + } } break; #endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: if (prologState.level >= groupSize) { - if (groupSize) { - groupConnector = REALLOC(groupConnector, groupSize *= 2); - if (dtd.scaffIndex) - dtd.scaffIndex = REALLOC(dtd.scaffIndex, groupSize * sizeof(int)); - } - else - groupConnector = MALLOC(groupSize = 32); - if (!groupConnector) - return XML_ERROR_NO_MEMORY; + if (groupSize) { + char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + groupConnector = temp; + if (dtd->scaffIndex) { + int *temp = (int *)REALLOC(dtd->scaffIndex, + groupSize * sizeof(int)); + if (temp == NULL) + return XML_ERROR_NO_MEMORY; + dtd->scaffIndex = temp; + } + } + else { + groupConnector = (char *)MALLOC(groupSize = 32); + if (!groupConnector) + return XML_ERROR_NO_MEMORY; + } } groupConnector[prologState.level] = 0; - if (dtd.in_eldecl) { - int myindex = nextScaffoldPart(parser); - if (myindex < 0) - return XML_ERROR_NO_MEMORY; - dtd.scaffIndex[dtd.scaffLevel] = myindex; - dtd.scaffLevel++; - dtd.scaffold[myindex].type = XML_CTYPE_SEQ; + if (dtd->in_eldecl) { + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd->scaffIndex[dtd->scaffLevel] = myindex; + dtd->scaffLevel++; + dtd->scaffold[myindex].type = XML_CTYPE_SEQ; + if (elementDeclHandler) + handleDefault = XML_FALSE; } break; case XML_ROLE_GROUP_SEQUENCE: if (groupConnector[prologState.level] == '|') - return XML_ERROR_SYNTAX; + return XML_ERROR_SYNTAX; groupConnector[prologState.level] = ','; + if (dtd->in_eldecl && elementDeclHandler) + handleDefault = XML_FALSE; break; case XML_ROLE_GROUP_CHOICE: if (groupConnector[prologState.level] == ',') - return XML_ERROR_SYNTAX; - if (dtd.in_eldecl - && ! groupConnector[prologState.level] - && dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type != XML_CTYPE_MIXED - ) { - dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_CHOICE; + return XML_ERROR_SYNTAX; + if (dtd->in_eldecl + && !groupConnector[prologState.level] + && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + != XML_CTYPE_MIXED) + ) { + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + = XML_CTYPE_CHOICE; + if (elementDeclHandler) + handleDefault = XML_FALSE; } groupConnector[prologState.level] = '|'; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD case XML_ROLE_INNER_PARAM_ENTITY_REF: - if (paramEntityParsing - && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) { - const XML_Char *name; - ENTITY *entity; - name = poolStoreString(&dtd.pool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) - return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); - poolDiscard(&dtd.pool); - if (!entity) { - /* FIXME what to do if !dtd.complete? */ - return XML_ERROR_UNDEFINED_ENTITY; - } - if (entity->open) - return XML_ERROR_RECURSIVE_ENTITY_REF; - if (entity->textPtr) { - enum XML_Error result; - result = processInternalParamEntity(parser, entity); - if (result != XML_ERROR_NONE) - return result; - break; - } - if (role == XML_ROLE_INNER_PARAM_ENTITY_REF) - return XML_ERROR_PARAM_ENTITY_REF; - if (externalEntityRefHandler) { - dtd.complete = 0; - entity->open = 1; - if (!externalEntityRefHandler(externalEntityRefHandlerArg, - 0, - entity->base, - entity->systemId, - entity->publicId)) { - entity->open = 0; - return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - } - entity->open = 0; - if (dtd.complete) - break; - } + /* PE references in internal subset are + not allowed within declarations */ + if (prologState.documentEntity && + role == XML_ROLE_INNER_PARAM_ENTITY_REF) + return XML_ERROR_PARAM_ENTITY_REF; + dtd->hasParamEntityRefs = XML_TRUE; + if (!paramEntityParsing) + dtd->keepProcessing = dtd->standalone; + else { + const XML_Char *name; + ENTITY *entity; + name = poolStoreString(&dtd->pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + poolDiscard(&dtd->pool); + /* first, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the skipped entity handler + */ + if (prologState.documentEntity && + (dtd->standalone + ? !openInternalEntities + : !dtd->hasParamEntityRefs)) { + if (!entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (!entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } + else if (!entity) { + dtd->keepProcessing = dtd->standalone; + /* cannot report skipped entities in declarations */ + if ((role == XML_ROLE_PARAM_ENTITY_REF) && skippedEntityHandler) { + skippedEntityHandler(handlerArg, name, 1); + handleDefault = XML_FALSE; + } + break; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + result = processInternalParamEntity(parser, entity); + if (result != XML_ERROR_NONE) + return result; + handleDefault = XML_FALSE; + break; + } + if (externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) { + entity->open = XML_FALSE; + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + } + entity->open = XML_FALSE; + handleDefault = XML_FALSE; + if (!dtd->paramEntityRead) { + dtd->keepProcessing = dtd->standalone; + break; + } + } + else { + dtd->keepProcessing = dtd->standalone; + break; + } } #endif /* XML_DTD */ - if (!dtd.standalone - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; - dtd.complete = 0; - if (defaultHandler) - reportDefault(parser, enc, s, next); + if (!dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; break; - /* Element declaration stuff */ + /* Element declaration stuff */ case XML_ROLE_ELEMENT_NAME: if (elementDeclHandler) { - declElementType = getElementType(parser, enc, s, next); - if (! declElementType) - return XML_ERROR_NO_MEMORY; - dtd.scaffLevel = 0; - dtd.scaffCount = 0; - dtd.in_eldecl = 1; + declElementType = getElementType(parser, enc, s, next); + if (!declElementType) + return XML_ERROR_NO_MEMORY; + dtd->scaffLevel = 0; + dtd->scaffCount = 0; + dtd->in_eldecl = XML_TRUE; + handleDefault = XML_FALSE; } break; case XML_ROLE_CONTENT_ANY: case XML_ROLE_CONTENT_EMPTY: - if (dtd.in_eldecl) { - if (elementDeclHandler) { - XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content)); - if (! content) - return XML_ERROR_NO_MEMORY; - content->quant = XML_CQUANT_NONE; - content->name = 0; - content->numchildren = 0; - content->children = 0; - content->type = ((role == XML_ROLE_CONTENT_ANY) ? - XML_CTYPE_ANY : - XML_CTYPE_EMPTY); - *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, content); - } - dtd.in_eldecl = 0; + if (dtd->in_eldecl) { + if (elementDeclHandler) { + XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content)); + if (!content) + return XML_ERROR_NO_MEMORY; + content->quant = XML_CQUANT_NONE; + content->name = NULL; + content->numchildren = 0; + content->children = NULL; + content->type = ((role == XML_ROLE_CONTENT_ANY) ? + XML_CTYPE_ANY : + XML_CTYPE_EMPTY); + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, content); + handleDefault = XML_FALSE; + } + dtd->in_eldecl = XML_FALSE; } break; - + case XML_ROLE_CONTENT_PCDATA: - if (dtd.in_eldecl) { - dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_MIXED; + if (dtd->in_eldecl) { + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type + = XML_CTYPE_MIXED; + if (elementDeclHandler) + handleDefault = XML_FALSE; } break; @@ -3183,21 +4039,29 @@ doProlog(XML_Parser parser, case XML_ROLE_CONTENT_ELEMENT_PLUS: quant = XML_CQUANT_PLUS; elementContent: - if (dtd.in_eldecl) - { - ELEMENT_TYPE *el; - const char *nxt = quant == XML_CQUANT_NONE ? next : next - 1; - int myindex = nextScaffoldPart(parser); - if (myindex < 0) - return XML_ERROR_NO_MEMORY; - dtd.scaffold[myindex].type = XML_CTYPE_NAME; - dtd.scaffold[myindex].quant = quant; - el = getElementType(parser, enc, s, nxt); - if (! el) - return XML_ERROR_NO_MEMORY; - dtd.scaffold[myindex].name = el->name; - dtd.contentStringLen += nxt - s + 1; - } + if (dtd->in_eldecl) { + ELEMENT_TYPE *el; + const XML_Char *name; + int nameLen; + const char *nxt = (quant == XML_CQUANT_NONE + ? next + : next - enc->minBytesPerChar); + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd->scaffold[myindex].type = XML_CTYPE_NAME; + dtd->scaffold[myindex].quant = quant; + el = getElementType(parser, enc, s, nxt); + if (!el) + return XML_ERROR_NO_MEMORY; + name = el->name; + dtd->scaffold[myindex].name = name; + nameLen = 0; + for (; name[nameLen++]; ); + dtd->contentStringLen += nameLen; + if (elementDeclHandler) + handleDefault = XML_FALSE; + } break; case XML_ROLE_GROUP_CLOSE: @@ -3212,109 +4076,125 @@ doProlog(XML_Parser parser, case XML_ROLE_GROUP_CLOSE_PLUS: quant = XML_CQUANT_PLUS; closeGroup: - if (dtd.in_eldecl) { - dtd.scaffLevel--; - dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel]].quant = quant; - if (dtd.scaffLevel == 0) { - if (elementDeclHandler) { - XML_Content *model = build_model(parser); - if (! model) - return XML_ERROR_NO_MEMORY; - *eventEndPP = s; - elementDeclHandler(handlerArg, declElementType->name, model); - } - dtd.in_eldecl = 0; - dtd.contentStringLen = 0; - } + if (dtd->in_eldecl) { + if (elementDeclHandler) + handleDefault = XML_FALSE; + dtd->scaffLevel--; + dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; + if (dtd->scaffLevel == 0) { + if (!handleDefault) { + XML_Content *model = build_model(parser); + if (!model) + return XML_ERROR_NO_MEMORY; + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, model); + } + dtd->in_eldecl = XML_FALSE; + dtd->contentStringLen = 0; + } } break; /* End element declaration stuff */ - case XML_ROLE_NONE: - switch (tok) { - case XML_TOK_PI: - if (!reportProcessingInstruction(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; - break; - case XML_TOK_COMMENT: - if (!reportComment(parser, enc, s, next)) - return XML_ERROR_NO_MEMORY; - break; - } + case XML_ROLE_PI: + if (!reportProcessingInstruction(parser, enc, s, next)) + return XML_ERROR_NO_MEMORY; + handleDefault = XML_FALSE; break; - } - if (defaultHandler) { + case XML_ROLE_COMMENT: + if (!reportComment(parser, enc, s, next)) + return XML_ERROR_NO_MEMORY; + handleDefault = XML_FALSE; + break; + case XML_ROLE_NONE: switch (tok) { - case XML_TOK_PI: - case XML_TOK_COMMENT: case XML_TOK_BOM: - case XML_TOK_XML_DECL: -#ifdef XML_DTD - case XML_TOK_IGNORE_SECT: -#endif /* XML_DTD */ - case XML_TOK_PARAM_ENTITY_REF: - break; - default: -#ifdef XML_DTD - if (role != XML_ROLE_IGNORE_SECT) -#endif /* XML_DTD */ - reportDefault(parser, enc, s, next); + handleDefault = XML_FALSE; + break; } - } + break; + case XML_ROLE_DOCTYPE_NONE: + if (startDoctypeDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ENTITY_NONE: + if (dtd->keepProcessing && entityDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_NOTATION_NONE: + if (notationDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ATTLIST_NONE: + if (dtd->keepProcessing && attlistDeclHandler) + handleDefault = XML_FALSE; + break; + case XML_ROLE_ELEMENT_NONE: + if (elementDeclHandler) + handleDefault = XML_FALSE; + break; + } /* end of big switch */ + + if (handleDefault && defaultHandler) + reportDefault(parser, enc, s, next); + s = next; tok = XmlPrologTok(enc, s, end, &next); } /* not reached */ } -static -enum XML_Error epilogProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) +static enum XML_Error PTRCALL +epilogProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) { processor = epilogProcessor; eventPtr = s; for (;;) { - const char *next; + const char *next = NULL; int tok = XmlPrologTok(encoding, s, end, &next); eventEndPtr = next; switch (tok) { + /* report partial linebreak - it might be the last token */ case -XML_TOK_PROLOG_S: if (defaultHandler) { - eventEndPtr = end; - reportDefault(parser, encoding, s, end); + eventEndPtr = next; + reportDefault(parser, encoding, s, next); } - /* fall through */ + if (nextPtr) + *nextPtr = next; + return XML_ERROR_NONE; case XML_TOK_NONE: if (nextPtr) - *nextPtr = end; + *nextPtr = s; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: if (defaultHandler) - reportDefault(parser, encoding, s, next); + reportDefault(parser, encoding, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, encoding, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (!reportComment(parser, encoding, s, next)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_INVALID: eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (nextPtr) { - *nextPtr = s; - return XML_ERROR_NONE; + *nextPtr = s; + return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; default: @@ -3333,38 +4213,39 @@ processInternalParamEntity(XML_Parser parser, ENTITY *entity) int tok; enum XML_Error result; OPEN_INTERNAL_ENTITY openEntity; - entity->open = 1; + entity->open = XML_TRUE; openEntity.next = openInternalEntities; openInternalEntities = &openEntity; openEntity.entity = entity; - openEntity.internalEventPtr = 0; - openEntity.internalEventEndPtr = 0; + openEntity.internalEventPtr = NULL; + openEntity.internalEventEndPtr = NULL; s = (char *)entity->textPtr; end = (char *)(entity->textPtr + entity->textLen); tok = XmlPrologTok(internalEncoding, s, end, &next); result = doProlog(parser, internalEncoding, s, end, tok, next, 0); - entity->open = 0; + entity->open = XML_FALSE; openInternalEntities = openEntity.next; return result; } #endif /* XML_DTD */ -static -enum XML_Error errorProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) +static enum XML_Error PTRCALL +errorProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) { return errorCode; } static enum XML_Error -storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) +storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, + STRING_POOL *pool) { - enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); + enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, + end, pool); if (result) return result; if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) @@ -3375,10 +4256,11 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, } static enum XML_Error -appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, - const char *ptr, const char *end, - STRING_POOL *pool) +appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, + STRING_POOL *pool) { + DTD * const dtd = _dtd; /* save one level of indirection */ for (;;) { const char *next; int tok = XmlAttributeValueTok(enc, ptr, end, &next); @@ -3387,42 +4269,41 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, return XML_ERROR_NONE; case XML_TOK_INVALID: if (enc == encoding) - eventPtr = next; + eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (enc == encoding) - eventPtr = ptr; + eventPtr = ptr; return XML_ERROR_INVALID_TOKEN; case XML_TOK_CHAR_REF: { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, ptr); - if (n < 0) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - if (!isCdata - && n == 0x20 /* space */ - && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) - break; - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (!poolAppendChar(pool, buf[i])) - return XML_ERROR_NO_MEMORY; - } + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, ptr); + if (n < 0) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; + } + if (!isCdata + && n == 0x20 /* space */ + && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) + break; + n = XmlEncode(n, (ICHAR *)buf); + if (!n) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_BAD_CHAR_REF; + } + for (i = 0; i < n; i++) { + if (!poolAppendChar(pool, buf[i])) + return XML_ERROR_NO_MEMORY; + } } break; case XML_TOK_DATA_CHARS: if (!poolAppend(pool, enc, ptr, next)) - return XML_ERROR_NO_MEMORY; - break; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: next = ptr + enc->minBytesPerChar; @@ -3430,65 +4311,91 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) - break; + break; if (!poolAppendChar(pool, 0x20)) - return XML_ERROR_NO_MEMORY; + return XML_ERROR_NO_MEMORY; break; case XML_TOK_ENTITY_REF: { - const XML_Char *name; - ENTITY *entity; - XML_Char ch = XmlPredefinedEntityName(enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (ch) { - if (!poolAppendChar(pool, ch)) - return XML_ERROR_NO_MEMORY; - break; - } - name = poolStoreString(&temp2Pool, enc, - ptr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) - return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); - poolDiscard(&temp2Pool); - if (!entity) { - if (dtd.complete) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_UNDEFINED_ENTITY; - } - } - else if (entity->open) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_RECURSIVE_ENTITY_REF; - } - else if (entity->notation) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BINARY_ENTITY_REF; - } - else if (!entity->textPtr) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; - } - else { - enum XML_Error result; - const XML_Char *textEnd = entity->textPtr + entity->textLen; - entity->open = 1; - result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool); - entity->open = 0; - if (result) - return result; - } + const XML_Char *name; + ENTITY *entity; + char checkEntityDecl; + XML_Char ch = (XML_Char) XmlPredefinedEntityName(enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (ch) { + if (!poolAppendChar(pool, ch)) + return XML_ERROR_NO_MEMORY; + break; + } + name = poolStoreString(&temp2Pool, enc, + ptr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + poolDiscard(&temp2Pool); + /* first, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal, + otherwise call the default handler (if called from content) + */ + if (pool == &dtd->pool) /* are we called from prolog? */ + checkEntityDecl = +#ifdef XML_DTD + prologState.documentEntity && +#endif /* XML_DTD */ + (dtd->standalone + ? !openInternalEntities + : !dtd->hasParamEntityRefs); + else /* if (pool == &tempPool): we are called from content */ + checkEntityDecl = !dtd->hasParamEntityRefs || dtd->standalone; + if (checkEntityDecl) { + if (!entity) + return XML_ERROR_UNDEFINED_ENTITY; + else if (!entity->is_internal) + return XML_ERROR_ENTITY_DECLARED_IN_PE; + } + else if (!entity) { + /* cannot report skipped entity here - see comments on + skippedEntityHandler + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, name, 0); + */ + if ((pool == &tempPool) && defaultHandler) + reportDefault(parser, enc, ptr, next); + break; + } + if (entity->open) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_RECURSIVE_ENTITY_REF; + } + if (entity->notation) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_BINARY_ENTITY_REF; + } + if (!entity->textPtr) { + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + } + else { + enum XML_Error result; + const XML_Char *textEnd = entity->textPtr + entity->textLen; + entity->open = XML_TRUE; + result = appendAttributeValue(parser, internalEncoding, isCdata, + (char *)entity->textPtr, + (char *)textEnd, pool); + entity->open = XML_FALSE; + if (result) + return result; + } } break; default: if (enc == encoding) - eventPtr = ptr; + eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; } ptr = next; @@ -3496,115 +4403,173 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, /* not reached */ } -static -enum XML_Error storeEntityValue(XML_Parser parser, - const ENCODING *enc, - const char *entityTextPtr, - const char *entityTextEnd) +static enum XML_Error +storeEntityValue(XML_Parser parser, + const ENCODING *enc, + const char *entityTextPtr, + const char *entityTextEnd) { - STRING_POOL *pool = &(dtd.pool); + DTD * const dtd = _dtd; /* save one level of indirection */ + STRING_POOL *pool = &(dtd->entityValuePool); + enum XML_Error result = XML_ERROR_NONE; +#ifdef XML_DTD + int oldInEntityValue = prologState.inEntityValue; + prologState.inEntityValue = 1; +#endif /* XML_DTD */ + /* never return Null for the value argument in EntityDeclHandler, + since this would indicate an external entity; therefore we + have to make sure that entityValuePool.start is not null */ + if (!pool->blocks) { + if (!poolGrow(pool)) + return XML_ERROR_NO_MEMORY; + } + for (;;) { const char *next; int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD - if (parentParser || enc != encoding) { - enum XML_Error result; - const XML_Char *name; - ENTITY *entity; - name = poolStoreString(&tempPool, enc, - entityTextPtr + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!name) - return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); - poolDiscard(&tempPool); - if (!entity) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_UNDEFINED_ENTITY; - } - if (entity->open) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_RECURSIVE_ENTITY_REF; - } - if (entity->systemId) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_PARAM_ENTITY_REF; - } - entity->open = 1; - result = storeEntityValue(parser, - internalEncoding, - (char *)entity->textPtr, - (char *)(entity->textPtr + entity->textLen)); - entity->open = 0; - if (result) - return result; - break; + if (isParamEntity || enc != encoding) { + const XML_Char *name; + ENTITY *entity; + name = poolStoreString(&tempPool, enc, + entityTextPtr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } + entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + poolDiscard(&tempPool); + if (!entity) { + /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ + /* cannot report skipped entity here - see comments on + skippedEntityHandler + if (skippedEntityHandler) + skippedEntityHandler(handlerArg, name, 0); + */ + dtd->keepProcessing = dtd->standalone; + goto endEntityValue; + } + if (entity->open) { + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_RECURSIVE_ENTITY_REF; + goto endEntityValue; + } + if (entity->systemId) { + if (externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) { + entity->open = XML_FALSE; + result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; + goto endEntityValue; + } + entity->open = XML_FALSE; + if (!dtd->paramEntityRead) + dtd->keepProcessing = dtd->standalone; + } + else + dtd->keepProcessing = dtd->standalone; + } + else { + entity->open = XML_TRUE; + result = storeEntityValue(parser, + internalEncoding, + (char *)entity->textPtr, + (char *)(entity->textPtr + + entity->textLen)); + entity->open = XML_FALSE; + if (result) + goto endEntityValue; + } + break; } #endif /* XML_DTD */ + /* in the internal subset, PE references are not legal + within markup declarations, e.g entity values in this case */ eventPtr = entityTextPtr; - return XML_ERROR_SYNTAX; + result = XML_ERROR_PARAM_ENTITY_REF; + goto endEntityValue; case XML_TOK_NONE: - return XML_ERROR_NONE; + result = XML_ERROR_NONE; + goto endEntityValue; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, enc, entityTextPtr, next)) - return XML_ERROR_NO_MEMORY; + if (!poolAppend(pool, enc, entityTextPtr, next)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } break; case XML_TOK_TRAILING_CR: next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: - if (pool->end == pool->ptr && !poolGrow(pool)) - return XML_ERROR_NO_MEMORY; + if (pool->end == pool->ptr && !poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } *(pool->ptr)++ = 0xA; break; case XML_TOK_CHAR_REF: { - XML_Char buf[XML_ENCODE_MAX]; - int i; - int n = XmlCharRefNumber(enc, entityTextPtr); - if (n < 0) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_BAD_CHAR_REF; - } - n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_BAD_CHAR_REF; - } - for (i = 0; i < n; i++) { - if (pool->end == pool->ptr && !poolGrow(pool)) - return XML_ERROR_NO_MEMORY; - *(pool->ptr)++ = buf[i]; - } + XML_Char buf[XML_ENCODE_MAX]; + int i; + int n = XmlCharRefNumber(enc, entityTextPtr); + if (n < 0) { + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_BAD_CHAR_REF; + goto endEntityValue; + } + n = XmlEncode(n, (ICHAR *)buf); + if (!n) { + if (enc == encoding) + eventPtr = entityTextPtr; + result = XML_ERROR_BAD_CHAR_REF; + goto endEntityValue; + } + for (i = 0; i < n; i++) { + if (pool->end == pool->ptr && !poolGrow(pool)) { + result = XML_ERROR_NO_MEMORY; + goto endEntityValue; + } + *(pool->ptr)++ = buf[i]; + } } break; case XML_TOK_PARTIAL: if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_INVALID_TOKEN; + eventPtr = entityTextPtr; + result = XML_ERROR_INVALID_TOKEN; + goto endEntityValue; case XML_TOK_INVALID: if (enc == encoding) - eventPtr = next; - return XML_ERROR_INVALID_TOKEN; + eventPtr = next; + result = XML_ERROR_INVALID_TOKEN; + goto endEntityValue; default: if (enc == encoding) - eventPtr = entityTextPtr; - return XML_ERROR_UNEXPECTED_STATE; + eventPtr = entityTextPtr; + result = XML_ERROR_UNEXPECTED_STATE; + goto endEntityValue; } entityTextPtr = next; } - /* not reached */ +endEntityValue: +#ifdef XML_DTD + prologState.inEntityValue = oldInEntityValue; +#endif /* XML_DTD */ + return result; } -static void +static void FASTCALL normalizeLines(XML_Char *s) { XML_Char *p; @@ -3628,7 +4593,8 @@ normalizeLines(XML_Char *s) } static int -reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) +reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end) { const XML_Char *target; XML_Char *data; @@ -3645,8 +4611,8 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char * return 0; poolFinish(&tempPool); data = poolStoreString(&tempPool, enc, - XmlSkipS(enc, tem), - end - enc->minBytesPerChar*2); + XmlSkipS(enc, tem), + end - enc->minBytesPerChar*2); if (!data) return 0; normalizeLines(data); @@ -3656,7 +4622,8 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char * } static int -reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) +reportComment(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end) { XML_Char *data; if (!commentHandler) { @@ -3666,8 +4633,8 @@ reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const c } data = poolStoreString(&tempPool, enc, - start + enc->minBytesPerChar * 4, - end - enc->minBytesPerChar * 3); + start + enc->minBytesPerChar * 4, + end - enc->minBytesPerChar * 3); if (!data) return 0; normalizeLines(data); @@ -3677,7 +4644,8 @@ reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const c } static void -reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) +reportDefault(XML_Parser parser, const ENCODING *enc, + const char *s, const char *end) { if (MUST_CONVERT(enc, s)) { const char **eventPP; @@ -3704,8 +4672,8 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, - int isId, const XML_Char *value, XML_Parser parser) +defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, + XML_Bool isId, const XML_Char *value, XML_Parser parser) { DEFAULT_ATTRIBUTE *att; if (value || isId) { @@ -3714,53 +4682,62 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, int i; for (i = 0; i < type->nDefaultAtts; i++) if (attId == type->defaultAtts[i].id) - return 1; + return 1; if (isId && !type->idAtt && !attId->xmlns) type->idAtt = attId; } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = MALLOC(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); + type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts + * sizeof(DEFAULT_ATTRIBUTE)); + if (!type->defaultAtts) + return 0; } else { - type->allocDefaultAtts *= 2; - type->defaultAtts = REALLOC(type->defaultAtts, - type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); + DEFAULT_ATTRIBUTE *temp; + int count = type->allocDefaultAtts * 2; + temp = (DEFAULT_ATTRIBUTE *) + REALLOC(type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE))); + if (temp == NULL) + return 0; + type->allocDefaultAtts = count; + type->defaultAtts = temp; } - if (!type->defaultAtts) - return 0; } att = type->defaultAtts + type->nDefaultAtts; att->id = attId; att->value = value; att->isCdata = isCdata; if (!isCdata) - attId->maybeTokenized = 1; + attId->maybeTokenized = XML_TRUE; type->nDefaultAtts += 1; return 1; } -static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) +static int +setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { + DTD * const dtd = _dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { if (*name == XML_T(':')) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { - if (!poolAppendChar(&dtd.pool, *s)) - return 0; + if (!poolAppendChar(&dtd->pool, *s)) + return 0; } - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + return 0; + prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + sizeof(PREFIX)); if (!prefix) - return 0; - if (prefix->name == poolStart(&dtd.pool)) - poolFinish(&dtd.pool); + return 0; + if (prefix->name == poolStart(&dtd->pool)) + poolFinish(&dtd->pool); else - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); elementType->prefix = prefix; } @@ -3769,55 +4746,58 @@ static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) } static ATTRIBUTE_ID * -getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) +getAttributeId(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end) { + DTD * const dtd = _dtd; /* save one level of indirection */ ATTRIBUTE_ID *id; const XML_Char *name; - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - name = poolStoreString(&dtd.pool, enc, start, end); + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + name = poolStoreString(&dtd->pool, enc, start, end); if (!name) - return 0; + return NULL; ++name; - id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID)); + id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); if (!id) - return 0; + return NULL; if (id->name != name) - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); else { - poolFinish(&dtd.pool); + poolFinish(&dtd->pool); if (!ns) ; - else if (name[0] == 'x' - && name[1] == 'm' - && name[2] == 'l' - && name[3] == 'n' - && name[4] == 's' - && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { - if (name[5] == '\0') - id->prefix = &dtd.defaultPrefix; + else if (name[0] == XML_T('x') + && name[1] == XML_T('m') + && name[2] == XML_T('l') + && name[3] == XML_T('n') + && name[4] == XML_T('s') + && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { + if (name[5] == XML_T('\0')) + id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX)); - id->xmlns = 1; + id->prefix = (PREFIX *)lookup(&dtd->prefixes, name + 6, sizeof(PREFIX)); + id->xmlns = XML_TRUE; } else { int i; for (i = 0; name[i]; i++) { - if (name[i] == XML_T(':')) { - int j; - for (j = 0; j < i; j++) { - if (!poolAppendChar(&dtd.pool, name[j])) - return 0; - } - if (!poolAppendChar(&dtd.pool, XML_T('\0'))) - return 0; - id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); - if (id->prefix->name == poolStart(&dtd.pool)) - poolFinish(&dtd.pool); - else - poolDiscard(&dtd.pool); - break; - } + if (name[i] == XML_T(':')) { + int j; + for (j = 0; j < i; j++) { + if (!poolAppendChar(&dtd->pool, name[j])) + return NULL; + } + if (!poolAppendChar(&dtd->pool, XML_T('\0'))) + return NULL; + id->prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + sizeof(PREFIX)); + if (id->prefix->name == poolStart(&dtd->pool)) + poolFinish(&dtd->pool); + else + poolDiscard(&dtd->pool); + break; + } } } } @@ -3826,27 +4806,28 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const #define CONTEXT_SEP XML_T('\f') -static -const XML_Char *getContext(XML_Parser parser) +static const XML_Char * +getContext(XML_Parser parser) { + DTD * const dtd = _dtd; /* save one level of indirection */ HASH_TABLE_ITER iter; - int needSep = 0; + XML_Bool needSep = XML_FALSE; - if (dtd.defaultPrefix.binding) { + if (dtd->defaultPrefix.binding) { int i; int len; if (!poolAppendChar(&tempPool, XML_T('='))) - return 0; - len = dtd.defaultPrefix.binding->uriLen; + return NULL; + len = dtd->defaultPrefix.binding->uriLen; if (namespaceSeparator != XML_T('\0')) len--; for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i])) - return 0; - needSep = 1; + if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) + return NULL; + needSep = XML_TRUE; } - hashTableIterInit(&iter, &(dtd.prefixes)); + hashTableIterInit(&iter, &(dtd->prefixes)); for (;;) { int i; int len; @@ -3857,23 +4838,23 @@ const XML_Char *getContext(XML_Parser parser) if (!prefix->binding) continue; if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) - return 0; + return NULL; for (s = prefix->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) - return 0; + return NULL; if (!poolAppendChar(&tempPool, XML_T('='))) - return 0; + return NULL; len = prefix->binding->uriLen; if (namespaceSeparator != XML_T('\0')) len--; for (i = 0; i < len; i++) if (!poolAppendChar(&tempPool, prefix->binding->uri[i])) - return 0; - needSep = 1; + return NULL; + needSep = XML_TRUE; } - hashTableIterInit(&iter, &(dtd.generalEntities)); + hashTableIterInit(&iter, &(dtd->generalEntities)); for (;;) { const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); @@ -3882,77 +4863,81 @@ const XML_Char *getContext(XML_Parser parser) if (!e->open) continue; if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) - return 0; + return NULL; for (s = e->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) return 0; - needSep = 1; + needSep = XML_TRUE; } if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; + return NULL; return tempPool.start; } -static -int setContext(XML_Parser parser, const XML_Char *context) +static XML_Bool +setContext(XML_Parser parser, const XML_Char *context) { + DTD * const dtd = _dtd; /* save one level of indirection */ const XML_Char *s = context; while (*context != XML_T('\0')) { if (*s == CONTEXT_SEP || *s == XML_T('\0')) { ENTITY *e; if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0); + return XML_FALSE; + e = (ENTITY *)lookup(&dtd->generalEntities, poolStart(&tempPool), 0); if (e) - e->open = 1; + e->open = XML_TRUE; if (*s != XML_T('\0')) - s++; + s++; context = s; poolDiscard(&tempPool); } - else if (*s == '=') { + else if (*s == XML_T('=')) { PREFIX *prefix; if (poolLength(&tempPool) == 0) - prefix = &dtd.defaultPrefix; + prefix = &dtd->defaultPrefix; else { - if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX)); - if (!prefix) - return 0; + if (!poolAppendChar(&tempPool, XML_T('\0'))) + return XML_FALSE; + prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&tempPool), + sizeof(PREFIX)); + if (!prefix) + return XML_FALSE; if (prefix->name == poolStart(&tempPool)) { - prefix->name = poolCopyString(&dtd.pool, prefix->name); - if (!prefix->name) - return 0; - } - poolDiscard(&tempPool); + prefix->name = poolCopyString(&dtd->pool, prefix->name); + if (!prefix->name) + return XML_FALSE; + } + poolDiscard(&tempPool); } - for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) + for (context = s + 1; + *context != CONTEXT_SEP && *context != XML_T('\0'); + context++) if (!poolAppendChar(&tempPool, *context)) - return 0; + return XML_FALSE; if (!poolAppendChar(&tempPool, XML_T('\0'))) - return 0; - if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings)) - return 0; + return XML_FALSE; + if (addBinding(parser, prefix, 0, poolStart(&tempPool), + &inheritedBindings) != XML_ERROR_NONE) + return XML_FALSE; poolDiscard(&tempPool); if (*context != XML_T('\0')) - ++context; + ++context; s = context; } else { if (!poolAppendChar(&tempPool, *s)) - return 0; + return XML_FALSE; s++; } } - return 1; + return XML_TRUE; } - -static -void normalizePublicId(XML_Char *publicId) +static void FASTCALL +normalizePublicId(XML_Char *publicId) { XML_Char *p = publicId; XML_Char *s; @@ -3962,7 +4947,7 @@ void normalizePublicId(XML_Char *publicId) case 0xD: case 0xA: if (p != publicId && p[-1] != 0x20) - *p++ = 0x20; + *p++ = 0x20; break; default: *p++ = *s; @@ -3973,46 +4958,89 @@ void normalizePublicId(XML_Char *publicId) *p = XML_T('\0'); } -static int dtdInit(DTD *p, XML_Parser parser) +static DTD * +dtdCreate(const XML_Memory_Handling_Suite *ms) { - XML_Memory_Handling_Suite *ms = &((Parser *) parser)->m_mem; + DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD)); + if (p == NULL) + return p; poolInit(&(p->pool), ms); +#ifdef XML_DTD + poolInit(&(p->entityValuePool), ms); +#endif /* XML_DTD */ hashTableInit(&(p->generalEntities), ms); hashTableInit(&(p->elementTypes), ms); hashTableInit(&(p->attributeIds), ms); hashTableInit(&(p->prefixes), ms); - p->complete = 1; - p->standalone = 0; #ifdef XML_DTD + p->paramEntityRead = XML_FALSE; hashTableInit(&(p->paramEntities), ms); #endif /* XML_DTD */ - p->defaultPrefix.name = 0; - p->defaultPrefix.binding = 0; + p->defaultPrefix.name = NULL; + p->defaultPrefix.binding = NULL; - p->in_eldecl = 0; - p->scaffIndex = 0; + p->in_eldecl = XML_FALSE; + p->scaffIndex = NULL; + p->scaffold = NULL; p->scaffLevel = 0; - p->scaffold = 0; - p->contentStringLen = 0; p->scaffSize = 0; p->scaffCount = 0; + p->contentStringLen = 0; - return 1; + p->keepProcessing = XML_TRUE; + p->hasParamEntityRefs = XML_FALSE; + p->standalone = XML_FALSE; + return p; } +static void +dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) +{ + HASH_TABLE_ITER iter; + hashTableIterInit(&iter, &(p->elementTypes)); + for (;;) { + ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); + if (!e) + break; + if (e->allocDefaultAtts != 0) + ms->free_fcn(e->defaultAtts); + } + hashTableClear(&(p->generalEntities)); #ifdef XML_DTD + p->paramEntityRead = XML_FALSE; + hashTableClear(&(p->paramEntities)); +#endif /* XML_DTD */ + hashTableClear(&(p->elementTypes)); + hashTableClear(&(p->attributeIds)); + hashTableClear(&(p->prefixes)); + poolClear(&(p->pool)); +#ifdef XML_DTD + poolClear(&(p->entityValuePool)); +#endif /* XML_DTD */ + p->defaultPrefix.name = NULL; + p->defaultPrefix.binding = NULL; -static void dtdSwap(DTD *p1, DTD *p2) -{ - DTD tem; - memcpy(&tem, p1, sizeof(DTD)); - memcpy(p1, p2, sizeof(DTD)); - memcpy(p2, &tem, sizeof(DTD)); -} + p->in_eldecl = XML_FALSE; + if (p->scaffIndex) { + ms->free_fcn(p->scaffIndex); + p->scaffIndex = NULL; + } + if (p->scaffold) { + ms->free_fcn(p->scaffold); + p->scaffold = NULL; + } + p->scaffLevel = 0; + p->scaffSize = 0; + p->scaffCount = 0; + p->contentStringLen = 0; -#endif /* XML_DTD */ + p->keepProcessing = XML_TRUE; + p->hasParamEntityRefs = XML_FALSE; + p->standalone = XML_FALSE; +} -static void dtdDestroy(DTD *p, XML_Parser parser) +static void +dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); @@ -4021,7 +5049,7 @@ static void dtdDestroy(DTD *p, XML_Parser parser) if (!e) break; if (e->allocDefaultAtts != 0) - FREE(e->defaultAtts); + ms->free_fcn(e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); #ifdef XML_DTD @@ -4031,16 +5059,23 @@ static void dtdDestroy(DTD *p, XML_Parser parser) hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); - if (p->scaffIndex) - FREE(p->scaffIndex); - if (p->scaffold) - FREE(p->scaffold); +#ifdef XML_DTD + poolDestroy(&(p->entityValuePool)); +#endif /* XML_DTD */ + if (isDocEntity) { + if (p->scaffIndex) + ms->free_fcn(p->scaffIndex); + if (p->scaffold) + ms->free_fcn(p->scaffold); + } + ms->free_fcn(p); } -/* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise. -The new DTD has already been initialized. */ - -static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) +/* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. + The new DTD has already been initialized. +*/ +static int +dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; @@ -4077,16 +5112,18 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) if (!name) return 0; ++name; - newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); + newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, + sizeof(ATTRIBUTE_ID)); if (!newA) return 0; newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { newA->xmlns = oldA->xmlns; if (oldA->prefix == &oldDtd->defaultPrefix) - newA->prefix = &newDtd->defaultPrefix; + newA->prefix = &newDtd->defaultPrefix; else - newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0); + newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + oldA->prefix->name, 0); } } @@ -4104,46 +5141,56 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) name = poolCopyString(&(newDtd->pool), oldE->name); if (!name) return 0; - newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); + newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, + sizeof(ELEMENT_TYPE)); if (!newE) return 0; if (oldE->nDefaultAtts) { - newE->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); - if (!newE->defaultAtts) - return 0; + newE->defaultAtts = (DEFAULT_ATTRIBUTE *) + ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + if (!newE->defaultAtts) { + ms->free_fcn(newE); + return 0; + } } if (oldE->idAtt) - newE->idAtt = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); + newE->idAtt = (ATTRIBUTE_ID *) + lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) - newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0); + newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { - newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + newE->defaultAtts[i].id = (ATTRIBUTE_ID *) + lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { - newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); - if (!newE->defaultAtts[i].value) - return 0; + newE->defaultAtts[i].value + = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); + if (!newE->defaultAtts[i].value) + return 0; } else - newE->defaultAtts[i].value = 0; + newE->defaultAtts[i].value = NULL; } } /* Copy the entity tables. */ if (!copyEntityTable(&(newDtd->generalEntities), - &(newDtd->pool), - &(oldDtd->generalEntities), parser)) + &(newDtd->pool), + &(oldDtd->generalEntities))) return 0; #ifdef XML_DTD if (!copyEntityTable(&(newDtd->paramEntities), - &(newDtd->pool), - &(oldDtd->paramEntities), parser)) + &(newDtd->pool), + &(oldDtd->paramEntities))) return 0; + newDtd->paramEntityRead = oldDtd->paramEntityRead; #endif /* XML_DTD */ - newDtd->complete = oldDtd->complete; + newDtd->keepProcessing = oldDtd->keepProcessing; + newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; newDtd->standalone = oldDtd->standalone; /* Don't want deep copying for scaffolding */ @@ -4157,14 +5204,14 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) return 1; } /* End dtdCopy */ -static int copyEntityTable(HASH_TABLE *newTable, - STRING_POOL *newPool, - const HASH_TABLE *oldTable, - XML_Parser parser) +static int +copyEntityTable(HASH_TABLE *newTable, + STRING_POOL *newPool, + const HASH_TABLE *oldTable) { HASH_TABLE_ITER iter; - const XML_Char *cachedOldBase = 0; - const XML_Char *cachedNewBase = 0; + const XML_Char *cachedOldBase = NULL; + const XML_Char *cachedNewBase = NULL; hashTableIterInit(&iter, oldTable); @@ -4183,41 +5230,50 @@ static int copyEntityTable(HASH_TABLE *newTable, if (oldE->systemId) { const XML_Char *tem = poolCopyString(newPool, oldE->systemId); if (!tem) - return 0; + return 0; newE->systemId = tem; if (oldE->base) { - if (oldE->base == cachedOldBase) - newE->base = cachedNewBase; - else { - cachedOldBase = oldE->base; - tem = poolCopyString(newPool, cachedOldBase); - if (!tem) - return 0; - cachedNewBase = newE->base = tem; - } + if (oldE->base == cachedOldBase) + newE->base = cachedNewBase; + else { + cachedOldBase = oldE->base; + tem = poolCopyString(newPool, cachedOldBase); + if (!tem) + return 0; + cachedNewBase = newE->base = tem; + } + } + if (oldE->publicId) { + tem = poolCopyString(newPool, oldE->publicId); + if (!tem) + return 0; + newE->publicId = tem; } } else { - const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); + const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, + oldE->textLen); if (!tem) - return 0; + return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { const XML_Char *tem = poolCopyString(newPool, oldE->notation); if (!tem) - return 0; + return 0; newE->notation = tem; } + newE->is_param = oldE->is_param; + newE->is_internal = oldE->is_internal; } return 1; } #define INIT_SIZE 64 -static -int keyeq(KEY s1, KEY s2) +static int FASTCALL +keyeq(KEY s1, KEY s2) { for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) @@ -4225,8 +5281,8 @@ int keyeq(KEY s1, KEY s2) return 0; } -static -unsigned long hash(KEY s) +static unsigned long FASTCALL +hash(KEY s) { unsigned long h = 0; while (*s) @@ -4234,19 +5290,19 @@ unsigned long hash(KEY s) return h; } -static -NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize) +static NAMED * +lookup(HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { size_t tsize; if (!createSize) - return 0; + return NULL; tsize = INIT_SIZE * sizeof(NAMED *); - table->v = table->mem->malloc_fcn(tsize); + table->v = (NAMED **)table->mem->malloc_fcn(tsize); if (!table->v) - return 0; + return NULL; memset(table->v, 0, tsize); table->size = INIT_SIZE; table->usedLim = INIT_SIZE / 2; @@ -4258,48 +5314,63 @@ NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize) table->v[i]; i == 0 ? i = table->size - 1 : --i) { if (keyeq(name, table->v[i]->name)) - return table->v[i]; + return table->v[i]; } if (!createSize) - return 0; + return NULL; if (table->used == table->usedLim) { /* check for overflow */ size_t newSize = table->size * 2; size_t tsize = newSize * sizeof(NAMED *); - NAMED **newV = table->mem->malloc_fcn(tsize); + NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); if (!newV) - return 0; + return NULL; memset(newV, 0, tsize); for (i = 0; i < table->size; i++) - if (table->v[i]) { - size_t j; - for (j = hash(table->v[i]->name) & (newSize - 1); - newV[j]; - j == 0 ? j = newSize - 1 : --j) - ; - newV[j] = table->v[i]; - } + if (table->v[i]) { + size_t j; + for (j = hash(table->v[i]->name) & (newSize - 1); + newV[j]; + j == 0 ? j = newSize - 1 : --j) + ; + newV[j] = table->v[i]; + } table->mem->free_fcn(table->v); table->v = newV; table->size = newSize; table->usedLim = newSize/2; for (i = h & (table->size - 1); - table->v[i]; - i == 0 ? i = table->size - 1 : --i) - ; + table->v[i]; + i == 0 ? i = table->size - 1 : --i) + ; } } - table->v[i] = table->mem->malloc_fcn(createSize); + table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize); if (!table->v[i]) - return 0; + return NULL; memset(table->v[i], 0, createSize); table->v[i]->name = name; (table->used)++; return table->v[i]; } -static -void hashTableDestroy(HASH_TABLE *table) +static void FASTCALL +hashTableClear(HASH_TABLE *table) +{ + size_t i; + for (i = 0; i < table->size; i++) { + NAMED *p = table->v[i]; + if (p) { + table->mem->free_fcn(p); + table->v[i] = NULL; + } + } + table->usedLim = table->size / 2; + table->used = 0; +} + +static void FASTCALL +hashTableDestroy(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { @@ -4311,48 +5382,47 @@ void hashTableDestroy(HASH_TABLE *table) table->mem->free_fcn(table->v); } -static -void hashTableInit(HASH_TABLE *p, XML_Memory_Handling_Suite *ms) +static void FASTCALL +hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { p->size = 0; p->usedLim = 0; p->used = 0; - p->v = 0; + p->v = NULL; p->mem = ms; } -static -void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) +static void FASTCALL +hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { iter->p = table->v; iter->end = iter->p + table->size; } -static -NAMED *hashTableIterNext(HASH_TABLE_ITER *iter) +static NAMED * FASTCALL +hashTableIterNext(HASH_TABLE_ITER *iter) { while (iter->p != iter->end) { NAMED *tem = *(iter->p)++; if (tem) return tem; } - return 0; + return NULL; } - -static -void poolInit(STRING_POOL *pool, XML_Memory_Handling_Suite *ms) +static void FASTCALL +poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { - pool->blocks = 0; - pool->freeBlocks = 0; - pool->start = 0; - pool->ptr = 0; - pool->end = 0; + pool->blocks = NULL; + pool->freeBlocks = NULL; + pool->start = NULL; + pool->ptr = NULL; + pool->end = NULL; pool->mem = ms; } -static -void poolClear(STRING_POOL *pool) +static void FASTCALL +poolClear(STRING_POOL *pool) { if (!pool->freeBlocks) pool->freeBlocks = pool->blocks; @@ -4365,14 +5435,14 @@ void poolClear(STRING_POOL *pool) p = tem; } } - pool->blocks = 0; - pool->start = 0; - pool->ptr = 0; - pool->end = 0; + pool->blocks = NULL; + pool->start = NULL; + pool->ptr = NULL; + pool->end = NULL; } -static -void poolDestroy(STRING_POOL *pool) +static void FASTCALL +poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { @@ -4380,113 +5450,113 @@ void poolDestroy(STRING_POOL *pool) pool->mem->free_fcn(p); p = tem; } - pool->blocks = 0; p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; pool->mem->free_fcn(p); p = tem; } - pool->freeBlocks = 0; - pool->ptr = 0; - pool->start = 0; - pool->end = 0; } -static -XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) +static XML_Char * +poolAppend(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) { if (!pool->ptr && !poolGrow(pool)) - return 0; + return NULL; for (;;) { XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); if (ptr == end) break; if (!poolGrow(pool)) - return 0; + return NULL; } return pool->start; } -static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s) +static const XML_Char * FASTCALL +poolCopyString(STRING_POOL *pool, const XML_Char *s) { do { if (!poolAppendChar(pool, *s)) - return 0; + return NULL; } while (*s++); s = pool->start; poolFinish(pool); return s; } -static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) +static const XML_Char * +poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { if (!pool->ptr && !poolGrow(pool)) - return 0; + return NULL; for (; n > 0; --n, s++) { if (!poolAppendChar(pool, *s)) - return 0; - + return NULL; } s = pool->start; poolFinish(pool); return s; } -static -const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s) +static const XML_Char * FASTCALL +poolAppendString(STRING_POOL *pool, const XML_Char *s) { while (*s) { if (!poolAppendChar(pool, *s)) - return 0; + return NULL; s++; - } + } return pool->start; -} /* End poolAppendString */ +} -static -XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, - const char *ptr, const char *end) +static XML_Char * +poolStoreString(STRING_POOL *pool, const ENCODING *enc, + const char *ptr, const char *end) { if (!poolAppend(pool, enc, ptr, end)) - return 0; + return NULL; if (pool->ptr == pool->end && !poolGrow(pool)) - return 0; + return NULL; *(pool->ptr)++ = 0; return pool->start; } -static -int poolGrow(STRING_POOL *pool) +static XML_Bool FASTCALL +poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { if (pool->start == 0) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; - pool->blocks->next = 0; + pool->blocks->next = NULL; pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; pool->ptr = pool->start; - return 1; + return XML_TRUE; } if (pool->end - pool->start < pool->freeBlocks->size) { BLOCK *tem = pool->freeBlocks->next; pool->freeBlocks->next = pool->blocks; pool->blocks = pool->freeBlocks; pool->freeBlocks = tem; - memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char)); + memcpy(pool->blocks->s, pool->start, + (pool->end - pool->start) * sizeof(XML_Char)); pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; - return 1; + return XML_TRUE; } } if (pool->blocks && pool->start == pool->blocks->s) { int blockSize = (pool->end - pool->start)*2; - pool->blocks = pool->mem->realloc_fcn(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); - if (!pool->blocks) - return 0; + pool->blocks = (BLOCK *) + pool->mem->realloc_fcn(pool->blocks, + (offsetof(BLOCK, s) + + blockSize * sizeof(XML_Char))); + if (pool->blocks == NULL) + return XML_FALSE; pool->blocks->size = blockSize; pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; @@ -4499,139 +5569,151 @@ int poolGrow(STRING_POOL *pool) blockSize = INIT_BLOCK_SIZE; else blockSize *= 2; - tem = pool->mem->malloc_fcn(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); + tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s) + + blockSize * sizeof(XML_Char)); if (!tem) - return 0; + return XML_FALSE; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; if (pool->ptr != pool->start) - memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); + memcpy(tem->s, pool->start, + (pool->ptr - pool->start) * sizeof(XML_Char)); pool->ptr = tem->s + (pool->ptr - pool->start); pool->start = tem->s; pool->end = tem->s + blockSize; } - return 1; + return XML_TRUE; } -static int +static int FASTCALL nextScaffoldPart(XML_Parser parser) { + DTD * const dtd = _dtd; /* save one level of indirection */ CONTENT_SCAFFOLD * me; int next; - if (! dtd.scaffIndex) { - dtd.scaffIndex = MALLOC(groupSize * sizeof(int)); - if (! dtd.scaffIndex) + if (!dtd->scaffIndex) { + dtd->scaffIndex = (int *)MALLOC(groupSize * sizeof(int)); + if (!dtd->scaffIndex) return -1; - dtd.scaffIndex[0] = 0; + dtd->scaffIndex[0] = 0; } - if (dtd.scaffCount >= dtd.scaffSize) { - if (dtd.scaffold) { - dtd.scaffSize *= 2; - dtd.scaffold = (CONTENT_SCAFFOLD *) REALLOC(dtd.scaffold, - dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); + if (dtd->scaffCount >= dtd->scaffSize) { + CONTENT_SCAFFOLD *temp; + if (dtd->scaffold) { + temp = (CONTENT_SCAFFOLD *) + REALLOC(dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + if (temp == NULL) + return -1; + dtd->scaffSize *= 2; } else { - dtd.scaffSize = 32; - dtd.scaffold = (CONTENT_SCAFFOLD *) MALLOC(dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); + temp = (CONTENT_SCAFFOLD *)MALLOC(INIT_SCAFFOLD_ELEMENTS + * sizeof(CONTENT_SCAFFOLD)); + if (temp == NULL) + return -1; + dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; } - if (! dtd.scaffold) - return -1; + dtd->scaffold = temp; } - next = dtd.scaffCount++; - me = &dtd.scaffold[next]; - if (dtd.scaffLevel) { - CONTENT_SCAFFOLD *parent = &dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]]; + next = dtd->scaffCount++; + me = &dtd->scaffold[next]; + if (dtd->scaffLevel) { + CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel-1]]; if (parent->lastchild) { - dtd.scaffold[parent->lastchild].nextsib = next; + dtd->scaffold[parent->lastchild].nextsib = next; } - if (! parent->childcnt) + if (!parent->childcnt) parent->firstchild = next; parent->lastchild = next; parent->childcnt++; } me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; return next; -} /* End nextScaffoldPart */ +} static void -build_node (XML_Parser parser, - int src_node, - XML_Content *dest, - XML_Content **contpos, - char **strpos) +build_node(XML_Parser parser, + int src_node, + XML_Content *dest, + XML_Content **contpos, + XML_Char **strpos) { - dest->type = dtd.scaffold[src_node].type; - dest->quant = dtd.scaffold[src_node].quant; + DTD * const dtd = _dtd; /* save one level of indirection */ + dest->type = dtd->scaffold[src_node].type; + dest->quant = dtd->scaffold[src_node].quant; if (dest->type == XML_CTYPE_NAME) { - const char *src; + const XML_Char *src; dest->name = *strpos; - src = dtd.scaffold[src_node].name; + src = dtd->scaffold[src_node].name; for (;;) { *(*strpos)++ = *src; - if (! *src) - break; + if (!*src) + break; src++; } dest->numchildren = 0; - dest->children = 0; + dest->children = NULL; } else { unsigned int i; int cn; - dest->numchildren = dtd.scaffold[src_node].childcnt; + dest->numchildren = dtd->scaffold[src_node].childcnt; dest->children = *contpos; *contpos += dest->numchildren; - for (i = 0, cn = dtd.scaffold[src_node].firstchild; - i < dest->numchildren; - i++, cn = dtd.scaffold[cn].nextsib) { + for (i = 0, cn = dtd->scaffold[src_node].firstchild; + i < dest->numchildren; + i++, cn = dtd->scaffold[cn].nextsib) { build_node(parser, cn, &(dest->children[i]), contpos, strpos); } - dest->name = 0; + dest->name = NULL; } -} /* End build_node */ +} static XML_Content * build_model (XML_Parser parser) { + DTD * const dtd = _dtd; /* save one level of indirection */ XML_Content *ret; XML_Content *cpos; - char * str; - int allocsize = dtd.scaffCount * sizeof(XML_Content) + dtd.contentStringLen; - - ret = MALLOC(allocsize); - if (! ret) - return 0; + XML_Char * str; + int allocsize = (dtd->scaffCount * sizeof(XML_Content) + + (dtd->contentStringLen * sizeof(XML_Char))); + + ret = (XML_Content *)MALLOC(allocsize); + if (!ret) + return NULL; - str = (char *) (&ret[dtd.scaffCount]); + str = (XML_Char *) (&ret[dtd->scaffCount]); cpos = &ret[1]; build_node(parser, 0, ret, &cpos, &str); return ret; -} /* End build_model */ +} static ELEMENT_TYPE * getElementType(XML_Parser parser, - const ENCODING *enc, - const char *ptr, - const char *end) + const ENCODING *enc, + const char *ptr, + const char *end) { - const XML_Char *name = poolStoreString(&dtd.pool, enc, ptr, end); + DTD * const dtd = _dtd; /* save one level of indirection */ + const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); ELEMENT_TYPE *ret; - if (! name) - return 0; - ret = (ELEMENT_TYPE *) lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); - if (! ret) - return 0; + if (!name) + return NULL; + ret = (ELEMENT_TYPE *) lookup(&dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); + if (!ret) + return NULL; if (ret->name != name) - poolDiscard(&dtd.pool); + poolDiscard(&dtd->pool); else { - poolFinish(&dtd.pool); + poolFinish(&dtd->pool); if (!setElementTypePrefix(parser, ret)) - return 0; + return NULL; } return ret; -} /* End getElementType */ +} diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c index b9f7f6c..8ef185d 100644 --- a/Modules/expat/xmlrole.c +++ b/Modules/expat/xmlrole.c @@ -1,16 +1,16 @@ -/* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifdef COMPILED_FROM_DSP -# include "winconfig.h" +#include "winconfig.h" +#elif defined(MACOS_CLASSIC) +#include "macconfig.h" #else -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif +#include <expat_config.h> #endif /* ndef COMPILED_FROM_DSP */ +#include "internal.h" #include "xmlrole.h" #include "ascii.h" @@ -21,29 +21,56 @@ See the file COPYING for copying permission. */ -static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' }; -static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; -static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; -static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; -static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; -static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; -static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; -static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; -static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' }; -static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; -static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; -static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; -static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; -static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; -static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; -static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; -static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' }; -static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; -static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; -static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' }; -static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; +static const char KW_ANY[] = { + ASCII_A, ASCII_N, ASCII_Y, '\0' }; +static const char KW_ATTLIST[] = { + ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; +static const char KW_CDATA[] = { + ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_DOCTYPE[] = { + ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; +static const char KW_ELEMENT[] = { + ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; +static const char KW_EMPTY[] = { + ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; +static const char KW_ENTITIES[] = { + ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, + '\0' }; +static const char KW_ENTITY[] = { + ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; +static const char KW_FIXED[] = { + ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; +static const char KW_ID[] = { + ASCII_I, ASCII_D, '\0' }; +static const char KW_IDREF[] = { + ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; +static const char KW_IDREFS[] = { + ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; +static const char KW_IGNORE[] = { + ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; +static const char KW_IMPLIED[] = { + ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; +static const char KW_INCLUDE[] = { + ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; +static const char KW_NDATA[] = { + ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_NMTOKEN[] = { + ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; +static const char KW_NMTOKENS[] = { + ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, + '\0' }; +static const char KW_NOTATION[] = + { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, + '\0' }; +static const char KW_PCDATA[] = { + ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_PUBLIC[] = { + ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; +static const char KW_REQUIRED[] = { + ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, + '\0' }; +static const char KW_SYSTEM[] = { + ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; #ifndef MIN_BYTES_PER_CHAR #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) @@ -58,18 +85,18 @@ static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, A #define setTopLevel(state) ((state)->handler = internalSubset) #endif /* not XML_DTD */ -typedef int PROLOG_HANDLER(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); +typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc); static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, entity3, entity4, entity5, entity6, - entity7, entity8, entity9, + entity7, entity8, entity9, entity10, notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, attlist9, @@ -82,15 +109,14 @@ static PROLOG_HANDLER declClose, error; -static -int common(PROLOG_STATE *state, int tok); +static int FASTCALL common(PROLOG_STATE *state, int tok); -static -int prolog0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +prolog0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: @@ -101,19 +127,20 @@ int prolog0(PROLOG_STATE *state, return XML_ROLE_XML_DECL; case XML_TOK_PI: state->handler = prolog1; - return XML_ROLE_NONE; + return XML_ROLE_PI; case XML_TOK_COMMENT: state->handler = prolog1; + return XML_ROLE_COMMENT; case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_DOCTYPE)) break; state->handler = doctype0; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; @@ -121,28 +148,30 @@ int prolog0(PROLOG_STATE *state, return common(state, tok); } -static -int prolog1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +prolog1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: + return XML_ROLE_COMMENT; case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_DOCTYPE)) + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_DOCTYPE)) break; state->handler = doctype0; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; @@ -150,19 +179,20 @@ int prolog1(PROLOG_STATE *state, return common(state, tok); } -static -int prolog2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +prolog2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: - return XML_ROLE_NONE; + return XML_ROLE_COMMENT; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; @@ -170,16 +200,16 @@ int prolog2(PROLOG_STATE *state, return common(state, tok); } -static -int doctype0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = doctype1; @@ -188,16 +218,16 @@ int doctype0(PROLOG_STATE *state, return common(state, tok); } -static -int doctype1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; @@ -207,27 +237,27 @@ int doctype1(PROLOG_STATE *state, case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = doctype3; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = doctype2; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; } break; } return common(state, tok); } -static -int doctype2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_LITERAL: state->handler = doctype3; return XML_ROLE_DOCTYPE_PUBLIC_ID; @@ -235,16 +265,16 @@ int doctype2(PROLOG_STATE *state, return common(state, tok); } -static -int doctype3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_LITERAL: state->handler = doctype4; return XML_ROLE_DOCTYPE_SYSTEM_ID; @@ -252,16 +282,16 @@ int doctype3(PROLOG_STATE *state, return common(state, tok); } -static -int doctype4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; @@ -272,16 +302,16 @@ int doctype4(PROLOG_STATE *state, return common(state, tok); } -static -int doctype5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +doctype5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; @@ -289,66 +319,67 @@ int doctype5(PROLOG_STATE *state, return common(state, tok); } -static -int internalSubset(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +internalSubset(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_ENTITY)) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_ENTITY)) { state->handler = entity0; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_ATTLIST)) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_ATTLIST)) { state->handler = attlist0; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_ELEMENT)) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_ELEMENT)) { state->handler = element0; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } if (XmlNameMatchesAscii(enc, - ptr + 2 * MIN_BYTES_PER_CHAR(enc), - end, - KW_NOTATION)) { + ptr + 2 * MIN_BYTES_PER_CHAR(enc), + end, + KW_NOTATION)) { state->handler = notation0; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } break; case XML_TOK_PI: + return XML_ROLE_PI; case XML_TOK_COMMENT: - return XML_ROLE_NONE; + return XML_ROLE_COMMENT; case XML_TOK_PARAM_ENTITY_REF: return XML_ROLE_PARAM_ENTITY_REF; case XML_TOK_CLOSE_BRACKET: state->handler = doctype5; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_NONE; } return common(state, tok); } #ifdef XML_DTD -static -int externalSubset0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +externalSubset0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { state->handler = externalSubset1; if (tok == XML_TOK_XML_DECL) @@ -356,12 +387,12 @@ int externalSubset0(PROLOG_STATE *state, return externalSubset1(state, tok, ptr, end, enc); } -static -int externalSubset1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +externalSubset1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_COND_SECT_OPEN: @@ -388,19 +419,19 @@ int externalSubset1(PROLOG_STATE *state, #endif /* XML_DTD */ -static -int entity0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_PERCENT: state->handler = entity1; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = entity2; return XML_ROLE_GENERAL_ENTITY_NAME; @@ -408,16 +439,16 @@ int entity0(PROLOG_STATE *state, return common(state, tok); } -static -int entity1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; @@ -425,43 +456,44 @@ int entity1(PROLOG_STATE *state, return common(state, tok); } -static -int entity2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity4; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity3; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_VALUE; } return common(state, tok); } -static -int entity3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity4; return XML_ROLE_ENTITY_PUBLIC_ID; @@ -469,17 +501,16 @@ int entity3(PROLOG_STATE *state, return common(state, tok); } - -static -int entity4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity5; return XML_ROLE_ENTITY_SYSTEM_ID; @@ -487,83 +518,85 @@ int entity4(PROLOG_STATE *state, return common(state, tok); } -static -int entity5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); return XML_ROLE_ENTITY_COMPLETE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { state->handler = entity6; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; } return common(state, tok); } -static -int entity6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity6(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_NOTATION_NAME; } return common(state, tok); } -static -int entity7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity7(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity9; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity8; - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_ENTITY_NONE; return XML_ROLE_ENTITY_VALUE; } return common(state, tok); } -static -int entity8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity8(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: state->handler = entity9; return XML_ROLE_ENTITY_PUBLIC_ID; @@ -571,33 +604,50 @@ int entity8(PROLOG_STATE *state, return common(state, tok); } -static -int entity9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity9(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; case XML_TOK_LITERAL: - state->handler = declClose; + state->handler = entity10; return XML_ROLE_ENTITY_SYSTEM_ID; } return common(state, tok); } -static -int notation0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +entity10(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ENTITY_NONE; + case XML_TOK_DECL_CLOSE: + setTopLevel(state); + return XML_ROLE_ENTITY_COMPLETE; + } + return common(state, tok); +} + +static int PTRCALL +notation0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NOTATION_NONE; case XML_TOK_NAME: state->handler = notation1; return XML_ROLE_NOTATION_NAME; @@ -605,40 +655,40 @@ int notation0(PROLOG_STATE *state, return common(state, tok); } -static -int notation1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = notation3; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = notation2; - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; } break; } return common(state, tok); } -static -int notation2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; @@ -646,35 +696,37 @@ int notation2(PROLOG_STATE *state, return common(state, tok); } -static -int notation3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_NOTATION_NONE; return XML_ROLE_NOTATION_SYSTEM_ID; } return common(state, tok); } -static -int notation4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +notation4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_NOTATION_NONE; case XML_TOK_LITERAL: state->handler = declClose; + state->role_none = XML_ROLE_NOTATION_NONE; return XML_ROLE_NOTATION_SYSTEM_ID; case XML_TOK_DECL_CLOSE: setTopLevel(state); @@ -683,16 +735,16 @@ int notation4(PROLOG_STATE *state, return common(state, tok); } -static -int attlist0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist1; @@ -701,19 +753,19 @@ int attlist0(PROLOG_STATE *state, return common(state, tok); } -static -int attlist1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist2; @@ -722,20 +774,20 @@ int attlist1(PROLOG_STATE *state, return common(state, tok); } -static -int attlist2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: { static const char *types[] = { - KW_CDATA, + KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, @@ -746,33 +798,33 @@ int attlist2(PROLOG_STATE *state, }; int i; for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) - if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { - state->handler = attlist8; - return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; - } + if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { + state->handler = attlist8; + return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; + } } if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } break; case XML_TOK_OPEN_PAREN: state->handler = attlist3; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } -static -int attlist3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NMTOKEN: case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: @@ -782,54 +834,53 @@ int attlist3(PROLOG_STATE *state, return common(state, tok); } -static -int attlist4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OR: state->handler = attlist3; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } -static -int attlist5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OPEN_PAREN: state->handler = attlist6; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } - -static -int attlist6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist6(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_NAME: state->handler = attlist7; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; @@ -837,58 +888,58 @@ int attlist6(PROLOG_STATE *state, return common(state, tok); } -static -int attlist7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist7(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_OR: state->handler = attlist6; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } return common(state, tok); } /* default value */ -static -int attlist8(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist8(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, - KW_IMPLIED)) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, - KW_REQUIRED)) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, - KW_FIXED)) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_FIXED)) { state->handler = attlist9; - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; } break; case XML_TOK_LITERAL: @@ -898,16 +949,16 @@ int attlist8(PROLOG_STATE *state, return common(state, tok); } -static -int attlist9(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +attlist9(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ATTLIST_NONE; case XML_TOK_LITERAL: state->handler = attlist1; return XML_ROLE_FIXED_ATTRIBUTE_VALUE; @@ -915,16 +966,16 @@ int attlist9(PROLOG_STATE *state, return common(state, tok); } -static -int element0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element1; @@ -933,23 +984,25 @@ int element0(PROLOG_STATE *state, return common(state, tok); } -static -int element1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_CONTENT_EMPTY; } if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_CONTENT_ANY; } break; @@ -961,21 +1014,21 @@ int element1(PROLOG_STATE *state, return common(state, tok); } -static -int element2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, - ptr + MIN_BYTES_PER_CHAR(enc), - end, - KW_PCDATA)) { + ptr + MIN_BYTES_PER_CHAR(enc), + end, + KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; } @@ -1001,39 +1054,41 @@ int element2(PROLOG_STATE *state, return common(state, tok); } -static -int element3(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element3(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } return common(state, tok); } -static -int element4(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element4(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element5; @@ -1042,36 +1097,37 @@ int element4(PROLOG_STATE *state, return common(state, tok); } -static -int element5(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element5(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; } return common(state, tok); } -static -int element6(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element6(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_OPEN_PAREN: state->level += 1; return XML_ROLE_GROUP_OPEN; @@ -1092,35 +1148,43 @@ int element6(PROLOG_STATE *state, return common(state, tok); } -static -int element7(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +element7(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return XML_ROLE_ELEMENT_NONE; case XML_TOK_CLOSE_PAREN: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_CLOSE_PAREN_QUESTION: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_OPT; case XML_TOK_CLOSE_PAREN_PLUS: state->level -= 1; - if (state->level == 0) + if (state->level == 0) { state->handler = declClose; + state->role_none = XML_ROLE_ELEMENT_NONE; + } return XML_ROLE_GROUP_CLOSE_PLUS; case XML_TOK_COMMA: state->handler = element6; @@ -1134,12 +1198,12 @@ int element7(PROLOG_STATE *state, #ifdef XML_DTD -static -int condSect0(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +condSect0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1158,12 +1222,12 @@ int condSect0(PROLOG_STATE *state, return common(state, tok); } -static -int condSect1(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +condSect1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1176,12 +1240,12 @@ int condSect1(PROLOG_STATE *state, return common(state, tok); } -static -int condSect2(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +condSect2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1195,55 +1259,35 @@ int condSect2(PROLOG_STATE *state, #endif /* XML_DTD */ -static -int declClose(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +declClose(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: - return XML_ROLE_NONE; + return state->role_none; case XML_TOK_DECL_CLOSE: setTopLevel(state); - return XML_ROLE_NONE; + return state->role_none; } return common(state, tok); } -#if 0 - -static -int ignore(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) -{ - switch (tok) { - case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; - return 0; - default: - return XML_ROLE_NONE; - } - return common(state, tok); -} -#endif - -static -int error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +static int PTRCALL +error(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { return XML_ROLE_NONE; } -static -int common(PROLOG_STATE *state, int tok) +static int FASTCALL +common(PROLOG_STATE *state, int tok) { #ifdef XML_DTD if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) @@ -1253,18 +1297,21 @@ int common(PROLOG_STATE *state, int tok) return XML_ROLE_ERROR; } -void XmlPrologStateInit(PROLOG_STATE *state) +void +XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; #ifdef XML_DTD state->documentEntity = 1; state->includeLevel = 0; + state->inEntityValue = 0; #endif /* XML_DTD */ } #ifdef XML_DTD -void XmlPrologStateInitExternalEntity(PROLOG_STATE *state) +void +XmlPrologStateInitExternalEntity(PROLOG_STATE *state) { state->handler = externalSubset0; state->documentEntity = 0; diff --git a/Modules/expat/xmlrole.h b/Modules/expat/xmlrole.h index db3ebc8..4dd9f06 100644 --- a/Modules/expat/xmlrole.h +++ b/Modules/expat/xmlrole.h @@ -1,11 +1,16 @@ -/* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifndef XmlRole_INCLUDED #define XmlRole_INCLUDED 1 +#ifdef __VMS +/* 0 1 2 3 0 1 2 3 + 1234567890123456789012345678901 1234567890123456789012345678901 */ +#define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt +#endif + #include "xmltok.h" #ifdef __cplusplus @@ -17,6 +22,7 @@ enum { XML_ROLE_NONE = 0, XML_ROLE_XML_DECL, XML_ROLE_INSTANCE_START, + XML_ROLE_DOCTYPE_NONE, XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_PUBLIC_ID, @@ -24,11 +30,13 @@ enum { XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME, + XML_ROLE_ENTITY_NONE, XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_PUBLIC_ID, XML_ROLE_ENTITY_COMPLETE, XML_ROLE_ENTITY_NOTATION_NAME, + XML_ROLE_NOTATION_NONE, XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_SYSTEM_ID, XML_ROLE_NOTATION_NO_SYSTEM_ID, @@ -44,11 +52,13 @@ enum { XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS, XML_ROLE_ATTRIBUTE_ENUM_VALUE, XML_ROLE_ATTRIBUTE_NOTATION_VALUE, + XML_ROLE_ATTLIST_NONE, XML_ROLE_ATTLIST_ELEMENT_NAME, XML_ROLE_IMPLIED_ATTRIBUTE_VALUE, XML_ROLE_REQUIRED_ATTRIBUTE_VALUE, XML_ROLE_DEFAULT_ATTRIBUTE_VALUE, XML_ROLE_FIXED_ATTRIBUTE_VALUE, + XML_ROLE_ELEMENT_NONE, XML_ROLE_ELEMENT_NAME, XML_ROLE_CONTENT_ANY, XML_ROLE_CONTENT_EMPTY, @@ -64,6 +74,8 @@ enum { XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_PLUS, + XML_ROLE_PI, + XML_ROLE_COMMENT, #ifdef XML_DTD XML_ROLE_TEXT_DECL, XML_ROLE_IGNORE_SECT, @@ -73,15 +85,17 @@ enum { }; typedef struct prolog_state { - int (*handler)(struct prolog_state *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc); + int (PTRCALL *handler) (struct prolog_state *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc); unsigned level; + int role_none; #ifdef XML_DTD unsigned includeLevel; int documentEntity; + int inEntityValue; #endif /* XML_DTD */ } PROLOG_STATE; diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c index 46be9b0..1248583 100644 --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -1,16 +1,18 @@ -/* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifdef COMPILED_FROM_DSP -# include "winconfig.h" +#include "winconfig.h" +#elif defined(MACOS_CLASSIC) +#include "macconfig.h" #else -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif +/* Unused - MvL +#include <expat_config.h> +*/ #endif /* ndef COMPILED_FROM_DSP */ +#include "internal.h" #include "xmltok.h" #include "nametab.h" @@ -39,24 +41,25 @@ See the file COPYING for copying permission. #define UCS2_GET_NAMING(pages, hi, lo) \ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) -/* A 2 byte UTF-8 representation splits the characters 11 bits -between the bottom 5 and 6 bits of the bytes. -We need 8 bits to index into pages, 3 bits to add to that index and -5 bits to generate the mask. */ +/* A 2 byte UTF-8 representation splits the characters 11 bits between + the bottom 5 and 6 bits of the bytes. We need 8 bits to index into + pages, 3 bits to add to that index and 5 bits to generate the mask. +*/ #define UTF8_GET_NAMING2(pages, byte) \ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ & (1 << (((byte)[1]) & 0x1F))) -/* A 3 byte UTF-8 representation splits the characters 16 bits -between the bottom 4, 6 and 6 bits of the bytes. -We need 8 bits to index into pages, 3 bits to add to that index and -5 bits to generate the mask. */ +/* A 3 byte UTF-8 representation splits the characters 16 bits between + the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index + into pages, 3 bits to add to that index and 5 bits to generate the + mask. +*/ #define UTF8_GET_NAMING3(pages, byte) \ (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + ((((byte)[1]) >> 2) & 0xF)] \ - << 3) \ + << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F))) @@ -68,59 +71,97 @@ We need 8 bits to index into pages, 3 bits to add to that index and ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) -#define UTF8_INVALID3(p) \ - ((*p) == 0xED \ - ? (((p)[1] & 0x20) != 0) \ - : ((*p) == 0xEF \ - ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ - : 0)) +/* Detection of invalid UTF-8 sequences is based on Table 3.1B + of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ + with the additional restriction of not allowing the Unicode + code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). + Implementation details: + (A & 0x80) == 0 means A < 0x80 + and + (A & 0xC0) == 0xC0 means A > 0xBF +*/ -#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) +#define UTF8_INVALID2(p) \ + ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) -static -int isNever(const ENCODING *enc, const char *p) +#define UTF8_INVALID3(p) \ + (((p)[2] & 0x80) == 0 \ + || \ + ((*p) == 0xEF && (p)[1] == 0xBF \ + ? \ + (p)[2] > 0xBD \ + : \ + ((p)[2] & 0xC0) == 0xC0) \ + || \ + ((*p) == 0xE0 \ + ? \ + (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ + : \ + ((p)[1] & 0x80) == 0 \ + || \ + ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) + +#define UTF8_INVALID4(p) \ + (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \ + || \ + ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \ + || \ + ((*p) == 0xF0 \ + ? \ + (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ + : \ + ((p)[1] & 0x80) == 0 \ + || \ + ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) + +static int PTRFASTCALL +isNever(const ENCODING *enc, const char *p) { return 0; } -static -int utf8_isName2(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isName2(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } -static -int utf8_isName3(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isName3(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } #define utf8_isName4 isNever -static -int utf8_isNmstrt2(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isNmstrt2(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } -static -int utf8_isNmstrt3(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isNmstrt3(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } #define utf8_isNmstrt4 isNever -#define utf8_isInvalid2 isNever +static int PTRFASTCALL +utf8_isInvalid2(const ENCODING *enc, const char *p) +{ + return UTF8_INVALID2((const unsigned char *)p); +} -static -int utf8_isInvalid3(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isInvalid3(const ENCODING *enc, const char *p) { return UTF8_INVALID3((const unsigned char *)p); } -static -int utf8_isInvalid4(const ENCODING *enc, const char *p) +static int PTRFASTCALL +utf8_isInvalid4(const ENCODING *enc, const char *p) { return UTF8_INVALID4((const unsigned char *)p); } @@ -129,23 +170,25 @@ struct normal_encoding { ENCODING enc; unsigned char type[256]; #ifdef XML_MIN_SIZE - int (*byteType)(const ENCODING *, const char *); - int (*isNameMin)(const ENCODING *, const char *); - int (*isNmstrtMin)(const ENCODING *, const char *); - int (*byteToAscii)(const ENCODING *, const char *); - int (*charMatches)(const ENCODING *, const char *, int); + int (PTRFASTCALL *byteType)(const ENCODING *, const char *); + int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); + int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); + int (PTRCALL *charMatches)(const ENCODING *, const char *, int); #endif /* XML_MIN_SIZE */ - int (*isName2)(const ENCODING *, const char *); - int (*isName3)(const ENCODING *, const char *); - int (*isName4)(const ENCODING *, const char *); - int (*isNmstrt2)(const ENCODING *, const char *); - int (*isNmstrt3)(const ENCODING *, const char *); - int (*isNmstrt4)(const ENCODING *, const char *); - int (*isInvalid2)(const ENCODING *, const char *); - int (*isInvalid3)(const ENCODING *, const char *); - int (*isInvalid4)(const ENCODING *, const char *); + int (PTRFASTCALL *isName2)(const ENCODING *, const char *); + int (PTRFASTCALL *isName3)(const ENCODING *, const char *); + int (PTRFASTCALL *isName4)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); + int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); + int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); + int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); + int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); }; +#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc)) + #ifdef XML_MIN_SIZE #define STANDARD_VTABLE(E) \ @@ -172,7 +215,7 @@ struct normal_encoding { E ## isInvalid3, \ E ## isInvalid4 -static int checkCharRefNumber(int); +static int FASTCALL checkCharRefNumber(int); #include "xmltok_impl.h" #include "ascii.h" @@ -193,22 +236,22 @@ static int checkCharRefNumber(int); (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE -static -int sb_byteType(const ENCODING *enc, const char *p) +static int PTRFASTCALL +sb_byteType(const ENCODING *enc, const char *p) { return SB_BYTE_TYPE(enc, p); } #define BYTE_TYPE(enc, p) \ - (((const struct normal_encoding *)(enc))->byteType(enc, p)) + (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) #else #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) #endif #ifdef XML_MIN_SIZE #define BYTE_TO_ASCII(enc, p) \ - (((const struct normal_encoding *)(enc))->byteToAscii(enc, p)) -static -int sb_byteToAscii(const ENCODING *enc, const char *p) + (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) +static int PTRFASTCALL +sb_byteToAscii(const ENCODING *enc, const char *p) { return *p; } @@ -217,17 +260,17 @@ int sb_byteToAscii(const ENCODING *enc, const char *p) #endif #define IS_NAME_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) + (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p)) #define IS_NMSTRT_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) + (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p)) #define IS_INVALID_CHAR(enc, p, n) \ - (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) + (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p)) #ifdef XML_MIN_SIZE #define IS_NAME_CHAR_MINBPC(enc, p) \ - (((const struct normal_encoding *)(enc))->isNameMin(enc, p)) + (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) #define IS_NMSTRT_CHAR_MINBPC(enc, p) \ - (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p)) + (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) #else #define IS_NAME_CHAR_MINBPC(enc, p) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) @@ -235,9 +278,9 @@ int sb_byteToAscii(const ENCODING *enc, const char *p) #ifdef XML_MIN_SIZE #define CHAR_MATCHES(enc, p, c) \ - (((const struct normal_encoding *)(enc))->charMatches(enc, p, c)) -static -int sb_charMatches(const ENCODING *enc, const char *p, int c) + (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) +static int PTRCALL +sb_charMatches(const ENCODING *enc, const char *p, int c) { return *p == c; } @@ -266,10 +309,10 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval4 = 0xf0 }; -static -void utf8_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +utf8_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { char *to; const char *from; @@ -277,7 +320,7 @@ void utf8_toUtf8(const ENCODING *enc, /* Avoid copying partial characters. */ for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) - break; + break; } for (to = *toP, from = *fromP; from != fromLim; from++, to++) *to = *from; @@ -285,34 +328,36 @@ void utf8_toUtf8(const ENCODING *enc, *toP = to; } -static -void utf8_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) +static void PTRCALL +utf8_toUtf16(const ENCODING *enc, + const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { unsigned short *to = *toP; const char *from = *fromP; while (from != fromLim && to != toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: - *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f); + *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: - *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f); + *to++ = (unsigned short)(((from[0] & 0xf) << 12) + | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); from += 3; break; case BT_LEAD4: { - unsigned long n; - if (to + 1 == toLim) - break; - n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); - n -= 0x10000; - to[0] = (unsigned short)((n >> 10) | 0xD800); - to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); - to += 2; - from += 4; + unsigned long n; + if (to + 1 == toLim) + goto after; + n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) + | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); + n -= 0x10000; + to[0] = (unsigned short)((n >> 10) | 0xD800); + to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); + to += 2; + from += 4; } break; default: @@ -320,6 +365,7 @@ void utf8_toUtf16(const ENCODING *enc, break; } } +after: *fromP = from; *toP = to; } @@ -370,10 +416,10 @@ static const struct normal_encoding internal_utf8_encoding = { STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; -static -void latin1_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +latin1_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { for (;;) { unsigned char c; @@ -382,23 +428,23 @@ void latin1_toUtf8(const ENCODING *enc, c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; - *(*toP)++ = ((c >> 6) | UTF8_cval2); - *(*toP)++ = ((c & 0x3f) | 0x80); + break; + *(*toP)++ = (char)((c >> 6) | UTF8_cval2); + *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; } else { if (*toP == toLim) - break; + break; *(*toP)++ = *(*fromP)++; } } } -static -void latin1_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) +static void PTRCALL +latin1_toUtf16(const ENCODING *enc, + const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { while (*fromP != fromLim && *toP != toLim) *(*toP)++ = (unsigned char)*(*fromP)++; @@ -428,10 +474,10 @@ static const struct normal_encoding latin1_encoding = { STANDARD_VTABLE(sb_) }; -static -void ascii_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +ascii_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { while (*fromP != fromLim && *toP != toLim) *(*toP)++ = *(*fromP)++; @@ -461,7 +507,8 @@ static const struct normal_encoding ascii_encoding = { STANDARD_VTABLE(sb_) }; -static int unicode_byte_type(char hi, char lo) +static int PTRFASTCALL +unicode_byte_type(char hi, char lo) { switch ((unsigned char)hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: @@ -480,10 +527,10 @@ static int unicode_byte_type(char hi, char lo) } #define DEFINE_UTF16_TO_UTF8(E) \ -static \ -void E ## toUtf8(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - char **toP, const char *toLim) \ +static void PTRCALL \ +E ## toUtf8(const ENCODING *enc, \ + const char **fromP, const char *fromLim, \ + char **toP, const char *toLim) \ { \ const char *from; \ for (from = *fromP; from != fromLim; from += 2) { \ @@ -496,7 +543,7 @@ void E ## toUtf8(const ENCODING *enc, \ if (lo < 0x80) { \ if (*toP == toLim) { \ *fromP = from; \ - return; \ + return; \ } \ *(*toP)++ = lo; \ break; \ @@ -506,7 +553,7 @@ void E ## toUtf8(const ENCODING *enc, \ case 0x4: case 0x5: case 0x6: case 0x7: \ if (toLim - *toP < 2) { \ *fromP = from; \ - return; \ + return; \ } \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ @@ -514,7 +561,7 @@ void E ## toUtf8(const ENCODING *enc, \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ - return; \ + return; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ @@ -523,8 +570,8 @@ void E ## toUtf8(const ENCODING *enc, \ break; \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ if (toLim - *toP < 4) { \ - *fromP = from; \ - return; \ + *fromP = from; \ + return; \ } \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ @@ -532,9 +579,9 @@ void E ## toUtf8(const ENCODING *enc, \ from += 2; \ lo2 = GET_LO(from); \ *(*toP)++ = (((lo & 0x3) << 4) \ - | ((GET_HI(from) & 0x3) << 2) \ - | (lo2 >> 6) \ - | 0x80); \ + | ((GET_HI(from) & 0x3) << 2) \ + | (lo2 >> 6) \ + | 0x80); \ *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ break; \ } \ @@ -543,10 +590,10 @@ void E ## toUtf8(const ENCODING *enc, \ } #define DEFINE_UTF16_TO_UTF16(E) \ -static \ -void E ## toUtf16(const ENCODING *enc, \ - const char **fromP, const char *fromLim, \ - unsigned short **toP, const unsigned short *toLim) \ +static void PTRCALL \ +E ## toUtf16(const ENCODING *enc, \ + const char **fromP, const char *fromLim, \ + unsigned short **toP, const unsigned short *toLim) \ { \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ @@ -593,32 +640,32 @@ DEFINE_UTF16_TO_UTF16(big2_) #ifdef XML_MIN_SIZE -static -int little2_byteType(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_byteType(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TYPE(enc, p); } -static -int little2_byteToAscii(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_byteToAscii(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TO_ASCII(enc, p); } -static -int little2_charMatches(const ENCODING *enc, const char *p, int c) +static int PTRCALL +little2_charMatches(const ENCODING *enc, const char *p, int c) { return LITTLE2_CHAR_MATCHES(enc, p, c); } -static -int little2_isNameMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_isNameMin(const ENCODING *enc, const char *p) { return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); } -static -int little2_isNmstrtMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +little2_isNmstrtMin(const ENCODING *enc, const char *p) { return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); } @@ -633,7 +680,7 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p) #define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) +#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) #define IS_NAME_CHAR(enc, p, n) 0 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) @@ -656,9 +703,9 @@ int little2_isNmstrtMin(const ENCODING *enc, const char *p) #ifdef XML_NS -static const struct normal_encoding little2_encoding_ns = { +static const struct normal_encoding little2_encoding_ns = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 12 +#if BYTEORDER == 1234 1 #else 0 @@ -673,9 +720,9 @@ static const struct normal_encoding little2_encoding_ns = { #endif -static const struct normal_encoding little2_encoding = { +static const struct normal_encoding little2_encoding = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 12 +#if BYTEORDER == 1234 1 #else 0 @@ -690,11 +737,11 @@ static const struct normal_encoding little2_encoding = { STANDARD_VTABLE(little2_) }; -#if XML_BYTE_ORDER != 21 +#if BYTEORDER != 4321 #ifdef XML_NS -static const struct normal_encoding internal_little2_encoding_ns = { +static const struct normal_encoding internal_little2_encoding_ns = { { VTABLE, 2, 0, 1 }, { #include "iasciitab.h" @@ -705,7 +752,7 @@ static const struct normal_encoding internal_little2_encoding_ns = { #endif -static const struct normal_encoding internal_little2_encoding = { +static const struct normal_encoding internal_little2_encoding = { { VTABLE, 2, 0, 1 }, { #define BT_COLON BT_NMSTRT @@ -732,32 +779,32 @@ static const struct normal_encoding internal_little2_encoding = { #ifdef XML_MIN_SIZE -static -int big2_byteType(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_byteType(const ENCODING *enc, const char *p) { return BIG2_BYTE_TYPE(enc, p); } -static -int big2_byteToAscii(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_byteToAscii(const ENCODING *enc, const char *p) { return BIG2_BYTE_TO_ASCII(enc, p); } -static -int big2_charMatches(const ENCODING *enc, const char *p, int c) +static int PTRCALL +big2_charMatches(const ENCODING *enc, const char *p, int c) { return BIG2_CHAR_MATCHES(enc, p, c); } -static -int big2_isNameMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_isNameMin(const ENCODING *enc, const char *p) { return BIG2_IS_NAME_CHAR_MINBPC(enc, p); } -static -int big2_isNmstrtMin(const ENCODING *enc, const char *p) +static int PTRFASTCALL +big2_isNmstrtMin(const ENCODING *enc, const char *p) { return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); } @@ -772,7 +819,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p) #define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) -#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) +#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) #define IS_NAME_CHAR(enc, p, n) 0 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) @@ -797,7 +844,7 @@ int big2_isNmstrtMin(const ENCODING *enc, const char *p) static const struct normal_encoding big2_encoding_ns = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 21 +#if BYTEORDER == 4321 1 #else 0 @@ -814,7 +861,7 @@ static const struct normal_encoding big2_encoding_ns = { static const struct normal_encoding big2_encoding = { { VTABLE, 2, 0, -#if XML_BYTE_ORDER == 21 +#if BYTEORDER == 4321 1 #else 0 @@ -829,7 +876,7 @@ static const struct normal_encoding big2_encoding = { STANDARD_VTABLE(big2_) }; -#if XML_BYTE_ORDER != 12 +#if BYTEORDER != 1234 #ifdef XML_NS @@ -859,8 +906,8 @@ static const struct normal_encoding internal_big2_encoding = { #undef PREFIX -static -int streqci(const char *s1, const char *s2) +static int FASTCALL +streqci(const char *s1, const char *s2) { for (;;) { char c1 = *s1++; @@ -877,15 +924,15 @@ int streqci(const char *s1, const char *s2) return 1; } -static -void initUpdatePosition(const ENCODING *enc, const char *ptr, - const char *end, POSITION *pos) +static void PTRCALL +initUpdatePosition(const ENCODING *enc, const char *ptr, + const char *end, POSITION *pos) { normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); } -static -int toAscii(const ENCODING *enc, const char *ptr, const char *end) +static int +toAscii(const ENCODING *enc, const char *ptr, const char *end) { char buf[1]; char *p = buf; @@ -896,34 +943,35 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end) return buf[0]; } -static -int isSpace(int c) +static int FASTCALL +isSpace(int c) { switch (c) { case 0x20: case 0xD: case 0xA: - case 0x9: + case 0x9: return 1; } return 0; } -/* Return 1 if there's just optional white space -or there's an S followed by name=val. */ -static -int parsePseudoAttribute(const ENCODING *enc, - const char *ptr, - const char *end, - const char **namePtr, - const char **nameEndPtr, - const char **valPtr, - const char **nextTokPtr) +/* Return 1 if there's just optional white space or there's an S + followed by name=val. +*/ +static int +parsePseudoAttribute(const ENCODING *enc, + const char *ptr, + const char *end, + const char **namePtr, + const char **nameEndPtr, + const char **valPtr, + const char **nextTokPtr) { int c; char open; if (ptr == end) { - *namePtr = 0; + *namePtr = NULL; return 1; } if (!isSpace(toAscii(enc, ptr, end))) { @@ -934,7 +982,7 @@ int parsePseudoAttribute(const ENCODING *enc, ptr += enc->minBytesPerChar; } while (isSpace(toAscii(enc, ptr, end))); if (ptr == end) { - *namePtr = 0; + *namePtr = NULL; return 1; } *namePtr = ptr; @@ -951,11 +999,11 @@ int parsePseudoAttribute(const ENCODING *enc, if (isSpace(c)) { *nameEndPtr = ptr; do { - ptr += enc->minBytesPerChar; + ptr += enc->minBytesPerChar; } while (isSpace(c = toAscii(enc, ptr, end))); if (c != ASCII_EQUALS) { - *nextTokPtr = ptr; - return 0; + *nextTokPtr = ptr; + return 0; } break; } @@ -975,7 +1023,7 @@ int parsePseudoAttribute(const ENCODING *enc, *nextTokPtr = ptr; return 0; } - open = c; + open = (char)c; ptr += enc->minBytesPerChar; *valPtr = ptr; for (;; ptr += enc->minBytesPerChar) { @@ -983,11 +1031,11 @@ int parsePseudoAttribute(const ENCODING *enc, if (c == open) break; if (!(ASCII_a <= c && c <= ASCII_z) - && !(ASCII_A <= c && c <= ASCII_Z) - && !(ASCII_0 <= c && c <= ASCII_9) - && c != ASCII_PERIOD - && c != ASCII_MINUS - && c != ASCII_UNDERSCORE) { + && !(ASCII_A <= c && c <= ASCII_Z) + && !(ASCII_0 <= c && c <= ASCII_9) + && c != ASCII_PERIOD + && c != ASCII_MINUS + && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } @@ -1005,7 +1053,8 @@ static const char KW_encoding[] = { }; static const char KW_standalone[] = { - ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0' + ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, + ASCII_n, ASCII_e, '\0' }; static const char KW_yes[] = { @@ -1016,27 +1065,28 @@ static const char KW_no[] = { ASCII_n, ASCII_o, '\0' }; -static -int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, - const char *, - const char *), - int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) +static int +doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, + const char *, + const char *), + int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) { - const char *val = 0; - const char *name = 0; - const char *nameEnd = 0; + const char *val = NULL; + const char *name = NULL; + const char *nameEnd = NULL; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; - if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) { + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) + || !name) { *badPtr = ptr; return 0; } @@ -1057,9 +1107,9 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, } if (!name) { if (isGeneralTextEntity) { - /* a TextDecl must have an EncodingDecl */ - *badPtr = ptr; - return 0; + /* a TextDecl must have an EncodingDecl */ + *badPtr = ptr; + return 0; } return 1; } @@ -1081,7 +1131,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, if (!name) return 1; } - if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) + || isGeneralTextEntity) { *badPtr = name; return 0; } @@ -1106,8 +1157,8 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, return 1; } -static -int checkCharRefNumber(int result) +static int FASTCALL +checkCharRefNumber(int result) { switch (result >> 8) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: @@ -1125,7 +1176,8 @@ int checkCharRefNumber(int result) return result; } -int XmlUtf8Encode(int c, char *buf) +int FASTCALL +XmlUtf8Encode(int c, char *buf) { enum { /* minN is minimum legal resulting value for N byte sequence */ @@ -1137,42 +1189,43 @@ int XmlUtf8Encode(int c, char *buf) if (c < 0) return 0; if (c < min2) { - buf[0] = (c | UTF8_cval1); + buf[0] = (char)(c | UTF8_cval1); return 1; } if (c < min3) { - buf[0] = ((c >> 6) | UTF8_cval2); - buf[1] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 6) | UTF8_cval2); + buf[1] = (char)((c & 0x3f) | 0x80); return 2; } if (c < min4) { - buf[0] = ((c >> 12) | UTF8_cval3); - buf[1] = (((c >> 6) & 0x3f) | 0x80); - buf[2] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 12) | UTF8_cval3); + buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); + buf[2] = (char)((c & 0x3f) | 0x80); return 3; } if (c < 0x110000) { - buf[0] = ((c >> 18) | UTF8_cval4); - buf[1] = (((c >> 12) & 0x3f) | 0x80); - buf[2] = (((c >> 6) & 0x3f) | 0x80); - buf[3] = ((c & 0x3f) | 0x80); + buf[0] = (char)((c >> 18) | UTF8_cval4); + buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); + buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); + buf[3] = (char)((c & 0x3f) | 0x80); return 4; } return 0; } -int XmlUtf16Encode(int charNum, unsigned short *buf) +int FASTCALL +XmlUtf16Encode(int charNum, unsigned short *buf) { if (charNum < 0) return 0; if (charNum < 0x10000) { - buf[0] = charNum; + buf[0] = (unsigned short)charNum; return 1; } if (charNum < 0x110000) { charNum -= 0x10000; - buf[0] = (charNum >> 10) + 0xD800; - buf[1] = (charNum & 0x3FF) + 0xDC00; + buf[0] = (unsigned short)((charNum >> 10) + 0xD800); + buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); return 2; } return 0; @@ -1186,65 +1239,68 @@ struct unknown_encoding { char utf8[256][4]; }; -int XmlSizeOfUnknownEncoding(void) +#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) + +int +XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } -static -int unknown_isName(const ENCODING *enc, const char *p) +static int PTRFASTCALL +unknown_isName(const ENCODING *enc, const char *p) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); } -static -int unknown_isNmstrt(const ENCODING *enc, const char *p) +static int PTRFASTCALL +unknown_isNmstrt(const ENCODING *enc, const char *p) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); } -static -int unknown_isInvalid(const ENCODING *enc, const char *p) +static int PTRFASTCALL +unknown_isInvalid(const ENCODING *enc, const char *p) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, p); + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); + int c = uenc->convert(uenc->userData, p); return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static -void unknown_toUtf8(const ENCODING *enc, - const char **fromP, const char *fromLim, - char **toP, const char *toLim) +static void PTRCALL +unknown_toUtf8(const ENCODING *enc, + const char **fromP, const char *fromLim, + char **toP, const char *toLim) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); char buf[XML_UTF8_ENCODE_MAX]; for (;;) { const char *utf8; int n; if (*fromP == fromLim) break; - utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; + utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { - int c = ((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); + int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + break; utf8 = buf; - *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - - (BT_LEAD2 - 2); + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] + - (BT_LEAD2 - 2)); } else { if (n > toLim - *toP) - break; + break; (*fromP)++; } do { @@ -1253,19 +1309,19 @@ void unknown_toUtf8(const ENCODING *enc, } } -static -void unknown_toUtf16(const ENCODING *enc, - const char **fromP, const char *fromLim, - unsigned short **toP, const unsigned short *toLim) +static void PTRCALL +unknown_toUtf16(const ENCODING *enc, + const char **fromP, const char *fromLim, + unsigned short **toP, const unsigned short *toLim) { + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); while (*fromP != fromLim && *toP != toLim) { - unsigned short c - = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP]; + unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { - c = (unsigned short)((const struct unknown_encoding *)enc) - ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); - *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - - (BT_LEAD2 - 2); + c = (unsigned short) + uenc->convert(uenc->userData, *fromP); + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] + - (BT_LEAD2 - 2)); } else (*fromP)++; @@ -1275,18 +1331,18 @@ void unknown_toUtf16(const ENCODING *enc, ENCODING * XmlInitUnknownEncoding(void *mem, - int *table, - int (*convert)(void *userData, const char *p), - void *userData) + int *table, + CONVERTER convert, + void *userData) { int i; - struct unknown_encoding *e = mem; + struct unknown_encoding *e = (struct unknown_encoding *)mem; for (i = 0; i < (int)sizeof(struct normal_encoding); i++) ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER && latin1_encoding.type[i] != BT_NONXML - && table[i] != i) + && table[i] != i) return 0; for (i = 0; i < 256; i++) { int c = table[i]; @@ -1299,20 +1355,20 @@ XmlInitUnknownEncoding(void *mem, } else if (c < 0) { if (c < -4) - return 0; - e->normal.type[i] = BT_LEAD2 - (c + 2); + return 0; + e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; } else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER - && latin1_encoding.type[c] != BT_NONXML - && c != i) - return 0; + && latin1_encoding.type[c] != BT_NONXML + && c != i) + return 0; e->normal.type[i] = latin1_encoding.type[c]; e->utf8[i][0] = 1; e->utf8[i][1] = (char)c; - e->utf16[i] = c == 0 ? 0xFFFF : c; + e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; @@ -1323,15 +1379,15 @@ XmlInitUnknownEncoding(void *mem, } else { if (c > 0xFFFF) - return 0; + return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) - e->normal.type[i] = BT_NMSTRT; + e->normal.type[i] = BT_NMSTRT; else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) - e->normal.type[i] = BT_NAME; + e->normal.type[i] = BT_NAME; else - e->normal.type[i] = BT_OTHER; + e->normal.type[i] = BT_OTHER; e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); - e->utf16[i] = c; + e->utf16[i] = (unsigned short)c; } } e->userData = userData; @@ -1367,26 +1423,30 @@ enum { }; static const char KW_ISO_8859_1[] = { - ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0' + ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, + ASCII_MINUS, ASCII_1, '\0' }; static const char KW_US_ASCII[] = { - ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0' + ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, + '\0' }; -static const char KW_UTF_8[] = { +static const char KW_UTF_8[] = { ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' }; -static const char KW_UTF_16[] = { +static const char KW_UTF_16[] = { ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' }; static const char KW_UTF_16BE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0' + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, + '\0' }; static const char KW_UTF_16LE[] = { - ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0' + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, + '\0' }; -static -int getEncodingIndex(const char *name) +static int FASTCALL +getEncodingIndex(const char *name) { static const char *encodingNames[] = { KW_ISO_8859_1, @@ -1397,7 +1457,7 @@ int getEncodingIndex(const char *name) KW_UTF_16LE, }; int i; - if (name == 0) + if (name == NULL) return NO_ENC; for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) if (streqci(name, encodingNames[i])) @@ -1405,27 +1465,28 @@ int getEncodingIndex(const char *name) return UNKNOWN_ENC; } -/* For binary compatibility, we store the index of the encoding specified -at initialization in the isUtf16 member. */ +/* For binary compatibility, we store the index of the encoding + specified at initialization in the isUtf16 member. +*/ #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) -/* This is what detects the encoding. -encodingTable maps from encoding indices to encodings; -INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding; -state is XML_CONTENT_STATE if we're parsing an external text entity, -and XML_PROLOG_STATE otherwise. +/* This is what detects the encoding. encodingTable maps from + encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of + the external (protocol) specified encoding; state is + XML_CONTENT_STATE if we're parsing an external text entity, and + XML_PROLOG_STATE otherwise. */ -static -int initScan(const ENCODING **encodingTable, - const INIT_ENCODING *enc, - int state, - const char *ptr, - const char *end, - const char **nextTokPtr) +static int +initScan(const ENCODING **encodingTable, + const INIT_ENCODING *enc, + int state, + const char *ptr, + const char *end, + const char **nextTokPtr) { const ENCODING **encPtr; @@ -1452,8 +1513,8 @@ int initScan(const ENCODING **encodingTable, case 0xFF: case 0xEF: /* possibly first byte of UTF-8 BOM */ if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) - break; + && state == XML_CONTENT_STATE) + break; /* fall through */ case 0x00: case 0x3C: @@ -1464,23 +1525,23 @@ int initScan(const ENCODING **encodingTable, switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { case 0xFEFF: if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) - break; + && state == XML_CONTENT_STATE) + break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16BE_ENC]; return XML_TOK_BOM; /* 00 3C is handled in the default case */ case 0x3C00: if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC - || INIT_ENC_INDEX(enc) == UTF_16_ENC) - && state == XML_CONTENT_STATE) - break; + || INIT_ENC_INDEX(enc) == UTF_16_ENC) + && state == XML_CONTENT_STATE) + break; *encPtr = encodingTable[UTF_16LE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFFFE: if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC - && state == XML_CONTENT_STATE) - break; + && state == XML_CONTENT_STATE) + break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16LE_ENC]; return XML_TOK_BOM; @@ -1489,45 +1550,50 @@ int initScan(const ENCODING **encodingTable, /* If there's an explicitly specified (external) encoding of ISO-8859-1 or some flavour of UTF-16 and this is an external text entity, - don't look for the BOM, - because it might be a legal data. */ + don't look for the BOM, + because it might be a legal data. + */ if (state == XML_CONTENT_STATE) { - int e = INIT_ENC_INDEX(enc); - if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC) - break; + int e = INIT_ENC_INDEX(enc); + if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC + || e == UTF_16LE_ENC || e == UTF_16_ENC) + break; } if (ptr + 2 == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; if ((unsigned char)ptr[2] == 0xBF) { - *nextTokPtr = ptr + 3; - *encPtr = encodingTable[UTF_8_ENC]; - return XML_TOK_BOM; + *nextTokPtr = ptr + 3; + *encPtr = encodingTable[UTF_8_ENC]; + return XML_TOK_BOM; } break; default: if (ptr[0] == '\0') { - /* 0 isn't a legal data character. Furthermore a document entity can only - start with ASCII characters. So the only way this can fail to be big-endian - UTF-16 if it it's an external parsed general entity that's labelled as - UTF-16LE. */ - if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) - break; - *encPtr = encodingTable[UTF_16BE_ENC]; - return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + /* 0 isn't a legal data character. Furthermore a document + entity can only start with ASCII characters. So the only + way this can fail to be big-endian UTF-16 if it it's an + external parsed general entity that's labelled as + UTF-16LE. + */ + if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) + break; + *encPtr = encodingTable[UTF_16BE_ENC]; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } else if (ptr[1] == '\0') { - /* We could recover here in the case: - - parsing an external entity - - second byte is 0 - - no externally specified encoding - - no encoding declaration - by assuming UTF-16LE. But we don't, because this would mean when - presented just with a single byte, we couldn't reliably determine - whether we needed further bytes. */ - if (state == XML_CONTENT_STATE) - break; - *encPtr = encodingTable[UTF_16LE_ENC]; - return XmlTok(*encPtr, state, ptr, end, nextTokPtr); + /* We could recover here in the case: + - parsing an external entity + - second byte is 0 + - no externally specified encoding + - no encoding declaration + by assuming UTF-16LE. But we don't, because this would mean when + presented just with a single byte, we couldn't reliably determine + whether we needed further bytes. + */ + if (state == XML_CONTENT_STATE) + break; + *encPtr = encodingTable[UTF_16LE_ENC]; + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } break; } @@ -1555,9 +1621,9 @@ int initScan(const ENCODING **encodingTable, ENCODING * XmlInitUnknownEncodingNS(void *mem, - int *table, - int (*convert)(void *userData, const char *p), - void *userData) + int *table, + CONVERTER convert, + void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) diff --git a/Modules/expat/xmltok.h b/Modules/expat/xmltok.h index 8b02324..3d776be 100644 --- a/Modules/expat/xmltok.h +++ b/Modules/expat/xmltok.h @@ -1,6 +1,5 @@ -/* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifndef XmlTok_INCLUDED @@ -11,19 +10,21 @@ extern "C" { #endif /* The following token may be returned by XmlContentTok */ -#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of - illegal ]]> sequence */ -/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_NONE -4 /* The string to be scanned is empty */ -#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; - might be part of CRLF sequence */ -#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ -#define XML_TOK_PARTIAL -1 /* only part of a token */ +#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be + start of illegal ]]> sequence */ +/* The following tokens may be returned by both XmlPrologTok and + XmlContentTok. +*/ +#define XML_TOK_NONE -4 /* The string to be scanned is empty */ +#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; + might be part of CRLF sequence */ +#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ +#define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also - returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ - + returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. +*/ #define XML_TOK_START_TAG_WITH_ATTS 1 #define XML_TOK_START_TAG_NO_ATTS 2 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ @@ -33,22 +34,24 @@ extern "C" { #define XML_TOK_DATA_NEWLINE 7 #define XML_TOK_CDATA_SECT_OPEN 8 #define XML_TOK_ENTITY_REF 9 -#define XML_TOK_CHAR_REF 10 /* numeric character reference */ +#define XML_TOK_CHAR_REF 10 /* numeric character reference */ -/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ -#define XML_TOK_PI 11 /* processing instruction */ -#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ +/* The following tokens may be returned by both XmlPrologTok and + XmlContentTok. +*/ +#define XML_TOK_PI 11 /* processing instruction */ +#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_COMMENT 13 -#define XML_TOK_BOM 14 /* Byte order mark */ +#define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ #define XML_TOK_PROLOG_S 15 -#define XML_TOK_DECL_OPEN 16 /* <!foo */ -#define XML_TOK_DECL_CLOSE 17 /* > */ +#define XML_TOK_DECL_OPEN 16 /* <!foo */ +#define XML_TOK_DECL_CLOSE 17 /* > */ #define XML_TOK_NAME 18 #define XML_TOK_NMTOKEN 19 -#define XML_TOK_POUND_NAME 20 /* #name */ -#define XML_TOK_OR 21 /* | */ +#define XML_TOK_POUND_NAME 20 /* #name */ +#define XML_TOK_OR 21 /* | */ #define XML_TOK_PERCENT 22 #define XML_TOK_OPEN_PAREN 23 #define XML_TOK_CLOSE_PAREN 24 @@ -59,14 +62,14 @@ extern "C" { #define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_NAME_QUESTION 30 /* name? */ -#define XML_TOK_NAME_ASTERISK 31 /* name* */ -#define XML_TOK_NAME_PLUS 32 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 33 /* <![ */ -#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ -#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ -#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ +#define XML_TOK_NAME_QUESTION 30 /* name? */ +#define XML_TOK_NAME_ASTERISK 31 /* name* */ +#define XML_TOK_NAME_PLUS 32 /* name+ */ +#define XML_TOK_COND_SECT_OPEN 33 /* <![ */ +#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ +#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ +#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ +#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ #define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ @@ -75,8 +78,9 @@ extern "C" { /* The following token is returned only by XmlCdataSectionTok */ #define XML_TOK_CDATA_SECT_CLOSE 40 -/* With namespace processing this is returned by XmlPrologTok - for a name with a colon. */ +/* With namespace processing this is returned by XmlPrologTok for a + name with a colon. +*/ #define XML_TOK_PREFIXED_NAME 41 #ifdef XML_DTD @@ -121,64 +125,73 @@ typedef struct { struct encoding; typedef struct encoding ENCODING; +typedef int (PTRCALL *SCANNER)(const ENCODING *, + const char *, + const char *, + const char **); + struct encoding { - int (*scanners[XML_N_STATES])(const ENCODING *, - const char *, - const char *, - const char **); - int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *, - const char *, - const char *, - const char **); - int (*sameName)(const ENCODING *, - const char *, const char *); - int (*nameMatchesAscii)(const ENCODING *, - const char *, const char *, const char *); - int (*nameLength)(const ENCODING *, const char *); - const char *(*skipS)(const ENCODING *, const char *); - int (*getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts); - int (*charRefNumber)(const ENCODING *enc, const char *ptr); - int (*predefinedEntityName)(const ENCODING *, const char *, const char *); - void (*updatePosition)(const ENCODING *, - const char *ptr, - const char *end, - POSITION *); - int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr); - void (*utf8Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - char **toP, - const char *toLim); - void (*utf16Convert)(const ENCODING *enc, - const char **fromP, - const char *fromLim, - unsigned short **toP, - const unsigned short *toLim); + SCANNER scanners[XML_N_STATES]; + SCANNER literalScanners[XML_N_LITERAL_TYPES]; + int (PTRCALL *sameName)(const ENCODING *, + const char *, + const char *); + int (PTRCALL *nameMatchesAscii)(const ENCODING *, + const char *, + const char *, + const char *); + int (PTRFASTCALL *nameLength)(const ENCODING *, const char *); + const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *); + int (PTRCALL *getAtts)(const ENCODING *enc, + const char *ptr, + int attsMax, + ATTRIBUTE *atts); + int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); + int (PTRCALL *predefinedEntityName)(const ENCODING *, + const char *, + const char *); + void (PTRCALL *updatePosition)(const ENCODING *, + const char *ptr, + const char *end, + POSITION *); + int (PTRCALL *isPublicId)(const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr); + void (PTRCALL *utf8Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, + char **toP, + const char *toLim); + void (PTRCALL *utf16Convert)(const ENCODING *enc, + const char **fromP, + const char *fromLim, + unsigned short **toP, + const unsigned short *toLim); int minBytesPerChar; char isUtf8; char isUtf16; }; -/* -Scan the string starting at ptr until the end of the next complete token, -but do not scan past eptr. Return an integer giving the type of token. +/* Scan the string starting at ptr until the end of the next complete + token, but do not scan past eptr. Return an integer giving the + type of token. -Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. + Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. -Return XML_TOK_PARTIAL when the string does not contain a complete token; -nextTokPtr will not be set. + Return XML_TOK_PARTIAL when the string does not contain a complete + token; nextTokPtr will not be set. -Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr -will be set to point to the character which made the token invalid. + Return XML_TOK_INVALID when the string does not start a valid + token; nextTokPtr will be set to point to the character which made + the token invalid. -Otherwise the string starts with a valid token; nextTokPtr will be set to point -to the character following the end of that token. + Otherwise the string starts with a valid token; nextTokPtr will be + set to point to the character following the end of that token. -Each data character counts as a single token, but adjacent data characters -may be returned together. Similarly for characters in the prolog outside -literals, comments and processing instructions. + Each data character counts as a single token, but adjacent data + characters may be returned together. Similarly for characters in + the prolog outside literals, comments and processing instructions. */ @@ -201,9 +214,9 @@ literals, comments and processing instructions. #endif /* XML_DTD */ -/* This is used for performing a 2nd-level tokenization on -the content of a literal that has already been returned by XmlTok. */ - +/* This is used for performing a 2nd-level tokenization on the content + of a literal that has already been returned by XmlTok. +*/ #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) @@ -250,48 +263,51 @@ typedef struct { const ENCODING **encPtr; } INIT_ENCODING; -int XmlParseXmlDecl(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); - -int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING *XmlGetUtf8InternalEncoding(void); -const ENCODING *XmlGetUtf16InternalEncoding(void); -int XmlUtf8Encode(int charNumber, char *buf); -int XmlUtf16Encode(int charNumber, unsigned short *buf); - -int XmlSizeOfUnknownEncoding(void); -ENCODING * +int XmlParseXmlDecl(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, + int *standalonePtr); + +int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncoding(void); +const ENCODING *XmlGetUtf16InternalEncoding(void); +int FASTCALL XmlUtf8Encode(int charNumber, char *buf); +int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf); +int XmlSizeOfUnknownEncoding(void); + +typedef int (*CONVERTER)(void *userData, const char *p); + +ENCODING * XmlInitUnknownEncoding(void *mem, - int *table, - int (*conv)(void *userData, const char *p), - void *userData); - -int XmlParseXmlDeclNS(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingNamePtr, - const ENCODING **namedEncodingPtr, - int *standalonePtr); -int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING *XmlGetUtf8InternalEncodingNS(void); -const ENCODING *XmlGetUtf16InternalEncodingNS(void); -ENCODING * + int *table, + CONVERTER convert, + void *userData); + +int XmlParseXmlDeclNS(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingNamePtr, + const ENCODING **namedEncodingPtr, + int *standalonePtr); + +int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncodingNS(void); +const ENCODING *XmlGetUtf16InternalEncodingNS(void); +ENCODING * XmlInitUnknownEncodingNS(void *mem, - int *table, - int (*conv)(void *userData, const char *p), - void *userData); + int *table, + CONVERTER convert, + void *userData); #ifdef __cplusplus } #endif diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c index 36d2065..84a3267 100644 --- a/Modules/expat/xmltok_impl.c +++ b/Modules/expat/xmltok_impl.c @@ -1,6 +1,5 @@ -/* -Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd -See the file COPYING for copying permission. +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd + See the file COPYING for copying permission. */ #ifndef IS_INVALID_CHAR @@ -10,7 +9,7 @@ See the file COPYING for copying permission. #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ + return XML_TOK_PARTIAL_CHAR; \ if (IS_INVALID_CHAR(enc, ptr, n)) { \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; \ @@ -87,9 +86,9 @@ See the file COPYING for copying permission. /* ptr points to character following "<!-" */ -static -int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanComment)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { if (ptr != end) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { @@ -101,22 +100,22 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_COMMENT; - } - break; + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_COMMENT; + } + break; default: - ptr += MINBPC(enc); - break; + ptr += MINBPC(enc); + break; } } } @@ -125,9 +124,9 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "<!" */ -static -int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; @@ -149,12 +148,12 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; /* don't allow <!ENTITY% foo "whatever"> */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } /* fall through */ case BT_S: case BT_CR: case BT_LF: @@ -172,8 +171,9 @@ int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) +static int PTRCALL +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, + const char *end, int *tokPtr) { int upper = 0; *tokPtr = XML_TOK_PI; @@ -216,9 +216,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "<?" */ -static -int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanPi)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { int tok; const char *target = ptr; @@ -235,39 +235,39 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } ptr += MINBPC(enc); while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) - case BT_QUEST: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr + MINBPC(enc); - return tok; - } - break; - default: - ptr += MINBPC(enc); - break; - } + case BT_QUEST: + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr + MINBPC(enc); + return tok; + } + break; + default: + ptr += MINBPC(enc); + break; + } } return XML_TOK_PARTIAL; case BT_QUEST: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr + MINBPC(enc); - return tok; + *nextTokPtr = ptr + MINBPC(enc); + return tok; } /* fall through */ default: @@ -278,12 +278,12 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } - -static -int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { - static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; + static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, + ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC(enc)) @@ -298,9 +298,9 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e return XML_TOK_CDATA_SECT_OPEN; } -static -int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_NONE; @@ -309,7 +309,7 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; end = ptr + n; } } @@ -350,8 +350,8 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; @@ -376,9 +376,9 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en /* ptr points to character following "</" */ -static -int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; @@ -393,21 +393,22 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: - break; - case BT_GT: - *nextTokPtr = ptr + MINBPC(enc); + switch (BYTE_TYPE(enc, ptr)) { + case BT_S: case BT_CR: case BT_LF: + break; + case BT_GT: + *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_END_TAG; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } } return XML_TOK_PARTIAL; #ifdef XML_NS case BT_COLON: - /* no need to check qname syntax here, since end-tag must match exactly */ + /* no need to check qname syntax here, + since end-tag must match exactly */ ptr += MINBPC(enc); break; #endif @@ -424,9 +425,9 @@ int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "&#X" */ -static -int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { if (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { @@ -441,13 +442,13 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: - break; + break; case BT_SEMI: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CHAR_REF; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_CHAR_REF; default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } } } @@ -456,9 +457,9 @@ int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end /* ptr points to character following "&#" */ -static -int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { if (ptr != end) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) @@ -473,13 +474,13 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: - break; + break; case BT_SEMI: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CHAR_REF; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_CHAR_REF; default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } } } @@ -488,9 +489,9 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "&" */ -static -int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; @@ -518,9 +519,9 @@ int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following first character of attribute name */ -static -int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { #ifdef XML_NS int hadColon = 0; @@ -531,142 +532,141 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS case BT_COLON: if (hadColon) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } hadColon = 1; ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } break; #endif case BT_S: case BT_CR: case BT_LF: for (;;) { - int t; + int t; - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - t = BYTE_TYPE(enc, ptr); - if (t == BT_EQUALS) - break; - switch (t) { - case BT_S: - case BT_LF: - case BT_CR: - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + t = BYTE_TYPE(enc, ptr); + if (t == BT_EQUALS) + break; + switch (t) { + case BT_S: + case BT_LF: + case BT_CR: + break; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } } /* fall through */ case BT_EQUALS: { - int open; + int open; #ifdef XML_NS - hadColon = 0; + hadColon = 0; #endif - for (;;) { - - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - open = BYTE_TYPE(enc, ptr); - if (open == BT_QUOT || open == BT_APOS) - break; - switch (open) { - case BT_S: - case BT_LF: - case BT_CR: - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - ptr += MINBPC(enc); - /* in attribute value */ - for (;;) { - int t; - if (ptr == end) - return XML_TOK_PARTIAL; - t = BYTE_TYPE(enc, ptr); - if (t == open) - break; - switch (t) { - INVALID_CASES(ptr, nextTokPtr) - case BT_AMP: - { - int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); - if (tok <= 0) { - if (tok == XML_TOK_INVALID) - *nextTokPtr = ptr; - return tok; - } - break; - } - case BT_LT: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - default: - ptr += MINBPC(enc); - break; - } - } - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_S: - case BT_CR: - case BT_LF: - break; - case BT_SOL: - goto sol; - case BT_GT: - goto gt; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - /* ptr points to closing quote */ - for (;;) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_CR: case BT_LF: - continue; - case BT_GT: + for (;;) { + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + open = BYTE_TYPE(enc, ptr); + if (open == BT_QUOT || open == BT_APOS) + break; + switch (open) { + case BT_S: + case BT_LF: + case BT_CR: + break; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + } + ptr += MINBPC(enc); + /* in attribute value */ + for (;;) { + int t; + if (ptr == end) + return XML_TOK_PARTIAL; + t = BYTE_TYPE(enc, ptr); + if (t == open) + break; + switch (t) { + INVALID_CASES(ptr, nextTokPtr) + case BT_AMP: + { + int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); + if (tok <= 0) { + if (tok == XML_TOK_INVALID) + *nextTokPtr = ptr; + return tok; + } + break; + } + case BT_LT: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + default: + ptr += MINBPC(enc); + break; + } + } + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + switch (BYTE_TYPE(enc, ptr)) { + case BT_S: + case BT_CR: + case BT_LF: + break; + case BT_SOL: + goto sol; + case BT_GT: + goto gt; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + /* ptr points to closing quote */ + for (;;) { + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + case BT_S: case BT_CR: case BT_LF: + continue; + case BT_GT: gt: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_START_TAG_WITH_ATTS; - case BT_SOL: + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_START_TAG_WITH_ATTS; + case BT_SOL: sol: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - break; - } - break; + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_PARTIAL; + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + break; + } + break; } default: *nextTokPtr = ptr; @@ -678,9 +678,9 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "<" */ -static -int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { #ifdef XML_NS int hadColon; @@ -696,7 +696,8 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LSQB: - return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), + end, nextTokPtr); } *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -718,13 +719,13 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS case BT_COLON: if (hadColon) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } hadColon = 1; ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -736,23 +737,23 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: - goto gt; - case BT_SOL: - goto sol; - case BT_S: case BT_CR: case BT_LF: - ptr += MINBPC(enc); - continue; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); - } - return XML_TOK_PARTIAL; + while (ptr != end) { + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + case BT_GT: + goto gt; + case BT_SOL: + goto sol; + case BT_S: case BT_CR: case BT_LF: + ptr += MINBPC(enc); + continue; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); + } + return XML_TOK_PARTIAL; } case BT_GT: gt: @@ -762,10 +763,10 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, sol: ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_EMPTY_ELEMENT_NO_ATTS; @@ -777,9 +778,9 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr == end) return XML_TOK_NONE; @@ -788,7 +789,7 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; end = ptr + n; } } @@ -833,8 +834,8 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; @@ -842,18 +843,18 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, #undef LEAD_CASE case BT_RSQB: if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { - ptr += MINBPC(enc); - break; - } - if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { - ptr += MINBPC(enc); - break; - } - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_INVALID; - } + if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { + ptr += MINBPC(enc); + break; + } + if (ptr + 2*MINBPC(enc) != end) { + if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { + ptr += MINBPC(enc); + break; + } + *nextTokPtr = ptr + 2*MINBPC(enc); + return XML_TOK_INVALID; + } } /* fall through */ case BT_AMP: @@ -876,9 +877,9 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to character following "%" */ -static -int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; @@ -905,9 +906,9 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_PARTIAL; } -static -int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; @@ -932,10 +933,10 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, return -XML_TOK_POUND_NAME; } -static -int PREFIX(scanLit)(int open, const ENCODING *enc, - const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(scanLit)(int open, const ENCODING *enc, + const char *ptr, const char *end, + const char **nextTokPtr) { while (ptr != end) { int t = BYTE_TYPE(enc, ptr); @@ -945,16 +946,16 @@ int PREFIX(scanLit)(int open, const ENCODING *enc, case BT_APOS: ptr += MINBPC(enc); if (t != open) - break; + break; if (ptr == end) - return -XML_TOK_LITERAL; + return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: case BT_GT: case BT_PERCNT: case BT_LSQB: - return XML_TOK_LITERAL; + return XML_TOK_LITERAL; default: - return XML_TOK_INVALID; + return XML_TOK_INVALID; } default: ptr += MINBPC(enc); @@ -964,9 +965,9 @@ int PREFIX(scanLit)(int open, const ENCODING *enc, return XML_TOK_PARTIAL; } -static -int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int tok; if (ptr == end) @@ -976,7 +977,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; end = ptr + n; } } @@ -989,44 +990,47 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, { ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: - return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_NMSTRT: case BT_HEX: case BT_NONASCII: case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: - *nextTokPtr = ptr - MINBPC(enc); - return XML_TOK_INSTANCE_START; + *nextTokPtr = ptr - MINBPC(enc); + return XML_TOK_INSTANCE_START; } *nextTokPtr = ptr; return XML_TOK_INVALID; } case BT_CR: - if (ptr + MINBPC(enc) == end) + if (ptr + MINBPC(enc) == end) { + *nextTokPtr = end; + /* indicate that this might be part of a CR/LF pair */ return -XML_TOK_PROLOG_S; + } /* fall through */ case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); if (ptr == end) - break; + break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: - break; + break; case BT_CR: - /* don't split CR/LF pair */ - if (ptr + MINBPC(enc) != end) - break; - /* fall through */ + /* don't split CR/LF pair */ + if (ptr + MINBPC(enc) != end) + break; + /* fall through */ default: - *nextTokPtr = ptr; - return XML_TOK_PROLOG_S; + *nextTokPtr = ptr; + return XML_TOK_PROLOG_S; } } *nextTokPtr = ptr; @@ -1045,10 +1049,10 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_COND_SECT_CLOSE; + *nextTokPtr = ptr + 2*MINBPC(enc); + return XML_TOK_COND_SECT_CLOSE; } } *nextTokPtr = ptr; @@ -1147,40 +1151,40 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; - tok = XML_TOK_PREFIXED_NAME; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - default: - tok = XML_TOK_NMTOKEN; - break; - } - break; + if (ptr == end) + return XML_TOK_PARTIAL; + tok = XML_TOK_PREFIXED_NAME; + switch (BYTE_TYPE(enc, ptr)) { + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + default: + tok = XML_TOK_NMTOKEN; + break; + } + break; case XML_TOK_PREFIXED_NAME: - tok = XML_TOK_NMTOKEN; - break; + tok = XML_TOK_NMTOKEN; + break; } break; #endif case BT_PLUS: if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; + *nextTokPtr = ptr; + return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_QUESTION; @@ -1192,9 +1196,9 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return -tok; } -static -int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) @@ -1208,7 +1212,7 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * #undef LEAD_CASE case BT_AMP: if (ptr == start) - return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LT: @@ -1217,27 +1221,27 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * return XML_TOK_INVALID; case BT_LF: if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_CR; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_TRAILING_CR; + if (BYTE_TYPE(enc, ptr) == BT_LF) + ptr += MINBPC(enc); + *nextTokPtr = ptr; + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_S: if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_ATTRIBUTE_VALUE_S; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_ATTRIBUTE_VALUE_S; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; @@ -1250,9 +1254,9 @@ int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char * return XML_TOK_DATA_CHARS; } -static -int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) @@ -1266,33 +1270,33 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end #undef LEAD_CASE case BT_AMP: if (ptr == start) - return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) { - int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), - end, nextTokPtr); - return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), + end, nextTokPtr); + return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_CR; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; + ptr += MINBPC(enc); + if (ptr == end) + return XML_TOK_TRAILING_CR; + if (BYTE_TYPE(enc, ptr) == BT_LF) + ptr += MINBPC(enc); + *nextTokPtr = ptr; + return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; @@ -1307,9 +1311,9 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end #ifdef XML_DTD -static -int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, + const char *end, const char **nextTokPtr) { int level = 0; if (MINBPC(enc) > 1) { @@ -1324,30 +1328,30 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e INVALID_CASES(ptr, nextTokPtr) case BT_LT: if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { - ++level; - ptr += MINBPC(enc); - } + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { + ++level; + ptr += MINBPC(enc); + } } break; case BT_RSQB: if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { - ptr += MINBPC(enc); - if (level == 0) { - *nextTokPtr = ptr; - return XML_TOK_IGNORE_SECT; - } - --level; - } + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { + ptr += MINBPC(enc); + if (level == 0) { + *nextTokPtr = ptr; + return XML_TOK_IGNORE_SECT; + } + --level; + } } break; default: @@ -1360,9 +1364,9 @@ int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *e #endif /* XML_DTD */ -static -int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr) +static int PTRCALL +PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, + const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); @@ -1392,22 +1396,22 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, break; case BT_S: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { - *badPtr = ptr; - return 0; + *badPtr = ptr; + return 0; } break; case BT_NAME: case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) - break; + break; default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ case 0x40: /* @ */ - break; + break; default: - *badPtr = ptr; - return 0; + *badPtr = ptr; + return 0; } break; } @@ -1415,28 +1419,29 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, return 1; } -/* This must only be called for a well-formed start-tag or empty element tag. -Returns the number of attributes. Pointers to the first attsMax attributes -are stored in atts. */ +/* This must only be called for a well-formed start-tag or empty + element tag. Returns the number of attributes. Pointers to the + first attsMax attributes are stored in atts. +*/ -static -int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts) +static int PTRCALL +PREFIX(getAtts)(const ENCODING *enc, const char *ptr, + int attsMax, ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; - initialization just to shut up compilers */ + initialization just to shut up compilers */ for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { #define START_NAME \ if (state == other) { \ - if (nAtts < attsMax) { \ - atts[nAtts].name = ptr; \ - atts[nAtts].normalized = 1; \ - } \ - state = inName; \ + if (nAtts < attsMax) { \ + atts[nAtts].name = ptr; \ + atts[nAtts].normalized = 1; \ + } \ + state = inName; \ } #define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; @@ -1450,47 +1455,47 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, #undef START_NAME case BT_QUOT: if (state != inValue) { - if (nAtts < attsMax) - atts[nAtts].valuePtr = ptr + MINBPC(enc); + if (nAtts < attsMax) + atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; } else if (open == BT_QUOT) { state = other; - if (nAtts < attsMax) - atts[nAtts].valueEnd = ptr; - nAtts++; + if (nAtts < attsMax) + atts[nAtts].valueEnd = ptr; + nAtts++; } break; case BT_APOS: if (state != inValue) { - if (nAtts < attsMax) - atts[nAtts].valuePtr = ptr + MINBPC(enc); + if (nAtts < attsMax) + atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; } else if (open == BT_APOS) { state = other; - if (nAtts < attsMax) - atts[nAtts].valueEnd = ptr; - nAtts++; + if (nAtts < attsMax) + atts[nAtts].valueEnd = ptr; + nAtts++; } break; case BT_AMP: if (nAtts < attsMax) - atts[nAtts].normalized = 0; + atts[nAtts].normalized = 0; break; case BT_S: if (state == inName) state = other; else if (state == inValue - && nAtts < attsMax - && atts[nAtts].normalized - && (ptr == atts[nAtts].valuePtr - || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE - || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE - || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) - atts[nAtts].normalized = 0; + && nAtts < attsMax + && atts[nAtts].normalized + && (ptr == atts[nAtts].valuePtr + || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE + || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE + || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) + atts[nAtts].normalized = 0; break; case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted @@ -1498,12 +1503,12 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, if (state == inName) state = other; else if (state == inValue && nAtts < attsMax) - atts[nAtts].normalized = 0; + atts[nAtts].normalized = 0; break; case BT_GT: case BT_SOL: if (state != inValue) - return nAtts; + return nAtts; break; default: break; @@ -1512,32 +1517,36 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, /* not reached */ } -static -int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) +static int PTRFASTCALL +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { - for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); + !CHAR_MATCHES(enc, ptr, ASCII_SEMI); + ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: - result <<= 4; - result |= (c - ASCII_0); - break; - case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: - result <<= 4; - result += 10 + (c - ASCII_A); - break; - case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: - result <<= 4; - result += 10 + (c - ASCII_a); - break; + result <<= 4; + result |= (c - ASCII_0); + break; + case ASCII_A: case ASCII_B: case ASCII_C: + case ASCII_D: case ASCII_E: case ASCII_F: + result <<= 4; + result += 10 + (c - ASCII_A); + break; + case ASCII_a: case ASCII_b: case ASCII_c: + case ASCII_d: case ASCII_e: case ASCII_f: + result <<= 4; + result += 10 + (c - ASCII_a); + break; } if (result >= 0x110000) - return -1; + return -1; } } else { @@ -1546,23 +1555,24 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) result *= 10; result += (c - ASCII_0); if (result >= 0x110000) - return -1; + return -1; } } return checkCharRefNumber(result); } -static -int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) +static int PTRCALL +PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, + const char *end) { switch ((end - ptr)/MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_l: - return ASCII_LT; + return ASCII_LT; case ASCII_g: - return ASCII_GT; + return ASCII_GT; } } break; @@ -1570,9 +1580,9 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha if (CHAR_MATCHES(enc, ptr, ASCII_a)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_m)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_p)) - return ASCII_AMP; + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_p)) + return ASCII_AMP; } } break; @@ -1581,23 +1591,23 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha case ASCII_q: ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_u)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_o)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_t)) - return ASCII_QUOT; - } + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_t)) + return ASCII_QUOT; + } } break; case ASCII_a: ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_p)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_o)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_s)) - return ASCII_APOS; - } + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { + ptr += MINBPC(enc); + if (CHAR_MATCHES(enc, ptr, ASCII_s)) + return ASCII_APOS; + } } break; } @@ -1605,20 +1615,20 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha return 0; } -static -int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) +static int PTRCALL +PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { for (;;) { switch (BYTE_TYPE(enc, ptr1)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (*ptr1++ != *ptr2++) \ - return 0; + return 0; LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) #undef LEAD_CASE /* fall through */ if (*ptr1++ != *ptr2++) - return 0; + return 0; break; case BT_NONASCII: case BT_NMSTRT: @@ -1630,23 +1640,23 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) case BT_NAME: case BT_MINUS: if (*ptr2++ != *ptr1++) - return 0; + return 0; if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; + if (*ptr2++ != *ptr1++) + return 0; + if (MINBPC(enc) > 2) { + if (*ptr2++ != *ptr1++) + return 0; if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } + if (*ptr2++ != *ptr1++) + return 0; + } + } } break; default: if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; + return 1; switch (BYTE_TYPE(enc, ptr2)) { case BT_LEAD2: case BT_LEAD3: @@ -1660,18 +1670,18 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) case BT_DIGIT: case BT_NAME: case BT_MINUS: - return 0; + return 0; default: - return 1; + return 1; } } } /* not reached */ } -static -int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, - const char *end1, const char *ptr2) +static int PTRCALL +PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, + const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { if (ptr1 == end1) @@ -1682,8 +1692,8 @@ int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, return ptr1 == end1; } -static -int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) +static int PTRFASTCALL +PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { @@ -1709,8 +1719,8 @@ int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) } } -static -const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) +static const char * PTRFASTCALL +PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { @@ -1725,11 +1735,11 @@ const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) } } -static -void PREFIX(updatePosition)(const ENCODING *enc, - const char *ptr, - const char *end, - POSITION *pos) +static void PTRCALL +PREFIX(updatePosition)(const ENCODING *enc, + const char *ptr, + const char *end, + POSITION *pos) { while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { @@ -1748,7 +1758,7 @@ void PREFIX(updatePosition)(const ENCODING *enc, pos->lineNumber++; ptr += MINBPC(enc); if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); + ptr += MINBPC(enc); pos->columnNumber = (unsigned)-1; break; default: @@ -1766,3 +1776,4 @@ void PREFIX(updatePosition)(const ENCODING *enc, #undef CHECK_NAME_CASES #undef CHECK_NMSTRT_CASE #undef CHECK_NMSTRT_CASES + diff --git a/Modules/expat/xmltok_ns.c b/Modules/expat/xmltok_ns.c index 2185973..5610eb9 100644 --- a/Modules/expat/xmltok_ns.c +++ b/Modules/expat/xmltok_ns.c @@ -1,22 +1,25 @@ -const ENCODING *NS(XmlGetUtf8InternalEncoding)(void) +const ENCODING * +NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } -const ENCODING *NS(XmlGetUtf16InternalEncoding)(void) +const ENCODING * +NS(XmlGetUtf16InternalEncoding)(void) { -#if XML_BYTE_ORDER == 12 +#if BYTEORDER == 1234 return &ns(internal_little2_encoding).enc; -#elif XML_BYTE_ORDER == 21 +#elif BYTEORDER == 4321 return &ns(internal_big2_encoding).enc; #else const short n = 1; - return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc; + return (*(const char *)&n + ? &ns(internal_little2_encoding).enc + : &ns(internal_big2_encoding).enc); #endif } -static -const ENCODING *NS(encodings)[] = { +static const ENCODING *NS(encodings)[] = { &ns(latin1_encoding).enc, &ns(ascii_encoding).enc, &ns(utf8_encoding).enc, @@ -26,21 +29,25 @@ const ENCODING *NS(encodings)[] = { &ns(utf8_encoding).enc /* NO_ENC */ }; -static -int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { - return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); + return initScan(NS(encodings), (const INIT_ENCODING *)enc, + XML_PROLOG_STATE, ptr, end, nextTokPtr); } -static -int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) +static int PTRCALL +NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { - return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); + return initScan(NS(encodings), (const INIT_ENCODING *)enc, + XML_CONTENT_STATE, ptr, end, nextTokPtr); } -int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) +int +NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, + const char *name) { int i = getEncodingIndex(name); if (i == UNKNOWN_ENC) @@ -54,8 +61,8 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n return 1; } -static -const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) +static const ENCODING * +NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { #define ENCODING_MAX 128 char buf[ENCODING_MAX]; @@ -73,26 +80,27 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha return NS(encodings)[i]; } -int NS(XmlParseXmlDecl)(int isGeneralTextEntity, - const ENCODING *enc, - const char *ptr, - const char *end, - const char **badPtr, - const char **versionPtr, - const char **versionEndPtr, - const char **encodingName, - const ENCODING **encoding, - int *standalone) +int +NS(XmlParseXmlDecl)(int isGeneralTextEntity, + const ENCODING *enc, + const char *ptr, + const char *end, + const char **badPtr, + const char **versionPtr, + const char **versionEndPtr, + const char **encodingName, + const ENCODING **encoding, + int *standalone) { return doParseXmlDecl(NS(findEncoding), - isGeneralTextEntity, - enc, - ptr, - end, - badPtr, - versionPtr, - versionEndPtr, - encodingName, - encoding, - standalone); + isGeneralTextEntity, + enc, + ptr, + end, + badPtr, + versionPtr, + versionEndPtr, + encodingName, + encoding, + standalone); } @@ -690,9 +690,9 @@ class PyBuildExt(build_ext): # More information on Expat can be found at www.libexpat.org. # if sys.byteorder == "little": - xmlbo = "12" + xmlbo = "1234" else: - xmlbo = "21" + xmlbo = "4321" expatinc = os.path.join(os.getcwd(), srcdir, 'Modules', 'expat') exts.append(Extension('pyexpat', sources = [ @@ -702,10 +702,9 @@ class PyBuildExt(build_ext): 'expat/xmltok.c', ], define_macros = [ - ('HAVE_EXPAT_H',None), ('XML_NS', '1'), ('XML_DTD', '1'), - ('XML_BYTE_ORDER', xmlbo), + ('BYTEORDER', xmlbo), ('XML_CONTEXT_BYTES','1024'), ], include_dirs = [expatinc] |