diff options
Diffstat (limited to 'Modules/expat/xmltok_impl.c')
-rw-r--r-- | Modules/expat/xmltok_impl.c | 1077 |
1 files changed, 524 insertions, 553 deletions
diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c index 93328b8..c209221 100644 --- a/Modules/expat/xmltok_impl.c +++ b/Modules/expat/xmltok_impl.c @@ -32,128 +32,124 @@ #ifdef XML_TOK_IMPL_C -#ifndef IS_INVALID_CHAR -#define IS_INVALID_CHAR(enc, ptr, n) (0) -#endif - -#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_INVALID_CHAR(enc, ptr, n)) { \ - *(nextTokPtr) = (ptr); \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; +# ifndef IS_INVALID_CHAR +# define IS_INVALID_CHAR(enc, ptr, n) (0) +# endif + +# define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_INVALID_CHAR(enc, ptr, n)) { \ + *(nextTokPtr) = (ptr); \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define INVALID_CASES(ptr, nextTokPtr) \ - INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ - case BT_NONXML: \ - case BT_MALFORM: \ - case BT_TRAIL: \ - *(nextTokPtr) = (ptr); \ +# define INVALID_CASES(ptr, nextTokPtr) \ + INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ + INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ + case BT_NONXML: \ + case BT_MALFORM: \ + case BT_TRAIL: \ + *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; -#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NAME_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; - -#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - case BT_DIGIT: \ - case BT_NAME: \ - case BT_MINUS: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) - -#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; - -#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) - -#ifndef PREFIX -#define PREFIX(ident) ident -#endif - - -#define HAS_CHARS(enc, ptr, end, count) \ - (end - ptr >= count * MINBPC(enc)) +# define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (! IS_NAME_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define HAS_CHAR(enc, ptr, end) \ - HAS_CHARS(enc, ptr, end, 1) +# define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + case BT_DIGIT: \ + case BT_NAME: \ + case BT_MINUS: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) + +# define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + ptr += n; \ + break; -#define REQUIRE_CHARS(enc, ptr, end, count) \ - { \ - if (! HAS_CHARS(enc, ptr, end, count)) { \ - return XML_TOK_PARTIAL; \ - } \ +# define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ + case BT_NONASCII: \ + if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ + /* fall through */ \ + case BT_NMSTRT: \ + case BT_HEX: \ + ptr += MINBPC(enc); \ + break; \ + CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ + CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) + +# ifndef PREFIX +# define PREFIX(ident) ident +# endif + +# define HAS_CHARS(enc, ptr, end, count) (end - ptr >= count * MINBPC(enc)) + +# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) + +# define REQUIRE_CHARS(enc, ptr, end, count) \ + { \ + if (! HAS_CHARS(enc, ptr, end, count)) { \ + return XML_TOK_PARTIAL; \ + } \ } -#define REQUIRE_CHAR(enc, ptr, end) \ - REQUIRE_CHARS(enc, ptr, end, 1) - +# define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1) /* ptr points to character following "<!-" */ static int PTRCALL -PREFIX(scanComment)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (HAS_CHAR(enc, ptr, end)) { - if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -173,9 +169,8 @@ PREFIX(scanComment)(const ENCODING *enc, const char *ptr, /* ptr points to character following "<!" */ static int PTRCALL -PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: @@ -197,12 +192,17 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, REQUIRE_CHARS(enc, ptr, end, 2); /* don't allow <!ENTITY% foo "whatever"> */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { - case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ - case BT_S: case BT_CR: case BT_LF: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: @@ -218,12 +218,12 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, - const char *end, int *tokPtr) -{ +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, + int *tokPtr) { int upper = 0; + UNUSED_P(enc); *tokPtr = XML_TOK_PI; - if (end - ptr != MINBPC(enc)*3) + if (end - ptr != MINBPC(enc) * 3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: @@ -263,30 +263,31 @@ PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, /* ptr points to character following "<?" */ static int PTRCALL -PREFIX(scanPi)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int tok; const char *target = ptr; REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_CR: case BT_LF: - if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_S: + case BT_CR: + case BT_LF: + if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); @@ -302,7 +303,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, } return XML_TOK_PARTIAL; case BT_QUEST: - if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { + if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -322,16 +323,16 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, - const char *end, const char **nextTokPtr) -{ - static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, - ASCII_T, ASCII_A, ASCII_LSQB }; +PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { + static const char CDATA_LSQB[] + = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; int i; + UNUSED_P(enc); /* CDATA[ */ REQUIRE_CHARS(enc, ptr, end, 6); for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { - if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { + if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -341,9 +342,8 @@ PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, } static int PTRCALL -PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { @@ -359,11 +359,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, case BT_RSQB: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -379,23 +379,25 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: @@ -416,23 +418,26 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, /* ptr points to character following "</" */ static int PTRCALL -PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_CR: case BT_LF: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_S: + case BT_CR: + case BT_LF: for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: + case BT_S: + case BT_CR: + case BT_LF: break; case BT_GT: *nextTokPtr = ptr + MINBPC(enc); @@ -443,13 +448,13 @@ PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, } } return XML_TOK_PARTIAL; -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: /* no need to check qname syntax here, since end-tag must match exactly */ ptr += MINBPC(enc); break; -#endif +# endif case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_END_TAG; @@ -464,9 +469,8 @@ PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, /* ptr points to character following "&#X" */ static int PTRCALL -PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: @@ -496,9 +500,8 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, /* ptr points to character following "&#" */ static int PTRCALL -PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { if (HAS_CHAR(enc, ptr, end)) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -529,11 +532,10 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, static int PTRCALL PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ + const char **nextTokPtr) { REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_NUM: return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); default: @@ -542,7 +544,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_ENTITY_REF; @@ -558,15 +560,14 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ -#ifdef XML_NS + const char **nextTokPtr) { +# ifdef XML_NS int hadColon = 0; -#endif +# endif while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) -#ifdef XML_NS + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) +# ifdef XML_NS case BT_COLON: if (hadColon) { *nextTokPtr = ptr; @@ -576,14 +577,16 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } break; -#endif - case BT_S: case BT_CR: case BT_LF: +# endif + case BT_S: + case BT_CR: + case BT_LF: for (;;) { int t; @@ -602,102 +605,102 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - /* fall through */ - case BT_EQUALS: - { - int open; -#ifdef XML_NS - hadColon = 0; -#endif - for (;;) { - ptr += MINBPC(enc); - REQUIRE_CHAR(enc, ptr, end); - open = BYTE_TYPE(enc, ptr); - if (open == BT_QUOT || open == BT_APOS) - break; - switch (open) { - case BT_S: - case BT_LF: - case BT_CR: - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } + /* fall through */ + case BT_EQUALS: { + int open; +# ifdef XML_NS + hadColon = 0; +# endif + for (;;) { ptr += MINBPC(enc); - /* in attribute value */ - for (;;) { - int t; - REQUIRE_CHAR(enc, ptr, end); - t = BYTE_TYPE(enc, ptr); - if (t == open) - break; - switch (t) { + REQUIRE_CHAR(enc, ptr, end); + open = BYTE_TYPE(enc, ptr); + if (open == BT_QUOT || open == BT_APOS) + break; + switch (open) { + case BT_S: + case BT_LF: + case BT_CR: + break; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + } + ptr += MINBPC(enc); + /* in attribute value */ + for (;;) { + int t; + REQUIRE_CHAR(enc, ptr, end); + t = BYTE_TYPE(enc, ptr); + if (t == open) + break; + switch (t) { INVALID_CASES(ptr, nextTokPtr) - case BT_AMP: - { - int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); - if (tok <= 0) { - if (tok == XML_TOK_INVALID) - *nextTokPtr = ptr; - return tok; - } - break; - } - case BT_LT: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - default: - ptr += MINBPC(enc); - break; + case BT_AMP: { + int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); + if (tok <= 0) { + if (tok == XML_TOK_INVALID) + *nextTokPtr = ptr; + return tok; } + break; + } + case BT_LT: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + default: + ptr += MINBPC(enc); + break; } + } + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); + switch (BYTE_TYPE(enc, ptr)) { + case BT_S: + case BT_CR: + case BT_LF: + break; + case BT_SOL: + goto sol; + case BT_GT: + goto gt; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } + /* ptr points to closing quote */ + for (;;) { ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: - break; - case BT_SOL: - goto sol; + continue; case BT_GT: - goto gt; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - /* ptr points to closing quote */ - for (;;) { + gt: + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_START_TAG_WITH_ATTS; + case BT_SOL: + sol: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_CR: case BT_LF: - continue; - case BT_GT: - gt: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_START_TAG_WITH_ATTS; - case BT_SOL: - sol: - ptr += MINBPC(enc); - REQUIRE_CHAR(enc, ptr, end); - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; - default: + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } - break; + *nextTokPtr = ptr + MINBPC(enc); + return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; } break; } + break; + } default: *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -710,14 +713,13 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ -#ifdef XML_NS + const char **nextTokPtr) { +# ifdef XML_NS int hadColon; -#endif +# endif REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_EXCL: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); @@ -725,8 +727,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LSQB: - return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), - end, nextTokPtr); + return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); } *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -738,14 +739,14 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } -#ifdef XML_NS +# ifdef XML_NS hadColon = 0; -#endif +# endif /* we have a start-tag */ while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) -#ifdef XML_NS + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) +# ifdef XML_NS case BT_COLON: if (hadColon) { *nextTokPtr = ptr; @@ -755,34 +756,37 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } break; -#endif - case BT_S: case BT_CR: case BT_LF: - { - ptr += MINBPC(enc); - while (HAS_CHAR(enc, ptr, end)) { - switch (BYTE_TYPE(enc, ptr)) { +# endif + case BT_S: + case BT_CR: + case BT_LF: { + ptr += MINBPC(enc); + while (HAS_CHAR(enc, ptr, end)) { + switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: - goto gt; - case BT_SOL: - goto sol; - case BT_S: case BT_CR: case BT_LF: - ptr += MINBPC(enc); - continue; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); + case BT_GT: + goto gt; + case BT_SOL: + goto sol; + case BT_S: + case BT_CR: + case BT_LF: + ptr += MINBPC(enc); + continue; + default: + *nextTokPtr = ptr; + return XML_TOK_INVALID; } - return XML_TOK_PARTIAL; + return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); } + return XML_TOK_PARTIAL; + } case BT_GT: gt: *nextTokPtr = ptr + MINBPC(enc); @@ -791,7 +795,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, sol: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -807,8 +811,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ + const char **nextTokPtr) { if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { @@ -840,48 +843,50 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) + if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { + if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr; return XML_TOK_INVALID; - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_DATA_CHARS; \ + } \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_RSQB: if (HAS_CHARS(enc, ptr, end, 2)) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { - ptr += MINBPC(enc); - break; - } - if (HAS_CHARS(enc, ptr, end, 3)) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { - ptr += MINBPC(enc); - break; - } - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_INVALID; - } + if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { + ptr += MINBPC(enc); + break; + } + if (HAS_CHARS(enc, ptr, end, 3)) { + if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { + ptr += MINBPC(enc); + break; + } + *nextTokPtr = ptr + 2 * MINBPC(enc); + return XML_TOK_INVALID; + } } /* fall through */ case BT_AMP: @@ -906,12 +911,14 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ + const char **nextTokPtr) { REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + case BT_S: + case BT_LF: + case BT_CR: + case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_PERCENT; default: @@ -920,7 +927,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_PARAM_ENTITY_REF; @@ -934,20 +941,24 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, static int PTRCALL PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ + const char **nextTokPtr) { REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_CR: case BT_LF: case BT_S: - case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_CR: + case BT_LF: + case BT_S: + case BT_RPAR: + case BT_GT: + case BT_PERCNT: + case BT_VERBAR: *nextTokPtr = ptr; return XML_TOK_POUND_NAME; default: @@ -959,14 +970,12 @@ PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, } static int PTRCALL -PREFIX(scanLit)(int open, const ENCODING *enc, - const char *ptr, const char *end, - const char **nextTokPtr) -{ +PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { while (HAS_CHAR(enc, ptr, end)) { int t = BYTE_TYPE(enc, ptr); switch (t) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_QUOT: case BT_APOS: ptr += MINBPC(enc); @@ -976,8 +985,12 @@ PREFIX(scanLit)(int open, const ENCODING *enc, return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: - case BT_GT: case BT_PERCNT: case BT_LSQB: + case BT_S: + case BT_CR: + case BT_LF: + case BT_GT: + case BT_PERCNT: + case BT_LSQB: return XML_TOK_LITERAL; default: return XML_TOK_INVALID; @@ -992,8 +1005,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, static int PTRCALL PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ + const char **nextTokPtr) { int tok; if (ptr >= end) return XML_TOK_NONE; @@ -1011,27 +1023,26 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_LT: - { - ptr += MINBPC(enc); - REQUIRE_CHAR(enc, ptr, end); - switch (BYTE_TYPE(enc, ptr)) { - case BT_EXCL: - return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_NMSTRT: - case BT_HEX: - case BT_NONASCII: - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - *nextTokPtr = ptr - MINBPC(enc); - return XML_TOK_INSTANCE_START; - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; + case BT_LT: { + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); + switch (BYTE_TYPE(enc, ptr)) { + case BT_EXCL: + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_QUEST: + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); + case BT_NMSTRT: + case BT_HEX: + case BT_NONASCII: + case BT_LEAD2: + case BT_LEAD3: + case BT_LEAD4: + *nextTokPtr = ptr - MINBPC(enc); + return XML_TOK_INSTANCE_START; } + *nextTokPtr = ptr; + return XML_TOK_INVALID; + } case BT_CR: if (ptr + MINBPC(enc) == end) { *nextTokPtr = end; @@ -1039,13 +1050,15 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return -XML_TOK_PROLOG_S; } /* fall through */ - case BT_S: case BT_LF: + case BT_S: + case BT_LF: for (;;) { ptr += MINBPC(enc); if (! HAS_CHAR(enc, ptr, end)) break; switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_LF: + case BT_S: + case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ @@ -1074,7 +1087,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { REQUIRE_CHARS(enc, ptr, end, 2); if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { - *nextTokPtr = ptr + 2*MINBPC(enc); + *nextTokPtr = ptr + 2 * MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } } @@ -1097,8 +1110,12 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_PLUS: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_PLUS; - case BT_CR: case BT_LF: case BT_S: - case BT_GT: case BT_COMMA: case BT_VERBAR: + case BT_CR: + case BT_LF: + case BT_S: + case BT_GT: + case BT_COMMA: + case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; @@ -1113,24 +1130,26 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NAME; \ - break; \ - } \ - if (IS_NAME_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NMTOKEN; \ - break; \ - } \ - *nextTokPtr = ptr; \ +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NAME; \ + break; \ + } \ + if (IS_NAME_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NMTOKEN; \ + break; \ + } \ + *nextTokPtr = ptr; \ return XML_TOK_INVALID; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; @@ -1139,9 +1158,9 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_DIGIT: case BT_NAME: case BT_MINUS: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif tok = XML_TOK_NMTOKEN; ptr += MINBPC(enc); break; @@ -1163,13 +1182,19 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: case BT_RPAR: case BT_COMMA: - case BT_VERBAR: case BT_LSQB: case BT_PERCNT: - case BT_S: case BT_CR: case BT_LF: + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + case BT_GT: + case BT_RPAR: + case BT_COMMA: + case BT_VERBAR: + case BT_LSQB: + case BT_PERCNT: + case BT_S: + case BT_CR: + case BT_LF: *nextTokPtr = ptr; return tok; -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { @@ -1177,7 +1202,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, REQUIRE_CHAR(enc, ptr, end); tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; @@ -1188,23 +1213,23 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, break; } break; -#endif +# endif case BT_PLUS: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: - if (tok == XML_TOK_NMTOKEN) { + if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -1219,9 +1244,8 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, } static int PTRCALL -PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; if (ptr >= end) return XML_TOK_NONE; @@ -1236,10 +1260,14 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1285,9 +1313,8 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { const char *start; if (ptr >= end) return XML_TOK_NONE; @@ -1302,10 +1329,14 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1313,8 +1344,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) { - int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), - end, nextTokPtr); + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; } *nextTokPtr = ptr; @@ -1347,12 +1377,11 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, return XML_TOK_DATA_CHARS; } -#ifdef XML_DTD +# ifdef XML_DTD static int PTRCALL -PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) { int level = 0; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -1363,7 +1392,7 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) + INVALID_CASES(ptr, nextTokPtr) case BT_LT: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); @@ -1400,12 +1429,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_PARTIAL; } -#endif /* XML_DTD */ +# endif /* XML_DTD */ static int PTRCALL PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr) -{ + const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { @@ -1428,9 +1456,9 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, case BT_AST: case BT_PERCNT: case BT_NUM: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { @@ -1440,8 +1468,9 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, break; case BT_NAME: case BT_NMSTRT: - if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) + if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; + /* fall through */ default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ @@ -1463,9 +1492,8 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, */ static int PTRCALL -PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts) -{ +PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; @@ -1473,32 +1501,35 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { -#define START_NAME \ - if (state == other) { \ - if (nAtts < attsMax) { \ - atts[nAtts].name = ptr; \ - atts[nAtts].normalized = 1; \ - } \ - state = inName; \ - } -#define LEAD_CASE(n) \ - case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define START_NAME \ + if (state == other) { \ + if (nAtts < attsMax) { \ + atts[nAtts].name = ptr; \ + atts[nAtts].normalized = 1; \ + } \ + state = inName; \ + } +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + START_NAME ptr += (n - MINBPC(enc)); \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break; -#undef START_NAME +# undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; - } - else if (open == BT_QUOT) { + } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1511,8 +1542,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; - } - else if (open == BT_APOS) { + } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; @@ -1526,16 +1556,15 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, case BT_S: if (state == inName) state = other; - else if (state == inValue - && nAtts < attsMax - && atts[nAtts].normalized + else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; - case BT_CR: case BT_LF: + case BT_CR: + case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) @@ -1556,29 +1585,44 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, } static int PTRFASTCALL -PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) -{ +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; /* skip &# */ - ptr += 2*MINBPC(enc); + UNUSED_P(enc); + ptr += 2 * MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { - for (ptr += MINBPC(enc); - !CHAR_MATCHES(enc, ptr, ASCII_SEMI); + for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { - case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: - case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: + case ASCII_0: + case ASCII_1: + case ASCII_2: + case ASCII_3: + case ASCII_4: + case ASCII_5: + case ASCII_6: + case ASCII_7: + case ASCII_8: + case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; - case ASCII_A: case ASCII_B: case ASCII_C: - case ASCII_D: case ASCII_E: case ASCII_F: + case ASCII_A: + case ASCII_B: + case ASCII_C: + case ASCII_D: + case ASCII_E: + case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; - case ASCII_a: case ASCII_b: case ASCII_c: - case ASCII_d: case ASCII_e: case ASCII_f: + case ASCII_a: + case ASCII_b: + case ASCII_c: + case ASCII_d: + case ASCII_e: + case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; @@ -1586,9 +1630,8 @@ PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) if (result >= 0x110000) return -1; } - } - else { - for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { + } else { + for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); @@ -1600,10 +1643,10 @@ PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) } static int PTRCALL -PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, - const char *end) -{ - switch ((end - ptr)/MINBPC(enc)) { +PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, + const char *end) { + UNUSED_P(enc); + switch ((end - ptr) / MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { @@ -1653,113 +1696,43 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, return 0; } -/* This function does not appear to be called from anywhere within the - * library code. It is used via the macro XmlSameName(), which is - * defined but never used. Since it appears in the encoding function - * table, removing it is not a thing to be undertaken lightly. For - * the moment, we simply exclude it from coverage tests. - * - * LCOV_EXCL_START - */ -static int PTRCALL -PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) -{ - for (;;) { - switch (BYTE_TYPE(enc, ptr1)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (*ptr1++ != *ptr2++) \ - return 0; - LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) -#undef LEAD_CASE - /* fall through */ - if (*ptr1++ != *ptr2++) - return 0; - break; - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } - } - break; - default: - if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; - switch (BYTE_TYPE(enc, ptr2)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } - } - } - /* not reached */ -} -/* LCOV_EXCL_STOP */ - static int PTRCALL -PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, - const char *end1, const char *ptr2) -{ +PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, + const char *end1, const char *ptr2) { + UNUSED_P(enc); for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { if (end1 - ptr1 < MINBPC(enc)) { - /* This line cannot be executed. THe incoming data has already - * been tokenized once, so imcomplete characters like this have + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have * already been eliminated from the input. Retaining the * paranoia check is still valuable, however. */ return 0; /* LCOV_EXCL_LINE */ } - if (!CHAR_MATCHES(enc, ptr1, *ptr2)) + if (! CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } return ptr1 == end1; } static int PTRFASTCALL -PREFIX(nameLength)(const ENCODING *enc, const char *ptr) -{ +PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: -#ifdef XML_NS +# ifdef XML_NS case BT_COLON: -#endif +# endif case BT_HEX: case BT_DIGIT: case BT_NAME: @@ -1772,9 +1745,8 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) } } -static const char * PTRFASTCALL -PREFIX(skipS)(const ENCODING *enc, const char *ptr) -{ +static const char *PTRFASTCALL +PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: @@ -1789,19 +1761,18 @@ PREFIX(skipS)(const ENCODING *enc, const char *ptr) } static void PTRCALL -PREFIX(updatePosition)(const ENCODING *enc, - const char *ptr, - const char *end, - POSITION *pos) -{ +PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, + POSITION *pos) { while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE +# define LEAD_CASE(n) \ + case BT_LEAD##n: \ + ptr += n; \ + break; + LEAD_CASE(2) + LEAD_CASE(3) + LEAD_CASE(4) +# undef LEAD_CASE case BT_LF: pos->columnNumber = (XML_Size)-1; pos->lineNumber++; @@ -1822,12 +1793,12 @@ PREFIX(updatePosition)(const ENCODING *enc, } } -#undef DO_LEAD_CASE -#undef MULTIBYTE_CASES -#undef INVALID_CASES -#undef CHECK_NAME_CASE -#undef CHECK_NAME_CASES -#undef CHECK_NMSTRT_CASE -#undef CHECK_NMSTRT_CASES +# undef DO_LEAD_CASE +# undef MULTIBYTE_CASES +# undef INVALID_CASES +# undef CHECK_NAME_CASE +# undef CHECK_NAME_CASES +# undef CHECK_NMSTRT_CASE +# undef CHECK_NMSTRT_CASES #endif /* XML_TOK_IMPL_C */ |