From 7e08602c761cabe75c557eacf738b716a4a1e18a Mon Sep 17 00:00:00 2001 From: Expat Upstream Date: Wed, 2 Aug 2017 15:40:48 +0200 Subject: expat 2017-08-02 (97c6bd01) Code extracted from: https://github.com/libexpat/libexpat.git at commit 97c6bd01990090d4015364ae37dd141f3c39a30f (R_2_2_3). --- CMakeLists.txt | 153 --------------- ConfigureChecks.cmake | 2 +- README | 139 ------------- README.md | 126 ++++++++++++ lib/expat.h | 3 +- lib/loadlibrary.c | 141 ++++++++++++++ lib/siphash.h | 64 ++++-- lib/xmlparse.c | 530 +++++++++++++++++++++++++++++++++++++++++++------- lib/xmlrole.c | 30 ++- lib/xmltok.c | 13 +- lib/xmltok_impl.c | 39 +++- 11 files changed, 846 insertions(+), 394 deletions(-) delete mode 100644 CMakeLists.txt delete mode 100644 README create mode 100644 README.md create mode 100644 lib/loadlibrary.c diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index cbb1981..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,153 +0,0 @@ -# This file is copyrighted under the BSD-license for buildsystem files of KDE -# copyright 2010, Patrick Spendrin - -project(expat) - -cmake_minimum_required(VERSION 2.6) -set(PACKAGE_BUGREPORT "expat-bugs@libexpat.org") -set(PACKAGE_NAME "expat") -set(PACKAGE_VERSION "2.2.1") -set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") -set(PACKAGE_TARNAME "${PACKAGE_NAME}") - -option(BUILD_tools "build the xmlwf tool for expat library" ON) -option(BUILD_examples "build the examples for expat library" ON) -option(BUILD_tests "build the tests for expat library" ON) -option(BUILD_shared "build a shared expat library" ON) -option(BUILD_doc "build man page for xmlwf" ON) -option(INSTALL "install expat files in cmake install target" ON) - -# configuration options -set(XML_CONTEXT_BYTES 1024 CACHE STRING "Define to specify how much context to retain around the current parse point") -option(XML_DTD "Define to make parameter entity parsing functionality available" ON) -option(XML_NS "Define to make XML Namespaces functionality available" ON) - -if(XML_DTD) - set(XML_DTD 1) -else(XML_DTD) - set(XML_DTD 0) -endif(XML_DTD) -if(XML_NS) - set(XML_NS 1) -else(XML_NS) - set(XML_NS 0) -endif(XML_NS) - -if(BUILD_tests) - enable_testing() -endif(BUILD_tests) - -include(ConfigureChecks.cmake) - -set(EXTRA_LINK_AND_COMPILE_FLAGS "-fno-strict-aliasing") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_LINK_AND_COMPILE_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_LINK_AND_COMPILE_FLAGS}") -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${EXTRA_LINK_AND_COMPILE_FLAGS}") -set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${EXTRA_LINK_AND_COMPILE_FLAGS}") - -include_directories(${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/lib) -if(MSVC) - add_definitions(-D_CRT_SECURE_NO_WARNINGS -wd4996) -endif(MSVC) -if(WIN32) - set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Add a suffix, usually d on Windows") -endif(WIN32) - -set(expat_SRCS - lib/xmlparse.c - lib/xmlrole.c - lib/xmltok.c - lib/xmltok_impl.c - lib/xmltok_ns.c -) - -if(BUILD_shared) - set(_SHARED SHARED) - if(WIN32) - set(expat_SRCS ${expat_SRCS} lib/libexpat.def) - endif(WIN32) -else(BUILD_shared) - set(_SHARED STATIC) - if(WIN32) - add_definitions(-DXML_STATIC) - endif(WIN32) -endif(BUILD_shared) - -add_library(expat ${_SHARED} ${expat_SRCS}) - -set(LIBCURRENT 7) # sync -set(LIBREVISION 3) # with -set(LIBAGE 6) # configure.ac! -math(EXPR LIBCURRENT_MINUS_AGE "${LIBCURRENT} - ${LIBAGE}") - -if(NOT WIN32) - set_property(TARGET expat PROPERTY VERSION ${LIBCURRENT_MINUS_AGE}.${LIBAGE}.${LIBREVISION}) - set_property(TARGET expat PROPERTY SOVERSION ${LIBCURRENT_MINUS_AGE}) - set_property(TARGET expat PROPERTY NO_SONAME ${NO_SONAME}) -endif(NOT WIN32) - -macro(expat_install) - if(INSTALL) - install(${ARGN}) - endif() -endmacro() - -expat_install(TARGETS expat RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) - -set(prefix ${CMAKE_INSTALL_PREFIX}) -set(exec_prefix "\${prefix}/bin") -set(libdir "\${prefix}/lib") -set(includedir "\${prefix}/include") -configure_file(expat.pc.in ${CMAKE_CURRENT_BINARY_DIR}/expat.pc) - -expat_install(FILES lib/expat.h lib/expat_external.h DESTINATION include) -expat_install(FILES ${CMAKE_CURRENT_BINARY_DIR}/expat.pc DESTINATION lib/pkgconfig) - -if(BUILD_tools AND NOT WINCE) - set(xmlwf_SRCS - xmlwf/xmlwf.c - xmlwf/xmlfile.c - xmlwf/codepage.c - xmlwf/readfilemap.c - ) - - add_executable(xmlwf ${xmlwf_SRCS}) - set_property(TARGET xmlwf PROPERTY RUNTIME_OUTPUT_DIRECTORY xmlwf) - target_link_libraries(xmlwf expat) - expat_install(TARGETS xmlwf DESTINATION bin) - if(BUILD_doc AND NOT MSVC) - if(CMAKE_GENERATOR STREQUAL "Unix Makefiles") - set(make_command "$(MAKE)") - else() - set(make_command "make") - endif() - - add_custom_command(TARGET expat PRE_BUILD COMMAND "${make_command}" -C "${PROJECT_SOURCE_DIR}/doc" xmlwf.1) - expat_install(FILES "${PROJECT_SOURCE_DIR}/doc/xmlwf.1" DESTINATION share/man/man1) - endif() -endif(BUILD_tools AND NOT WINCE) - -if(BUILD_examples) - add_executable(elements examples/elements.c) - set_property(TARGET elements PROPERTY RUNTIME_OUTPUT_DIRECTORY examples) - target_link_libraries(elements expat) - - add_executable(outline examples/outline.c) - set_property(TARGET outline PROPERTY RUNTIME_OUTPUT_DIRECTORY examples) - target_link_libraries(outline expat) -endif(BUILD_examples) - -if(BUILD_tests) - ## these are unittests that can be run on any platform - add_executable(runtests tests/runtests.c tests/chardata.c tests/minicheck.c tests/memcheck.c) - set_property(TARGET runtests PROPERTY RUNTIME_OUTPUT_DIRECTORY tests) - target_link_libraries(runtests expat) - add_test(runtests tests/runtests) - - add_executable(runtestspp tests/runtestspp.cpp tests/chardata.c tests/minicheck.c tests/memcheck.c) - set_property(TARGET runtestspp PROPERTY RUNTIME_OUTPUT_DIRECTORY tests) - target_link_libraries(runtestspp expat) - add_test(runtestspp tests/runtestspp) -endif(BUILD_tests) diff --git a/ConfigureChecks.cmake b/ConfigureChecks.cmake index f03faa6..057cfa5 100644 --- a/ConfigureChecks.cmake +++ b/ConfigureChecks.cmake @@ -40,5 +40,5 @@ else(HAVE_SYS_TYPES_H) set(SIZE_T "unsigned") endif(HAVE_SYS_TYPES_H) -configure_file(expat_config.h.cmake expat_config.h) +configure_file(expat_config.h.cmake "${CMAKE_CURRENT_BINARY_DIR}/expat_config.h") add_definitions(-DHAVE_EXPAT_CONFIG_H) diff --git a/README b/README deleted file mode 100644 index cd11a22..0000000 --- a/README +++ /dev/null @@ -1,139 +0,0 @@ - - Expat, Release 2.2.1 - -This is Expat, a C library for parsing XML, written by James Clark. -Expat is a stream-oriented XML parser. This means that you register -handlers with the parser before starting the parse. These handlers -are called when the parser discovers the associated structures in the -document being parsed. A start tag is an example of the kind of -structures for which you may register handlers. - -Windows users should use the expat_win32bin package, which includes -both precompiled libraries and executables, and source code for -developers. - -Expat is free software. You may copy, distribute, and modify it under -the terms of the License contained in the file COPYING distributed -with this package. This license is the same as the MIT/X Consortium -license. - -Versions of Expat that have an odd minor version (the middle number in -the release above), are development releases and should be considered -as beta software. Releases with even minor version numbers are -intended to be production grade software. - -If you are building Expat from a check-out from the CVS repository, -you need to run a script that generates the configure script using the -GNU autoconf and libtool tools. To do this, you need to have -autoconf 2.58 or newer. Run the script like this: - - ./buildconf.sh - -Once this has been done, follow the same instructions as for building -from a source distribution. - -To build Expat from a source distribution, you first run the -configuration shell script in the top level distribution directory: - - ./configure - -There are many options which you may provide to configure (which you -can discover by running configure with the --help option). But the -one of most interest is the one that sets the installation directory. -By default, the configure script will set things up to install -libexpat into /usr/local/lib, expat.h into /usr/local/include, and -xmlwf into /usr/local/bin. If, for example, you'd prefer to install -into /home/me/mystuff/lib, /home/me/mystuff/include, and -/home/me/mystuff/bin, you can tell configure about that with: - - ./configure --prefix=/home/me/mystuff - -Another interesting option is to enable 64-bit integer support for -line and column numbers and the over-all byte index: - - ./configure CPPFLAGS=-DXML_LARGE_SIZE - -However, such a modification would be a breaking change to the ABI -and is therefore not recommended for general use - e.g. as part of -a Linux distribution - but rather for builds with special requirements. - -After running the configure script, the "make" command will build -things and "make install" will install things into their proper -location. Have a look at the "Makefile" to learn about additional -"make" options. Note that you need to have write permission into -the directories into which things will be installed. - -If you are interested in building Expat to provide document -information in UTF-16 encoding rather than the default UTF-8, follow -these instructions (after having run "make distclean"): - - 1. For UTF-16 output as unsigned short (and version/error - strings as char), run: - - ./configure CPPFLAGS=-DXML_UNICODE - - For UTF-16 output as wchar_t (incl. version/error strings), - run: - - ./configure CFLAGS="-g -O2 -fshort-wchar" \ - CPPFLAGS=-DXML_UNICODE_WCHAR_T - - 2. Edit the MakeFile, changing: - - LIBRARY = libexpat.la - - to: - - LIBRARY = libexpatw.la - - (Note the additional "w" in the library name.) - - 3. Run "make buildlib" (which builds the library only). - Or, to save step 2, run "make buildlib LIBRARY=libexpatw.la". - - 4. Run "make installlib" (which installs the library only). - Or, if step 2 was omitted, run "make installlib LIBRARY=libexpatw.la". - -Using DESTDIR or INSTALL_ROOT is enabled, with INSTALL_ROOT being the default -value for DESTDIR, and the rest of the make file using only DESTDIR. -It works as follows: - $ make install DESTDIR=/path/to/image -overrides the in-makefile set DESTDIR, while both - $ INSTALL_ROOT=/path/to/image make install - $ make install INSTALL_ROOT=/path/to/image -use DESTDIR=$(INSTALL_ROOT), even if DESTDIR eventually is defined in the -environment, because variable-setting priority is -1) commandline -2) in-makefile -3) environment - -Note: This only applies to the Expat library itself, building UTF-16 versions -of xmlwf and the tests is currently not supported. - -Note for Solaris users: The "ar" command is usually located in -"/usr/ccs/bin", which is not in the default PATH. You will need to -add this to your path for the "make" command, and probably also switch -to GNU make (the "make" found in /usr/ccs/bin does not seem to work -properly -- apparently it does not understand .PHONY directives). If -you're using ksh or bash, use this command to build: - - PATH=/usr/ccs/bin:$PATH make - -When using Expat with a project using autoconf for configuration, you -can use the probing macro in conftools/expat.m4 to determine how to -include Expat. See the comments at the top of that file for more -information. - -A reference manual is available in the file doc/reference.html in this -distribution. - -The homepage for this project is http://www.libexpat.org/. There -are links there to connect you to the bug reports page. If you need -to report a bug when you don't have access to a browser, you may also -send a bug report by email to expat-bugs@mail.libexpat.org. - -Discussion related to the direction of future expat development takes -place on expat-discuss@mail.libexpat.org. Archives of this list and -other Expat-related lists may be found at: - - http://mail.libexpat.org/mailman/listinfo/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..0a1777e --- /dev/null +++ b/README.md @@ -0,0 +1,126 @@ +# Expat, Release 2.2.3 + +This is Expat, a C library for parsing XML, started by +[James Clark](https://en.wikipedia.org/wiki/James_Clark_(programmer)) in 1997. +Expat is a stream-oriented XML parser. This means that you register +handlers with the parser before starting the parse. These handlers +are called when the parser discovers the associated structures in the +document being parsed. A start tag is an example of the kind of +structures for which you may register handlers. + +Windows users should use the +[`expat_win32` package](https://sourceforge.net/projects/expat/files/expat_win32/), +which includes both precompiled libraries and executables, and source code for +developers. + +Expat is [free software](https://www.gnu.org/philosophy/free-sw.en.html). +You may copy, distribute, and modify it under the terms of the License +contained in the file +[`COPYING`](https://github.com/libexpat/libexpat/blob/master/expat/COPYING) +distributed with this package. +This license is the same as the MIT/X Consortium license. + +If you are building Expat from a check-out from the +[Git repository](https://github.com/libexpat/libexpat/), +you need to run a script that generates the configure script using the +GNU autoconf and libtool tools. To do this, you need to have +autoconf 2.58 or newer. Run the script like this: + +```console +./buildconf.sh +``` + +Once this has been done, follow the same instructions as for building +from a source distribution. + +To build Expat from a source distribution, you first run the +configuration shell script in the top level distribution directory: + +```console +./configure +``` + +There are many options which you may provide to configure (which you +can discover by running configure with the `--help` option). But the +one of most interest is the one that sets the installation directory. +By default, the configure script will set things up to install +libexpat into `/usr/local/lib`, `expat.h` into `/usr/local/include`, and +`xmlwf` into `/usr/local/bin`. If, for example, you'd prefer to install +into `/home/me/mystuff/lib`, `/home/me/mystuff/include`, and +`/home/me/mystuff/bin`, you can tell `configure` about that with: + +```console +./configure --prefix=/home/me/mystuff +``` + +Another interesting option is to enable 64-bit integer support for +line and column numbers and the over-all byte index: + +```console +./configure CPPFLAGS=-DXML_LARGE_SIZE +``` + +However, such a modification would be a breaking change to the ABI +and is therefore not recommended for general use — e.g. as part of +a Linux distribution — but rather for builds with special requirements. + +After running the configure script, the `make` command will build +things and `make install` will install things into their proper +location. Have a look at the `Makefile` to learn about additional +`make` options. Note that you need to have write permission into +the directories into which things will be installed. + +If you are interested in building Expat to provide document +information in UTF-16 encoding rather than the default UTF-8, follow +these instructions (after having run `make distclean`): + +1. For UTF-16 output as unsigned short (and version/error strings as char), + run:
+ `./configure CPPFLAGS=-DXML_UNICODE`
+ For UTF-16 output as `wchar_t` (incl. version/error strings), run:
+ `./configure CFLAGS="-g -O2 -fshort-wchar" CPPFLAGS=-DXML_UNICODE_WCHAR_T` +
Note: The latter requires libc compiled with `-fshort-wchar`, as well. + +1. Edit `Makefile`, changing:
+ `LIBRARY = libexpat.la`
+ to:
+ `LIBRARY = libexpatw.la`
+ (Note the additional "w" in the library name.) + +1. Run `make buildlib` (which builds the library only). + Or, to save step 2, run `make buildlib LIBRARY=libexpatw.la`. + +1. Run `make installlib` (which installs the library only). + Or, if step 2 was omitted, run `make installlib LIBRARY=libexpatw.la`. + +Using `DESTDIR` or `INSTALL_ROOT` is enabled, with `INSTALL_ROOT` being the +default value for `DESTDIR`, and the rest of the make file using only +`DESTDIR`. It works as follows: + +```console +make install DESTDIR=/path/to/image +``` + +overrides the in-makefile set `DESTDIR`, while both + +```console +INSTALL_ROOT=/path/to/image make install +make install INSTALL_ROOT=/path/to/image +``` + +use `DESTDIR=$(INSTALL_ROOT)`, even if `DESTDIR` eventually is defined in the +environment, because variable-setting priority is +1. commandline +2. in-makefile +3. environment + +Note: This only applies to the Expat library itself, building UTF-16 versions +of xmlwf and the tests is currently not supported. + +When using Expat with a project using autoconf for configuration, you +can use the probing macro in `conftools/expat.m4` to determine how to +include Expat. See the comments at the top of that file for more +information. + +A reference manual is available in the file `doc/reference.html` in this +distribution. diff --git a/lib/expat.h b/lib/expat.h index 28b0f95..7e5bbb7 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -24,7 +24,6 @@ extern "C" { struct XML_ParserStruct; typedef struct XML_ParserStruct *XML_Parser; -/* Should this be defined using stdbool.h when C99 is available? */ typedef unsigned char XML_Bool; #define XML_TRUE ((XML_Bool) 1) #define XML_FALSE ((XML_Bool) 0) @@ -1049,7 +1048,7 @@ XML_GetFeatureList(void); */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 2 -#define XML_MICRO_VERSION 1 +#define XML_MICRO_VERSION 3 #ifdef __cplusplus } diff --git a/lib/loadlibrary.c b/lib/loadlibrary.c new file mode 100644 index 0000000..ffce868 --- /dev/null +++ b/lib/loadlibrary.c @@ -0,0 +1,141 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 2016 - 2017, Steve Holme, . + * + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF + * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH + * THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall + * not be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization of the + * copyright holder. + * + ***************************************************************************/ + +#if defined(_WIN32) + +#include +#include + + +HMODULE _Expat_LoadLibrary(LPCTSTR filename); + + +#if !defined(LOAD_WITH_ALTERED_SEARCH_PATH) +#define LOAD_WITH_ALTERED_SEARCH_PATH 0x00000008 +#endif + +#if !defined(LOAD_LIBRARY_SEARCH_SYSTEM32) +#define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 +#endif + +/* We use our own typedef here since some headers might lack these */ +typedef HMODULE (APIENTRY *LOADLIBRARYEX_FN)(LPCTSTR, HANDLE, DWORD); + +/* See function definitions in winbase.h */ +#ifdef UNICODE +# ifdef _WIN32_WCE +# define LOADLIBARYEX L"LoadLibraryExW" +# else +# define LOADLIBARYEX "LoadLibraryExW" +# endif +#else +# define LOADLIBARYEX "LoadLibraryExA" +#endif + + +/* + * _Expat_LoadLibrary() + * + * This is used to dynamically load DLLs using the most secure method available + * for the version of Windows that we are running on. + * + * Parameters: + * + * filename [in] - The filename or full path of the DLL to load. If only the + * filename is passed then the DLL will be loaded from the + * Windows system directory. + * + * Returns the handle of the module on success; otherwise NULL. + */ +HMODULE _Expat_LoadLibrary(LPCTSTR filename) +{ + HMODULE hModule = NULL; + LOADLIBRARYEX_FN pLoadLibraryEx = NULL; + + /* Get a handle to kernel32 so we can access it's functions at runtime */ + HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32")); + if(!hKernel32) + return NULL; + + /* Attempt to find LoadLibraryEx() which is only available on Windows 2000 + and above */ + pLoadLibraryEx = (LOADLIBRARYEX_FN) GetProcAddress(hKernel32, LOADLIBARYEX); + + /* Detect if there's already a path in the filename and load the library if + there is. Note: Both back slashes and forward slashes have been supported + since the earlier days of DOS at an API level although they are not + supported by command prompt */ + if(_tcspbrk(filename, TEXT("\\/"))) { + /** !checksrc! disable BANNEDFUNC 1 **/ + hModule = pLoadLibraryEx ? + pLoadLibraryEx(filename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) : + LoadLibrary(filename); + } + /* Detect if KB2533623 is installed, as LOAD_LIBARY_SEARCH_SYSTEM32 is only + supported on Windows Vista, Windows Server 2008, Windows 7 and Windows + Server 2008 R2 with this patch or natively on Windows 8 and above */ + else if(pLoadLibraryEx && GetProcAddress(hKernel32, "AddDllDirectory")) { + /* Load the DLL from the Windows system directory */ + hModule = pLoadLibraryEx(filename, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + } + else { + /* Attempt to get the Windows system path */ + UINT systemdirlen = GetSystemDirectory(NULL, 0); + if(systemdirlen) { + /* Allocate space for the full DLL path (Room for the null terminator + is included in systemdirlen) */ + size_t filenamelen = _tcslen(filename); + TCHAR *path = malloc(sizeof(TCHAR) * (systemdirlen + 1 + filenamelen)); + if(path && GetSystemDirectory(path, systemdirlen)) { + /* Calculate the full DLL path */ + _tcscpy(path + _tcslen(path), TEXT("\\")); + _tcscpy(path + _tcslen(path), filename); + + /* Load the DLL from the Windows system directory */ + /** !checksrc! disable BANNEDFUNC 1 **/ + hModule = pLoadLibraryEx ? + pLoadLibraryEx(path, NULL, LOAD_WITH_ALTERED_SEARCH_PATH) : + LoadLibrary(path); + + } + free(path); + } + } + + return hModule; +} + +#else /* defined(_WIN32) */ + +/* ISO C requires a translation unit to contain at least one declaration + [-Wempty-translation-unit] */ +typedef int _TRANSLATION_UNIT_LOAD_LIBRARY_C_NOT_EMTPY; + +#endif /* defined(_WIN32) */ diff --git a/lib/siphash.h b/lib/siphash.h index 23b56d2..581872d 100644 --- a/lib/siphash.h +++ b/lib/siphash.h @@ -2,9 +2,8 @@ * siphash.h - SipHash-2-4 in a single header file * -------------------------------------------------------------------------- * Derived by William Ahern from the reference implementation[1] published[2] - * by Jean-Philippe Aumasson and Daniel J. Berstein. Licensed in kind. * by Jean-Philippe Aumasson and Daniel J. Berstein. - * Minimal changes by Sebastian Pipping on top, details below. + * Minimal changes by Sebastian Pipping and Victor Stinner on top, see below. * Licensed under the CC0 Public Domain Dedication license. * * 1. https://www.131002.net/siphash/siphash24.c @@ -12,14 +11,25 @@ * -------------------------------------------------------------------------- * HISTORY: * - * 2017-06-10 (Sebastian Pipping) + * 2017-07-25 (Vadim Zeitlin) + * - Fix use of SIPHASH_MAIN macro + * + * 2017-07-05 (Sebastian Pipping) + * - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++ + * - Add const qualifiers at two places + * - Ensure <=80 characters line length (assuming tab width 4) + * + * 2017-06-23 (Victor Stinner) + * - Address Win64 compile warnings + * + * 2017-06-18 (Sebastian Pipping) * - Clarify license note in the header * - Address C89 issues: * - Stop using inline keyword (and let compiler decide) - * - Turn integer suffix ULL to UL * - Replace _Bool by int * - Turn macro siphash24 into a function * - Address invalid conversion (void pointer) by explicit cast + * - Address lack of stdint.h for Visual Studio 2003 to 2008 * - Always expose sip24_valid (for self-tests) * * 2012-11-04 - Born. (William Ahern) @@ -76,7 +86,23 @@ #define SIPHASH_H #include /* size_t */ -#include /* uint64_t uint32_t uint8_t */ + +#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600) + /* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */ + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +#else + #include /* uint64_t uint32_t uint8_t */ +#endif + + +/* + * Workaround to not require a C++11 compiler for using ULL suffix + * if this code is included and compiled as C++; related GCC warning is: + * warning: use of C++11 long long integer constant [-Wlong-long] + */ +#define _SIP_ULL(high, low) (((uint64_t)high << 32) | low) #define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ( (x) >> (64 - (b)))) @@ -158,11 +184,12 @@ static void sip_round(struct siphash *H, const int rounds) { } /* sip_round() */ -static struct siphash *sip24_init(struct siphash *H, const struct sipkey *key) { - H->v0 = 0x736f6d6570736575UL ^ key->k[0]; - H->v1 = 0x646f72616e646f6dUL ^ key->k[1]; - H->v2 = 0x6c7967656e657261UL ^ key->k[0]; - H->v3 = 0x7465646279746573UL ^ key->k[1]; +static struct siphash *sip24_init(struct siphash *H, + const struct sipkey *key) { + H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; + H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; + H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; + H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; H->p = H->buf; H->c = 0; @@ -173,7 +200,8 @@ static struct siphash *sip24_init(struct siphash *H, const struct sipkey *key) { #define sip_endof(a) (&(a)[sizeof (a) / sizeof *(a)]) -static struct siphash *sip24_update(struct siphash *H, const void *src, size_t len) { +static struct siphash *sip24_update(struct siphash *H, const void *src, + size_t len) { const unsigned char *p = (const unsigned char *)src, *pe = p + len; uint64_t m; @@ -198,7 +226,7 @@ static struct siphash *sip24_update(struct siphash *H, const void *src, size_t l static uint64_t sip24_final(struct siphash *H) { - char left = H->p - H->buf; + const char left = (char)(H->p - H->buf); uint64_t b = (H->c + left) << 56; switch (left) { @@ -222,7 +250,8 @@ static uint64_t sip24_final(struct siphash *H) { } /* sip24_final() */ -static uint64_t siphash24(const void *src, size_t len, const struct sipkey *key) { +static uint64_t siphash24(const void *src, size_t len, + const struct sipkey *key) { struct siphash state = SIPHASH_INITIALIZER; return sip24_final(sip24_update(sip24_init(&state, key), src, len)); } /* siphash24() */ @@ -310,10 +339,11 @@ static int sip24_valid(void) { struct sipkey k; size_t i; - sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"); + sip_tokey(&k, "\000\001\002\003\004\005\006\007\010\011" + "\012\013\014\015\016\017"); for (i = 0; i < sizeof in; ++i) { - in[i] = i; + in[i] = (unsigned char)i; if (siphash24(in, i, &k) != SIP_U8TO64_LE(vectors[i])) return 0; @@ -323,12 +353,12 @@ static int sip24_valid(void) { } /* sip24_valid() */ -#if SIPHASH_MAIN +#ifdef SIPHASH_MAIN #include int main(void) { - int ok = sip24_valid(); + const int ok = sip24_valid(); if (ok) puts("OK"); diff --git a/lib/xmlparse.c b/lib/xmlparse.c index 76f078e..b703e61 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -1,10 +1,12 @@ /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd See the file COPYING for copying permission. - 77fea421d361dca90041d0040ecf1dca651167fadf2af79e990e35168d70d933 (2.2.1+) + 101bfd65d1ff3d1511cf6671e6aae65f82cd97df6f4da137d46d510731830ad9 (2.2.3+) */ -#define _GNU_SOURCE /* syscall prototype */ +#if !defined(_GNU_SOURCE) +# define _GNU_SOURCE 1 /* syscall prototype */ +#endif #include #include /* memset(), memcpy() */ @@ -19,6 +21,8 @@ #include /* gettimeofday() */ #include /* getpid() */ #include /* getpid() */ +#include /* O_RDONLY */ +#include #endif #define XML_BUILDING_EXPAT 1 @@ -33,6 +37,57 @@ #include "expat.h" #include "siphash.h" +#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) +# if defined(HAVE_GETRANDOM) +# include /* getrandom */ +# else +# include /* syscall */ +# include /* SYS_getrandom */ +# endif +# if ! defined(GRND_NONBLOCK) +# define GRND_NONBLOCK 0x0001 +# endif /* defined(GRND_NONBLOCK) */ +#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + +#if defined(HAVE_LIBBSD) \ + && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) +# include +#endif + +#if defined(_WIN32) && !defined(LOAD_LIBRARY_SEARCH_SYSTEM32) +# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 +#endif + +#if !defined(HAVE_GETRANDOM) && !defined(HAVE_SYSCALL_GETRANDOM) \ + && !defined(HAVE_ARC4RANDOM_BUF) && !defined(HAVE_ARC4RANDOM) \ + && !defined(XML_DEV_URANDOM) \ + && !defined(_WIN32) \ + && !defined(XML_POOR_ENTROPY) +# error \ + You do not have support for any sources of high quality entropy \ + enabled. For end user security, that is probably not what you want. \ + \ + Your options include: \ + * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ + * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ + * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \ + * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ + * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ + * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \ + * Windows (RtlGenRandom): _WIN32. \ + \ + If insist on not using any of these, bypass this error by defining \ + XML_POOR_ENTROPY; you have been warned. \ + \ + For CMake, one way to pass the define is: \ + cmake -DCMAKE_C_FLAGS="-pipe -O2 -DHAVE_SYSCALL_GETRANDOM" . \ + \ + If you have reasons to patch this detection code away or need changes \ + to the build system, please open a bug. Thank you! +#endif + + #ifdef XML_UNICODE #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX #define XmlConvert XmlUtf16Convert @@ -436,6 +491,9 @@ static ELEMENT_TYPE * getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end); +static XML_Char *copyString(const XML_Char *s, + const XML_Memory_Handling_Suite *memsuite); + static unsigned long generate_hash_secret_salt(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser); @@ -696,21 +754,13 @@ static const XML_Char implicitContext[] = { #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) -# include - -# if defined(HAVE_GETRANDOM) -# include /* getrandom */ -# else -# include /* syscall */ -# include /* SYS_getrandom */ -# endif /* Obtain entropy on Linux 3.17+ */ static int -writeRandomBytes_getrandom(void * target, size_t count) { +writeRandomBytes_getrandom_nonblock(void * target, size_t count) { int success = 0; /* full count bytes written? */ size_t bytesWrittenTotal = 0; - const unsigned int getrandomFlags = 0; + const unsigned int getrandomFlags = GRND_NONBLOCK; do { void * const currentTarget = (void*)((char*)target + bytesWrittenTotal); @@ -728,7 +778,7 @@ writeRandomBytes_getrandom(void * target, size_t count) { if (bytesWrittenTotal >= count) success = 1; } - } while (! success && (errno == EINTR || errno == EAGAIN)); + } while (! success && (errno == EINTR)); return success; } @@ -736,12 +786,67 @@ writeRandomBytes_getrandom(void * target, size_t count) { #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ +#if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + +/* Extract entropy from /dev/urandom */ +static int +writeRandomBytes_dev_urandom(void * target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + + const int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + return 0; + } + + do { + void * const currentTarget = (void*)((char*)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR)); + + close(fd); + return success; +} + +#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ + + +#if defined(HAVE_ARC4RANDOM) + +static void +writeRandomBytes_arc4random(void * target, size_t count) { + size_t bytesWrittenTotal = 0; + + while (bytesWrittenTotal < count) { + const uint32_t random32 = arc4random(); + size_t i = 0; + + for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); + i++, bytesWrittenTotal++) { + const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); + ((uint8_t *)target)[bytesWrittenTotal] = random8; + } + } +} + +#endif /* defined(HAVE_ARC4RANDOM) */ + + #ifdef _WIN32 typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG); +HMODULE _Expat_LoadLibrary(LPCTSTR filename); /* see loadlibrary.c */ /* Obtain entropy on Windows XP / Windows Server 2003 and later. - * Hint on RtlGenRandom and the following article from libsodioum. + * Hint on RtlGenRandom and the following article from libsodium. * * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/ @@ -749,7 +854,7 @@ typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG); static int writeRandomBytes_RtlGenRandom(void * target, size_t count) { int success = 0; /* full count bytes written? */ - const HMODULE advapi32 = LoadLibrary("ADVAPI32.DLL"); + const HMODULE advapi32 = _Expat_LoadLibrary(TEXT("ADVAPI32.DLL")); if (advapi32) { const RTLGENRANDOM_FUNC RtlGenRandom @@ -768,6 +873,8 @@ writeRandomBytes_RtlGenRandom(void * target, size_t count) { #endif /* _WIN32 */ +#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) + static unsigned long gather_time_entropy(void) { @@ -780,16 +887,20 @@ gather_time_entropy(void) int gettimeofday_res; gettimeofday_res = gettimeofday(&tv, NULL); + +#if defined(NDEBUG) + (void)gettimeofday_res; +#else assert (gettimeofday_res == 0); +#endif /* defined(NDEBUG) */ /* Microseconds time is <20 bits entropy */ return tv.tv_usec; #endif } -#if defined(HAVE_ARC4RANDOM_BUF) && defined(HAVE_LIBBSD) -# include -#endif +#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ + static unsigned long ENTROPY_DEBUG(const char * label, unsigned long entropy) { @@ -808,10 +919,12 @@ generate_hash_secret_salt(XML_Parser parser) { unsigned long entropy; (void)parser; -#if defined(HAVE_ARC4RANDOM_BUF) || defined(__CloudABI__) - (void)gather_time_entropy; +#if defined(HAVE_ARC4RANDOM_BUF) arc4random_buf(&entropy, sizeof(entropy)); return ENTROPY_DEBUG("arc4random_buf", entropy); +#elif defined(HAVE_ARC4RANDOM) + writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random", entropy); #else /* Try high quality providers first .. */ #ifdef _WIN32 @@ -819,10 +932,15 @@ generate_hash_secret_salt(XML_Parser parser) return ENTROPY_DEBUG("RtlGenRandom", entropy); } #elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) - if (writeRandomBytes_getrandom((void *)&entropy, sizeof(entropy))) { + if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("getrandom", entropy); } #endif +#if ! defined(_WIN32) && defined(XML_DEV_URANDOM) + if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("/dev/urandom", entropy); + } +#endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ /* .. and self-made low quality for backup: */ /* Process ID is 0 bits entropy if attacker has local access */ @@ -833,7 +951,7 @@ generate_hash_secret_salt(XML_Parser parser) return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); } else { return ENTROPY_DEBUG("fallback(8)", - entropy * (unsigned long)2305843009213693951); + entropy * (unsigned long)2305843009213693951ULL); } #endif } @@ -962,6 +1080,8 @@ parserCreate(const XML_Char *encodingName, nsAttsVersion = 0; nsAttsPower = 0; + protocolEncodingName = NULL; + poolInit(&tempPool, &(parser->m_mem)); poolInit(&temp2Pool, &(parser->m_mem)); parserInit(parser, encodingName); @@ -988,9 +1108,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { processor = prologInitProcessor; XmlPrologStateInit(&prologState); - protocolEncodingName = (encodingName != NULL - ? poolCopyString(&tempPool, encodingName) - : NULL); + if (encodingName != NULL) { + protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + } curBase = NULL; XmlInitEncoding(&initEncoding, &encoding, 0); userData = NULL; @@ -1103,6 +1223,8 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) unknownEncodingRelease(unknownEncodingData); poolClear(&tempPool); poolClear(&temp2Pool); + FREE((void *)protocolEncodingName); + protocolEncodingName = NULL; parserInit(parser, encodingName); dtdReset(_dtd, &parser->m_mem); return XML_TRUE; @@ -1119,10 +1241,16 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) */ if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return XML_STATUS_ERROR; + + /* Get rid of any previous encoding name */ + FREE((void *)protocolEncodingName); + if (encodingName == NULL) + /* No new encoding name */ protocolEncodingName = NULL; else { - protocolEncodingName = poolCopyString(&tempPool, encodingName); + /* Copy the new encoding name into allocated memory */ + protocolEncodingName = copyString(encodingName, &(parser->m_mem)); if (!protocolEncodingName) return XML_STATUS_ERROR; } @@ -1357,6 +1485,7 @@ XML_ParserFree(XML_Parser parser) destroyBindings(inheritedBindings, parser); poolDestroy(&tempPool); poolDestroy(&temp2Pool); + FREE((void *)protocolEncodingName); #ifdef XML_DTD /* external parameter entity parsers share the DTD structure parser->m_dtd with the root parser, so we must not destroy it @@ -1748,7 +1877,8 @@ enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { - errorCode = XML_ERROR_INVALID_ARGUMENT; + if (parser != NULL) + parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; return XML_STATUS_ERROR; } switch (ps_parsing) { @@ -1783,9 +1913,22 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) if (errorCode == XML_ERROR_NONE) { switch (ps_parsing) { case XML_SUSPENDED: + /* It is hard to be certain, but it seems that this case + * cannot occur. This code is cleaning up a previous parse + * with no new data (since len == 0). Changing the parsing + * state requires getting to execute a handler function, and + * there doesn't seem to be an opportunity for that while in + * this circumstance. + * + * Given the uncertainty, we retain the code but exclude it + * from coverage tests. + * + * LCOV_EXCL_START + */ XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); positionPtr = bufferPtr; return XML_STATUS_SUSPENDED; + /* LCOV_EXCL_STOP */ case XML_INITIALIZED: case XML_PARSING: ps_parsing = XML_FINISHED; @@ -2974,9 +3117,17 @@ doContent(XML_Parser parser, return XML_ERROR_NO_MEMORY; break; default: + /* All of the tokens produced by XmlContentTok() have their own + * explicit cases, so this default is not strictly necessary. + * However it is a useful safety net, so we retain the code and + * simply exclude it from the coverage tests. + * + * LCOV_EXCL_START + */ if (defaultHandler) reportDefault(parser, enc, s, next); break; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; switch (ps_parsing) { @@ -3067,13 +3218,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc, #endif attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (temp == NULL) + if (temp == NULL) { + attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; + } atts = temp; #ifdef XML_ATTR_INFO temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); - if (temp2 == NULL) + if (temp2 == NULL) { + attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; + } attInfo = temp2; #endif if (n > oldAttsSize) @@ -3210,6 +3365,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, int j; /* hash table index */ unsigned long version = nsAttsVersion; int nsAttsSize = (int)1 << nsAttsPower; + unsigned char oldNsAttsPower = nsAttsPower; /* size of hash table must be at least 2 * (# of prefixed attributes) */ if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ NS_ATT *temp; @@ -3219,8 +3375,11 @@ storeAtts(XML_Parser parser, const ENCODING *enc, nsAttsPower = 3; nsAttsSize = (int)1 << nsAttsPower; temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); - if (!temp) + if (!temp) { + /* Restore actual size of memory in nsAtts */ + nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; + } nsAtts = temp; version = 0; /* force re-initialization of nsAtts hash table */ } @@ -3247,8 +3406,23 @@ storeAtts(XML_Parser parser, const ENCODING *enc, ((XML_Char *)s)[-1] = 0; /* clear flag */ id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); - if (!id || !id->prefix) - return XML_ERROR_NO_MEMORY; + if (!id || !id->prefix) { + /* This code is walking through the appAtts array, dealing + * with (in this case) a prefixed attribute name. To be in + * the array, the attribute must have already been bound, so + * has to have passed through the hash table lookup once + * already. That implies that an entry for it already + * exists, so the lookup above will return a pointer to + * already allocated memory. There is no opportunaity for + * the allocator to fail, so the condition above cannot be + * fulfilled. + * + * Since it is difficult to be certain that the above + * analysis is complete, we retain the test and merely + * remove the code from coverage tests. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } b = id->prefix->binding; if (!b) return XML_ERROR_UNBOUND_PREFIX; @@ -3625,8 +3799,16 @@ doCdataSection(XML_Parser parser, } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: + /* Every token returned by XmlCdataSectionTok() has its own + * explicit case, so this default case will never be executed. + * We retain it as a safety net and exclude it from the coverage + * statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; @@ -3686,8 +3868,20 @@ doIgnoreSection(XML_Parser parser, eventEndPP = &eventEndPtr; } else { + /* It's not entirely clear, but it seems the following two lines + * of code cannot be executed. The only occasions on which 'enc' + * is not 'parser->m_encoding' are when this function is called + * from the internal entity processing, and IGNORE sections are an + * error in internal entities. + * + * Since it really isn't clear that this is true, we keep the code + * and just remove it from our coverage tests. + * + * LCOV_EXCL_START + */ eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } *eventPP = s; *startPtr = NULL; @@ -3720,8 +3914,16 @@ doIgnoreSection(XML_Parser parser, } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: + /* All of the tokens that XmlIgnoreSectionTok() returns have + * explicit cases to handle them, so this default case is never + * executed. We keep it as a safety net anyway, and remove it + * from our test coverage statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } /* not reached */ } @@ -3734,6 +3936,7 @@ initializeEncoding(XML_Parser parser) const char *s; #ifdef XML_UNICODE char encodingBuf[128]; + /* See comments abount `protoclEncodingName` in parserInit() */ if (!protocolEncodingName) s = NULL; else { @@ -3817,7 +4020,14 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, reportDefault(parser, encoding, s, next); if (protocolEncodingName == NULL) { if (newEncoding) { - if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { + /* Check that the specified encoding does not conflict with what + * the parser has already deduced. Do we have the same number + * of bytes in the smallest representation of a character? If + * this is UTF-16, is it the same endianness? + */ + if (newEncoding->minBytesPerChar != encoding->minBytesPerChar + || (newEncoding->minBytesPerChar == 2 && + newEncoding != encoding)) { eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } @@ -3962,15 +4172,14 @@ entityValueInitProcessor(XML_Parser parser, result = processXmlDecl(parser, 0, start, next); if (result != XML_ERROR_NONE) return result; - switch (ps_parsing) { - case XML_SUSPENDED: - *nextPtr = next; - return XML_ERROR_NONE; - case XML_FINISHED: + /* At this point, ps_parsing cannot be XML_SUSPENDED. For that + * to happen, a parameter entity parsing handler must have + * attempted to suspend the parser, which fails and raises an + * error. The parser can be aborted, but can't be suspended. + */ + if (ps_parsing == XML_FINISHED) return XML_ERROR_ABORTED; - default: - *nextPtr = next; - } + *nextPtr = next; /* stop scanning for text declaration - we found one */ processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); @@ -4293,8 +4502,14 @@ doProlog(XML_Parser parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); - if (!entity) - return XML_ERROR_NO_MEMORY; + if (!entity) { + /* The external subset name "#" will have already been + * inserted into the hash table at the start of the + * external entity parsing, so no allocation will happen + * and lookup() cannot fail. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } if (useForeignDTD) entity->base = curBase; dtd->paramEntityRead = XML_FALSE; @@ -4773,8 +4988,10 @@ doProlog(XML_Parser parser, if (prologState.level >= groupSize) { if (groupSize) { char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); - if (temp == NULL) + if (temp == NULL) { + groupSize /= 2; return XML_ERROR_NO_MEMORY; + } groupConnector = temp; if (dtd->scaffIndex) { int *temp = (int *)REALLOC(dtd->scaffIndex, @@ -4786,8 +5003,10 @@ doProlog(XML_Parser parser, } else { groupConnector = (char *)MALLOC(groupSize = 32); - if (!groupConnector) + if (!groupConnector) { + groupSize = 0; return XML_ERROR_NO_MEMORY; + } } } groupConnector[prologState.level] = 0; @@ -4850,8 +5069,29 @@ doProlog(XML_Parser parser, : !dtd->hasParamEntityRefs)) { if (!entity) return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; + else if (!entity->is_internal) { + /* It's hard to exhaustively search the code to be sure, + * but there doesn't seem to be a way of executing the + * following line. There are two cases: + * + * If 'standalone' is false, the DTD must have no + * parameter entities or we wouldn't have passed the outer + * 'if' statement. That measn the only entity in the hash + * table is the external subset name "#" which cannot be + * given as a parameter entity name in XML syntax, so the + * lookup must have returned NULL and we don't even reach + * the test for an internal entity. + * + * If 'standalone' is true, it does not seem to be + * possible to create entities taking this code path that + * are not internal entities, so fail the test above. + * + * Because this analysis is very uncertain, the code is + * being left in place and merely removed from the + * coverage test statistics. + */ + return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ + } } else if (!entity) { dtd->keepProcessing = dtd->standalone; @@ -5323,11 +5563,15 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ for (i = 0; i < n; i++) { if (!poolAppendChar(pool, buf[i])) return XML_ERROR_NO_MEMORY; @@ -5401,8 +5645,26 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, break; } if (entity->open) { - if (enc == encoding) - eventPtr = ptr; + if (enc == encoding) { + /* It does not appear that this line can be executed. + * + * The "if (entity->open)" check catches recursive entity + * definitions. In order to be called with an open + * entity, it must have gone through this code before and + * been through the recursive call to + * appendAttributeValue() some lines below. That call + * sets the local encoding ("enc") to the parser's + * internal encoding (internal_utf8 or internal_utf16), + * which can never be the same as the principle encoding. + * It doesn't appear there is another code path that gets + * here with entity->open being TRUE. + * + * Since it is not certain that this logic is watertight, + * we keep the line and merely exclude it from coverage + * tests. + */ + eventPtr = ptr; /* LCOV_EXCL_LINE */ + } return XML_ERROR_RECURSIVE_ENTITY_REF; } if (entity->notation) { @@ -5429,9 +5691,21 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, } break; default: + /* The only token returned by XmlAttributeValueTok() that does + * not have an explicit case here is XML_TOK_PARTIAL_CHAR. + * Getting that would require an entity name to contain an + * incomplete XML character (e.g. \xE2\x82); however previous + * tokenisers will have already recognised and rejected such + * names before XmlAttributeValueTok() gets a look-in. This + * default case should be retained as a safety net, but the code + * excluded from coverage tests. + * + * LCOV_EXCL_START + */ if (enc == encoding) eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } ptr = next; } @@ -5564,12 +5838,15 @@ storeEntityValue(XML_Parser parser, goto endEntityValue; } n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = entityTextPtr; - result = XML_ERROR_BAD_CHAR_REF; - goto endEntityValue; - } + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ for (i = 0; i < n; i++) { if (pool->end == pool->ptr && !poolGrow(pool)) { result = XML_ERROR_NO_MEMORY; @@ -5590,10 +5867,18 @@ storeEntityValue(XML_Parser parser, result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; default: + /* This default case should be unnecessary -- all the tokens + * that XmlEntityValueTok() can return have their own explicit + * cases -- but should be retained for safety. We do however + * exclude it from the coverage statistics. + * + * LCOV_EXCL_START + */ if (enc == encoding) eventPtr = entityTextPtr; result = XML_ERROR_UNEXPECTED_STATE; goto endEntityValue; + /* LCOV_EXCL_STOP */ } entityTextPtr = next; } @@ -5691,8 +5976,25 @@ reportDefault(XML_Parser parser, const ENCODING *enc, eventEndPP = &eventEndPtr; } else { + /* To get here, two things must be true; the parser must be + * using a character encoding that is not the same as the + * encoding passed in, and the encoding passed in must need + * conversion to the internal format (UTF-8 unless XML_UNICODE + * is defined). The only occasions on which the encoding passed + * in is not the same as the parser's encoding are when it is + * the internal encoding (e.g. a previously defined parameter + * entity, already converted to internal format). This by + * definition doesn't need conversion, so the whole branch never + * gets executed. + * + * For safety's sake we don't delete these lines and merely + * exclude them from coverage statistics. + * + * LCOV_EXCL_START + */ eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } do { ICHAR *dataPtr = (ICHAR *)dataBuf; @@ -5861,9 +6163,30 @@ getContext(XML_Parser parser) len = dtd->defaultPrefix.binding->uriLen; if (namespaceSeparator) len--; - for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) - return NULL; + for (i = 0; i < len; i++) { + if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) { + /* Because of memory caching, I don't believe this line can be + * executed. + * + * This is part of a loop copying the default prefix binding + * URI into the parser's temporary string pool. Previously, + * that URI was copied into the same string pool, with a + * terminating NUL character, as part of setContext(). When + * the pool was cleared, that leaves a block definitely big + * enough to hold the URI on the free block list of the pool. + * The URI copy in getContext() therefore cannot run out of + * memory. + * + * If the pool is used between the setContext() and + * getContext() calls, the worst it can do is leave a bigger + * block on the front of the free list. Given that this is + * all somewhat inobvious and program logic can be changed, we + * don't delete the line but we do exclude it from the test + * coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } + } needSep = XML_TRUE; } @@ -5875,8 +6198,15 @@ getContext(XML_Parser parser) PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); if (!prefix) break; - if (!prefix->binding) - continue; + if (!prefix->binding) { + /* This test appears to be (justifiable) paranoia. There does + * not seem to be a way of injecting a prefix without a binding + * that doesn't get errored long before this function is called. + * The test should remain for safety's sake, so we instead + * exclude the following line from the coverage statistics. + */ + continue; /* LCOV_EXCL_LINE */ + } if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) return NULL; for (s = prefix->name; *s; s++) @@ -6547,8 +6877,20 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s) static const XML_Char * poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { - if (!pool->ptr && !poolGrow(pool)) - return NULL; + if (!pool->ptr && !poolGrow(pool)) { + /* The following line is unreachable given the current usage of + * poolCopyStringN(). Currently it is called from exactly one + * place to copy the text of a simple general entity. By that + * point, the name of the entity is already stored in the pool, so + * pool->ptr cannot be NULL. + * + * If poolCopyStringN() is used elsewhere as it well might be, + * this line may well become executable again. Regardless, this + * sort of check shouldn't be removed lightly, so we just exclude + * it from the coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } for (; n > 0; --n, s++) { if (!poolAppendChar(pool, *s)) return NULL; @@ -6641,8 +6983,19 @@ poolGrow(STRING_POOL *pool) int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); size_t bytesToAllocate; - if (blockSize < 0) - return XML_FALSE; + // NOTE: Needs to be calculated prior to calling `realloc` + // to avoid dangling pointers: + const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX/2 bytes have already been allocated. This isn't + * readily testable, since it is unlikely that an average + * machine will have that much memory, so we exclude it from the + * coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } bytesToAllocate = poolBytesToAllocateFor(blockSize); if (bytesToAllocate == 0) @@ -6654,7 +7007,7 @@ poolGrow(STRING_POOL *pool) return XML_FALSE; pool->blocks = temp; pool->blocks->size = blockSize; - pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->ptr = pool->blocks->s + offsetInsideBlock; pool->start = pool->blocks->s; pool->end = pool->start + blockSize; } @@ -6663,8 +7016,18 @@ poolGrow(STRING_POOL *pool) int blockSize = (int)(pool->end - pool->start); size_t bytesToAllocate; - if (blockSize < 0) - return XML_FALSE; + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX bytes have already been allocated (which is prevented + * by various pieces of program logic, not least this one, never + * mind the unlikelihood of actually having that much memory) or + * the pool control fields have been corrupted (which could + * conceivably happen in an extremely buggy user handler + * function). Either way it isn't readily testable, so we + * exclude it from the coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; @@ -6827,3 +7190,26 @@ getElementType(XML_Parser parser, } return ret; } + +static XML_Char * +copyString(const XML_Char *s, + const XML_Memory_Handling_Suite *memsuite) +{ + int charsRequired = 0; + XML_Char *result; + + /* First determine how long the string is */ + while (s[charsRequired] != 0) { + charsRequired++; + } + /* Include the terminator */ + charsRequired++; + + /* Now allocate space for the copy */ + result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); + if (result == NULL) + return NULL; + /* Copy the original into place */ + memcpy(result, s, charsRequired * sizeof(XML_Char)); + return result; +} diff --git a/lib/xmlrole.c b/lib/xmlrole.c index a7c5630..c809ee5 100644 --- a/lib/xmlrole.c +++ b/lib/xmlrole.c @@ -170,7 +170,14 @@ prolog1(PROLOG_STATE *state, case XML_TOK_COMMENT: return XML_ROLE_COMMENT; case XML_TOK_BOM: - return XML_ROLE_NONE; + /* This case can never arise. To reach this role function, the + * parse must have passed through prolog0 and therefore have had + * some form of input, even if only a space. At that point, a + * byte order mark is no longer a valid character (though + * technically it should be interpreted as a non-breaking space), + * so will be rejected by the tokenizing stages. + */ + return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), @@ -1285,6 +1292,26 @@ declClose(PROLOG_STATE *state, return common(state, tok); } +/* This function will only be invoked if the internal logic of the + * parser has broken down. It is used in two cases: + * + * 1: When the XML prolog has been finished. At this point the + * processor (the parser level above these role handlers) should + * switch from prologProcessor to contentProcessor and reinitialise + * the handler function. + * + * 2: When an error has been detected (via common() below). At this + * point again the processor should be switched to errorProcessor, + * which will never call a handler. + * + * The result of this is that error() can only be called if the + * processor switch failed to happen, which is an internal error and + * therefore we shouldn't be able to provoke it simply by using the + * library. It is a necessary backstop, however, so we merely exclude + * it from the coverage statistics. + * + * LCOV_EXCL_START + */ static int PTRCALL error(PROLOG_STATE *UNUSED_P(state), int UNUSED_P(tok), @@ -1294,6 +1321,7 @@ error(PROLOG_STATE *UNUSED_P(state), { return XML_ROLE_NONE; } +/* LCOV_EXCL_STOP */ static int FASTCALL common(PROLOG_STATE *state, int tok) diff --git a/lib/xmltok.c b/lib/xmltok.c index cdf0720..db4a5c8 100644 --- a/lib/xmltok.c +++ b/lib/xmltok.c @@ -1019,7 +1019,11 @@ streqci(const char *s1, const char *s2) if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) - c2 += ASCII_A - ASCII_a; + /* The following line will never get executed. streqci() is + * only called from two places, both of which guarantee to put + * upper-case strings into s2. + */ + c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ if (c1 != c2) return 0; if (!c1) @@ -1291,7 +1295,7 @@ XmlUtf8Encode(int c, char *buf) }; if (c < 0) - return 0; + return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ if (c < min2) { buf[0] = (char)(c | UTF8_cval1); return 1; @@ -1314,7 +1318,7 @@ XmlUtf8Encode(int c, char *buf) buf[3] = (char)((c & 0x3f) | 0x80); return 4; } - return 0; + return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ } int FASTCALL @@ -1465,6 +1469,9 @@ XmlInitUnknownEncoding(void *mem, else if (c < 0) { if (c < -4) return 0; + /* Multi-byte sequences need a converter function */ + if (!convert) + return 0; e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c index 5f779c0..4fa1ff6 100644 --- a/lib/xmltok_impl.c +++ b/lib/xmltok_impl.c @@ -1198,8 +1198,14 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *start; if (ptr >= end) return XML_TOK_NONE; - else if (! HAS_CHAR(enc, ptr, end)) - return XML_TOK_PARTIAL; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { @@ -1258,8 +1264,14 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *start; if (ptr >= end) return XML_TOK_NONE; - else if (! HAS_CHAR(enc, ptr, end)) - return XML_TOK_PARTIAL; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { @@ -1614,6 +1626,14 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, return 0; } +/* This function does not appear to be called from anywhere within the + * library code. It is used via the macro XmlSameName(), which is + * defined but never used. Since it appears in the encoding function + * table, removing it is not a thing to be undertaken lightly. For + * the moment, we simply exclude it from coverage tests. + * + * LCOV_EXCL_START + */ static int PTRCALL PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { @@ -1677,14 +1697,21 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) } /* not reached */ } +/* LCOV_EXCL_STOP */ static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (end1 - ptr1 < MINBPC(enc)) - return 0; + if (end1 - ptr1 < MINBPC(enc)) { + /* This line cannot be executed. THe incoming data has already + * been tokenized once, so imcomplete characters like this have + * already been eliminated from the input. Retaining the + * paranoia check is still valuable, however. + */ + return 0; /* LCOV_EXCL_LINE */ + } if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } -- cgit v0.12