diff options
54 files changed, 1216 insertions, 273 deletions
diff --git a/common/mythread.h b/common/mythread.h new file mode 100644 index 0000000..be22654 --- /dev/null +++ b/common/mythread.h @@ -0,0 +1,521 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mythread.h +/// \brief Some threading related helper macros and functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MYTHREAD_H +#define MYTHREAD_H + +#include "sysdefs.h" + +// If any type of threading is enabled, #define MYTHREAD_ENABLED. +#if defined(MYTHREAD_POSIX) || defined(MYTHREAD_WIN95) \ + || defined(MYTHREAD_VISTA) +# define MYTHREAD_ENABLED 1 +#endif + + +#ifdef MYTHREAD_ENABLED + +//////////////////////////////////////// +// Shared between all threading types // +//////////////////////////////////////// + +// Locks a mutex for a duration of a block. +// +// Perform mythread_mutex_lock(&mutex) in the beginning of a block +// and mythread_mutex_unlock(&mutex) at the end of the block. "break" +// may be used to unlock the mutex and jump out of the block. +// mythread_sync blocks may be nested. +// +// Example: +// +// mythread_sync(mutex) { +// foo(); +// if (some_error) +// break; // Skips bar() +// bar(); +// } +// +// At least GCC optimizes the loops completely away so it doesn't slow +// things down at all compared to plain mythread_mutex_lock(&mutex) +// and mythread_mutex_unlock(&mutex) calls. +// +#define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__) +#define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line) +#define mythread_sync_helper2(mutex, line) \ + for (unsigned int mythread_i_ ## line = 0; \ + mythread_i_ ## line \ + ? (mythread_mutex_unlock(&(mutex)), 0) \ + : (mythread_mutex_lock(&(mutex)), 1); \ + mythread_i_ ## line = 1) \ + for (unsigned int mythread_j_ ## line = 0; \ + !mythread_j_ ## line; \ + mythread_j_ ## line = 1) +#endif + + +#if !defined(MYTHREAD_ENABLED) + +////////////////// +// No threading // +////////////////// + +// Calls the given function once. This isn't thread safe. +#define mythread_once(func) \ +do { \ + static bool once_ = false; \ + if (!once_) { \ + func(); \ + once_ = true; \ + } \ +} while (0) + + +#if !(defined(_WIN32) && !defined(__CYGWIN__)) +// Use sigprocmask() to set the signal mask in single-threaded programs. +#include <signal.h> + +static inline void +mythread_sigmask(int how, const sigset_t *restrict set, + sigset_t *restrict oset) +{ + int ret = sigprocmask(how, set, oset); + assert(ret == 0); + (void)ret; +} +#endif + + +#elif defined(MYTHREAD_POSIX) + +//////////////////// +// Using pthreads // +//////////////////// + +#include <sys/time.h> +#include <pthread.h> +#include <signal.h> +#include <time.h> +#include <errno.h> + +#define MYTHREAD_RET_TYPE void * +#define MYTHREAD_RET_VALUE NULL + +typedef pthread_t mythread; +typedef pthread_mutex_t mythread_mutex; + +typedef struct { + pthread_cond_t cond; +#ifdef HAVE_CLOCK_GETTIME + // Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with + // the condition variable. + clockid_t clk_id; +#endif +} mythread_cond; + +typedef struct timespec mythread_condtime; + + +// Calls the given function once in a thread-safe way. +#define mythread_once(func) \ + do { \ + static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ + pthread_once(&once_, &func); \ + } while (0) + + +// Use pthread_sigmask() to set the signal mask in multi-threaded programs. +// Do nothing on OpenVMS since it lacks pthread_sigmask(). +static inline void +mythread_sigmask(int how, const sigset_t *restrict set, + sigset_t *restrict oset) +{ +#ifdef __VMS + (void)how; + (void)set; + (void)oset; +#else + int ret = pthread_sigmask(how, set, oset); + assert(ret == 0); + (void)ret; +#endif +} + + +// Creates a new thread with all signals blocked. Returns zero on success +// and non-zero on error. +static inline int +mythread_create(mythread *thread, void *(*func)(void *arg), void *arg) +{ + sigset_t old; + sigset_t all; + sigfillset(&all); + + mythread_sigmask(SIG_SETMASK, &all, &old); + const int ret = pthread_create(thread, NULL, func, arg); + mythread_sigmask(SIG_SETMASK, &old, NULL); + + return ret; +} + +// Joins a thread. Returns zero on success and non-zero on error. +static inline int +mythread_join(mythread thread) +{ + return pthread_join(thread, NULL); +} + + +// Initiatlizes a mutex. Returns zero on success and non-zero on error. +static inline int +mythread_mutex_init(mythread_mutex *mutex) +{ + return pthread_mutex_init(mutex, NULL); +} + +static inline void +mythread_mutex_destroy(mythread_mutex *mutex) +{ + int ret = pthread_mutex_destroy(mutex); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_mutex_lock(mythread_mutex *mutex) +{ + int ret = pthread_mutex_lock(mutex); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_mutex_unlock(mythread_mutex *mutex) +{ + int ret = pthread_mutex_unlock(mutex); + assert(ret == 0); + (void)ret; +} + + +// Initializes a condition variable. +// +// Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the +// timeout in pthread_cond_timedwait() work correctly also if system time +// is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available +// everywhere while the default CLOCK_REALTIME is, so the default is +// used if CLOCK_MONOTONIC isn't available. +// +// If clock_gettime() isn't available at all, gettimeofday() will be used. +static inline int +mythread_cond_init(mythread_cond *mycond) +{ +#ifdef HAVE_CLOCK_GETTIME + // NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1. +# if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && HAVE_DECL_CLOCK_MONOTONIC + struct timespec ts; + pthread_condattr_t condattr; + + // POSIX doesn't seem to *require* that pthread_condattr_setclock() + // will fail if given an unsupported clock ID. Test that + // CLOCK_MONOTONIC really is supported using clock_gettime(). + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0 + && pthread_condattr_init(&condattr) == 0) { + int ret = pthread_condattr_setclock( + &condattr, CLOCK_MONOTONIC); + if (ret == 0) + ret = pthread_cond_init(&mycond->cond, &condattr); + + pthread_condattr_destroy(&condattr); + + if (ret == 0) { + mycond->clk_id = CLOCK_MONOTONIC; + return 0; + } + } + + // If anything above fails, fall back to the default CLOCK_REALTIME. + // POSIX requires that all implementations of clock_gettime() must + // support at least CLOCK_REALTIME. +# endif + + mycond->clk_id = CLOCK_REALTIME; +#endif + + return pthread_cond_init(&mycond->cond, NULL); +} + +static inline void +mythread_cond_destroy(mythread_cond *cond) +{ + int ret = pthread_cond_destroy(&cond->cond); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_cond_signal(mythread_cond *cond) +{ + int ret = pthread_cond_signal(&cond->cond); + assert(ret == 0); + (void)ret; +} + +static inline void +mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) +{ + int ret = pthread_cond_wait(&cond->cond, mutex); + assert(ret == 0); + (void)ret; +} + +// Waits on a condition or until a timeout expires. If the timeout expires, +// non-zero is returned, otherwise zero is returned. +static inline int +mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, + const mythread_condtime *condtime) +{ + int ret = pthread_cond_timedwait(&cond->cond, mutex, condtime); + assert(ret == 0 || ret == ETIMEDOUT); + return ret; +} + +// Sets condtime to the absolute time that is timeout_ms milliseconds +// in the future. The type of the clock to use is taken from cond. +static inline void +mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, + uint32_t timeout_ms) +{ + condtime->tv_sec = timeout_ms / 1000; + condtime->tv_nsec = (timeout_ms % 1000) * 1000000; + +#ifdef HAVE_CLOCK_GETTIME + struct timespec now; + int ret = clock_gettime(cond->clk_id, &now); + assert(ret == 0); + (void)ret; + + condtime->tv_sec += now.tv_sec; + condtime->tv_nsec += now.tv_nsec; +#else + (void)cond; + + struct timeval now; + gettimeofday(&now, NULL); + + condtime->tv_sec += now.tv_sec; + condtime->tv_nsec += now.tv_usec * 1000L; +#endif + + // tv_nsec must stay in the range [0, 999_999_999]. + if (condtime->tv_nsec >= 1000000000L) { + condtime->tv_nsec -= 1000000000L; + ++condtime->tv_sec; + } +} + + +#elif defined(MYTHREAD_WIN95) || defined(MYTHREAD_VISTA) + +///////////////////// +// Windows threads // +///////////////////// + +#define WIN32_LEAN_AND_MEAN +#ifdef MYTHREAD_VISTA +# undef _WIN32_WINNT +# define _WIN32_WINNT 0x0600 +#endif +#include <windows.h> +#include <process.h> + +#define MYTHREAD_RET_TYPE unsigned int __stdcall +#define MYTHREAD_RET_VALUE 0 + +typedef HANDLE mythread; +typedef CRITICAL_SECTION mythread_mutex; + +#ifdef MYTHREAD_WIN95 +typedef HANDLE mythread_cond; +#else +typedef CONDITION_VARIABLE mythread_cond; +#endif + +typedef struct { + // Tick count (milliseconds) in the beginning of the timeout. + // NOTE: This is 32 bits so it wraps around after 49.7 days. + // Multi-day timeouts may not work as expected. + DWORD start; + + // Length of the timeout in milliseconds. The timeout expires + // when the current tick count minus "start" is equal or greater + // than "timeout". + DWORD timeout; +} mythread_condtime; + + +// mythread_once() is only available with Vista threads. +#ifdef MYTHREAD_VISTA +#define mythread_once(func) \ + do { \ + static INIT_ONCE once_ = INIT_ONCE_STATIC_INIT; \ + BOOL pending_; \ + if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \ + abort(); \ + if (pending_) \ + func(); \ + if (!InitOnceComplete(&once, 0, NULL)) \ + abort(); \ + } while (0) +#endif + + +// mythread_sigmask() isn't available on Windows. Even a dummy version would +// make no sense because the other POSIX signal functions are missing anyway. + + +static inline int +mythread_create(mythread *thread, + unsigned int (__stdcall *func)(void *arg), void *arg) +{ + uintptr_t ret = _beginthreadex(NULL, 0, func, arg, 0, NULL); + if (ret == 0) + return -1; + + *thread = (HANDLE)ret; + return 0; +} + +static inline int +mythread_join(mythread thread) +{ + int ret = 0; + + if (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0) + ret = -1; + + if (!CloseHandle(thread)) + ret = -1; + + return ret; +} + + +static inline int +mythread_mutex_init(mythread_mutex *mutex) +{ + InitializeCriticalSection(mutex); + return 0; +} + +static inline void +mythread_mutex_destroy(mythread_mutex *mutex) +{ + DeleteCriticalSection(mutex); +} + +static inline void +mythread_mutex_lock(mythread_mutex *mutex) +{ + EnterCriticalSection(mutex); +} + +static inline void +mythread_mutex_unlock(mythread_mutex *mutex) +{ + LeaveCriticalSection(mutex); +} + + +static inline int +mythread_cond_init(mythread_cond *cond) +{ +#ifdef MYTHREAD_WIN95 + *cond = CreateEvent(NULL, FALSE, FALSE, NULL); + return *cond == NULL ? -1 : 0; +#else + InitializeConditionVariable(cond); + return 0; +#endif +} + +static inline void +mythread_cond_destroy(mythread_cond *cond) +{ +#ifdef MYTHREAD_WIN95 + CloseHandle(*cond); +#else + (void)cond; +#endif +} + +static inline void +mythread_cond_signal(mythread_cond *cond) +{ +#ifdef MYTHREAD_WIN95 + SetEvent(*cond); +#else + WakeConditionVariable(cond); +#endif +} + +static inline void +mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) +{ +#ifdef MYTHREAD_WIN95 + LeaveCriticalSection(mutex); + WaitForSingleObject(*cond, INFINITE); + EnterCriticalSection(mutex); +#else + BOOL ret = SleepConditionVariableCS(cond, mutex, INFINITE); + assert(ret); + (void)ret; +#endif +} + +static inline int +mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, + const mythread_condtime *condtime) +{ +#ifdef MYTHREAD_WIN95 + LeaveCriticalSection(mutex); +#endif + + DWORD elapsed = GetTickCount() - condtime->start; + DWORD timeout = elapsed >= condtime->timeout + ? 0 : condtime->timeout - elapsed; + +#ifdef MYTHREAD_WIN95 + DWORD ret = WaitForSingleObject(*cond, timeout); + assert(ret == WAIT_OBJECT_0 || ret == WAIT_TIMEOUT); + + EnterCriticalSection(mutex); + + return ret == WAIT_TIMEOUT; +#else + BOOL ret = SleepConditionVariableCS(cond, mutex, timeout); + assert(ret || GetLastError() == ERROR_TIMEOUT); + return !ret; +#endif +} + +static inline void +mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, + uint32_t timeout) +{ + (void)cond; + condtime->start = GetTickCount(); + condtime->timeout = timeout; +} + +#endif + +#endif diff --git a/common/sysdefs.h b/common/sysdefs.h index e056ca4..df7ecf4 100644 --- a/common/sysdefs.h +++ b/common/sysdefs.h @@ -44,9 +44,7 @@ // Some pre-C99 systems have SIZE_MAX in limits.h instead of stdint.h. The // limits are also used to figure out some macros missing from pre-C99 systems. -#ifdef HAVE_LIMITS_H -# include <limits.h> -#endif +#include <limits.h> // Be more compatible with systems that have non-conforming inttypes.h. // We assume that int is 32-bit and that long is either 32-bit or 64-bit. @@ -153,9 +151,7 @@ typedef unsigned char _Bool; // string.h should be enough but let's include strings.h and memory.h too if // they exists, since that shouldn't do any harm, but may improve portability. -#ifdef HAVE_STRING_H -# include <string.h> -#endif +#include <string.h> #ifdef HAVE_STRINGS_H # include <strings.h> @@ -193,7 +189,8 @@ typedef unsigned char _Bool; # define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) #endif -#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 +#if defined(__GNUC__) \ + && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4) # define lzma_attr_alloc_size(x) __attribute__((__alloc_size__(x))) #else # define lzma_attr_alloc_size(x) diff --git a/common/tuklib_common.h b/common/tuklib_common.h new file mode 100644 index 0000000..31fbab5 --- /dev/null +++ b/common/tuklib_common.h @@ -0,0 +1,71 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_common.h +/// \brief Common definitions for tuklib modules +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_COMMON_H +#define TUKLIB_COMMON_H + +// The config file may be replaced by a package-specific file. +// It should include at least stddef.h, inttypes.h, and limits.h. +#include "tuklib_config.h" + +// TUKLIB_SYMBOL_PREFIX is prefixed to all symbols exported by +// the tuklib modules. If you use a tuklib module in a library, +// you should use TUKLIB_SYMBOL_PREFIX to make sure that there +// are no symbol conflicts in case someone links your library +// into application that also uses the same tuklib module. +#ifndef TUKLIB_SYMBOL_PREFIX +# define TUKLIB_SYMBOL_PREFIX +#endif + +#define TUKLIB_CAT_X(a, b) a ## b +#define TUKLIB_CAT(a, b) TUKLIB_CAT_X(a, b) + +#ifndef TUKLIB_SYMBOL +# define TUKLIB_SYMBOL(sym) TUKLIB_CAT(TUKLIB_SYMBOL_PREFIX, sym) +#endif + +#ifndef TUKLIB_DECLS_BEGIN +# ifdef __cplusplus +# define TUKLIB_DECLS_BEGIN extern "C" { +# else +# define TUKLIB_DECLS_BEGIN +# endif +#endif + +#ifndef TUKLIB_DECLS_END +# ifdef __cplusplus +# define TUKLIB_DECLS_END } +# else +# define TUKLIB_DECLS_END +# endif +#endif + +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define TUKLIB_GNUC_REQ(major, minor) \ + ((__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) \ + || __GNUC__ > (major)) +#else +# define TUKLIB_GNUC_REQ(major, minor) 0 +#endif + +#if TUKLIB_GNUC_REQ(2, 5) +# define tuklib_attr_noreturn __attribute__((__noreturn__)) +#else +# define tuklib_attr_noreturn +#endif + +#if (defined(_WIN32) && !defined(__CYGWIN__)) \ + || defined(__OS2__) || defined(__MSDOS__) +# define TUKLIB_DOSLIKE 1 +#endif + +#endif diff --git a/common/tuklib_config.h b/common/tuklib_config.h new file mode 100644 index 0000000..549cb24 --- /dev/null +++ b/common/tuklib_config.h @@ -0,0 +1,7 @@ +#ifdef HAVE_CONFIG_H +# include "sysdefs.h" +#else +# include <stddef.h> +# include <inttypes.h> +# include <limits.h> +#endif diff --git a/common/tuklib_cpucores.c b/common/tuklib_cpucores.c new file mode 100644 index 0000000..cc968dd --- /dev/null +++ b/common/tuklib_cpucores.c @@ -0,0 +1,100 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.c +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_cpucores.h" + +#if defined(_WIN32) || defined(__CYGWIN__) +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0500 +# endif +# include <windows.h> + +// glibc >= 2.9 +#elif defined(TUKLIB_CPUCORES_SCHED_GETAFFINITY) +# include <sched.h> + +// FreeBSD +#elif defined(TUKLIB_CPUCORES_CPUSET) +# include <sys/param.h> +# include <sys/cpuset.h> + +#elif defined(TUKLIB_CPUCORES_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +# endif +# include <sys/sysctl.h> + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# include <unistd.h> + +// HP-UX +#elif defined(TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) +# include <sys/param.h> +# include <sys/pstat.h> +#endif + + +extern uint32_t +tuklib_cpucores(void) +{ + uint32_t ret = 0; + +#if defined(_WIN32) || defined(__CYGWIN__) + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + ret = sysinfo.dwNumberOfProcessors; + +#elif defined(TUKLIB_CPUCORES_SCHED_GETAFFINITY) + cpu_set_t cpu_mask; + if (sched_getaffinity(0, sizeof(cpu_mask), &cpu_mask) == 0) + ret = (uint32_t)CPU_COUNT(&cpu_mask); + +#elif defined(TUKLIB_CPUCORES_CPUSET) + cpuset_t set; + if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, + sizeof(set), &set) == 0) { +# ifdef CPU_COUNT + ret = (uint32_t)CPU_COUNT(&set); +# else + for (unsigned i = 0; i < CPU_SETSIZE; ++i) + if (CPU_ISSET(i, &set)) + ++ret; +# endif + } + +#elif defined(TUKLIB_CPUCORES_SYSCTL) + int name[2] = { CTL_HW, HW_NCPU }; + int cpus; + size_t cpus_size = sizeof(cpus); + if (sysctl(name, 2, &cpus, &cpus_size, NULL, 0) != -1 + && cpus_size == sizeof(cpus) && cpus > 0) + ret = (uint32_t)cpus; + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# ifdef _SC_NPROCESSORS_ONLN + // Most systems + const long cpus = sysconf(_SC_NPROCESSORS_ONLN); +# else + // IRIX + const long cpus = sysconf(_SC_NPROC_ONLN); +# endif + if (cpus > 0) + ret = (uint32_t)cpus; + +#elif defined(TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) + struct pst_dynamic pst; + if (pstat_getdynamic(&pst, sizeof(pst), 1, 0) != -1) + ret = (uint32_t)pst.psd_proc_cnt; +#endif + + return ret; +} diff --git a/common/tuklib_cpucores.h b/common/tuklib_cpucores.h new file mode 100644 index 0000000..be1ce1c --- /dev/null +++ b/common/tuklib_cpucores.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.h +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_CPUCORES_H +#define TUKLIB_CPUCORES_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_cpucores TUKLIB_SYMBOL(tuklib_cpucores) +extern uint32_t tuklib_cpucores(void); + +TUKLIB_DECLS_END +#endif diff --git a/common/tuklib_integer.h b/common/tuklib_integer.h index b1e84d5..6f44a7a 100644 --- a/common/tuklib_integer.h +++ b/common/tuklib_integer.h @@ -6,22 +6,26 @@ /// This file provides macros or functions to do some basic integer and bit /// operations. /// -/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l): -/// - Byte swapping: bswapXX(num) -/// - Byte order conversions to/from native: convXXYe(num) -/// - Aligned reads: readXXYe(ptr) -/// - Aligned writes: writeXXYe(ptr, num) -/// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr) -/// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num) +/// Native endian inline functions (XX = 16, 32, or 64): +/// - Unaligned native endian reads: readXXne(ptr) +/// - Unaligned native endian writes: writeXXne(ptr, num) +/// - Aligned native endian reads: aligned_readXXne(ptr) +/// - Aligned native endian writes: aligned_writeXXne(ptr, num) /// -/// Since they can macros, the arguments should have no side effects since -/// they may be evaluated more than once. +/// Endianness-converting integer operations (these can be macros!) +/// (XX = 16, 32, or 64; Y = b or l): +/// - Byte swapping: bswapXX(num) +/// - Byte order conversions to/from native (byteswaps if Y isn't +/// the native endianness): convXXYe(num) +/// - Unaligned reads (16/32-bit only): readXXYe(ptr) +/// - Unaligned writes (16/32-bit only): writeXXYe(ptr, num) +/// - Aligned reads: aligned_readXXYe(ptr) +/// - Aligned writes: aligned_writeXXYe(ptr, num) /// -/// \todo PowerPC and possibly some other architectures support -/// byte swapping load and store instructions. This file -/// doesn't take advantage of those instructions. +/// Since the above can macros, the arguments should have no side effects +/// because they may be evaluated more than once. /// -/// Bit scan operations for non-zero 32-bit integers: +/// Bit scan operations for non-zero 32-bit integers (inline functions): /// - Bit scan reverse (find highest non-zero bit): bsr32(num) /// - Count leading zeros: clz32(num) /// - Count trailing zeros: ctz32(num) @@ -42,13 +46,26 @@ #define TUKLIB_INTEGER_H #include "tuklib_common.h" +#include <string.h> + +// Newer Intel C compilers require immintrin.h for _bit_scan_reverse() +// and such functions. +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) +# include <immintrin.h> +#endif -//////////////////////////////////////// -// Operating system specific features // -//////////////////////////////////////// +/////////////////// +// Byte swapping // +/////////////////// -#if defined(HAVE_BYTESWAP_H) +#if defined(HAVE___BUILTIN_BSWAPXX) + // GCC >= 4.8 and Clang +# define bswap16(n) __builtin_bswap16(n) +# define bswap32(n) __builtin_bswap32(n) +# define bswap64(n) __builtin_bswap64(n) + +#elif defined(HAVE_BYTESWAP_H) // glibc, uClibc, dietlibc # include <byteswap.h> # ifdef HAVE_BSWAP_16 @@ -97,45 +114,33 @@ # endif #endif - -//////////////////////////////// -// Compiler-specific features // -//////////////////////////////// - -// Newer Intel C compilers require immintrin.h for _bit_scan_reverse() -// and such functions. -#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) -# include <immintrin.h> -#endif - - -/////////////////// -// Byte swapping // -/////////////////// - #ifndef bswap16 -# define bswap16(num) \ - (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8)) +# define bswap16(n) (uint16_t)( \ + (((n) & 0x00FFU) << 8) \ + | (((n) & 0xFF00U) >> 8) \ + ) #endif #ifndef bswap32 -# define bswap32(num) \ - ( (((uint32_t)(num) << 24) ) \ - | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \ - | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \ - | (((uint32_t)(num) >> 24) ) ) +# define bswap32(n) (uint32_t)( \ + (((n) & UINT32_C(0x000000FF)) << 24) \ + | (((n) & UINT32_C(0x0000FF00)) << 8) \ + | (((n) & UINT32_C(0x00FF0000)) >> 8) \ + | (((n) & UINT32_C(0xFF000000)) >> 24) \ + ) #endif #ifndef bswap64 -# define bswap64(num) \ - ( (((uint64_t)(num) << 56) ) \ - | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \ - | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \ - | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \ - | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \ - | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \ - | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \ - | (((uint64_t)(num) >> 56) ) ) +# define bswap64(n) (uint64_t)( \ + (((n) & UINT64_C(0x00000000000000FF)) << 56) \ + | (((n) & UINT64_C(0x000000000000FF00)) << 40) \ + | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \ + | (((n) & UINT64_C(0x00000000FF000000)) << 8) \ + | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \ + | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \ + | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \ + | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \ + ) #endif // Define conversion macros using the basic byte swapping macros. @@ -180,76 +185,76 @@ #endif -////////////////////////////// -// Aligned reads and writes // -////////////////////////////// - -static inline uint16_t -read16be(const uint8_t *buf) -{ - uint16_t num = *(const uint16_t *)buf; - return conv16be(num); -} +//////////////////////////////// +// Unaligned reads and writes // +//////////////////////////////// +// The traditional way of casting e.g. *(const uint16_t *)uint8_pointer +// is bad even if the uint8_pointer is properly aligned because this kind +// of casts break strict aliasing rules and result in undefined behavior. +// With unaligned pointers it's even worse: compilers may emit vector +// instructions that require aligned pointers even if non-vector +// instructions work with unaligned pointers. +// +// Using memcpy() is the standard compliant way to do unaligned access. +// Many modern compilers inline it so there is no function call overhead. +// For those compilers that don't handle the memcpy() method well, the +// old casting method (that violates strict aliasing) can be requested at +// build time. A third method, casting to a packed struct, would also be +// an option but isn't provided to keep things simpler (it's already a mess). +// Hopefully this is flexible enough in practice. static inline uint16_t -read16le(const uint8_t *buf) +read16ne(const uint8_t *buf) { - uint16_t num = *(const uint16_t *)buf; - return conv16le(num); -} - - -static inline uint32_t -read32be(const uint8_t *buf) -{ - uint32_t num = *(const uint32_t *)buf; - return conv32be(num); +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) + return *(const uint16_t *)buf; +#else + uint16_t num; + memcpy(&num, buf, sizeof(num)); + return num; +#endif } static inline uint32_t -read32le(const uint8_t *buf) -{ - uint32_t num = *(const uint32_t *)buf; - return conv32le(num); -} - - -static inline uint64_t -read64be(const uint8_t *buf) +read32ne(const uint8_t *buf) { - uint64_t num = *(const uint64_t *)buf; - return conv64be(num); +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) + return *(const uint32_t *)buf; +#else + uint32_t num; + memcpy(&num, buf, sizeof(num)); + return num; +#endif } static inline uint64_t -read64le(const uint8_t *buf) +read64ne(const uint8_t *buf) { - uint64_t num = *(const uint64_t *)buf; - return conv64le(num); +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) + return *(const uint64_t *)buf; +#else + uint64_t num; + memcpy(&num, buf, sizeof(num)); + return num; +#endif } -// NOTE: Possible byte swapping must be done in a macro to allow GCC -// to optimize byte swapping of constants when using glibc's or *BSD's -// byte swapping macros. The actual write is done in an inline function -// to make type checking of the buf pointer possible similarly to readXXYe() -// functions. - -#define write16be(buf, num) write16ne((buf), conv16be(num)) -#define write16le(buf, num) write16ne((buf), conv16le(num)) -#define write32be(buf, num) write32ne((buf), conv32be(num)) -#define write32le(buf, num) write32ne((buf), conv32le(num)) -#define write64be(buf, num) write64ne((buf), conv64be(num)) -#define write64le(buf, num) write64ne((buf), conv64le(num)) - - static inline void write16ne(uint8_t *buf, uint16_t num) { +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) *(uint16_t *)buf = num; +#else + memcpy(buf, &num, sizeof(num)); +#endif return; } @@ -257,7 +262,12 @@ write16ne(uint8_t *buf, uint16_t num) static inline void write32ne(uint8_t *buf, uint32_t num) { +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) *(uint32_t *)buf = num; +#else + memcpy(buf, &num, sizeof(num)); +#endif return; } @@ -265,90 +275,114 @@ write32ne(uint8_t *buf, uint32_t num) static inline void write64ne(uint8_t *buf, uint64_t num) { +#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ + && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) *(uint64_t *)buf = num; +#else + memcpy(buf, &num, sizeof(num)); +#endif return; } -//////////////////////////////// -// Unaligned reads and writes // -//////////////////////////////// - -// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and -// 32-bit unaligned integer loads and stores. It's possible that 64-bit -// unaligned access doesn't work or is slower than byte-by-byte access. -// Since unaligned 64-bit is probably not needed as often as 16-bit or -// 32-bit, we simply don't support 64-bit unaligned access for now. -#ifdef TUKLIB_FAST_UNALIGNED_ACCESS -# define unaligned_read16be read16be -# define unaligned_read16le read16le -# define unaligned_read32be read32be -# define unaligned_read32le read32le -# define unaligned_write16be write16be -# define unaligned_write16le write16le -# define unaligned_write32be write32be -# define unaligned_write32le write32le - -#else - static inline uint16_t -unaligned_read16be(const uint8_t *buf) +read16be(const uint8_t *buf) { +#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) + uint16_t num = read16ne(buf); + return conv16be(num); +#else uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; return num; +#endif } static inline uint16_t -unaligned_read16le(const uint8_t *buf) +read16le(const uint8_t *buf) { +#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) + uint16_t num = read16ne(buf); + return conv16le(num); +#else uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); return num; +#endif } static inline uint32_t -unaligned_read32be(const uint8_t *buf) +read32be(const uint8_t *buf) { +#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) + uint32_t num = read32ne(buf); + return conv32be(num); +#else uint32_t num = (uint32_t)buf[0] << 24; num |= (uint32_t)buf[1] << 16; num |= (uint32_t)buf[2] << 8; num |= (uint32_t)buf[3]; return num; +#endif } static inline uint32_t -unaligned_read32le(const uint8_t *buf) +read32le(const uint8_t *buf) { +#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) + uint32_t num = read32ne(buf); + return conv32le(num); +#else uint32_t num = (uint32_t)buf[0]; num |= (uint32_t)buf[1] << 8; num |= (uint32_t)buf[2] << 16; num |= (uint32_t)buf[3] << 24; return num; +#endif } +// NOTE: Possible byte swapping must be done in a macro to allow the compiler +// to optimize byte swapping of constants when using glibc's or *BSD's +// byte swapping macros. The actual write is done in an inline function +// to make type checking of the buf pointer possible. +#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) +# define write16be(buf, num) write16ne(buf, conv16be(num)) +# define write32be(buf, num) write32ne(buf, conv32be(num)) +#endif + +#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) +# define write16le(buf, num) write16ne(buf, conv16le(num)) +# define write32le(buf, num) write32ne(buf, conv32le(num)) +#endif + + +#ifndef write16be static inline void -unaligned_write16be(uint8_t *buf, uint16_t num) +write16be(uint8_t *buf, uint16_t num) { buf[0] = (uint8_t)(num >> 8); buf[1] = (uint8_t)num; return; } +#endif +#ifndef write16le static inline void -unaligned_write16le(uint8_t *buf, uint16_t num) +write16le(uint8_t *buf, uint16_t num) { buf[0] = (uint8_t)num; buf[1] = (uint8_t)(num >> 8); return; } +#endif +#ifndef write32be static inline void -unaligned_write32be(uint8_t *buf, uint32_t num) +write32be(uint8_t *buf, uint32_t num) { buf[0] = (uint8_t)(num >> 24); buf[1] = (uint8_t)(num >> 16); @@ -356,10 +390,12 @@ unaligned_write32be(uint8_t *buf, uint32_t num) buf[3] = (uint8_t)num; return; } +#endif +#ifndef write32le static inline void -unaligned_write32le(uint8_t *buf, uint32_t num) +write32le(uint8_t *buf, uint32_t num) { buf[0] = (uint8_t)num; buf[1] = (uint8_t)(num >> 8); @@ -367,10 +403,184 @@ unaligned_write32le(uint8_t *buf, uint32_t num) buf[3] = (uint8_t)(num >> 24); return; } +#endif + + +////////////////////////////// +// Aligned reads and writes // +////////////////////////////// +// Separate functions for aligned reads and writes are provided since on +// strict-align archs aligned access is much faster than unaligned access. +// +// Just like in the unaligned case, memcpy() is needed to avoid +// strict aliasing violations. However, on archs that don't support +// unaligned access the compiler cannot know that the pointers given +// to memcpy() are aligned which results in slow code. As of C11 there is +// no standard way to tell the compiler that we know that the address is +// aligned but some compilers have language extensions to do that. With +// such language extensions the memcpy() method gives excellent results. +// +// What to do on a strict-align system when no known language extentensions +// are available? Falling back to byte-by-byte access would be safe but ruin +// optimizations that have been made specifically with aligned access in mind. +// As a compromise, aligned reads will fall back to non-compliant type punning +// but aligned writes will be byte-by-byte, that is, fast reads are preferred +// over fast writes. This obviously isn't great but hopefully it's a working +// compromise for now. +// +// __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6. +#ifdef HAVE___BUILTIN_ASSUME_ALIGNED +# define tuklib_memcpy_aligned(dest, src, size) \ + memcpy(dest, __builtin_assume_aligned(src, size), size) +#else +# define tuklib_memcpy_aligned(dest, src, size) \ + memcpy(dest, src, size) +# ifndef TUKLIB_FAST_UNALIGNED_ACCESS +# define TUKLIB_USE_UNSAFE_ALIGNED_READS 1 +# endif #endif +static inline uint16_t +aligned_read16ne(const uint8_t *buf) +{ +#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ + || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) + return *(const uint16_t *)buf; +#else + uint16_t num; + tuklib_memcpy_aligned(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline uint32_t +aligned_read32ne(const uint8_t *buf) +{ +#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ + || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) + return *(const uint32_t *)buf; +#else + uint32_t num; + tuklib_memcpy_aligned(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline uint64_t +aligned_read64ne(const uint8_t *buf) +{ +#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ + || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) + return *(const uint64_t *)buf; +#else + uint64_t num; + tuklib_memcpy_aligned(&num, buf, sizeof(num)); + return num; +#endif +} + + +static inline void +aligned_write16ne(uint8_t *buf, uint16_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint16_t *)buf = num; +#else + tuklib_memcpy_aligned(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline void +aligned_write32ne(uint8_t *buf, uint32_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint32_t *)buf = num; +#else + tuklib_memcpy_aligned(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline void +aligned_write64ne(uint8_t *buf, uint64_t num) +{ +#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING + *(uint64_t *)buf = num; +#else + tuklib_memcpy_aligned(buf, &num, sizeof(num)); +#endif + return; +} + + +static inline uint16_t +aligned_read16be(const uint8_t *buf) +{ + uint16_t num = aligned_read16ne(buf); + return conv16be(num); +} + + +static inline uint16_t +aligned_read16le(const uint8_t *buf) +{ + uint16_t num = aligned_read16ne(buf); + return conv16le(num); +} + + +static inline uint32_t +aligned_read32be(const uint8_t *buf) +{ + uint32_t num = aligned_read32ne(buf); + return conv32be(num); +} + + +static inline uint32_t +aligned_read32le(const uint8_t *buf) +{ + uint32_t num = aligned_read32ne(buf); + return conv32le(num); +} + + +static inline uint64_t +aligned_read64be(const uint8_t *buf) +{ + uint64_t num = aligned_read64ne(buf); + return conv64be(num); +} + + +static inline uint64_t +aligned_read64le(const uint8_t *buf) +{ + uint64_t num = aligned_read64ne(buf); + return conv64le(num); +} + + +// These need to be macros like in the unaligned case. +#define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num)) +#define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num)) +#define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num)) +#define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num)) +#define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num)) +#define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num)) + + +//////////////////// +// Bit operations // +//////////////////// + static inline uint32_t bsr32(uint32_t n) { @@ -383,44 +593,42 @@ bsr32(uint32_t n) // multiple architectures. On x86, __builtin_clz() ^ 31U becomes // either plain BSR (so the XOR gets optimized away) or LZCNT and // XOR (if -march indicates that SSE4a instructions are supported). - return __builtin_clz(n) ^ 31U; + return (uint32_t)__builtin_clz(n) ^ 31U; #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) uint32_t i; __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); return i; -#elif defined(_MSC_VER) && _MSC_VER >= 1400 - // MSVC isn't supported by tuklib, but since this code exists, - // it doesn't hurt to have it here anyway. - uint32_t i; - _BitScanReverse((DWORD *)&i, n); +#elif defined(_MSC_VER) + unsigned long i; + _BitScanReverse(&i, n); return i; #else uint32_t i = 31; - if ((n & UINT32_C(0xFFFF0000)) == 0) { + if ((n & 0xFFFF0000) == 0) { n <<= 16; i = 15; } - if ((n & UINT32_C(0xFF000000)) == 0) { + if ((n & 0xFF000000) == 0) { n <<= 8; i -= 8; } - if ((n & UINT32_C(0xF0000000)) == 0) { + if ((n & 0xF0000000) == 0) { n <<= 4; i -= 4; } - if ((n & UINT32_C(0xC0000000)) == 0) { + if ((n & 0xC0000000) == 0) { n <<= 2; i -= 2; } - if ((n & UINT32_C(0x80000000)) == 0) + if ((n & 0x80000000) == 0) --i; return i; @@ -435,7 +643,7 @@ clz32(uint32_t n) return _bit_scan_reverse(n) ^ 31U; #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX - return __builtin_clz(n); + return (uint32_t)__builtin_clz(n); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) uint32_t i; @@ -444,35 +652,35 @@ clz32(uint32_t n) : "=r" (i) : "rm" (n)); return i; -#elif defined(_MSC_VER) && _MSC_VER >= 1400 - uint32_t i; - _BitScanReverse((DWORD *)&i, n); +#elif defined(_MSC_VER) + unsigned long i; + _BitScanReverse(&i, n); return i ^ 31U; #else uint32_t i = 0; - if ((n & UINT32_C(0xFFFF0000)) == 0) { + if ((n & 0xFFFF0000) == 0) { n <<= 16; i = 16; } - if ((n & UINT32_C(0xFF000000)) == 0) { + if ((n & 0xFF000000) == 0) { n <<= 8; i += 8; } - if ((n & UINT32_C(0xF0000000)) == 0) { + if ((n & 0xF0000000) == 0) { n <<= 4; i += 4; } - if ((n & UINT32_C(0xC0000000)) == 0) { + if ((n & 0xC0000000) == 0) { n <<= 2; i += 2; } - if ((n & UINT32_C(0x80000000)) == 0) + if ((n & 0x80000000) == 0) ++i; return i; @@ -487,42 +695,42 @@ ctz32(uint32_t n) return _bit_scan_forward(n); #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX - return __builtin_ctz(n); + return (uint32_t)__builtin_ctz(n); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) uint32_t i; __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); return i; -#elif defined(_MSC_VER) && _MSC_VER >= 1400 - uint32_t i; - _BitScanForward((DWORD *)&i, n); +#elif defined(_MSC_VER) + unsigned long i; + _BitScanForward(&i, n); return i; #else uint32_t i = 0; - if ((n & UINT32_C(0x0000FFFF)) == 0) { + if ((n & 0x0000FFFF) == 0) { n >>= 16; i = 16; } - if ((n & UINT32_C(0x000000FF)) == 0) { + if ((n & 0x000000FF) == 0) { n >>= 8; i += 8; } - if ((n & UINT32_C(0x0000000F)) == 0) { + if ((n & 0x0000000F) == 0) { n >>= 4; i += 4; } - if ((n & UINT32_C(0x00000003)) == 0) { + if ((n & 0x00000003) == 0) { n >>= 2; i += 2; } - if ((n & UINT32_C(0x00000001)) == 0) + if ((n & 0x00000001) == 0) ++i; return i; diff --git a/liblzma/api/lzma.h b/liblzma/api/lzma.h index aa88e42..122dab8 100644 --- a/liblzma/api/lzma.h +++ b/liblzma/api/lzma.h @@ -224,7 +224,8 @@ # else # define lzma_nothrow throw() # endif -# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +# elif defined(__GNUC__) && (__GNUC__ > 3 \ + || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) # define lzma_nothrow __attribute__((__nothrow__)) # else # define lzma_nothrow @@ -241,7 +242,7 @@ * break anything if these are sometimes enabled and sometimes not, only * affects warnings and optimizations. */ -#if __GNUC__ >= 3 +#if defined(__GNUC__) && __GNUC__ >= 3 # ifndef lzma_attribute # define lzma_attribute(attr) __attribute__(attr) # endif diff --git a/liblzma/api/lzma/block.h b/liblzma/api/lzma/block.h index 7bdcfd7..962f387 100644 --- a/liblzma/api/lzma/block.h +++ b/liblzma/api/lzma/block.h @@ -448,7 +448,7 @@ extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID - * that is not supported by this buid of liblzma. Initializing + * that is not supported by this build of liblzma. Initializing * the encoder failed. * - LZMA_PROG_ERROR */ diff --git a/liblzma/api/lzma/filter.h b/liblzma/api/lzma/filter.h index 4e78752..8c85931 100644 --- a/liblzma/api/lzma/filter.h +++ b/liblzma/api/lzma/filter.h @@ -341,9 +341,10 @@ extern LZMA_API(lzma_ret) lzma_properties_encode( * \param filter filter->id must have been set to the correct * Filter ID. filter->options doesn't need to be * initialized (it's not freed by this function). The - * decoded options will be stored to filter->options. - * filter->options is set to NULL if there are no - * properties or if an error occurs. + * decoded options will be stored in filter->options; + * it's application's responsibility to free it when + * appropriate. filter->options is set to NULL if + * there are no properties or if an error occurs. * \param allocator Custom memory allocator used to allocate the * options. Set to NULL to use the default malloc(), * and in case of an error, also free(). diff --git a/liblzma/api/lzma/hardware.h b/liblzma/api/lzma/hardware.h index 5321d9a..47481f2 100644 --- a/liblzma/api/lzma/hardware.h +++ b/liblzma/api/lzma/hardware.h @@ -6,7 +6,7 @@ * ways to limit the resource usage. Applications linking against liblzma * need to do the actual decisions how much resources to let liblzma to use. * To ease making these decisions, liblzma provides functions to find out - * the relevant capabilities of the underlaying hardware. Currently there + * the relevant capabilities of the underlying hardware. Currently there * is only a function to find out the amount of RAM, but in the future there * will be also a function to detect how many concurrent threads the system * can run. diff --git a/liblzma/api/lzma/lzma12.h b/liblzma/api/lzma/lzma12.h index 4e32fa3..df5f23b 100644 --- a/liblzma/api/lzma/lzma12.h +++ b/liblzma/api/lzma/lzma12.h @@ -301,7 +301,7 @@ typedef struct { * (2^ pb =2^2=4), which is often a good choice when there's * no better guess. * - * When the aligment is known, setting pb accordingly may reduce + * When the alignment is known, setting pb accordingly may reduce * the file size a little. E.g. with text files having one-byte * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can * improve compression slightly. For UTF-16 text, pb=1 is a good diff --git a/liblzma/api/lzma/version.h b/liblzma/api/lzma/version.h index 143c7de..2bf3eae 100644 --- a/liblzma/api/lzma/version.h +++ b/liblzma/api/lzma/version.h @@ -22,7 +22,7 @@ */ #define LZMA_VERSION_MAJOR 5 #define LZMA_VERSION_MINOR 2 -#define LZMA_VERSION_PATCH 4 +#define LZMA_VERSION_PATCH 5 #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE #ifndef LZMA_VERSION_COMMIT diff --git a/liblzma/api/lzma/vli.h b/liblzma/api/lzma/vli.h index 9ad13f2..1b7a952 100644 --- a/liblzma/api/lzma/vli.h +++ b/liblzma/api/lzma/vli.h @@ -54,7 +54,7 @@ * * Valid VLI values are in the range [0, LZMA_VLI_MAX]. Unknown value is * indicated with LZMA_VLI_UNKNOWN, which is the maximum value of the - * underlaying integer type. + * underlying integer type. * * lzma_vli will be uint64_t for the foreseeable future. If a bigger size * is needed in the future, it is guaranteed that 2 * LZMA_VLI_MAX will diff --git a/liblzma/check/crc32_fast.c b/liblzma/check/crc32_fast.c index 3de0263..eed7350 100644 --- a/liblzma/check/crc32_fast.c +++ b/liblzma/check/crc32_fast.c @@ -49,7 +49,7 @@ lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) // Calculate the CRC32 using the slice-by-eight algorithm. while (buf < limit) { - crc ^= *(const uint32_t *)(buf); + crc ^= aligned_read32ne(buf); buf += 4; crc = lzma_crc32_table[7][A(crc)] @@ -57,7 +57,7 @@ lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) ^ lzma_crc32_table[5][C(crc)] ^ lzma_crc32_table[4][D(crc)]; - const uint32_t tmp = *(const uint32_t *)(buf); + const uint32_t tmp = aligned_read32ne(buf); buf += 4; // At least with some compilers, it is critical for diff --git a/liblzma/check/crc32_table.c b/liblzma/check/crc32_table.c index 368874e..b11762a 100644 --- a/liblzma/check/crc32_table.c +++ b/liblzma/check/crc32_table.c @@ -12,6 +12,9 @@ #include "common.h" +// Having the declaration here silences clang -Wmissing-variable-declarations. +extern const uint32_t lzma_crc32_table[8][256]; + #ifdef WORDS_BIGENDIAN # include "crc32_table_be.h" #else diff --git a/liblzma/check/crc64_fast.c b/liblzma/check/crc64_fast.c index 52af29e..8af54cd 100644 --- a/liblzma/check/crc64_fast.c +++ b/liblzma/check/crc64_fast.c @@ -47,9 +47,9 @@ lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) while (buf < limit) { #ifdef WORDS_BIGENDIAN const uint32_t tmp = (crc >> 32) - ^ *(const uint32_t *)(buf); + ^ aligned_read32ne(buf); #else - const uint32_t tmp = crc ^ *(const uint32_t *)(buf); + const uint32_t tmp = crc ^ aligned_read32ne(buf); #endif buf += 4; diff --git a/liblzma/check/crc64_table.c b/liblzma/check/crc64_table.c index 1fbcd94..7560eb0 100644 --- a/liblzma/check/crc64_table.c +++ b/liblzma/check/crc64_table.c @@ -12,6 +12,9 @@ #include "common.h" +// Having the declaration here silences clang -Wmissing-variable-declarations. +extern const uint64_t lzma_crc64_table[4][256]; + #ifdef WORDS_BIGENDIAN # include "crc64_table_be.h" #else diff --git a/liblzma/common/alone_decoder.c b/liblzma/common/alone_decoder.c index 77d0a9b..239b230 100644 --- a/liblzma/common/alone_decoder.c +++ b/liblzma/common/alone_decoder.c @@ -50,8 +50,7 @@ typedef struct { static lzma_ret -alone_decode(void *coder_ptr, - const lzma_allocator *allocator lzma_attribute((__unused__)), +alone_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, diff --git a/liblzma/common/alone_encoder.c b/liblzma/common/alone_encoder.c index 4853cfd..96c1db7 100644 --- a/liblzma/common/alone_encoder.c +++ b/liblzma/common/alone_encoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file alone_decoder.c -/// \brief Decoder for LZMA_Alone files +/// \file alone_encoder.c +/// \brief Encoder for LZMA_Alone files // // Author: Lasse Collin // @@ -31,8 +31,7 @@ typedef struct { static lzma_ret -alone_encode(void *coder_ptr, - const lzma_allocator *allocator lzma_attribute((__unused__)), +alone_encode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, @@ -122,7 +121,7 @@ alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, if (d != UINT32_MAX) ++d; - unaligned_write32le(coder->header + 1, d); + write32le(coder->header + 1, d); // - Uncompressed size (always unknown and using EOPM) memset(coder->header + 1 + 4, 0xFF, 8); diff --git a/liblzma/common/block_header_decoder.c b/liblzma/common/block_header_decoder.c index 1dd982f..2e1135d 100644 --- a/liblzma/common/block_header_decoder.c +++ b/liblzma/common/block_header_decoder.c @@ -67,7 +67,7 @@ lzma_block_header_decode(lzma_block *block, const size_t in_size = block->header_size - 4; // Verify CRC32 - if (lzma_crc32(in, in_size, 0) != unaligned_read32le(in + in_size)) + if (lzma_crc32(in, in_size, 0) != read32le(in + in_size)) return LZMA_DATA_ERROR; // Check for unsupported flags. @@ -98,7 +98,7 @@ lzma_block_header_decode(lzma_block *block, block->uncompressed_size = LZMA_VLI_UNKNOWN; // Filter Flags - const size_t filter_count = (in[1] & 3) + 1; + const size_t filter_count = (in[1] & 3U) + 1; for (size_t i = 0; i < filter_count; ++i) { const lzma_ret ret = lzma_filter_flags_decode( &block->filters[i], allocator, diff --git a/liblzma/common/block_header_encoder.c b/liblzma/common/block_header_encoder.c index 5c5f542..160425d 100644 --- a/liblzma/common/block_header_encoder.c +++ b/liblzma/common/block_header_encoder.c @@ -126,7 +126,7 @@ lzma_block_header_encode(const lzma_block *block, uint8_t *out) memzero(out + out_pos, out_size - out_pos); // CRC32 - unaligned_write32le(out + out_size, lzma_crc32(out, out_size, 0)); + write32le(out + out_size, lzma_crc32(out, out_size, 0)); return LZMA_OK; } diff --git a/liblzma/common/block_util.c b/liblzma/common/block_util.c index 00c7fe8..acb3111 100644 --- a/liblzma/common/block_util.c +++ b/liblzma/common/block_util.c @@ -1,6 +1,6 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file block_header.c +/// \file block_util.c /// \brief Utility functions to handle lzma_block // // Author: Lasse Collin diff --git a/liblzma/common/common.c b/liblzma/common/common.c index 57e3f8e..cf714e5 100644 --- a/liblzma/common/common.c +++ b/liblzma/common/common.c @@ -1,6 +1,6 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file common.h +/// \file common.c /// \brief Common functions needed in many places in liblzma // // Author: Lasse Collin @@ -99,7 +99,11 @@ lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, const size_t out_avail = out_size - *out_pos; const size_t copy_size = my_min(in_avail, out_avail); - memcpy(out + *out_pos, in + *in_pos, copy_size); + // Call memcpy() only if there is something to copy. If there is + // nothing to copy, in or out might be NULL and then the memcpy() + // call would trigger undefined behavior. + if (copy_size > 0) + memcpy(out + *out_pos, in + *in_pos, copy_size); *in_pos += copy_size; *out_pos += copy_size; diff --git a/liblzma/common/filter_common.h b/liblzma/common/filter_common.h index 42a26a2..9390305 100644 --- a/liblzma/common/filter_common.h +++ b/liblzma/common/filter_common.h @@ -1,6 +1,6 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file filter_common.c +/// \file filter_common.h /// \brief Filter-specific stuff common for both encoder and decoder // // Author: Lasse Collin diff --git a/liblzma/common/filter_decoder.h b/liblzma/common/filter_decoder.h index a2e255f..2dac602 100644 --- a/liblzma/common/filter_decoder.h +++ b/liblzma/common/filter_decoder.h @@ -1,6 +1,6 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file filter_decoder.c +/// \file filter_decoder.h /// \brief Filter ID mapping to filter-specific functions // // Author: Lasse Collin diff --git a/liblzma/common/filter_flags_encoder.c b/liblzma/common/filter_flags_encoder.c index d110566..b57b9fd 100644 --- a/liblzma/common/filter_flags_encoder.c +++ b/liblzma/common/filter_flags_encoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file filter_flags_encoder.c -/// \brief Decodes a Filter Flags field +/// \brief Encodes a Filter Flags field // // Author: Lasse Collin // diff --git a/liblzma/common/hardware_physmem.c b/liblzma/common/hardware_physmem.c index 7405b65..a2bbbe2 100644 --- a/liblzma/common/hardware_physmem.c +++ b/liblzma/common/hardware_physmem.c @@ -19,7 +19,7 @@ extern LZMA_API(uint64_t) lzma_physmem(void) { // It is simpler to make lzma_physmem() a wrapper for - // tuklib_physmem() than to hack appropriate symbol visiblity + // tuklib_physmem() than to hack appropriate symbol visibility // support for the tuklib modules. return tuklib_physmem(); } diff --git a/liblzma/common/index.c b/liblzma/common/index.c index 26e4e51..a41e8f3 100644 --- a/liblzma/common/index.c +++ b/liblzma/common/index.c @@ -105,7 +105,7 @@ typedef struct { typedef struct { - /// Every index_stream is a node in the tree of Sreams. + /// Every index_stream is a node in the tree of Streams. index_tree_node node; /// Number of this Stream (first one is 1) @@ -166,7 +166,7 @@ struct lzma_index_s { lzma_vli index_list_size; /// How many Records to allocate at once in lzma_index_append(). - /// This defaults to INDEX_GROUP_SIZE but can be overriden with + /// This defaults to INDEX_GROUP_SIZE but can be overridden with /// lzma_index_prealloc(). size_t prealloc; @@ -825,8 +825,8 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, s->groups.root = &newg->node; } - if (s->groups.rightmost == &g->node) - s->groups.rightmost = &newg->node; + assert(s->groups.rightmost == &g->node); + s->groups.rightmost = &newg->node; lzma_free(g, allocator); diff --git a/liblzma/common/memcmplen.h b/liblzma/common/memcmplen.h index c1efc9e..dcfd8d6 100644 --- a/liblzma/common/memcmplen.h +++ b/liblzma/common/memcmplen.h @@ -61,8 +61,7 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, // to __builtin_clzll(). #define LZMA_MEMCMPLEN_EXTRA 8 while (len < limit) { - const uint64_t x = *(const uint64_t *)(buf1 + len) - - *(const uint64_t *)(buf2 + len); + const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len); if (x != 0) { # if defined(_M_X64) // MSVC or Intel C compiler on Windows unsigned long tmp; @@ -99,15 +98,7 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, _mm_loadu_si128((const __m128i *)(buf2 + len)))); if (x != 0) { -# if defined(__INTEL_COMPILER) - len += _bit_scan_forward(x); -# elif defined(_MSC_VER) - unsigned long tmp; - _BitScanForward(&tmp, x); - len += tmp; -# else - len += __builtin_ctz(x); -# endif + len += ctz32(x); return my_min(len, limit); } @@ -120,8 +111,7 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, // Generic 32-bit little endian method # define LZMA_MEMCMPLEN_EXTRA 4 while (len < limit) { - uint32_t x = *(const uint32_t *)(buf1 + len) - - *(const uint32_t *)(buf2 + len); + uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len); if (x != 0) { if ((x & 0xFFFF) == 0) { len += 2; @@ -143,8 +133,7 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, // Generic 32-bit big endian method # define LZMA_MEMCMPLEN_EXTRA 4 while (len < limit) { - uint32_t x = *(const uint32_t *)(buf1 + len) - ^ *(const uint32_t *)(buf2 + len); + uint32_t x = read32ne(buf1 + len) ^ read32ne(buf2 + len); if (x != 0) { if ((x & 0xFFFF0000) == 0) { len += 2; diff --git a/liblzma/common/stream_encoder_mt.c b/liblzma/common/stream_encoder_mt.c index 2efe44c..01e4033 100644 --- a/liblzma/common/stream_encoder_mt.c +++ b/liblzma/common/stream_encoder_mt.c @@ -700,7 +700,7 @@ stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator, ret = coder->thread_error; if (ret != LZMA_OK) { assert(ret != LZMA_STREAM_END); - break; + break; // Break out of mythread_sync. } // Try to read compressed data to out[]. @@ -958,7 +958,7 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, // Validate the filter chain so that we can give an error in this // function instead of delaying it to the first call to lzma_code(). // The memory usage calculation verifies the filter chain as - // a side effect so we take advatange of that. + // a side effect so we take advantage of that. if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) return LZMA_OPTIONS_ERROR; diff --git a/liblzma/common/stream_flags_decoder.c b/liblzma/common/stream_flags_decoder.c index 1bc2f97..4e43e35 100644 --- a/liblzma/common/stream_flags_decoder.c +++ b/liblzma/common/stream_flags_decoder.c @@ -38,7 +38,7 @@ lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) // and unsupported files. const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), LZMA_STREAM_FLAGS_SIZE, 0); - if (crc != unaligned_read32le(in + sizeof(lzma_header_magic) + if (crc != read32le(in + sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE)) return LZMA_DATA_ERROR; @@ -67,7 +67,7 @@ lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) // CRC32 const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); - if (crc != unaligned_read32le(in)) + if (crc != read32le(in)) return LZMA_DATA_ERROR; // Stream Flags @@ -75,7 +75,7 @@ lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) return LZMA_OPTIONS_ERROR; // Backward Size - options->backward_size = unaligned_read32le(in + sizeof(uint32_t)); + options->backward_size = read32le(in + sizeof(uint32_t)); options->backward_size = (options->backward_size + 1) * 4; return LZMA_OK; diff --git a/liblzma/common/stream_flags_encoder.c b/liblzma/common/stream_flags_encoder.c index 4e71715..b98ab17 100644 --- a/liblzma/common/stream_flags_encoder.c +++ b/liblzma/common/stream_flags_encoder.c @@ -46,8 +46,8 @@ lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), LZMA_STREAM_FLAGS_SIZE, 0); - unaligned_write32le(out + sizeof(lzma_header_magic) - + LZMA_STREAM_FLAGS_SIZE, crc); + write32le(out + sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE, + crc); return LZMA_OK; } @@ -66,7 +66,7 @@ lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) if (!is_backward_size_valid(options)) return LZMA_PROG_ERROR; - unaligned_write32le(out + 4, options->backward_size / 4 - 1); + write32le(out + 4, options->backward_size / 4 - 1); // Stream Flags if (stream_flags_encode(options, out + 2 * 4)) @@ -76,7 +76,7 @@ lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) const uint32_t crc = lzma_crc32( out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); - unaligned_write32le(out, crc); + write32le(out, crc); // Magic memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, diff --git a/liblzma/common/vli_decoder.c b/liblzma/common/vli_decoder.c index c181828..af2799d 100644 --- a/liblzma/common/vli_decoder.c +++ b/liblzma/common/vli_decoder.c @@ -72,7 +72,7 @@ lzma_vli_decode(lzma_vli *restrict vli, size_t *vli_pos, // corrupt. // // If we need bigger integers in future, old versions liblzma - // will confusingly indicate the file being corrupt istead of + // will confusingly indicate the file being corrupt instead of // unsupported. I suppose it's still better this way, because // in the foreseeable future (writing this in 2008) the only // reason why files would appear having over 63-bit integers diff --git a/liblzma/delta/delta_decoder.c b/liblzma/delta/delta_decoder.c index 6859afa..13d8a28 100644 --- a/liblzma/delta/delta_decoder.c +++ b/liblzma/delta/delta_decoder.c @@ -70,7 +70,7 @@ lzma_delta_props_decode(void **options, const lzma_allocator *allocator, return LZMA_MEM_ERROR; opt->type = LZMA_DELTA_TYPE_BYTE; - opt->dist = props[0] + 1; + opt->dist = props[0] + 1U; *options = opt; diff --git a/liblzma/lz/lz_decoder.c b/liblzma/lz/lz_decoder.c index c708644..09b5743 100644 --- a/liblzma/lz/lz_decoder.c +++ b/liblzma/lz/lz_decoder.c @@ -91,11 +91,17 @@ decode_buffer(lzma_coder *coder, in, in_pos, in_size); // Copy the decoded data from the dictionary to the out[] - // buffer. + // buffer. Do it conditionally because out can be NULL + // (in which case copy_size is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. const size_t copy_size = coder->dict.pos - dict_start; assert(copy_size <= out_size - *out_pos); - memcpy(out + *out_pos, coder->dict.buf + dict_start, - copy_size); + + if (copy_size > 0) + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + *out_pos += copy_size; // Reset the dictionary if so requested by coder->lz.code(). @@ -125,8 +131,7 @@ decode_buffer(lzma_coder *coder, static lzma_ret -lz_decode(void *coder_ptr, - const lzma_allocator *allocator lzma_attribute((__unused__)), +lz_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, @@ -241,7 +246,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, if (lz_options.dict_size < 4096) lz_options.dict_size = 4096; - // Make dictionary size a multipe of 16. Some LZ-based decoders like + // Make dictionary size a multiple of 16. Some LZ-based decoders like // LZMA use the lowest bits lzma_dict.pos to know the alignment of the // data. Aligned buffer is also good when memcpying from the // dictionary to the output buffer, since applications are diff --git a/liblzma/lz/lz_encoder_hash.h b/liblzma/lz/lz_encoder_hash.h index 342a333..fb15c58 100644 --- a/liblzma/lz/lz_encoder_hash.h +++ b/liblzma/lz/lz_encoder_hash.h @@ -39,7 +39,7 @@ // Endianness doesn't matter in hash_2_calc() (no effect on the output). #ifdef TUKLIB_FAST_UNALIGNED_ACCESS # define hash_2_calc() \ - const uint32_t hash_value = *(const uint16_t *)(cur) + const uint32_t hash_value = read16ne(cur) #else # define hash_2_calc() \ const uint32_t hash_value \ diff --git a/liblzma/lz/lz_encoder_mf.c b/liblzma/lz/lz_encoder_mf.c index 7852077..d03657a 100644 --- a/liblzma/lz/lz_encoder_mf.c +++ b/liblzma/lz/lz_encoder_mf.c @@ -113,7 +113,7 @@ normalize(lzma_mf *mf) // may be match finders that use larger resolution than one byte. const uint32_t subvalue = (MUST_NORMALIZE_POS - mf->cyclic_size); - // & (~(UINT32_C(1) << 10) - 1); + // & ~((UINT32_C(1) << 10) - 1); for (uint32_t i = 0; i < mf->hash_count; ++i) { // If the distance is greater than the dictionary size, diff --git a/liblzma/lzma/fastpos.h b/liblzma/lzma/fastpos.h index a3feea5..cba442c 100644 --- a/liblzma/lzma/fastpos.h +++ b/liblzma/lzma/fastpos.h @@ -101,7 +101,7 @@ extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) #define fastpos_result(dist, extra, n) \ - lzma_fastpos[(dist) >> fastpos_shift(extra, n)] \ + (uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \ + 2 * fastpos_shift(extra, n) diff --git a/liblzma/lzma/fastpos_tablegen.c b/liblzma/lzma/fastpos_tablegen.c index c97e6f4..d4484c8 100644 --- a/liblzma/lzma/fastpos_tablegen.c +++ b/liblzma/lzma/fastpos_tablegen.c @@ -11,7 +11,6 @@ // /////////////////////////////////////////////////////////////////////////////// -#include <sys/types.h> #include <inttypes.h> #include <stdio.h> #include "fastpos.h" diff --git a/liblzma/lzma/lzma2_decoder.c b/liblzma/lzma/lzma2_decoder.c index 878c870..cf1b511 100644 --- a/liblzma/lzma/lzma2_decoder.c +++ b/liblzma/lzma/lzma2_decoder.c @@ -136,7 +136,7 @@ lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, break; case SEQ_UNCOMPRESSED_2: - coder->uncompressed_size += in[(*in_pos)++] + 1; + coder->uncompressed_size += in[(*in_pos)++] + 1U; coder->sequence = SEQ_COMPRESSED_0; coder->lzma.set_uncompressed(coder->lzma.coder, coder->uncompressed_size); @@ -148,7 +148,7 @@ lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, break; case SEQ_COMPRESSED_1: - coder->compressed_size += in[(*in_pos)++] + 1; + coder->compressed_size += in[(*in_pos)++] + 1U; coder->sequence = coder->next_sequence; break; @@ -297,8 +297,8 @@ lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, if (props[0] == 40) { opt->dict_size = UINT32_MAX; } else { - opt->dict_size = 2 | (props[0] & 1); - opt->dict_size <<= props[0] / 2 + 11; + opt->dict_size = 2 | (props[0] & 1U); + opt->dict_size <<= props[0] / 2U + 11; } opt->preset_dict = NULL; diff --git a/liblzma/lzma/lzma_common.h b/liblzma/lzma/lzma_common.h index 09efd38..9d040d9 100644 --- a/liblzma/lzma/lzma_common.h +++ b/liblzma/lzma/lzma_common.h @@ -122,7 +122,8 @@ typedef enum { /// byte; and /// - the highest literal_context_bits bits of the previous byte. #define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \ - ((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))]) + ((probs)[(((pos) & (lp_mask)) << (lc)) \ + + ((uint32_t)(prev_byte) >> (8U - (lc)))]) static inline void diff --git a/liblzma/lzma/lzma_decoder.c b/liblzma/lzma/lzma_decoder.c index d0f29b7..e605a0a 100644 --- a/liblzma/lzma/lzma_decoder.c +++ b/liblzma/lzma/lzma_decoder.c @@ -398,7 +398,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, // ("match byte") to "len" to minimize the // number of variables we need to store // between decoder calls. - len = dict_get(&dict, rep0) << 1; + len = (uint32_t)(dict_get(&dict, rep0)) << 1; // The usage of "offset" allows omitting some // branches, which should give tiny speed @@ -569,7 +569,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, #ifdef HAVE_SMALL do { rc_bit(probs[symbol], , - rep0 += 1 << offset, + rep0 += 1U << offset, SEQ_DIST_MODEL); } while (++offset < limit); #else @@ -577,25 +577,25 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, case 5: assert(offset == 0); rc_bit(probs[symbol], , - rep0 += 1, + rep0 += 1U, SEQ_DIST_MODEL); ++offset; --limit; case 4: rc_bit(probs[symbol], , - rep0 += 1 << offset, + rep0 += 1U << offset, SEQ_DIST_MODEL); ++offset; --limit; case 3: rc_bit(probs[symbol], , - rep0 += 1 << offset, + rep0 += 1U << offset, SEQ_DIST_MODEL); ++offset; --limit; case 2: rc_bit(probs[symbol], , - rep0 += 1 << offset, + rep0 += 1U << offset, SEQ_DIST_MODEL); ++offset; --limit; @@ -607,7 +607,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, // the unneeded updating of // "symbol". rc_bit_last(probs[symbol], , - rep0 += 1 << offset, + rep0 += 1U << offset, SEQ_DIST_MODEL); } #endif @@ -635,7 +635,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, do { rc_bit(coder->pos_align[ symbol], , - rep0 += 1 << offset, + rep0 += 1U << offset, SEQ_ALIGN); } while (++offset < ALIGN_BITS); #else @@ -1049,7 +1049,7 @@ lzma_lzma_props_decode(void **options, const lzma_allocator *allocator, // All dictionary sizes are accepted, including zero. LZ decoder // will automatically use a dictionary at least a few KiB even if // a smaller dictionary is requested. - opt->dict_size = unaligned_read32le(props + 1); + opt->dict_size = read32le(props + 1); opt->preset_dict = NULL; opt->preset_dict_size = 0; diff --git a/liblzma/lzma/lzma_encoder.c b/liblzma/lzma/lzma_encoder.c index ba9ce69..07d2b87 100644 --- a/liblzma/lzma/lzma_encoder.c +++ b/liblzma/lzma/lzma_encoder.c @@ -663,7 +663,7 @@ lzma_lzma_props_encode(const void *options, uint8_t *out) if (lzma_lzma_lclppb_encode(opt, out)) return LZMA_PROG_ERROR; - unaligned_write32le(out + 1, opt->dict_size); + write32le(out + 1, opt->dict_size); return LZMA_OK; } diff --git a/liblzma/lzma/lzma_encoder_optimum_normal.c b/liblzma/lzma/lzma_encoder_optimum_normal.c index 59f7734..101c8d4 100644 --- a/liblzma/lzma/lzma_encoder_optimum_normal.c +++ b/liblzma/lzma/lzma_encoder_optimum_normal.c @@ -636,9 +636,10 @@ helper2(lzma_lzma1_encoder *coder, uint32_t *reps, const uint8_t *buf, uint32_t len_test_2 = len_test + 1; const uint32_t limit = my_min(buf_avail_full, len_test_2 + nice_len); - for (; len_test_2 < limit - && buf[len_test_2] == buf_back[len_test_2]; - ++len_test_2) ; + // NOTE: len_test_2 may be greater than limit so the call to + // lzma_memcmplen() must be done conditionally. + if (len_test_2 < limit) + len_test_2 = lzma_memcmplen(buf, buf_back, len_test_2, limit); len_test_2 -= len_test + 1; @@ -732,9 +733,12 @@ helper2(lzma_lzma1_encoder *coder, uint32_t *reps, const uint8_t *buf, const uint32_t limit = my_min(buf_avail_full, len_test_2 + nice_len); - for (; len_test_2 < limit && - buf[len_test_2] == buf_back[len_test_2]; - ++len_test_2) ; + // NOTE: len_test_2 may be greater than limit + // so the call to lzma_memcmplen() must be + // done conditionally. + if (len_test_2 < limit) + len_test_2 = lzma_memcmplen(buf, buf_back, + len_test_2, limit); len_test_2 -= len_test + 1; diff --git a/liblzma/lzma/lzma_encoder_private.h b/liblzma/lzma/lzma_encoder_private.h index a2da969..2e34aac 100644 --- a/liblzma/lzma/lzma_encoder_private.h +++ b/liblzma/lzma/lzma_encoder_private.h @@ -25,8 +25,7 @@ // MATCH_LEN_MIN bytes. Unaligned access gives tiny gain so there's no // reason to not use it when it is supported. #ifdef TUKLIB_FAST_UNALIGNED_ACCESS -# define not_equal_16(a, b) \ - (*(const uint16_t *)(a) != *(const uint16_t *)(b)) +# define not_equal_16(a, b) (read16ne(a) != read16ne(b)) #else # define not_equal_16(a, b) \ ((a)[0] != (b)[0] || (a)[1] != (b)[1]) diff --git a/liblzma/simple/arm.c b/liblzma/simple/arm.c index 181d0e3..ff5073a 100644 --- a/liblzma/simple/arm.c +++ b/liblzma/simple/arm.c @@ -22,9 +22,9 @@ arm_code(void *simple lzma_attribute((__unused__)), size_t i; for (i = 0; i + 4 <= size; i += 4) { if (buffer[i + 3] == 0xEB) { - uint32_t src = (buffer[i + 2] << 16) - | (buffer[i + 1] << 8) - | (buffer[i + 0]); + uint32_t src = ((uint32_t)(buffer[i + 2]) << 16) + | ((uint32_t)(buffer[i + 1]) << 8) + | (uint32_t)(buffer[i + 0]); src <<= 2; uint32_t dest; diff --git a/liblzma/simple/armthumb.c b/liblzma/simple/armthumb.c index eab4862..a8da334 100644 --- a/liblzma/simple/armthumb.c +++ b/liblzma/simple/armthumb.c @@ -23,10 +23,10 @@ armthumb_code(void *simple lzma_attribute((__unused__)), for (i = 0; i + 4 <= size; i += 2) { if ((buffer[i + 1] & 0xF8) == 0xF0 && (buffer[i + 3] & 0xF8) == 0xF8) { - uint32_t src = ((buffer[i + 1] & 0x7) << 19) - | (buffer[i + 0] << 11) - | ((buffer[i + 3] & 0x7) << 8) - | (buffer[i + 2]); + uint32_t src = (((uint32_t)(buffer[i + 1]) & 7) << 19) + | ((uint32_t)(buffer[i + 0]) << 11) + | (((uint32_t)(buffer[i + 3]) & 7) << 8) + | (uint32_t)(buffer[i + 2]); src <<= 1; diff --git a/liblzma/simple/ia64.c b/liblzma/simple/ia64.c index 580529e..6492d0a 100644 --- a/liblzma/simple/ia64.c +++ b/liblzma/simple/ia64.c @@ -70,7 +70,7 @@ ia64_code(void *simple lzma_attribute((__unused__)), inst_norm |= (uint64_t)(dest & 0x100000) << (36 - 20); - instruction &= (1 << bit_res) - 1; + instruction &= (1U << bit_res) - 1; instruction |= (inst_norm << bit_res); for (size_t j = 0; j < 6; j++) diff --git a/liblzma/simple/powerpc.c b/liblzma/simple/powerpc.c index 54dfbf1..0b60e9b 100644 --- a/liblzma/simple/powerpc.c +++ b/liblzma/simple/powerpc.c @@ -25,10 +25,11 @@ powerpc_code(void *simple lzma_attribute((__unused__)), if ((buffer[i] >> 2) == 0x12 && ((buffer[i + 3] & 3) == 1)) { - const uint32_t src = ((buffer[i + 0] & 3) << 24) - | (buffer[i + 1] << 16) - | (buffer[i + 2] << 8) - | (buffer[i + 3] & (~3)); + const uint32_t src + = (((uint32_t)(buffer[i + 0]) & 3) << 24) + | ((uint32_t)(buffer[i + 1]) << 16) + | ((uint32_t)(buffer[i + 2]) << 8) + | ((uint32_t)(buffer[i + 3]) & ~UINT32_C(3)); uint32_t dest; if (is_encoder) diff --git a/liblzma/simple/simple_coder.c b/liblzma/simple/simple_coder.c index 13ebabc..4f499be 100644 --- a/liblzma/simple/simple_coder.c +++ b/liblzma/simple/simple_coder.c @@ -118,7 +118,15 @@ simple_code(void *coder_ptr, const lzma_allocator *allocator, // coder->pos and coder->size yet. This way the coder can be // restarted if the next filter in the chain returns e.g. // LZMA_MEM_ERROR. - memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail); + // + // Do the memcpy() conditionally because out can be NULL + // (in which case buf_avail is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. + if (buf_avail > 0) + memcpy(out + *out_pos, coder->buffer + coder->pos, + buf_avail); + *out_pos += buf_avail; // Copy/Encode/Decode more data to out[]. diff --git a/liblzma/simple/simple_decoder.c b/liblzma/simple/simple_decoder.c index 1d864f2..dc4d241 100644 --- a/liblzma/simple/simple_decoder.c +++ b/liblzma/simple/simple_decoder.c @@ -28,7 +28,7 @@ lzma_simple_props_decode(void **options, const lzma_allocator *allocator, if (opt == NULL) return LZMA_MEM_ERROR; - opt->start_offset = unaligned_read32le(props); + opt->start_offset = read32le(props); // Don't leave an options structure allocated if start_offset is zero. if (opt->start_offset == 0) diff --git a/liblzma/simple/simple_encoder.c b/liblzma/simple/simple_encoder.c index 8aa463b..d2cc03e 100644 --- a/liblzma/simple/simple_encoder.c +++ b/liblzma/simple/simple_encoder.c @@ -32,7 +32,7 @@ lzma_simple_props_encode(const void *options, uint8_t *out) if (opt == NULL || opt->start_offset == 0) return LZMA_OK; - unaligned_write32le(out, opt->start_offset); + write32le(out, opt->start_offset); return LZMA_OK; } diff --git a/liblzma/simple/x86.c b/liblzma/simple/x86.c index 0b14807..0e78909 100644 --- a/liblzma/simple/x86.c +++ b/liblzma/simple/x86.c @@ -97,7 +97,7 @@ x86_code(void *simple_ptr, uint32_t now_pos, bool is_encoder, if (!Test86MSByte(b)) break; - src = dest ^ ((1 << (32 - i * 8)) - 1); + src = dest ^ ((1U << (32 - i * 8)) - 1); } buffer[buffer_pos + 4] |