Merge pull request #670 from lz4/dev

v1.9.0
author: Yann Collet <Cyan4973@users.noreply.github.com> 2019-04-16 17:07:41 (GMT)
committer: GitHub <noreply@github.com> 2019-04-16 17:07:41 (GMT)
commit: f1226ac53dee1b41a801e8003bb3708a2e671d12 (patch)
tree: dd17db72ce5d38e4d1cc65e4a298b11d2d96c817
parent: 01d2a721d393646384291af2f6f2f940493cd78f (diff)
parent: fc3176f6aa6b8034e0a27598c23bdda559b5cf9c (diff)
download: lz4-f1226ac53dee1b41a801e8003bb3708a2e671d12.zip
lz4-f1226ac53dee1b41a801e8003bb3708a2e671d12.tar.gz
lz4-f1226ac53dee1b41a801e8003bb3708a2e671d12.tar.bz2
57 files changed, 4416 insertions, 2094 deletions
diff --git a/.gitignore b/.gitignore
index 829270b..2a59a7d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,6 @@ bin/
 # Windows / Msys
 nul
 ld.exe*
+
+# test files
+*.lz4
diff --git a/.travis.yml b/.travis.yml
index de6875b..301d294 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,116 +1,123 @@
 language: c
+
 matrix:
   fast_finish: true
   include:
     # OS X Mavericks
-    - os: osx
-      install:
-        - export CC=clang
-      env: Ubu=OS_X_Mavericks Cmd='make -C tests test-lz4 MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion" && CFLAGS=-m32 make -C tests clean test-lz4-contentSize' COMPILER=clang
+    - name: (macOS) General Test
+      os: osx
+      compiler: clang
+      script:
+        - make -C tests test-lz4 MOREFLAGS='-Werror -Wconversion -Wno-sign-conversion'
+        - CFLAGS=-m32 make -C tests clean test-lz4-contentSize
 
     # Container-based 12.04 LTS Server Edition 64 bit (doesn't support 32-bit includes)
-    - os: linux
-      sudo: false
-      env: Ubu=12.04cont Cmd='make -C tests test-lz4 test-lz4c test-fullbench' COMPILER=cc
+    - name: (Precise) benchmark test
+      dist: precise
+      script:
+        - make -C tests test-lz4 test-lz4c test-fullbench
 
-    - os: linux
-      sudo: required
-      env: Ubu=12.04cont Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest test-fuzzer' COMPILER=cc
+    - name: (Precise) frame and fuzzer test
+      dist: precise
+      install:
+        - sudo sysctl -w vm.mmap_min_addr=4096
+      script:
+        - make -C tests test-frametest test-fuzzer
 
-    - os: linux
-      sudo: false
-      env: Ubu=12.04cont Cmd="make gpptest && make clean && make examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
+    - name: (Precise) g++ and clang CMake test
+      dist: precise
+      script:
+        - make gpptest
+        - make clean
+        - make examples
+        - make clean cmake
+        - make clean travis-install
+        - make clean clangtest
 
 
     # 14.04 LTS Server Edition 64 bit
-    - env: Ubu=14.04 Cmd='make -C tests test MOREFLAGS=-mx32' COMPILER=cc
+    - name: (Trusty) i386 gcc test
       dist: trusty
-      sudo: required
       addons:
         apt:
           packages:
             - libc6-dev-i386
             - gcc-multilib
+      script:
+        - make -C tests test MOREFLAGS=-mx32
 
     # presume clang >= v3.9.0
-    - env: Ubu=14.04 Cmd='make usan MOREFLAGS=-Wcomma -Werror' COMPILER=clang
+    - name: (Trusty) USan test
       dist: trusty
-      sudo: required
-      addons:
-        apt:
-          packages:
-            - clang
+      compiler: clang
+      script:
+        - make usan MOREFLAGS=-Wcomma -Werror
 
-    - env: Ubu=14.04 Cmd='make c_standards && make -C tests test-lz4 test-mem' COMPILER=cc
+    - name: (Trusty) valgrind test
       dist: trusty
-      sudo: required
-      addons:
-        apt:
-          packages:
-            - valgrind
+      install:
+        - sudo apt-get install -qq valgrind
+      script:
+        - make c_standards
+        - make -C tests test-lz4 test-mem
 
-    - env: Ubu=14.04 Cmd='make ctocpptest' COMPILER=cc
+    - name: (Trusty) c-to-c++ test
       dist: trusty
-      sudo: false
+      script:
+        - make ctocpptest
 
-    - env: Ubu=14.04 Cmd='make -C tests test-lz4c32 test-fullbench32 versionsTest' COMPILER=cc
+    - name: (Trusty) i386 benchmark + version test
       dist: trusty
-      sudo: required
-      addons:
-        apt:
-          packages:
-            - python3
-            - libc6-dev-i386
-            - gcc-multilib
+      install:
+        - sudo apt-get install -qq python3 libc6-dev-i386 gcc-multilib
+      script:
+        - make -C tests test-lz4c32 test-fullbench32 versionsTest
 
-    - env: Ubu=14.04 Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
+    - name: (Trusty) i386 frame + fuzzer test
       dist: trusty
-      sudo: required
-      addons:
-        apt:
-          packages:
-            - libc6-dev-i386
-            - gcc-multilib
+      install:
+        - sudo apt-get install -qq libc6-dev-i386 gcc-multilib
+        - sudo sysctl -w vm.mmap_min_addr=4096
+      script:
+        - make -C tests test-frametest32 test-fuzzer32
 
-    - env: Ubu=14.04 Cmd='make c_standards CC=gcc-6 && make -C tests test-lz4 CC=gcc-6 MOREFLAGS=-Werror' COMPILER=gcc-6
+    - name: (Trusty) gcc-6 standard C compilation
       dist: trusty
-      sudo: required
       addons:
         apt:
           sources:
             - ubuntu-toolchain-r-test
           packages:
             - gcc-6
+      env:
+        - CC=gcc-6
+      script:
+        - make c_standards
+        - make -C tests test-lz4 MOREFLAGS=-Werror
 
-    - env: Ubu=14.04 Cmd='make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static && make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static' COMPILER=arm-linux-gnueabi-gcc
-      dist: trusty
-      sudo: required
-      addons:
-        apt:
-          packages:
-            - qemu-system-arm
-            - qemu-user-static
-            - gcc-arm-linux-gnueabi
-            - libc6-dev-armel-cross
-            - gcc-aarch64-linux-gnu
-            - libc6-dev-arm64-cross
-
-    - env: Ubu=14.04 Cmd='make -C tests test-lz4 clean test-lz4c32 CC=gcc-5 MOREFLAGS=-Werror' COMPILER=gcc-5
+    - name: (Trusty) arm + aarch64 compilation
       dist: trusty
-      sudo: required
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - libc6-dev-i386
-            - gcc-multilib
-            - gcc-5
-            - gcc-5-multilib
+      install:
+        - sudo apt-get install -qq
+            qemu-system-arm
+            qemu-user-static
+            gcc-arm-linux-gnueabi
+            libc6-dev-armel-cross
+            gcc-aarch64-linux-gnu
+            libc6-dev-arm64-cross
+      script:
+        - make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static
+        - make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static
+
+    - name: (Xenial) gcc-5 compilation
+      dist: xenial
+      install:
+        - sudo apt-get install -qq libc6-dev-i386 gcc-multilib
+      script:
+        - make -C tests test-lz4 clean test-lz4c32 MOREFLAGS=-Werror
 
-    - env: Ubu=14.04 Cmd='make -C tests test-lz4 CC=clang-3.8' COMPILER=clang-3.8
+    - name: (Trusty) clang-3.8 compilation
       dist: trusty
-      sudo: required
       addons:
         apt:
           sources:
@@ -118,28 +125,28 @@ matrix:
             - llvm-toolchain-precise-3.8
           packages:
             - clang-3.8
+      script:
+        - make -C tests test-lz4 CC=clang-3.8
 
-    - env: Ubu=14.04 Cmd='make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static && make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64' COMPILER=powerpc-linux-gnu-gcc
+    - name: (Trusty) PowerPC + PPC64 compilation
       dist: trusty
-      sudo: required
-      addons:
-        apt:
-          packages:
-            - qemu-system-ppc
-            - qemu-user-static
-            - gcc-powerpc-linux-gnu
+      install:
+        - sudo apt-get install -qq qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu
+      script:
+        - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static
+        - make platformTest CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS=-m64
 
-    - env: Ubu=14.04 Cmd='make staticAnalyze' COMPILER=clang
+    - name: (Trusty) scan-build + cppcheck
       dist: trusty
-      sudo: required
-      addons:
-        apt:
-          packages:
-            - clang
+      compiler: clang
+      install:
+        - sudo apt-get install -qq cppcheck
+      script:
+        - make staticAnalyze
+        - make cppcheck
 
-    - env: Ubu=14.04 Cmd='make clean all CC=gcc-4.4 MOREFLAGS=-Werror && make clean && CFLAGS=-fPIC LDFLAGS="-pie -fPIE -D_FORTIFY_SOURCE=2" make -C programs' COMPILER=gcc-4.4
+    - name: (Trusty) gcc-4.4 compilation
       dist: trusty
-      sudo: required
       addons:
         apt:
           sources:
@@ -148,16 +155,41 @@ matrix:
             - libc6-dev-i386
             - gcc-multilib
             - gcc-4.4
+      script:
+        - make clean all CC=gcc-4.4 MOREFLAGS=-Werror
+        - make clean
+        - CFLAGS=-fPIC LDFLAGS='-pie -fPIE -D_FORTIFY_SOURCE=2' make -C programs
 
     # tag-specific test
-    - if: tag =~ ^v[0-9]\.[0-9]
+    - name: tag build
+      if: tag =~ ^v[0-9]\.[0-9]
       os: linux
-      sudo: false
-      env: Cmd="make -C tests checkTag && tests/checkTag $TRAVIS_BRANCH " COMPILER=cc
-
-
-script:
-  - uname -a
-  - echo Cmd=$Cmd
-  - $COMPILER -v
-  - sh -c "$Cmd"
+      script:
+        - make -C tests checkTag
+        - tests/checkTag "$TRAVIS_BRANCH"
+
+    - name: (Xenial) Meson + clang build
+      env: ALLOW_FAILURES=true
+      dist: xenial
+      language: cpp
+      compiler: clang
+      install:
+        - sudo apt-get install -qq python3 tree
+        - curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip'
+            && unzip ~/ninja.zip -d ~/.local/bin
+        - curl -o ~/get-pip.py 'https://bootstrap.pypa.io/get-pip.py'
+            && python3 ~/get-pip.py --user
+            && pip3 install --user meson
+      script:
+        - meson setup
+            --buildtype=debug
+            -Db_lundef=false
+            -Dauto_features=enabled
+            -Ddefault_library=both
+            -Dbuild_{programs,contrib,tests,examples}=true
+            contrib/meson build
+        - cd build
+        - DESTDIR=./staging ninja install
+        - tree ./staging
+  allow_failures:
+    - env: ALLOW_FAILURES=true
diff --git a/Makefile b/Makefile
index 69a34b7..f3c6ce2 100644
--- a/Makefile
+++ b/Makefile
@@ -50,10 +50,10 @@ endif
 default: lib-release lz4-release
 
 .PHONY: all
-all: allmost manuals
+all: allmost examples manuals build_tests
 
 .PHONY: allmost
-allmost: lib lz4 examples
+allmost: lib lz4
 
 .PHONY: lib lib-release liblz4.a
 lib: liblz4.a
@@ -75,6 +75,10 @@ examples: liblz4.a
 manuals:
 	@$(MAKE) -C contrib/gen_manual $@
 
+.PHONY: build_tests
+build_tests:
+	@$(MAKE) -C $(TESTDIR) all
+
 .PHONY: clean
 clean:
 	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
@@ -89,7 +93,7 @@ clean:
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD MINGW32_NT-6.1 MINGW64_NT-6.1 MINGW32_NT-10.0 MINGW64_NT-10.0))
 HOST_OS = POSIX
 
 .PHONY: install uninstall
@@ -148,9 +152,14 @@ usan: clean
 usan32: clean
 	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
 
+.PHONY: staticAnalyze
 staticAnalyze: clean
 	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
 
+.PHONY: cppcheck
+cppcheck:
+	cppcheck . --force --enable=warning,portability,performance,style --error-exitcode=1 > /dev/null
+
 platformTest: clean
 	@echo "\n ---- test lz4 with $(CC) compiler ----"
 	@$(CC) -v
@@ -181,10 +190,10 @@ ctocpptest: clean
 	CC=$(TESTCC) $(MAKE) -C $(TESTDIR) CFLAGS="$(CFLAGS)" all
 
 c_standards: clean
-	CFLAGS="-std=c90   -Werror" $(MAKE) clean allmost
-	CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost
-	CFLAGS="-std=c99   -Werror" $(MAKE) clean allmost
-	CFLAGS="-std=gnu99 -Werror" $(MAKE) clean allmost
-	CFLAGS="-std=c11   -Werror" $(MAKE) clean allmost
+	$(MAKE) clean; CFLAGS="-std=c90   -Werror -pedantic -Wno-long-long -Wno-variadic-macros" $(MAKE) allmost
+	$(MAKE) clean; CFLAGS="-std=gnu90 -Werror -pedantic -Wno-long-long -Wno-variadic-macros" $(MAKE) allmost
+	$(MAKE) clean; CFLAGS="-std=c99   -Werror -pedantic" $(MAKE) all
+	$(MAKE) clean; CFLAGS="-std=gnu99 -Werror -pedantic" $(MAKE) all
+	$(MAKE) clean; CFLAGS="-std=c11   -Werror" $(MAKE) all
 
 endif
diff --git a/NEWS b/NEWS
index 13a9a1c..6313142 100644
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,20 @@
+v1.9.0
+perf: large decompression speed improvement on x86/x64 (~+20%) by @djwatson
+api : changed : _destSize() compression variants are promoted to stable API
+api : new : LZ4_initStream(HC), replacing LZ4_resetStream(HC)
+api : changed : LZ4_resetStream(HC) as recommended reset function, for better performance on small data
+cli : support custom block sizes, by @blezsan
+build: source code can be amalgamated, by Bing Xu
+build: added meson build, by @lzutao
+build: new build macros : LZ4_DISTANCE_MAX, LZ4_FAST_DEC_LOOP
+install: MidnightBSD, by @laffer1
+install: Windows 10 msys2, by @vtorri
+
 v1.8.3
 perf: minor decompression speed improvement (~+2%) with gcc
 fix : corruption in v1.8.2 at level 9 for files > 64KB under rare conditions (#560)
 cli : new command --fast, by @jennifermliu
+cli : fixed elapsed time, and added cpu load indicator (on -vv) (#555)
 api : LZ4_decompress_safe_partial() now decodes exactly the nb of bytes requested (feature request #566)
 build : added Haiku target, by @fbrosson, and MidnightBSD, by @laffer1
 doc : updated documentation regarding dictionary compression
diff --git a/README.md b/README.md
index e64020d..fd58573 100644
--- a/README.md
+++ b/README.md
@@ -15,9 +15,11 @@ trading CPU time for improved compression ratio.
 All versions feature the same decompression speed.
 
 LZ4 is also compatible with [dictionary compression](https://github.com/facebook/zstd#the-case-for-small-data-compression),
-and can ingest any input file as dictionary,
-including those created by [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder).
-(note: only the final 64KB are used).
+both at [API](https://github.com/lz4/lz4/blob/v1.8.3/lib/lz4frame.h#L481) and [CLI](https://github.com/lz4/lz4/blob/v1.8.3/programs/lz4.1.md#operation-modifiers) levels.
+It can ingest any input file as dictionary, though only the final 64KB are used.
+This capability can be combined with the [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder),
+in order to drastically improve compression performance on small files.
+
 
 LZ4 library is provided as open-source software using BSD 2-Clause license.
 
@@ -48,8 +50,8 @@ Benchmarks
 -------------------------
 
 The benchmark uses [lzbench], from @inikep
-compiled with GCC v7.3.0 on Linux 64-bits (Debian 4.15.17-1).
-The reference system uses a Core i7-6700K CPU @ 4.0GHz.
+compiled with GCC v8.2.0 on Linux 64-bits (Ubuntu 4.18.0-17).
+The reference system uses a Core i7-9700K CPU @ 4.9GHz.
 Benchmark evaluates the compression of reference [Silesia Corpus]
 in single-thread mode.
 
@@ -58,16 +60,16 @@ in single-thread mode.
 
 |  Compressor             | Ratio   | Compression | Decompression |
 |  ----------             | -----   | ----------- | ------------- |
-|  memcpy                 |  1.000  |13100 MB/s   |  13100 MB/s   |
-|**LZ4 default (v1.8.2)** |**2.101**|**730 MB/s** | **3900 MB/s** |
-|  LZO 2.09               |  2.108  |  630 MB/s   |    800 MB/s   |
-|  QuickLZ 1.5.0          |  2.238  |  530 MB/s   |    720 MB/s   |
-|  Snappy 1.1.4           |  2.091  |  525 MB/s   |   1750 MB/s   |
-|  [Zstandard] 1.3.4 -1   |  2.877  |  470 MB/s   |   1380 MB/s   |
-|  LZF v3.6               |  2.073  |  380 MB/s   |    840 MB/s   |
-| [zlib] deflate 1.2.11 -1|  2.730  |  100 MB/s   |    380 MB/s   |
-|**LZ4 HC -9 (v1.8.2)**   |**2.721**|   40 MB/s   | **3920 MB/s** |
-| [zlib] deflate 1.2.11 -6|  3.099  |   34 MB/s   |    410 MB/s   |
+|  memcpy                 |  1.000  | 13700 MB/s  |  13700 MB/s   |
+|**LZ4 default (v1.9.0)** |**2.101**| **780 MB/s**| **4900 MB/s** |
+|  LZO 2.09               |  2.108  |   670 MB/s  |    860 MB/s   |
+|  QuickLZ 1.5.0          |  2.238  |   575 MB/s  |    780 MB/s   |
+|  Snappy 1.1.4           |  2.091  |   565 MB/s  |   1950 MB/s   |
+| [Zstandard] 1.4.0 -1    |  2.883  |   515 MB/s  |   1380 MB/s   |
+|  LZF v3.6               |  2.073  |   415 MB/s  |    910 MB/s   |
+| [zlib] deflate 1.2.11 -1|  2.730  |   100 MB/s  |    415 MB/s   |
+|**LZ4 HC -9 (v1.8.2)**   |**2.721**|    39 MB/s  | **4850 MB/s** |
+| [zlib] deflate 1.2.11 -6|  3.099  |    36 MB/s  |    445 MB/s   |
 
 [zlib]: http://www.zlib.net/
 [Zstandard]: http://www.zstd.net/
diff --git a/contrib/gen_manual/gen_manual.cpp b/contrib/gen_manual/gen_manual.cpp
index 65abd3a..bedef94 100644
--- a/contrib/gen_manual/gen_manual.cpp
+++ b/contrib/gen_manual/gen_manual.cpp
@@ -44,7 +44,7 @@ void trim(string& s, string characters)
 {
     size_t p = s.find_first_not_of(characters);
     s.erase(0, p);
- 
+
     p = s.find_last_not_of(characters);
     if (string::npos != p)
        s.erase(p+1);
@@ -67,14 +67,13 @@ vector<string> get_lines(vector<string>& input, int& linenum, string terminator)
 {
     vector<string> out;
     string line;
-    size_t epos;
 
     while ((size_t)linenum < input.size()) {
         line = input[linenum];
 
         if (terminator.empty() && line.empty()) { linenum--; break; }
-        
-        epos = line.find(terminator);
+
+        size_t const epos = line.find(terminator);
         if (!terminator.empty() && epos!=string::npos) {
             out.push_back(line);
             break;
@@ -152,8 +151,9 @@ int main(int argc, char *argv[]) {
             continue;
         }
 
-        /* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */
-        if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) {
+        /* comments of type  / * * < and  / * ! <  are detected, and only function declaration is highlighted (bold) */
+        if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos)
+          && line.find("*/")!=string::npos) {
             sout << "<pre><b>";
             print_line(sout, line);
             sout << "</b></pre><BR>" << endl;
@@ -177,16 +177,19 @@ int main(int argc, char *argv[]) {
 
         comments = get_lines(input, linenum, "*/");
         if (!comments.empty()) comments[0] = line.substr(spos+3);
-        if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
+        if (!comments.empty())
+            comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
         for (l=0; l<comments.size(); l++) {
-            if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
-            else if (comments[l].find("  *")==0) comments[l] = comments[l].substr(3);
+            if (comments[l].compare(0, 2, " *") == 0)
+                comments[l] = comments[l].substr(2);
+            else if (comments[l].compare(0, 3, "  *") == 0)
+                comments[l] = comments[l].substr(3);
             trim(comments[l], "*-=");
         }
         while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
         while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
 
-        /* comments of type /*! mean: this is a function declaration; switch comments with declarations */
+        /* comments of type  / * !  mean: this is a function declaration; switch comments with declarations */
         if (exclam == '!') {
             if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "LZ4_XXX() :" */
             linenum++;
@@ -194,7 +197,6 @@ int main(int argc, char *argv[]) {
 
             sout << "<pre><b>";
             for (l=0; l<lines.size(); l++) {
-              //  fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str());
                 print_line(sout, lines[l]);
             }
             sout << "</b><p>";
@@ -202,7 +204,7 @@ int main(int argc, char *argv[]) {
                 print_line(sout, comments[l]);
             }
             sout << "</p></pre><BR>" << endl << endl;
-        } else if (exclam == '=') { /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
+        } else if (exclam == '=') { /* comments of type  / * =  and  / * * =  mean: use a <H3> header and show also all functions until first empty line */
             trim(comments[0], " ");
             sout << "<h3>" << comments[0] << "</h3><pre>";
             for (l=1; l<comments.size(); l++) {
@@ -214,7 +216,7 @@ int main(int argc, char *argv[]) {
                 print_line(sout, lines[l]);
             }
             sout << "</pre></b><BR>" << endl;
-        } else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
+        } else { /* comments of type  / * *  and  / * -  mean: this is a comment; use a <H2> header for the first line */
             if (comments.empty()) continue;
 
             trim(comments[0], " ");
@@ -244,4 +246,4 @@ int main(int argc, char *argv[]) {
     ostream << "</html>" << endl << "</body>" << endl;
 
     return 0;
-}
-\ No newline at end of file
+}
diff --git a/contrib/meson/GetLz4LibraryVersion.py b/contrib/meson/GetLz4LibraryVersion.py
new file mode 100644
index 0000000..d8abfcb
--- /dev/null
+++ b/contrib/meson/GetLz4LibraryVersion.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+import re
+
+
+def find_version_tuple(filepath):
+  version_file_data = None
+  with open(filepath) as fd:
+    version_file_data = fd.read()
+
+  patterns = r"""#\s*define\s+LZ4_VERSION_MAJOR\s+([0-9]+).*$
+#\s*define\s+LZ4_VERSION_MINOR\s+([0-9]+).*$
+#\s*define\s+LZ4_VERSION_RELEASE\s+([0-9]+).*$
+"""
+  regex = re.compile(patterns, re.MULTILINE)
+  version_match = regex.search(version_file_data)
+  if version_match:
+    return version_match.groups()
+  raise Exception("Unable to find version string.")
+
+
+def main():
+  import argparse
+  parser = argparse.ArgumentParser(description='Print lz4 version from lib/lz4.h')
+  parser.add_argument('file', help='path to lib/lz4.h')
+  args = parser.parse_args()
+  version_tuple = find_version_tuple(args.file)
+  print('.'.join(version_tuple))
+
+
+if __name__ == '__main__':
+  main()
diff --git a/contrib/meson/InstallSymlink.py b/contrib/meson/InstallSymlink.py
new file mode 100644
index 0000000..3f2998c
--- /dev/null
+++ b/contrib/meson/InstallSymlink.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# #############################################################################
+# Copyright (c) 2018-present  lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+# This file should be synced with https://github.com/lzutao/meson-symlink
+
+import os
+import pathlib  # since Python 3.4
+
+
+def install_symlink(src, dst, install_dir, dst_is_dir=False, dir_mode=0o777):
+  if not install_dir.exists():
+    install_dir.mkdir(mode=dir_mode, parents=True, exist_ok=True)
+  if not install_dir.is_dir():
+    raise NotADirectoryError(install_dir)
+
+  new_dst = install_dir.joinpath(dst)
+  if new_dst.is_symlink() and os.readlink(new_dst) == src:
+    print('File exists: {!r} -> {!r}'.format(new_dst, src))
+    return
+  print('Installing symlink {!r} -> {!r}'.format(new_dst, src))
+  new_dst.symlink_to(src, target_is_directory=dst_is_dir)
+
+
+def main():
+  import argparse
+  parser = argparse.ArgumentParser(description='Install a symlink',
+      usage='{0} [-h] [-d] [-m MODE] source dest install_dir\n\n'
+            'example:\n'
+            '        {0} dash sh /bin'.format(pathlib.Path(__file__).name))
+  parser.add_argument('source', help='target to link')
+  parser.add_argument('dest', help='link name')
+  parser.add_argument('install_dir', help='installation directory')
+  parser.add_argument('-d', '--isdir',
+      action='store_true',
+      help='dest is a directory')
+  parser.add_argument('-m', '--mode',
+      help='directory mode on creating if not exist',
+      default='0o755')
+  args = parser.parse_args()
+
+  dir_mode = int(args.mode, 8)
+
+  meson_destdir = os.environ.get('MESON_INSTALL_DESTDIR_PREFIX', default='')
+  install_dir = pathlib.Path(meson_destdir, args.install_dir)
+  install_symlink(args.source, args.dest, install_dir, args.isdir, dir_mode)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/contrib/meson/README.md b/contrib/meson/README.md
new file mode 100644
index 0000000..fa18493
--- /dev/null
+++ b/contrib/meson/README.md
@@ -0,0 +1,34 @@
+Meson build system for lz4
+==========================
+
+Meson is a build system designed to optimize programmer productivity.
+It aims to do this by providing simple, out-of-the-box support for
+modern software development tools and practices, such as unit tests,
+coverage reports, Valgrind, CCache and the like.
+
+This Meson build system is provided with no guarantee.
+
+## How to build
+
+`cd` to this meson directory (`contrib/meson`)
+
+```sh
+meson setup --buildtype=release -Ddefault_library=shared -Dbuild_programs=true builddir
+cd builddir
+ninja             # to build
+ninja install     # to install
+```
+
+You might want to install it in staging directory:
+
+```sh
+DESTDIR=./staging ninja install
+```
+
+To configure build options, use:
+
+```sh
+meson configure
+```
+
+See [man meson(1)](https://manpages.debian.org/testing/meson/meson.1.en.html).
diff --git a/contrib/meson/contrib/gen_manual/meson.build b/contrib/meson/contrib/gen_manual/meson.build
new file mode 100644
index 0000000..6233cdc
--- /dev/null
+++ b/contrib/meson/contrib/gen_manual/meson.build
@@ -0,0 +1,42 @@
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+lz4_root_dir = '../../../..'
+
+add_languages('cpp')
+cxx = meson.get_compiler('cpp')
+
+gen_manual_includes = include_directories(join_paths(lz4_root_dir, 'contrib/gen_manual'))
+
+gen_manual_cppflags = cxx.get_supported_arguments(['-Wextra', '-Wcast-qual',
+  '-Wcast-align', '-Wshadow', '-Wstrict-aliasing=1', '-Wswitch-enum',
+  '-Wno-comment'])
+
+gen_manual = executable('gen_manual',
+  join_paths(lz4_root_dir, 'contrib/gen_manual/gen_manual.cpp'),
+  cpp_args: gen_manual_cppflags,
+  include_directories: gen_manual_includes,
+  install: false)
+
+# Update lz4 manual
+lz4_manual_html = custom_target('lz4_manual.html',
+  output : 'lz4_manual.html',
+  command : [gen_manual,
+    lz4_version,
+    join_paths(meson.current_source_dir(), lz4_root_dir, 'lib/lz4.h'),
+    '@OUTPUT@'],
+  install : false)
+# Update lz4frame manual
+lz4_manual_html = custom_target('lz4frame_manual.html',
+  output : 'lz4frame_manual.html',
+  command : [gen_manual,
+    lz4_version,
+    join_paths(meson.current_source_dir(), lz4_root_dir, 'lib/lz4frame.h'),
+    '@OUTPUT@'],
+  install : false)
diff --git a/contrib/meson/contrib/meson.build b/contrib/meson/contrib/meson.build
new file mode 100644
index 0000000..5249a4c
--- /dev/null
+++ b/contrib/meson/contrib/meson.build
@@ -0,0 +1,10 @@
+# #############################################################################
+# Copyright (c) 2018-present        lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+subdir('gen_manual')
diff --git a/contrib/meson/examples/meson.build b/contrib/meson/examples/meson.build
new file mode 100644
index 0000000..3c13214
--- /dev/null
+++ b/contrib/meson/examples/meson.build
@@ -0,0 +1,49 @@
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+lz4_root_dir = '../../..'
+
+#examples_c_args = ['-Wextra', '-Wundef', '-Wshadow', '-Wcast-align', '-Wstrict-prototypes']
+
+printVersion = executable('printVersion',
+  join_paths(lz4_root_dir, 'examples/printVersion.c'),
+  dependencies: liblz4_dep,
+  install: false)
+doubleBuffer = executable('doubleBuffer',
+  join_paths(lz4_root_dir, 'examples/blockStreaming_doubleBuffer.c'),
+  dependencies: liblz4_dep,
+  install: false)
+dictionaryRandomAccess = executable('dictionaryRandomAccess',
+  join_paths(lz4_root_dir, 'examples/dictionaryRandomAccess.c'),
+  dependencies: liblz4_dep,
+  install: false)
+ringBuffer = executable('ringBuffer',
+  join_paths(lz4_root_dir, 'examples/blockStreaming_ringBuffer.c'),
+  dependencies: liblz4_dep,
+  install: false)
+ringBufferHC = executable('ringBufferHC',
+  join_paths(lz4_root_dir, 'examples/HCStreaming_ringBuffer.c'),
+  dependencies: liblz4_dep,
+  install: false)
+lineCompress = executable('lineCompress',
+  join_paths(lz4_root_dir, 'examples/blockStreaming_lineByLine.c'),
+  dependencies: liblz4_dep,
+  install: false)
+frameCompress = executable('frameCompress',
+  join_paths(lz4_root_dir, 'examples/frameCompress.c'),
+  dependencies: liblz4_dep,
+  install: false)
+compressFunctions = executable('compressFunctions',
+  join_paths(lz4_root_dir, 'examples/compress_functions.c'),
+  dependencies: liblz4_dep,
+  install: false)
+simpleBuffer = executable('simpleBuffer',
+  join_paths(lz4_root_dir, 'examples/simple_buffer.c'),
+  dependencies: liblz4_dep,
+  install: false)
diff --git a/contrib/meson/lib/meson.build b/contrib/meson/lib/meson.build
new file mode 100644
index 0000000..e782334
--- /dev/null
+++ b/contrib/meson/lib/meson.build
@@ -0,0 +1,57 @@
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+lz4_root_dir = '../../..'
+
+liblz4_includes = [include_directories(join_paths(lz4_root_dir, 'lib'))]
+liblz4_sources = [join_paths(lz4_root_dir, 'lib/lz4.c'),
+  join_paths(lz4_root_dir, 'lib/lz4frame.c'),
+  join_paths(lz4_root_dir, 'lib/lz4hc.c'),
+  join_paths(lz4_root_dir, 'lib/xxhash.c')]
+liblz4_c_args = []
+
+liblz4_debug_cflags = []
+if use_debug
+  liblz4_c_args += '-DLZ4_DEBUG=@0@'.format(debug_level)
+  if [compiler_gcc, compiler_clang].contains(cc_id)
+    liblz4_debug_cflags = ['-Wextra', '-Wcast-qual', '-Wcast-align', '-Wshadow',
+      '-Wswitch-enum', '-Wdeclaration-after-statement', '-Wstrict-prototypes',
+      '-Wundef', '-Wpointer-arith', '-Wstrict-aliasing=1']
+  endif
+endif
+liblz4_c_args += cc.get_supported_arguments(liblz4_debug_cflags)
+
+if host_machine_os == os_windows and default_library != 'static'
+  liblz4_c_args += '-DLZ4_DLL_EXPORT=1'
+endif
+
+liblz4 = library('lz4',
+  liblz4_sources,
+  include_directories: liblz4_includes,
+  c_args: liblz4_c_args,
+  install: true,
+  version: lz4_libversion)
+
+liblz4_dep = declare_dependency(link_with: liblz4,
+  include_directories: liblz4_includes)
+
+pkgconfig.generate(liblz4,
+  name: 'lz4',
+  filebase: 'liblz4',
+  description: 'extremely fast lossless compression algorithm library',
+  version: lz4_libversion,
+  url: 'http://www.lz4.org/')
+
+install_headers(join_paths(lz4_root_dir, 'lib/lz4.h'),
+  join_paths(lz4_root_dir, 'lib/lz4hc.h'),
+  join_paths(lz4_root_dir, 'lib/lz4frame.h'))
+
+if default_library != 'shared'
+  install_headers(join_paths(lz4_root_dir, 'lib/lz4frame_static.h'))
+endif
diff --git a/contrib/meson/meson.build b/contrib/meson/meson.build
new file mode 100644
index 0000000..bf30eae
--- /dev/null
+++ b/contrib/meson/meson.build
@@ -0,0 +1,128 @@
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+project('lz4', ['c'],
+  license: ['BSD', 'GPLv2'],
+  default_options : ['c_std=c99',
+    'buildtype=release'],
+  version: '1.8.3',
+  meson_version: '>=0.47.0')
+
+cc = meson.get_compiler('c')
+pkgconfig = import('pkgconfig')
+python3 = import('python').find_installation()
+c_std = get_option('c_std')
+default_library = get_option('default_library')
+
+host_machine_os = host_machine.system()
+os_windows = 'windows'
+os_linux = 'linux'
+os_darwin = 'darwin'
+os_freebsd = 'freebsd'
+os_sun = 'sunos'
+
+cc_id = cc.get_id()
+compiler_gcc = 'gcc'
+compiler_clang = 'clang'
+compiler_msvc = 'msvc'
+
+lz4_version = meson.project_version()
+
+lz4_h_file = join_paths(meson.current_source_dir(), '../../lib/lz4.h')
+GetLz4LibraryVersion_py = files('GetLz4LibraryVersion.py')
+r = run_command(python3, GetLz4LibraryVersion_py, lz4_h_file)
+if r.returncode() == 0
+  output = r.stdout().strip()
+  if output.version_compare('>@0@'.format(lz4_version))
+    lz4_version = output
+    message('Project version is now: @0@'.format(lz4_version))
+  endif
+else
+  warning('Cannot find project version in @0@'.format(lz4_h_file))
+endif
+
+lz4_libversion = lz4_version
+
+# =============================================================================
+# Installation directories
+# =============================================================================
+
+lz4_prefix = get_option('prefix')
+lz4_bindir = get_option('bindir')
+lz4_datadir = get_option('datadir')
+lz4_mandir = get_option('mandir')
+lz4_docdir = join_paths(lz4_datadir, 'doc', meson.project_name())
+
+# =============================================================================
+# Project options
+# =============================================================================
+
+buildtype = get_option('buildtype')
+
+# Built-in options
+use_debug = get_option('debug')
+
+# Custom options
+debug_level = get_option('debug_level')
+use_backtrace = get_option('backtrace')
+
+build_programs = get_option('build_programs')
+build_contrib = get_option('build_contrib')
+build_tests = get_option('build_tests')
+build_examples = get_option('build_examples')
+#feature_multi_thread = get_option('multi_thread')
+
+# =============================================================================
+# Dependencies
+# =============================================================================
+
+#libm_dep = cc.find_library('m', required: build_tests)
+#thread_dep = dependency('threads', required: feature_multi_thread)
+#use_multi_thread = thread_dep.found()
+
+# =============================================================================
+# Compiler flags
+# =============================================================================
+
+add_project_arguments(['-DXXH_NAMESPACE=LZ4_'], language: 'c')
+
+if [compiler_gcc, compiler_clang].contains(cc_id)
+  common_warning_flags = []
+  # Should use Meson's own --werror build option
+  #common_warning_flags += ['-Werror']
+  if c_std == 'c89' or c_std == 'gnu89'
+    common_warning_flags += ['-pedantic', '-Wno-long-long', '-Wno-variadic-macros']
+  elif c_std == 'c99' or c_std == 'gnu99'
+    common_warning_flags += ['-pedantic']
+  endif
+  cc_compile_flags = cc.get_supported_arguments(common_warning_flags)
+  add_project_arguments(cc_compile_flags, language: 'c')
+endif
+
+# =============================================================================
+# Subdirs
+# =============================================================================
+
+subdir('lib')
+
+if build_programs
+  subdir('programs')
+endif
+
+if build_tests
+  subdir('tests')
+endif
+
+if build_contrib
+  subdir('contrib')
+endif
+
+if build_examples
+  subdir('examples')
+endif
diff --git a/contrib/meson/meson_options.txt b/contrib/meson/meson_options.txt
new file mode 100644
index 0000000..f6a4ae7
--- /dev/null
+++ b/contrib/meson/meson_options.txt
@@ -0,0 +1,24 @@
+# #############################################################################
+# Copyright (c) 2018-present        lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+# Read guidelines from https://wiki.gnome.org/Initiatives/GnomeGoals/MesonPorting
+
+option('debug_level', type: 'integer', min: 0, max: 7, value: 1,
+  description: 'Enable run-time debug. See lib/lz4hc.c')
+option('backtrace', type: 'boolean', value: false,
+  description: 'Display a stack backtrace when execution generates a runtime exception')
+
+option('build_programs', type: 'boolean', value: false,
+  description: 'Enable programs build')
+option('build_tests', type: 'boolean', value: false,
+  description: 'Enable tests build')
+option('build_contrib', type: 'boolean', value: false,
+  description: 'Enable contrib build')
+option('build_examples', type: 'boolean', value: false,
+  description: 'Enable examples build')
diff --git a/contrib/meson/programs/meson.build b/contrib/meson/programs/meson.build
new file mode 100644
index 0000000..df64eb0
--- /dev/null
+++ b/contrib/meson/programs/meson.build
@@ -0,0 +1,52 @@
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+lz4_root_dir = '../../..'
+
+lz4_includes = include_directories(join_paths(lz4_root_dir, 'programs'))
+lz4_sources = [join_paths(lz4_root_dir, 'programs/bench.c'),
+  join_paths(lz4_root_dir, 'programs/datagen.c'),
+  join_paths(lz4_root_dir, 'programs/lz4cli.c'),
+  join_paths(lz4_root_dir, 'programs/lz4io.c')]
+lz4_c_args = []
+
+export_dynamic_on_windows = false
+# explicit backtrace enable/disable for Linux & Darwin
+if not use_backtrace
+  lz4_c_args += '-DBACKTRACE_ENABLE=0'
+elif use_debug and host_machine_os == os_windows  # MinGW target
+  lz4_c_args += '-DBACKTRACE_ENABLE=1'
+  export_dynamic_on_windows = true
+endif
+
+lz4_deps = [ liblz4_dep ]
+
+lz4 = executable('lz4',
+  lz4_sources,
+  include_directories: lz4_includes,
+  c_args: lz4_c_args,
+  dependencies: lz4_deps,
+  export_dynamic: export_dynamic_on_windows, # Since Meson 0.45.0
+  install: true)
+
+# =============================================================================
+# Programs and manpages installing
+# =============================================================================
+
+install_man(join_paths(lz4_root_dir, 'programs/lz4.1'))
+
+InstallSymlink_py = '../InstallSymlink.py'
+lz4_man1_dir = join_paths(lz4_mandir, 'man1')
+bin_EXT = host_machine_os == os_windows ? '.exe' : ''
+man1_EXT = meson.version().version_compare('>=0.49.0') ? '.1' : '.1.gz'
+
+foreach f : ['lz4c', 'lz4cat', 'unlz4']
+  meson.add_install_script(InstallSymlink_py, 'lz4' + bin_EXT, f + bin_EXT, lz4_bindir)
+  meson.add_install_script(InstallSymlink_py, 'lz4' + man1_EXT, f + man1_EXT, lz4_man1_dir)
+endforeach
diff --git a/contrib/meson/tests/meson.build b/contrib/meson/tests/meson.build
new file mode 100644
index 0000000..392bcf2
--- /dev/null
+++ b/contrib/meson/tests/meson.build
@@ -0,0 +1,93 @@
+# #############################################################################
+# Copyright (c) 2018-present    lzutao <taolzu(at)gmail.com>
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# #############################################################################
+
+lz4_root_dir = '../../..'
+programs_dir_inc = include_directories(join_paths(lz4_root_dir, 'programs'))
+lib_dir_inc = include_directories(join_paths(lz4_root_dir, 'lib'))
+
+# =============================================================================
+# Test flags
+# =============================================================================
+
+TEST_FILES   = join_paths(meson.current_source_dir(), lz4_root_dir, 'tests/COPYING')
+FUZZER_TIME  = '-T90s'
+NB_LOOPS     = '-i1'
+
+# =============================================================================
+# Executables
+# =============================================================================
+
+fullbench_sources = [join_paths(lz4_root_dir, 'tests/fullbench.c')]
+fullbench = executable('fullbench',
+  fullbench_sources,
+  include_directories: programs_dir_inc,
+  dependencies: liblz4_dep,
+  install: false)
+
+fuzzer_sources = [join_paths(lz4_root_dir, 'tests/fuzzer.c')]
+fuzzer = executable('fuzzer',
+  fuzzer_sources,
+  c_args: ['-D_DEFAULT_SOURCE', '-D_BSD_SOURCE'], # since glibc 2.19
+  include_directories: programs_dir_inc,
+  dependencies: liblz4_dep,
+  install: false)
+
+frametest_sources = [join_paths(lz4_root_dir, 'tests/frametest.c')]
+frametest = executable('frametest',
+  frametest_sources,
+  include_directories: programs_dir_inc,
+  dependencies: liblz4_dep,
+  install: false)
+
+roundTripTest_sources = [join_paths(lz4_root_dir, 'tests/roundTripTest.c')]
+roundTripTest = executable('roundTripTest',
+  roundTripTest_sources,
+  dependencies: [ liblz4_dep ],
+  install: false)
+
+datagen_sources = [join_paths(lz4_root_dir, 'tests/datagencli.c')]
+datagen = executable('datagen',
+  datagen_sources,
+  objects: lz4.extract_objects(join_paths(lz4_root_dir, 'programs/datagen.c')),
+  include_directories: lz4_includes,
+  dependencies: [ liblz4_dep ],
+  install: false)
+
+checkFrame_sources = [join_paths(lz4_root_dir, 'tests/checkFrame.c')]
+checkFrame = executable('checkFrame',
+  checkFrame_sources,
+  include_directories: programs_dir_inc,
+  dependencies: [ liblz4_dep ],
+  install: false)
+
+checkTag_sources = [join_paths(lz4_root_dir, 'tests/checkTag.c')]
+checkTag = executable('checkTag',
+  checkTag_sources,
+  include_directories: lib_dir_inc,
+  install: false)
+
+# =============================================================================
+# Tests (Use "meson test --list" to list all tests)
+# =============================================================================
+
+# XXX: (Need TEST) These timeouts (in seconds) when running on a HDD should be
+# at least six times bigger than on a SSD
+
+test('test-fullbench',
+  fullbench,
+  args: ['--no-prompt', NB_LOOPS, TEST_FILES],
+  timeout: 420) # Should enough when running on HDD
+test('test-fuzzer',
+  fuzzer,
+  args: [FUZZER_TIME],
+  timeout: 100)
+test('test-frametest',
+  frametest,
+  args: [FUZZER_TIME],
+  timeout: 100)
diff --git a/contrib/snap/README.md b/contrib/snap/README.md
new file mode 100644
index 0000000..612d6d7
--- /dev/null
+++ b/contrib/snap/README.md
@@ -0,0 +1,29 @@
+Snap Packaging
+--------------
+
+This directory contains the config required to generate a snap package
+of lz4. Snaps are universal Linux packages that allow you to easily
+build your application from any source and ship it to any Linux
+distribution by publishing it to https://snapcraft.io/. A key attribute
+of a snap package is that it is (ideally) confined such that it
+executes within a controlled environmenti with all its dependencies
+bundled with it and does not share dependencies with of from any other
+package on the system (with a couple of minor exceptions).
+
+The basic anatomy and workflow is:
+
+  * ensure snap.snapcraft.yaml is up-to-date e.g. with version info
+
+  * build the snap by installing the snapcraft package and running it
+
+  * push snap/* changes to the repo (excluding any crud generated by a build of course)
+
+  * register yourself as owner of lz4 name in snapstore
+
+  * publish new snap to the snap store
+
+  * install snap by doing 'snap install lz4' on any Linux distro
+
+  * all installed copies of lz4 will be automatically updated to your new version
+
+For more information on Snaps see https://docs.snapcraft.io and https://forum.snapcraft.io/
diff --git a/contrib/snap/snapcraft.yaml b/contrib/snap/snapcraft.yaml
new file mode 100644
index 0000000..2793c0e
--- /dev/null
+++ b/contrib/snap/snapcraft.yaml
@@ -0,0 +1,31 @@
+name: lz4
+version: 1.8.4
+summary: Extremely Fast Compression algorithm 
+description: >
+    LZ4 is lossless compression algorithm, providing compression
+    speed > 500 MB/s per core, scalable with multi-cores CPU. It features an
+    extremely fast decoder, with speed in multiple GB/s per core, typically
+    reaching RAM speed limits on multi-core systems.
+    .
+    Speed can be tuned dynamically, selecting an "acceleration" factor which
+    trades compression ratio for faster speed. On the other end, a high
+    compression derivative, LZ4_HC, is also provided, trading CPU time for
+    improved compression ratio. All versions feature the same decompression
+    speed.
+    .
+    LZ4 is also compatible with dictionary compression, and can ingest any
+    input file as dictionary, including those created by Zstandard Dictionary
+    Builder. (note: only the final 64KB are used).
+    .
+    LZ4 library is provided as open-source software using BSD 2-Clause license.
+confinement: strict
+grade: stable
+
+apps:
+  lz4:
+    command: usr/local/bin/lz4
+    plugs: [home]
+parts:
+  lz4:
+    source: ../
+    plugin: make
diff --git a/doc/images/usingCDict_1_8_2.png b/doc/images/usingCDict_1_8_2.png
deleted file mode 100644
index 9434198..0000000
--- a/doc/images/usingCDict_1_8_2.png
+++ /dev/null
diff --git a/doc/lz4_Block_format.md b/doc/lz4_Block_format.md
index 5438730..4344e9b 100644
--- a/doc/lz4_Block_format.md
+++ b/doc/lz4_Block_format.md
@@ -1,6 +1,6 @@
 LZ4 Block Format Description
 ============================
-Last revised: 2018-04-25.
+Last revised: 2019-03-30.
 Author : Yann Collet
 
 
@@ -10,7 +10,8 @@ using any programming language.
 
 LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
 There is no entropy encoder back-end nor framing layer.
-The latter is assumed to be handled by other parts of the system (see [LZ4 Frame format]).
+The latter is assumed to be handled by other parts of the system
+(see [LZ4 Frame format]).
 This design is assumed to favor simplicity and speed.
 It helps later on for optimizations, compactness, and features.
 
@@ -104,45 +105,52 @@ A common case is an offset of 1,
 meaning the last byte is repeated `matchlength` times.
 
 
-Parsing restrictions
+End of block restrictions
 -----------------------
-There are specific parsing rules to respect in order to remain compatible
-with assumptions made by the decoder :
-
-1. The last 5 bytes are always literals.  In other words, the last five bytes
-   from the uncompressed input (or all bytes, if the input has less than five
-   bytes) must be encoded as literals on behalf of the last sequence.
-   The last sequence is incomplete, and stops right after the literals.
-2. The last match must start at least 12 bytes before end of block.
-   The last match is part of the penultimate sequence,
-   since the last sequence stops right after literals.
-   Note that, as a consequence, blocks < 13 bytes cannot be compressed.
-
-These rules are in place to ensure that the decoder
-can speculatively execute copy instructions
-without ever reading nor writing beyond provided I/O buffers.
-
-1. To copy literals from a non-last sequence, an 8-byte copy instruction
-   can always be safely issued (without reading past the input),
-   because literals are followed by a 2-byte offset,
-   and last sequence is at least 1+5 bytes long.
-2. Similarly, a match operation can speculatively copy up to 12 bytes
-   while remaining within output buffer boundaries.
-
-Empty inputs can be represented with a zero byte,
-interpreted as a token without literals and without a match.
+There are specific rules required to terminate a block.
+
+1. The last sequence contains only literals.
+   The block ends right after them.
+2. The last 5 bytes of input are always literals.
+   Therefore, the last sequence contains at least 5 bytes.
+   - Special : if input is smaller than 5 bytes,
+     there is only one sequence, it contains the whole input as literals.
+     Empty input can be represented with a zero byte,
+     interpreted as a final token without literal and without a match.
+3. The last match must start at least 12 bytes before the end of block.
+   The last match is part of the penultimate sequence.
+   It is followed by the last sequence, which contains only literals.
+   - Note that, as a consequence,
+     an independent block < 13 bytes cannot be compressed,
+     because the match must copy "something",
+     so it needs at least one prior byte.
+   - When a block can reference data from another block,
+     it can start immediately with a match and no literal,
+     so a block of 12 bytes can be compressed.
+
+When a block does not respect these end conditions,
+a conformant decoder is allowed to reject the block as incorrect.
+
+These rules are in place to ensure that a conformant decoder
+can be designed for speed, issuing speculatively instructions,
+while never reading nor writing beyond provided I/O buffers.
 
 
 Additional notes
 -----------------------
-There is no assumption nor limits to the way the compressor
+If the decoder will decompress data from an external source,
+it is recommended to ensure that the decoder will not be vulnerable to
+buffer overflow manipulations.
+Always ensure that read and write operations
+remain within the limits of provided buffers.
+Test the decoder with fuzzers
+to ensure it's resilient to improbable combinations.
+
+The format makes no assumption nor limits to the way the compressor
 searches and selects matches within the source data block.
-It could be a fast scan, a multi-probe, a full search using BST,
-standard hash chains or MMC, well whatever.
-
-Advanced parsing strategies can also be implemented, such as lazy match,
-or full optimal parsing.
-
-All these trade-off offer distinctive speed/memory/compression advantages.
-Whatever the method used by the compressor, its result will be decodable
-by any LZ4 decoder if it follows the format specification described above.
+Multiple techniques can be considered,
+featuring distinct time / performance trade offs.
+As long as the format is respected,
+the result will be compatible and decodable by any compliant decoder.
+An upper compression limit can be reached,
+using a technique called "full optimal parsing", at high cpu cost.
diff --git a/doc/lz4_Frame_format.md b/doc/lz4_Frame_format.md
index a8541f5..a0514e0 100644
--- a/doc/lz4_Frame_format.md
+++ b/doc/lz4_Frame_format.md
@@ -265,20 +265,23 @@ The highest bit is “1” if data in the block is uncompressed.
 
 The highest bit is “0” if data in the block is compressed by LZ4.
 
-All other bits give the size, in bytes, of the following data block
-(the size does not include the block checksum if present).
+All other bits give the size, in bytes, of the following data block.
+The size does not include the block checksum if present.
 
 Block Size shall never be larger than Block Maximum Size.
-Such a thing could happen for incompressible source data.
-In such case, such a data block shall be passed in uncompressed format.
+Such a thing could potentially happen for non-compressible sources.
+In such a case, such data block shall be passed using uncompressed format.
 
 __Data__
 
 Where the actual data to decode stands.
 It might be compressed or not, depending on previous field indications.
-Uncompressed size of Data can be any size, up to “block maximum size”.
-Note that data block is not necessarily full :
-an arbitrary “flush” may happen anytime. Any block can be “partially filled”.
+
+When compressed, the data must respect the [LZ4 block format specification](https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md).
+
+Note that the block is not necessarily full.
+Uncompressed size of data can be any size, up to "Block Maximum Size”,
+so it may contain less data than the maximum block size.
 
 __Block checksum__
 
diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index 6ebf8d2..356a60d 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.8.3 Manual</title>
+<title>1.9.0 Manual</title>
 </head>
 <body>
-<h1>1.8.3 Manual</h1>
+<h1>1.9.0 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -15,37 +15,41 @@
 <li><a href="#Chapter5">Advanced Functions</a></li>
 <li><a href="#Chapter6">Streaming Compression Functions</a></li>
 <li><a href="#Chapter7">Streaming Decompression Functions</a></li>
-<li><a href="#Chapter8">Unstable declarations</a></li>
-<li><a href="#Chapter9">Private definitions</a></li>
+<li><a href="#Chapter8">Experimental section</a></li>
+<li><a href="#Chapter9">PRIVATE DEFINITIONS</a></li>
 <li><a href="#Chapter10">Obsolete Functions</a></li>
 </ol>
 <hr>
 <a name="Chapter1"></a><h2>Introduction</h2><pre>
-  LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
+  LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
   scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
   multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
 
   The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
   Compression can be done in:
     - a single step (described as Simple Functions)
     - a single step, reusing a context (described in Advanced Functions)
     - unbounded multiple steps (described as Streaming compression)
 
-  lz4.h provides block compression functions. It gives full buffer control to user.
-  Decompressing an lz4-compressed block also requires metadata (such as compressed size).
-  Each application is free to encode such metadata in whichever way it wants.
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing a block requires additional metadata, such as its compressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.
 
-  An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
-  take care of encoding standard metadata alongside LZ4-compressed blocks.
-  If your application requires interoperability, it's recommended to use it.
-  A library is provided to take care of it, see lz4frame.h.
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  This are required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  Note that the `lz4` CLI can only manage frames.
 <BR></pre>
 
 <a name="Chapter2"></a><h2>Version</h2><pre></pre>
 
 <pre><b>int LZ4_versionNumber (void);  </b>/**< library version number; useful to check dll version */<b>
 </b></pre><BR>
-<pre><b>const char* LZ4_versionString (void);   </b>/**< library version string; unseful to check dll version */<b>
+<pre><b>const char* LZ4_versionString (void);   </b>/**< library version string; useful to check dll version */<b>
 </b></pre><BR>
 <a name="Chapter3"></a><h2>Tuning parameter</h2><pre></pre>
 
@@ -53,8 +57,8 @@
 # define LZ4_MEMORY_USAGE 14
 #endif
 </b><p> Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
- Increasing memory usage improves compression ratio
- Reduced memory usage may improve speed, thanks to cache effect
+ Increasing memory usage improves compression ratio.
+ Reduced memory usage may improve speed, thanks to better cache locality.
  Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  
 </p></pre><BR>
@@ -68,21 +72,21 @@
     It also runs faster, so it's a recommended setting.
     If the function cannot compress 'src' into a more limited 'dst' budget,
     compression stops *immediately*, and the function result is zero.
-    Note : as a consequence, 'dst' content is not valid.
-    Note 2 : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+    In which case, 'dst' content is undefined (invalid).
         srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
         dstCapacity : size of buffer 'dst' (which must be already allocated)
-        return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
-                  or 0 if compression fails 
+       @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+                  or 0 if compression fails
+    Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
 </b><p>    compressedSize : is the exact complete size of the compressed block.
     dstCapacity : is the size of destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+   @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
              If destination buffer is not large enough, decoding will stop and output an error code (negative value).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against malicious data packets.
+    Note : This function is protected against malicious data packets (never writes outside 'dst' buffer, nor read outside 'source' buffer).
 </p></pre><BR>
 
 <a name="Chapter5"></a><h2>Advanced Functions</h2><pre></pre>
@@ -107,10 +111,11 @@
 
 <pre><b>int LZ4_sizeofState(void);
 int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
-</b><p>    Same compression function, just using an externally allocated memory space to store compression state.
-    Use LZ4_sizeofState() to know how much memory must be allocated,
-    and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide this buffer as 'void* state' to compression function.
+</b><p>  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+  Use LZ4_sizeofState() to know how much memory must be allocated,
+  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+  Then, provide this buffer as `void* state` to compression function.
+ 
 </p></pre><BR>
 
 <pre><b>int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
@@ -126,27 +131,6 @@ int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int src
            or 0 if compression fails.
 </p></pre><BR>
 
-<pre><b>int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
-</b><p>  This function used to be a bit faster than LZ4_decompress_safe(),
-  though situation has changed in recent versions,
-  and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`.
-  Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data.
-  As a consequence, this function is no longer recommended, and may be deprecated in future versions.
-  It's only remaining specificity is that it can decompress data without knowing its compressed size.
-
-  originalSize : is the uncompressed size to regenerate.
-                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
- @return : number of bytes read from source buffer (== compressed size).
-           If the source stream is detected malformed, the function stops decoding and returns a negative result.
-  note : This function requires uncompressed originalSize to be known in advance.
-         The function never writes past the output buffer.
-         However, since it doesn't know its 'src' size, it may read past the intended input.
-         Also, because match offsets are not validated during decoding,
-         reads from 'src' may underflow.
-         Use this function in trusted environment **only**.
- 
-</p></pre><BR>
-
 <pre><b>int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
 </b><p>  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
   into destination buffer 'dst' of size 'dstCapacity'.
@@ -175,30 +159,46 @@ int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int src
 
 <a name="Chapter6"></a><h2>Streaming Compression Functions</h2><pre></pre>
 
-<pre><b>LZ4_stream_t* LZ4_createStream(void);
-int           LZ4_freeStream (LZ4_stream_t* streamPtr);
-</b><p>  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
-  LZ4_freeStream() releases its memory.
- 
-</p></pre><BR>
-
-<pre><b>void LZ4_resetStream (LZ4_stream_t* streamPtr);
-</b><p>  An LZ4_stream_t structure can be allocated once and re-used multiple times.
-  Use this function to start compressing a new stream.
+<pre><b>void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+</b><p>  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+  (e.g., LZ4_compress_fast_continue()).
+
+  An LZ4_stream_t must be initialized once before usage.
+  This is automatically done when created by LZ4_createStream().
+  However, should the LZ4_stream_t be simply declared on stack (for example),
+  it's necessary to initialize it first, using LZ4_initStream().
+
+  After init, start any new stream with LZ4_resetStream_fast().
+  A same LZ4_stream_t can be re-used multiple times consecutively
+  and compress multiple streams,
+  provided that it starts each new stream with LZ4_resetStream_fast().
+
+  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+  but is not compatible with memory regions containing garbage data.
+
+  Note: it's only useful to call LZ4_resetStream_fast()
+        in the context of streaming compression.
+        The *extState* functions perform their own resets.
+        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
  
 </p></pre><BR>
 
 <pre><b>int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
-</b><p>  Use this function to load a static dictionary into LZ4_stream_t.
-  Any previous data will be forgotten, only 'dictionary' will remain in memory.
+</b><p>  Use this function to reference a static dictionary into LZ4_stream_t.
+  The dictionary must remain available during compression.
+  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+  The same dictionary will have to be loaded on decompression side for successful decoding.
+  Dictionary are useful for better compression of small data (KB range).
+  While LZ4 accept any input as dictionary,
+  results are generally better when using Zstandard's Dictionary Builder.
   Loading a size of 0 is allowed, and is the same as reset.
- @return : dictionary size, in bytes (necessarily <= 64 KB)
+ @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
  
 </p></pre><BR>
 
 <pre><b>int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 </b><p>  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
-  'dst' buffer must be already allocated.
+ 'dst' buffer must be already allocated.
   If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
 
  @return : size of compressed block
@@ -206,10 +206,10 @@ int           LZ4_freeStream (LZ4_stream_t* streamPtr);
 
   Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
            Each block has precise boundaries.
+           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
            It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
-           Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata.
 
-  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory!
+  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
 
   Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
            Make sure that buffers are separated, by at least one byte.
@@ -217,7 +217,7 @@ int           LZ4_freeStream (LZ4_stream_t* streamPtr);
 
   Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
 
-  Note 5 : After an error, the stream status is invalid, it can only be reset or freed.
+  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
  
 </p></pre><BR>
 
@@ -250,7 +250,7 @@ int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
 </p></pre><BR>
 
 <pre><b>int LZ4_decoderRingBufferSize(int maxBlockSize);
-#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs))  </b>/* for static allocation; mbs presumed valid */<b>
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  </b>/* for static allocation; maxBlockSize presumed valid */<b>
 </b><p>  Note : in a ring buffer scenario (optional),
   blocks are presumed decompressed next to each other
   up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
@@ -264,7 +264,6 @@ int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
-int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
 </b><p>  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
   A block is an unsplittable entity, it must be presented entirely to a decompression function.
   Decompression functions only accepts one block at a time.
@@ -291,70 +290,48 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
-int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
 </b><p>  These decoding functions work the same as
   a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
   They are stand-alone, and don't need an LZ4_streamDecode_t structure.
-  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+  Performance tip : Decompression speed can be substantially increased
+                    when dst == dictStart + dictSize.
  
 </p></pre><BR>
 
-<a name="Chapter8"></a><h2>Unstable declarations</h2><pre>
- Declarations in this section should be considered unstable.
- Use at your own peril, etc., etc.
- They may be removed in the future.
- Their signatures may change.
-<BR></pre>
+<a name="Chapter8"></a><h2>Experimental section</h2><pre>
+ Symbols declared in this section must be considered unstable. Their
+ signatures or semantics may change, or they may be removed altogether in the
+ future. They are therefore only safe to depend on when the caller is
+ statically linked against the library.
 
-<pre><b>void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
-</b><p>  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
-  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
-
-  Note:
-  Using this in advance of a non- streaming-compression function is redundant,
-  and potentially bad for performance, since they all perform their own custom
-  reset internally.
-
-  Differences from LZ4_resetStream():
-  When an LZ4_stream_t is known to be in a internally coherent state,
-  it can often be prepared for a new compression with almost no work, only
-  sometimes falling back to the full, expensive reset that is always required
-  when the stream is in an indeterminate state (i.e., the reset performed by
-  LZ4_resetStream()).
-
-  LZ4_streams are guaranteed to be in a valid state when:
-  - returned from LZ4_createStream()
-  - reset by LZ4_resetStream()
-  - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
-  - the stream was in a valid state and was reset by LZ4_resetStream_fast()
-  - the stream was in a valid state and was then used in any compression call
-    that returned success
-  - the stream was in an indeterminate state and was used in a compression
-    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
-    that returned success
-
-  When a stream isn't known to be in a valid state, it is not safe to pass to
-  any fastReset or streaming function. It must first be cleansed by the full
-  LZ4_resetStream().
- 
-</p></pre><BR>
+ To protect against unsafe usage, not only are the declarations guarded,
+ the definitions are hidden by default
+ when building LZ4 as a shared/dynamic library.
+
+ In order to access these declarations,
+ define LZ4_STATIC_LINKING_ONLY in your application
+ before including LZ4's headers.
+
+ In order to make their implementations accessible dynamically, you must
+ define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+<BR></pre>
 
-<pre><b>int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+<pre><b>LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 </b><p>  A variant of LZ4_compress_fast_extState().
 
-  Using this variant avoids an expensive initialization step. It is only safe
-  to call if the state buffer is known to be correctly initialized already
-  (see above comment on LZ4_resetStream_fast() for a definition of "correctly
-  initialized"). From a high level, the difference is that this function
-  initializes the provided state with a call to something like
-  LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
-  call to LZ4_resetStream().
+  Using this variant avoids an expensive initialization step.
+  It is only safe to call if the state buffer is known to be correctly initialized already
+  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+  From a high level, the difference is that
+  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
  
 </p></pre><BR>
 
-<pre><b>void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream);
-</b><p>  This is an experimental API that allows for the efficient use of a
-  static dictionary many times.
+<pre><b>LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+</b><p>  This is an experimental API that allows
+  efficient use of a static dictionary many times.
 
   Rather than re-loading the dictionary buffer into a working context before
   each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
@@ -365,8 +342,8 @@ int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize,
   Currently, only streams which have been prepared by LZ4_loadDict() should
   be expected to work.
 
-  Alternatively, the provided dictionary stream pointer may be NULL, in which
-  case any existing dictionary stream is unset.
+  Alternatively, the provided dictionaryStream may be NULL,
+  in which case any existing dictionary stream is unset.
 
   If a dictionary is provided, it replaces any pre-existing stream history.
   The dictionary contents are the only history that can be referenced and
@@ -380,10 +357,10 @@ int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize,
  
 </p></pre><BR>
 
-<a name="Chapter9"></a><h2>Private definitions</h2><pre>
- Do not use these definitions.
- They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
- Using these definitions will expose code to API and/or ABI break in future versions of the library.
+<a name="Chapter9"></a><h2>PRIVATE DEFINITIONS</h2><pre>
+ Do not use these definitions directly.
+ They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ Accessing members will expose code to API and/or ABI break in future versions of the library.
 <BR></pre>
 
 <pre><b>typedef struct {
@@ -395,36 +372,54 @@ int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize,
 </b></pre><BR>
 <pre><b>typedef struct {
     const unsigned char* externalDict;
-    size_t extDictSize;
     const unsigned char* prefixEnd;
+    size_t extDictSize;
     size_t prefixSize;
 } LZ4_streamDecode_t_internal;
 </b></pre><BR>
-<pre><b>#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+<pre><b>#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) </b>/*AS-400*/ )<b>
 #define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
 union LZ4_stream_u {
     unsigned long long table[LZ4_STREAMSIZE_U64];
     LZ4_stream_t_internal internal_donotuse;
 } ;  </b>/* previously typedef'd to LZ4_stream_t */<b>
-</b><p> information structure to track an LZ4 stream.
- init this structure before first use.
- note : only use in association with static linking !
-        this definition is not API/ABI safe,
-        it may change in a future version !
+</b><p>  information structure to track an LZ4 stream.
+  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+  The structure definition can be convenient for static allocation
+  (on stack, or as part of larger structure).
+  Init this structure with LZ4_initStream() before first use.
+  note : only use this definition in association with static linking !
+    this definition is not API/ABI safe, and may change in a future version.
+ 
+</p></pre><BR>
+
+<pre><b>LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+</b><p>  An LZ4_stream_t structure must be initialized at least once.
+  This is automatically done when invoking LZ4_createStream(),
+  but it's not when the structure is simply declared on stack (for example).
+
+  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+  It can also initialize any arbitrary buffer of sufficient size,
+  and will @return a pointer of proper type upon initialization.
+
+  Note : initialization fails if size and alignment conditions are not respected.
+         In which case, the function will @return NULL.
+  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+  Note3: Before v1.9.0, use LZ4_resetStream() instead
  
 </p></pre><BR>
 
-<pre><b>#define LZ4_STREAMDECODESIZE_U64  4
+<pre><b>#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) </b>/*AS-400*/ )<b>
 #define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
 union LZ4_streamDecode_u {
     unsigned long long table[LZ4_STREAMDECODESIZE_U64];
     LZ4_streamDecode_t_internal internal_donotuse;
 } ;   </b>/* previously typedef'd to LZ4_streamDecode_t */<b>
-</b><p> information structure to track an LZ4 stream during decompression.
- init this structure  using LZ4_setStreamDecode (or memset()) before first use
- note : only use in association with static linking !
-        this definition is not API/ABI safe,
-        and may change in a future version !
+</b><p>  information structure to track an LZ4 stream during decompression.
+  init this structure  using LZ4_setStreamDecode() before first use.
+  note : only use in association with static linking !
+         this definition is not API/ABI safe,
+         and may change in a future version !
  
 </p></pre><BR>
 
@@ -447,11 +442,55 @@ union LZ4_streamDecode_u {
 #    define LZ4_DEPRECATED(message)
 #  endif
 #endif </b>/* LZ4_DISABLE_DEPRECATE_WARNINGS */<b>
-</b><p>   Should deprecation warnings be a problem,
-   it is generally possible to disable them,
-   typically with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual.
-   Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS 
+</b><p>
+  Deprecated functions make the compiler generate a warning when invoked.
+  This is meant to invite users to update their source code.
+  Should deprecation warnings be a problem, it is generally possible to disable them,
+  typically with -Wno-deprecated-declarations for gcc
+  or _CRT_SECURE_NO_WARNINGS in Visual.
+
+  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+  before including the header file.
+ 
+</p></pre><BR>
+
+<pre><b>LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") LZ4LIB_API
+int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") LZ4LIB_API
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") LZ4LIB_API
+int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+</b><p>  These functions used to be a bit faster than LZ4_decompress_safe(),
+  but situation has changed in recent versions.
+  Now, `LZ4_decompress_safe()` is as fast and sometimes even faster than `LZ4_decompress_fast()`.
+  Moreover, LZ4_decompress_safe() is protected vs malformed input, while `LZ4_decompress_fast()` is not, making it a security liability.
+  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+
+  Last LZ4_decompress_fast() specificity is that it can decompress a block without knowing its compressed size.
+  Note that even that functionality could be achieved in a more secure manner if need be,
+  though it would require new prototypes, and adaptation of the implementation to this new use case.
+
+  Parameters:
+  originalSize : is the uncompressed size to regenerate.
+                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ @return : number of bytes read from source buffer (== compressed size).
+           The function expects to finish at block's end exactly.
+           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+         However, since it doesn't know its 'src' size, it may read an unknown amount of input, and overflow input buffer.
+         Also, since match offsets are not validated, match reads from 'src' may underflow.
+         These issues never happen if input data is correct.
+         But they may happen if input data is invalid (error or intentional tampering).
+         As a consequence, use these functions in trusted environments with trusted data **only**.
+ 
+</p></pre><BR>
+
+<pre><b>void LZ4_resetStream (LZ4_stream_t* streamPtr);
+</b><p>  An LZ4_stream_t structure must be initialized at least once.
+  This is done with LZ4_initStream(), or LZ4_resetStream().
+  Consider switching to LZ4_initStream(),
+  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ 
 </p></pre><BR>
 
 </html>
diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html
index fb8e0ce..914405f 100644
--- a/doc/lz4frame_manual.html
+++ b/doc/lz4frame_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.8.3 Manual</title>
+<title>1.9.0 Manual</title>
 </head>
 <body>
-<h1>1.8.3 Manual</h1>
+<h1>1.9.0 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -84,19 +84,21 @@
   LZ4F_blockChecksum_t   blockChecksumFlag;   </b>/* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */<b>
 } LZ4F_frameInfo_t;
 </b><p>  makes it possible to set or read frame parameters.
-  It's not required to set all fields, as long as the structure was initially memset() to zero.
-  For all fields, 0 sets it to default value 
+  Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO,
+  setting all parameters to default.
+  It's then possible to update selectively some parameters 
 </p></pre><BR>
 
 <pre><b>typedef struct {
   LZ4F_frameInfo_t frameInfo;
   int      compressionLevel;    </b>/* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */<b>
-  unsigned autoFlush;           </b>/* 1: always flush, to reduce usage of internal buffers */<b>
-  unsigned favorDecSpeed;       </b>/* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4LZ4HC_CLEVEL_OPT_MIN) */  /* >= v1.8.2 */<b>
+  unsigned autoFlush;           </b>/* 1: always flush; reduces usage of internal buffers */<b>
+  unsigned favorDecSpeed;       </b>/* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */  /* v1.8.2+ */<b>
   unsigned reserved[3];         </b>/* must be zero for forward compatibility */<b>
 } LZ4F_preferences_t;
-</b><p>  makes it possible to supply detailed compression parameters to the stream interface.
-  Structure is presumed initially memset() to zero, representing default settings.
+</b><p>  makes it possible to supply advanced compression instructions to streaming interface.
+  Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES,
+  setting all parameters to default.
   All reserved fields must be set to zero. 
 </p></pre><BR>
 
@@ -155,15 +157,19 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
 </p></pre><BR>
 
 <pre><b>size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
-</b><p>  Provides minimum dstCapacity required to guarantee compression success
-  given a srcSize and preferences, covering worst case scenario.
+</b><p>  Provides minimum dstCapacity required to guarantee success of
+  LZ4F_compressUpdate(), given a srcSize and preferences, for a worst case scenario.
+  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() instead.
+  Note that the result is only valid for a single invocation of LZ4F_compressUpdate().
+  When invoking LZ4F_compressUpdate() multiple times,
+  if the output buffer is gradually filled up instead of emptied and re-used from its start,
+  one must check if there is enough remaining capacity before each invocation, using LZ4F_compressBound().
+ @return is always the same for a srcSize and prefsPtr.
   prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
-  Estimation is valid for either LZ4F_compressUpdate(), LZ4F_flush() or LZ4F_compressEnd(),
-  Estimation includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
-  It also includes frame footer (ending + checksum), which would have to be generated by LZ4F_compressEnd().
-  Estimation doesn't include frame header, as it was already generated by LZ4F_compressBegin().
-  Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
-  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+  tech details :
+ @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+  It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd().
+ @return doesn't include frame header, as it was already generated by LZ4F_compressBegin().
  
 </p></pre><BR>
 
@@ -192,6 +198,7 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
  `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
  @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
            or an error code if it fails (which can be tested using LZ4F_isError())
+  Note : LZ4F_flush() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
  
 </p></pre><BR>
 
@@ -204,6 +211,7 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
  `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
  @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
            or an error code if it fails (which can be tested using LZ4F_isError())
+  Note : LZ4F_compressEnd() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
   A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
  
 </p></pre><BR>
@@ -229,25 +237,58 @@ LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
 
 <a name="Chapter10"></a><h2>Streaming decompression functions</h2><pre></pre>
 
+<pre><b>size_t LZ4F_headerSize(const void* src, size_t srcSize);
+</b><p>  Provide the header size of a frame starting at `src`.
+ `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
+  which is enough to decode the header length.
+ @return : size of frame header
+           or an error code, which can be tested using LZ4F_isError()
+  note : Frame header size is variable, but is guaranteed to be
+         >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes.
+ 
+</p></pre><BR>
+
 <pre><b>size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
                                      LZ4F_frameInfo_t* frameInfoPtr,
                                      const void* srcBuffer, size_t* srcSizePtr);
 </b><p>  This function extracts frame parameters (max blockSize, dictID, etc.).
-  Its usage is optional.
-  Extracted information is typically useful for allocation and dictionary.
-  This function works in 2 situations :
-   - At the beginning of a new frame, in which case
-     it will decode information from `srcBuffer`, starting the decoding process.
-     Input size must be large enough to successfully decode the entire frame header.
-     Frame header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes.
-     It's allowed to provide more input data than this minimum.
-   - After decoding has been started.
-     In which case, no input is read, frame parameters are extracted from dctx.
-   - If decoding has barely started, but not yet extracted information from header,
+  Its usage is optional: user can call LZ4F_decompress() directly.
+
+  Extracted information will fill an existing LZ4F_frameInfo_t structure.
+  This can be useful for allocation and dictionary identification purposes.
+
+  LZ4F_getFrameInfo() can work in the following situations :
+
+  1) At the beginning of a new frame, before any invocation of LZ4F_decompress().
+     It will decode header from `srcBuffer`,
+     consuming the header and starting the decoding process.
+
+     Input size must be large enough to contain the full frame header.
+     Frame header size can be known beforehand by LZ4F_headerSize().
+     Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes,
+     and not more than <= LZ4F_HEADER_SIZE_MAX bytes.
+     Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work.
+     It's allowed to provide more input data than the header size,
+     LZ4F_getFrameInfo() will only consume the header.
+
+     If input size is not large enough,
+     aka if it's smaller than header size,
+     function will fail and return an error code.
+
+  2) After decoding has been started,
+     it's possible to invoke LZ4F_getFrameInfo() anytime
+     to extract already decoded frame parameters stored within dctx.
+
+     Note that, if decoding has barely started,
+     and not yet read enough information to decode the header,
      LZ4F_getFrameInfo() will fail.
-  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
-  Decompression must resume from (srcBuffer + *srcSizePtr).
- @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+
+  The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value).
+  LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started,
+  and when decoding the header has been successful.
+  Decompression must then resume from (srcBuffer + *srcSizePtr).
+
+ @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call,
            or an error code which can be tested using LZ4F_isError().
   note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
   note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
@@ -295,13 +336,14 @@ LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
   and start a new one using same context resources. 
 </p></pre><BR>
 
-<pre><b>typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
+<pre><b>typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM)
+              _LZ4F_dummy_error_enum_for_c89_never_used } LZ4F_errorCodes;
 </b></pre><BR>
 <a name="Chapter11"></a><h2>Bulk processing dictionary API</h2><pre></pre>
 
 <pre><b>LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
 LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
-</b><p>  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+</b><p>  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once.
   LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
   LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
  `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict 
diff --git a/examples/blockStreaming_doubleBuffer.c b/examples/blockStreaming_doubleBuffer.c
index acb3455..3f719d3 100644
--- a/examples/blockStreaming_doubleBuffer.c
+++ b/examples/blockStreaming_doubleBuffer.c
@@ -44,7 +44,7 @@ void test_compress(FILE* outFp, FILE* inpFp)
     char inpBuf[2][BLOCK_BYTES];
     int  inpBufIndex = 0;
 
-    LZ4_resetStream(lz4Stream);
+    LZ4_initStream(lz4Stream, sizeof (*lz4Stream));
 
     for(;;) {
         char* const inpPtr = inpBuf[inpBufIndex];
diff --git a/examples/blockStreaming_lineByLine.c b/examples/blockStreaming_lineByLine.c
index 677c426..19c3345 100644
--- a/examples/blockStreaming_lineByLine.c
+++ b/examples/blockStreaming_lineByLine.c
@@ -99,7 +99,7 @@ static void test_decompress(
         uint16_t cmpBytes = 0;
 
         if (read_uint16(inpFp, &cmpBytes) != 1) break;
-        if (cmpBytes <= 0) break;
+        if (cmpBytes == 0) break;
         if (read_bin(inpFp, cmpBuf, cmpBytes) != cmpBytes) break;
 
         {
diff --git a/examples/compress_functions.c b/examples/compress_functions.c
index d0dca13..7fd6775 100644
--- a/examples/compress_functions.c
+++ b/examples/compress_functions.c
@@ -60,6 +60,7 @@
 #define _POSIX_C_SOURCE 199309L
 
 /* Includes, for Power! */
+#define LZ4_DISABLE_DEPRECATE_WARNINGS   /* LZ4_decompress_fast */
 #include "lz4.h"
 #include <stdio.h>    /* for printf() */
 #include <stdlib.h>   /* for exit() */
@@ -88,7 +89,6 @@
 void run_screaming(const char *message, const int code) {
   printf("%s\n", message);
   exit(code);
-  return;
 }
 
 
@@ -343,19 +343,19 @@ int main(int argc, char **argv) {
   printf("%s", separator);
   printf(header_format, "Source", "Function Benchmarked", "Total Seconds", "Iterations/sec", "ns/Iteration", "% of default");
   printf("%s", separator);
-  printf(format, "Normal Text", "LZ4_compress_default()",       (double)time_taken__default       / BILLION, (int)(iterations / ((double)time_taken__default       /BILLION)), time_taken__default       / iterations, (double)time_taken__default       * 100 / time_taken__default);
-  printf(format, "Normal Text", "LZ4_compress_fast()",          (double)time_taken__fast          / BILLION, (int)(iterations / ((double)time_taken__fast          /BILLION)), time_taken__fast          / iterations, (double)time_taken__fast          * 100 / time_taken__default);
-  printf(format, "Normal Text", "LZ4_compress_fast_extState()", (double)time_taken__fast_extstate / BILLION, (int)(iterations / ((double)time_taken__fast_extstate /BILLION)), time_taken__fast_extstate / iterations, (double)time_taken__fast_extstate * 100 / time_taken__default);
-  //printf(format, "Normal Text", "LZ4_compress_generic()",       (double)time_taken__generic       / BILLION, (int)(iterations / ((double)time_taken__generic       /BILLION)), time_taken__generic       / iterations, (double)time_taken__generic       * 100 / time_taken__default);
-  printf(format, "Normal Text", "LZ4_decompress_safe()",        (double)time_taken__decomp_safe   / BILLION, (int)(iterations / ((double)time_taken__decomp_safe   /BILLION)), time_taken__decomp_safe   / iterations, (double)time_taken__decomp_safe   * 100 / time_taken__default);
-  printf(format, "Normal Text", "LZ4_decompress_fast()",        (double)time_taken__decomp_fast   / BILLION, (int)(iterations / ((double)time_taken__decomp_fast   /BILLION)), time_taken__decomp_fast   / iterations, (double)time_taken__decomp_fast   * 100 / time_taken__default);
+  printf(format, "Normal Text", "LZ4_compress_default()",       (double)time_taken__default       / BILLION, (int)(iterations / ((double)time_taken__default       /BILLION)), (int)time_taken__default       / iterations, (double)time_taken__default       * 100 / time_taken__default);
+  printf(format, "Normal Text", "LZ4_compress_fast()",          (double)time_taken__fast          / BILLION, (int)(iterations / ((double)time_taken__fast          /BILLION)), (int)time_taken__fast          / iterations, (double)time_taken__fast          * 100 / time_taken__default);
+  printf(format, "Normal Text", "LZ4_compress_fast_extState()", (double)time_taken__fast_extstate / BILLION, (int)(iterations / ((double)time_taken__fast_extstate /BILLION)), (int)time_taken__fast_extstate / iterations, (double)time_taken__fast_extstate * 100 / time_taken__default);
+  //printf(format, "Normal Text", "LZ4_compress_generic()",       (double)time_taken__generic       / BILLION, (int)(iterations / ((double)time_taken__generic       /BILLION)), (int)time_taken__generic       / iterations, (double)time_taken__generic       * 100 / time_taken__default);
+  printf(format, "Normal Text", "LZ4_decompress_safe()",        (double)time_taken__decomp_safe   / BILLION, (int)(iterations / ((double)time_taken__decomp_safe   /BILLION)), (int)time_taken__decomp_safe   / iterations, (double)time_taken__decomp_safe   * 100 / time_taken__default);
+  printf(format, "Normal Text", "LZ4_decompress_fast()",        (double)time_taken__decomp_fast   / BILLION, (int)(iterations / ((double)time_taken__decomp_fast   /BILLION)), (int)time_taken__decomp_fast   / iterations, (double)time_taken__decomp_fast   * 100 / time_taken__default);
   printf(header_format, "", "", "", "", "", "");
-  printf(format, "Compressible", "LZ4_compress_default()",       (double)time_taken_hc__default       / BILLION, (int)(iterations / ((double)time_taken_hc__default       /BILLION)), time_taken_hc__default       / iterations, (double)time_taken_hc__default       * 100 / time_taken_hc__default);
-  printf(format, "Compressible", "LZ4_compress_fast()",          (double)time_taken_hc__fast          / BILLION, (int)(iterations / ((double)time_taken_hc__fast          /BILLION)), time_taken_hc__fast          / iterations, (double)time_taken_hc__fast          * 100 / time_taken_hc__default);
-  printf(format, "Compressible", "LZ4_compress_fast_extState()", (double)time_taken_hc__fast_extstate / BILLION, (int)(iterations / ((double)time_taken_hc__fast_extstate /BILLION)), time_taken_hc__fast_extstate / iterations, (double)time_taken_hc__fast_extstate * 100 / time_taken_hc__default);
-  //printf(format, "Compressible", "LZ4_compress_generic()",       (double)time_taken_hc__generic       / BILLION, (int)(iterations / ((double)time_taken_hc__generic       /BILLION)), time_taken_hc__generic       / iterations, (double)time_taken_hc__generic       * 100 / time_taken_hc__default);
-  printf(format, "Compressible", "LZ4_decompress_safe()",        (double)time_taken_hc__decomp_safe   / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_safe   /BILLION)), time_taken_hc__decomp_safe   / iterations, (double)time_taken_hc__decomp_safe   * 100 / time_taken_hc__default);
-  printf(format, "Compressible", "LZ4_decompress_fast()",        (double)time_taken_hc__decomp_fast   / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_fast   /BILLION)), time_taken_hc__decomp_fast   / iterations, (double)time_taken_hc__decomp_fast   * 100 / time_taken_hc__default);
+  printf(format, "Compressible", "LZ4_compress_default()",       (double)time_taken_hc__default       / BILLION, (int)(iterations / ((double)time_taken_hc__default       /BILLION)), (int)time_taken_hc__default       / iterations, (double)time_taken_hc__default       * 100 / time_taken_hc__default);
+  printf(format, "Compressible", "LZ4_compress_fast()",          (double)time_taken_hc__fast          / BILLION, (int)(iterations / ((double)time_taken_hc__fast          /BILLION)), (int)time_taken_hc__fast          / iterations, (double)time_taken_hc__fast          * 100 / time_taken_hc__default);
+  printf(format, "Compressible", "LZ4_compress_fast_extState()", (double)time_taken_hc__fast_extstate / BILLION, (int)(iterations / ((double)time_taken_hc__fast_extstate /BILLION)), (int)time_taken_hc__fast_extstate / iterations, (double)time_taken_hc__fast_extstate * 100 / time_taken_hc__default);
+  //printf(format, "Compressible", "LZ4_compress_generic()",       (double)time_taken_hc__generic       / BILLION, (int)(iterations / ((double)time_taken_hc__generic       /BILLION)), (int)time_taken_hc__generic       / iterations, (double)time_taken_hc__generic       * 100 / time_taken_hc__default);
+  printf(format, "Compressible", "LZ4_decompress_safe()",        (double)time_taken_hc__decomp_safe   / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_safe   /BILLION)), (int)time_taken_hc__decomp_safe   / iterations, (double)time_taken_hc__decomp_safe   * 100 / time_taken_hc__default);
+  printf(format, "Compressible", "LZ4_decompress_fast()",        (double)time_taken_hc__decomp_fast   / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_fast   /BILLION)), (int)time_taken_hc__decomp_fast   / iterations, (double)time_taken_hc__decomp_fast   * 100 / time_taken_hc__default);
   printf("%s", separator);
   printf("\n");
   printf("All done, ran %d iterations per test.\n", iterations);
diff --git a/examples/dictionaryRandomAccess.c b/examples/dictionaryRandomAccess.c
index 291fd08..ecb3b2d 100644
--- a/examples/dictionaryRandomAccess.c
+++ b/examples/dictionaryRandomAccess.c
@@ -11,7 +11,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-#define MIN(x, y) (x) < (y) ? (x) : (y)
+#define MIN(x, y)  ((x) < (y) ? (x) : (y))
 
 enum {
     BLOCK_BYTES = 1024,  /* 1 KiB of uncompressed data in a block */
@@ -63,7 +63,7 @@ void test_compress(FILE* outFp, FILE* inpFp, void *dict, int dictSize)
     int *offsetsEnd = offsets;
 
 
-    LZ4_resetStream(lz4Stream);
+    LZ4_initStream(lz4Stream, sizeof(*lz4Stream));
 
     /* Write header magic */
     write_bin(outFp, kTestMagic, sizeof(kTestMagic));
diff --git a/examples/frameCompress.c b/examples/frameCompress.c
index a0c5d3d..a189329 100644
--- a/examples/frameCompress.c
+++ b/examples/frameCompress.c
@@ -70,11 +70,12 @@ compress_file_internal(FILE* f_in, FILE* f_out,
     /* write frame header */
     {   size_t const headerSize = LZ4F_compressBegin(ctx, outBuff, outCapacity, &kPrefs);
         if (LZ4F_isError(headerSize)) {
-            printf("Failed to start compression: error %zu\n", headerSize);
+            printf("Failed to start compression: error %u \n", (unsigned)headerSize);
             return result;
         }
         count_out = headerSize;
-        printf("Buffer size is %zu bytes, header size %zu bytes\n", outCapacity, headerSize);
+        printf("Buffer size is %u bytes, header size %u bytes \n",
+                (unsigned)outCapacity, (unsigned)headerSize);
         safe_fwrite(outBuff, 1, headerSize, f_out);
     }
 
@@ -89,11 +90,11 @@ compress_file_internal(FILE* f_in, FILE* f_out,
                                                 inBuff, readSize,
                                                 NULL);
         if (LZ4F_isError(compressedSize)) {
-            printf("Compression failed: error %zu\n", compressedSize);
+            printf("Compression failed: error %u \n", (unsigned)compressedSize);
             return result;
         }
 
-        printf("Writing %zu bytes\n", compressedSize);
+        printf("Writing %u bytes\n", (unsigned)compressedSize);
         safe_fwrite(outBuff, 1, compressedSize, f_out);
         count_out += compressedSize;
     }
@@ -103,11 +104,11 @@ compress_file_internal(FILE* f_in, FILE* f_out,
                                                 outBuff, outCapacity,
                                                 NULL);
         if (LZ4F_isError(compressedSize)) {
-            printf("Failed to end compression: error %zu\n", compressedSize);
+            printf("Failed to end compression: error %u \n", (unsigned)compressedSize);
             return result;
         }
 
-        printf("Writing %zu bytes\n", compressedSize);
+        printf("Writing %u bytes \n", (unsigned)compressedSize);
         safe_fwrite(outBuff, 1, compressedSize, f_out);
         count_out += compressedSize;
     }
@@ -184,8 +185,8 @@ decompress_file_internal(FILE* f_in, FILE* f_out,
     while (ret != 0) {
         /* Load more input */
         size_t readSize = firstChunk ? filled : fread(src, 1, srcCapacity, f_in); firstChunk=0;
-        const void* srcPtr = src + alreadyConsumed; alreadyConsumed=0;
-        const void* const srcEnd = srcPtr + readSize;
+        const void* srcPtr = (const char*)src + alreadyConsumed; alreadyConsumed=0;
+        const void* const srcEnd = (const char*)srcPtr + readSize;
         if (readSize == 0 || ferror(f_in)) {
             printf("Decompress: not enough input or error reading file\n");
             return 1;
@@ -198,7 +199,7 @@ decompress_file_internal(FILE* f_in, FILE* f_out,
         while (srcPtr < srcEnd && ret != 0) {
             /* Any data within dst has been flushed at this stage */
             size_t dstSize = dstCapacity;
-            size_t srcSize = srcEnd - srcPtr;
+            size_t srcSize = (const char*)srcEnd - (const char*)srcPtr;
             ret = LZ4F_decompress(dctx, dst, &dstSize, srcPtr, &srcSize, /* LZ4F_decompressOptions_t */ NULL);
             if (LZ4F_isError(ret)) {
                 printf("Decompression error: %s\n", LZ4F_getErrorName(ret));
@@ -207,7 +208,7 @@ decompress_file_internal(FILE* f_in, FILE* f_out,
             /* Flush output */
             if (dstSize != 0) safe_fwrite(dst, 1, dstSize, f_out);
             /* Update input */
-            srcPtr += srcSize;
+            srcPtr = (const char*)srcPtr + srcSize;
         }
 
         assert(srcPtr <= srcEnd);
diff --git a/examples/simple_buffer.c b/examples/simple_buffer.c
index 403d9e8..54e542a 100644
--- a/examples/simple_buffer.c
+++ b/examples/simple_buffer.c
@@ -16,10 +16,9 @@
 /*
  * Easy show-error-and-bail function.
  */
-void run_screaming(const char *message, const int code) {
-  printf("%s\n", message);
+void run_screaming(const char* message, const int code) {
+  printf("%s \n", message);
   exit(code);
-  return;
 }
 
 
diff --git a/lib/Makefile b/lib/Makefile
index 88d9b4f..cb1571c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -71,7 +71,11 @@ else
 	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
 endif
 
+ifneq (,$(filter Windows%,$(OS)))
+LIBLZ4 = liblz4-$(LIBVER_MAJOR)
+else
 LIBLZ4 = liblz4.$(SHARED_EXT_VER)
+endif
 
 .PHONY: default
 default: lib-release
@@ -103,8 +107,7 @@ $(LIBLZ4): $(SRCFILES)
 ifeq ($(BUILD_SHARED),yes)  # can be disabled on command line
 	@echo compiling dynamic library $(LIBVER)
 ifneq (,$(filter Windows%,$(OS)))
-	$(Q)$(CC) $(FLAGS) -DLZ4_DLL_EXPORT=1 -shared $^ -o dll\$@.dll
-	dlltool -D dll\liblz4.dll -d dll\liblz4.def -l dll\liblz4.lib
+	$(Q)$(CC) $(FLAGS) -DLZ4_DLL_EXPORT=1 -shared $^ -o dll/$@.dll -Wl,--out-implib,dll/liblz4.lib
 else
 	$(Q)$(CC) $(FLAGS) -shared $^ -fPIC -fvisibility=hidden $(SONAME_FLAGS) -o $@
 	@echo creating versioned links
@@ -124,7 +127,7 @@ clean:
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD MINGW32_NT-6.1 MINGW64_NT-6.1 MINGW32_NT-10.0 MINGW64_NT-10.0))
 
 .PHONY: listL120
 listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
@@ -138,12 +141,14 @@ PREFIX      ?= /usr/local
 prefix      ?= $(PREFIX)
 EXEC_PREFIX ?= $(prefix)
 exec_prefix ?= $(EXEC_PREFIX)
+BINDIR      ?= $(exec_prefix)/bin
+bindir      ?= $(BINDIR)
 LIBDIR      ?= $(exec_prefix)/lib
 libdir      ?= $(LIBDIR)
 INCLUDEDIR  ?= $(prefix)/include
 includedir  ?= $(INCLUDEDIR)
 
-ifneq (,$(filter $(OS),OpenBSD FreeBSD NetBSD DragonFly))
+ifneq (,$(filter $(OS),OpenBSD FreeBSD NetBSD DragonFly MidnightBSD))
 PKGCONFIGDIR ?= $(prefix)/libdata/pkgconfig
 else
 PKGCONFIGDIR ?= $(libdir)/pkgconfig
@@ -168,7 +173,7 @@ liblz4.pc: liblz4.pc.in Makefile
           $< >$@
 
 install: lib liblz4.pc
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(pkgconfigdir)/ $(DESTDIR)$(includedir)/ $(DESTDIR)$(libdir)/
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(pkgconfigdir)/ $(DESTDIR)$(includedir)/ $(DESTDIR)$(libdir)/ $(DESTDIR)$(bindir)/
 	$(Q)$(INSTALL_DATA) liblz4.pc $(DESTDIR)$(pkgconfigdir)/
 	@echo Installing libraries
 ifeq ($(BUILD_STATIC),yes)
@@ -176,10 +181,18 @@ ifeq ($(BUILD_STATIC),yes)
 	$(Q)$(INSTALL_DATA) lz4frame_static.h $(DESTDIR)$(includedir)/lz4frame_static.h
 endif
 ifeq ($(BUILD_SHARED),yes)
+# Traditionnally, one installs the DLLs in the bin directory as programs
+# search them first in their directory. This allows to not pollute system
+# directories (like c:/windows/system32), nor modify the PATH variable.
+ifneq (,$(filter Windows%,$(OS)))
+	$(Q)$(INSTALL_PROGRAM) dll/$(LIBLZ4).dll $(DESTDIR)$(bindir)
+	$(Q)$(INSTALL_PROGRAM) dll/liblz4.lib $(DESTDIR)$(libdir)
+else
 	$(Q)$(INSTALL_PROGRAM) liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)
 	$(Q)ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_MAJOR)
 	$(Q)ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT)
 endif
+endif
 	@echo Installing headers in $(includedir)
 	$(Q)$(INSTALL_DATA) lz4.h $(DESTDIR)$(includedir)/lz4.h
 	$(Q)$(INSTALL_DATA) lz4hc.h $(DESTDIR)$(includedir)/lz4hc.h
@@ -188,9 +201,14 @@ endif
 
 uninstall:
 	$(Q)$(RM) $(DESTDIR)$(pkgconfigdir)/liblz4.pc
+ifneq (,$(filter Windows%,$(OS)))
+	$(Q)$(RM) $(DESTDIR)$(bindir)/$(LIBLZ4).dll
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.lib
+else
 	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT)
 	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_MAJOR)
 	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_VER)
+endif
 	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.a
 	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4.h
 	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4hc.h
diff --git a/lib/README.md b/lib/README.md
index 7082fe3..b753195 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -7,8 +7,8 @@ not all of them are necessary.
 #### Minimal LZ4 build
 
 The minimum required is **`lz4.c`** and **`lz4.h`**,
-which provides the fast compression and decompression algorithm.
-They generate and decode data using [LZ4 block format].
+which provides the fast compression and decompression algorithms.
+They generate and decode data using the [LZ4 block format].
 
 
 #### High Compression variant
@@ -16,13 +16,14 @@ They generate and decode data using [LZ4 block format].
 For more compression ratio at the cost of compression speed,
 the High Compression variant called **lz4hc** is available.
 Add files **`lz4hc.c`** and **`lz4hc.h`**.
-The variant still depends on regular `lib/lz4.*` source files.
+This variant also compresses data using the [LZ4 block format],
+and depends on regular `lib/lz4.*` source files.
 
 
-#### Frame variant, for interoperability
+#### Frame support, for interoperability
 
 In order to produce compressed data compatible with `lz4` command line utility,
-it's necessary to encode lz4-compressed blocks using the [official interoperable frame format].
+it's necessary to use the [official interoperable frame format].
 This format is generated and decoded automatically by the **lz4frame** library.
 Its public API is described in `lib/lz4frame.h`.
 In order to work properly, lz4frame needs all other modules present in `/lib`,
@@ -32,15 +33,63 @@ So it's necessary to include all `*.c` and `*.h` files present in `/lib`.
 
 #### Advanced / Experimental API
 
-A complex API defined in `lz4frame_static.h` contains definitions
-which are not guaranteed to remain stable in future versions.
-As a consequence, it must be used with static linking ***only***.
+Definitions which are not guaranteed to remain stable in future versions,
+are protected behind macros, such as `LZ4_STATIC_LINKING_ONLY`.
+As the name implies, these definitions can only be invoked
+in the context of static linking ***only***.
+Otherwise, dependent application may fail on API or ABI break in the future.
+The associated symbols are also not present in dynamic library by default.
+Should they be nonetheless needed, it's possible to force their publication
+by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`.
+
+
+#### Build macros
+
+The following build macro can be selected at compilation time :
+
+- `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop.
+  This loops works great on x86/x64 cpus, and is automatically enabled on this platform.
+  It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
+  For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
+  and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
+
+- `LZ4_DISTANCE_MAX` : control the maximum offset that the compressor will allow.
+  Set to 65535 by default, which is the maximum value supported by lz4 format.
+  Reducing maximum distance will reduce opportunities for LZ4 to find matches,
+  hence will produce worse the compression ratio.
+  However, a smaller max distance may allow compatibility with specific decoders using limited memory budget.
+  This build macro only influences the compressed output of the compressor.
+
+- `LZ4_DISABLE_DEPRECATE_WARNINGS` : invoking a deprecated function will make the compiler generate a warning.
+  This is meant to invite users to update their source code.
+  Should this be a problem, it's generally possible to make the compiler ignore these warnings,
+  for example with `-Wno-deprecated-declarations` on `gcc`,
+  or `_CRT_SECURE_NO_WARNINGS` for Visual Studio.
+  Another method is to define `LZ4_DISABLE_DEPRECATE_WARNINGS`
+  before including the LZ4 header files.
+
+
+#### Amalgamation
+
+lz4 source code can be amalgamated into a single file.
+One can combine all source code into `lz4_all.c` by using following command:
+```
+cat lz4.c > lz4_all.c
+cat lz4hc.c >> lz4_all.c
+cat lz4frame.c >> lz4_all.c
+```
+(`cat` file order is important) then compile `lz4_all.c`.
+All `*.h` files present in `/lib` remain necessary to compile `lz4_all.c`.
 
 
 #### Windows : using MinGW+MSYS to create DLL
 
 DLL can be created using MinGW+MSYS with the `make liblz4` command.
 This command creates `dll\liblz4.dll` and the import library `dll\liblz4.lib`.
+To override the `dlltool` command  when cross-compiling on Linux, just set the `DLLTOOL` variable. Example of cross compilation on Linux with mingw-w64 64 bits:
+```
+make BUILD_STATIC=no CC=x86_64-w64-mingw32-gcc DLLTOOL=x86_64-w64-mingw32-dlltool OS=Windows_NT
+```
 The import library is only required with Visual C++.
 The header files `lz4.h`, `lz4hc.h`, `lz4frame.h` and the dynamic library
 `dll\liblz4.dll` are required to compile a project using gcc/MinGW.
@@ -48,7 +97,7 @@ The dynamic library has to be added to linking options.
 It means that if a project that uses LZ4 consists of a single `test-dll.c`
 file it should be linked with `dll\liblz4.dll`. For example:
 ```
-    gcc $(CFLAGS) -Iinclude/ test-dll.c -o test-dll dll\liblz4.dll
+    $(CC) $(CFLAGS) -Iinclude/ test-dll.c -o test-dll dll\liblz4.dll
 ```
 The compiled executable will require LZ4 DLL which is available at `dll\liblz4.dll`.
 
diff --git a/lib/dll/liblz4.def b/lib/dll/liblz4.def
deleted file mode 100644
index 0ace223..0000000
--- a/lib/dll/liblz4.def
+++ /dev/null
@@ -1,62 +0,0 @@
-LIBRARY liblz4.dll
-EXPORTS
-    LZ4F_compressBegin
-    LZ4F_compressBound
-    LZ4F_compressEnd
-    LZ4F_compressFrame
-    LZ4F_compressFrameBound
-    LZ4F_compressUpdate
-    LZ4F_createCompressionContext
-    LZ4F_createDecompressionContext
-    LZ4F_decompress
-    LZ4F_flush
-    LZ4F_freeCompressionContext
-    LZ4F_freeDecompressionContext
-    LZ4F_getErrorName
-    LZ4F_getFrameInfo
-    LZ4F_getVersion
-    LZ4F_isError
-    LZ4_compress
-    LZ4_compressBound
-    LZ4_compressHC
-    LZ4_compressHC_continue
-    LZ4_compressHC_limitedOutput
-    LZ4_compressHC_limitedOutput_continue
-    LZ4_compressHC_limitedOutput_withStateHC
-    LZ4_compressHC_withStateHC
-    LZ4_compress_HC
-    LZ4_compress_HC_continue
-    LZ4_compress_HC_extStateHC
-    LZ4_compress_continue
-    LZ4_compress_default
-    LZ4_compress_destSize
-    LZ4_compress_fast
-    LZ4_compress_fast_continue
-    LZ4_compress_fast_extState
-    LZ4_compress_limitedOutput
-    LZ4_compress_limitedOutput_continue
-    LZ4_compress_limitedOutput_withState
-    LZ4_compress_withState
-    LZ4_createStream
-    LZ4_createStreamDecode
-    LZ4_createStreamHC
-    LZ4_decompress_fast
-    LZ4_decompress_fast_continue
-    LZ4_decompress_fast_usingDict
-    LZ4_decompress_safe
-    LZ4_decompress_safe_continue
-    LZ4_decompress_safe_partial
-    LZ4_decompress_safe_usingDict
-    LZ4_freeStream
-    LZ4_freeStreamDecode
-    LZ4_freeStreamHC
-    LZ4_loadDict
-    LZ4_loadDictHC
-    LZ4_resetStream
-    LZ4_resetStreamHC
-    LZ4_saveDict
-    LZ4_saveDictHC
-    LZ4_setStreamDecode
-    LZ4_sizeofState
-    LZ4_sizeofStateHC
-    LZ4_versionNumber
diff --git a/lib/lz4.c b/lib/lz4.c
index 4046102..693bdaf 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c
@@ -32,7 +32,6 @@
     - LZ4 source repository : https://github.com/lz4/lz4
 */
 
-
 /*-************************************
 *  Tuning parameters
 **************************************/
@@ -91,6 +90,14 @@
 /*-************************************
 *  Dependency
 **************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+#  define LZ4_SRC_INCLUDED 1
+#endif
+
 #define LZ4_STATIC_LINKING_ONLY
 #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
 #include "lz4.h"
@@ -136,7 +143,7 @@
  * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
  * of LZ4_wildCopy does not affect the compression speed.
  */
-#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
 #  define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
 #  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
 #else
@@ -170,7 +177,7 @@
 
 
 /*-************************************
-*  Basic Types
+*  Types
 **************************************/
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 # include <stdint.h>
@@ -195,6 +202,13 @@
   typedef size_t reg_t;   /* 32-bits in x32 mode */
 #endif
 
+typedef enum {
+    notLimited = 0,
+    limitedOutput = 1,
+    fillOutput = 2
+} limitedOutput_directive;
+
+
 /*-************************************
 *  Reading and writing into memory
 **************************************/
@@ -228,7 +242,7 @@ static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArc
 static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
 
-#else  /* safe and portable access through memcpy() */
+#else  /* safe and portable access using memcpy() */
 
 static U16 LZ4_read16(const void* memPtr)
 {
@@ -290,6 +304,86 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
     do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 }
 
+static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
+static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
+
+
+#ifndef LZ4_FAST_DEC_LOOP
+#  if defined(__i386__) || defined(__x86_64__)
+#    define LZ4_FAST_DEC_LOOP 1
+#  else
+#    define LZ4_FAST_DEC_LOOP 0
+#  endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    if (offset < 8) {
+        dstPtr[0] = srcPtr[0];
+        dstPtr[1] = srcPtr[1];
+        dstPtr[2] = srcPtr[2];
+        dstPtr[3] = srcPtr[3];
+        srcPtr += inc32table[offset];
+        memcpy(dstPtr+4, srcPtr, 4);
+        srcPtr -= dec64table[offset];
+        dstPtr += 8;
+    } else {
+        memcpy(dstPtr, srcPtr, 8);
+        dstPtr += 8;
+        srcPtr += 8;
+    }
+
+    LZ4_wildCopy(dstPtr, srcPtr, dstEnd);
+}
+
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}
+
+LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    BYTE v[8];
+    switch(offset) {
+    case 1:
+        memset(v, *srcPtr, 8);
+        goto copy_loop;
+    case 2:
+        memcpy(v, srcPtr, 2);
+        memcpy(&v[2], srcPtr, 2);
+        memcpy(&v[4], &v[0], 4);
+        goto copy_loop;
+    case 4:
+        memcpy(v, srcPtr, 4);
+        memcpy(&v[4], srcPtr, 4);
+        goto copy_loop;
+    default:
+        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+        return;
+    }
+
+ copy_loop:
+    memcpy(dstPtr, v, 8);
+    dstPtr += 8;
+    while (dstPtr < dstEnd) {
+        memcpy(dstPtr, v, 8);
+        dstPtr += 8;
+    }
+}
+#endif
+
 
 /*-************************************
 *  Common Constants
@@ -300,14 +394,20 @@ void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
 static const int LZ4_minLength = (MFLIMIT+1);
 
 #define KB *(1 <<10)
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#ifndef LZ4_DISTANCE_MAX   /* can be user - defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535
+#endif
+
+#if (LZ4_DISTANCE_MAX > 65535)   /* max supported by LZ4 format */
+#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
 
 #define ML_BITS  4
 #define ML_MASK  ((1U<<ML_BITS)-1)
@@ -455,7 +555,6 @@ static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression ru
 /*-************************************
 *  Local Structures and types
 **************************************/
-typedef enum { notLimited = 0, limitedOutput = 1, fillOutput = 2 } limitedOutput_directive;
 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 
 /**
@@ -522,13 +621,14 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 
 static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 {
-    static const U64 prime5bytes = 889523592379ULL;
-    static const U64 prime8bytes = 11400714785074694791ULL;
     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
-    if (LZ4_isLittleEndian())
+    if (LZ4_isLittleEndian()) {
+        const U64 prime5bytes = 889523592379ULL;
         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
-    else
+    } else {
+        const U64 prime8bytes = 11400714785074694791ULL;
         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+    }
 }
 
 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
@@ -609,6 +709,15 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(
         LZ4_stream_t_internal* const cctx,
         const int inputSize,
         const tableType_t tableType) {
+    /* If compression failed during the previous step, then the context
+     * is marked as dirty, therefore, it has to be fully reset.
+     */
+    if (cctx->dirty) {
+        DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx);
+        MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal));
+        return;
+    }
+
     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
      * therefore safe to use no matter what mode we're in. Otherwise, we figure
      * out if it's safe to leave as is or whether it needs to be reset.
@@ -629,7 +738,7 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(
         }
     }
 
-    /* Adding a gap, so all previous entries are > MAX_DISTANCE back, is faster
+    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
      * than compressing without a gap. However, compressing with
      * currentOffset == 0 is faster still, so we preserve that case.
      */
@@ -651,14 +760,15 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                  const char* const source,
                  char* const dest,
                  const int inputSize,
-                 int *inputConsumed, /* only written when outputLimited == fillOutput */
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
                  const int maxOutputSize,
-                 const limitedOutput_directive outputLimited,
+                 const limitedOutput_directive outputDirective,
                  const tableType_t tableType,
                  const dict_directive dictDirective,
                  const dictIssue_directive dictIssue,
-                 const U32 acceleration)
+                 const int acceleration)
 {
+    int result;
     const BYTE* ip = (const BYTE*) source;
 
     U32 const startIndex = cctx->currentOffset;
@@ -693,9 +803,10 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
     U32 forwardH;
 
     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
-    /* Init conditions */
-    if (outputLimited == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
+    /* If init conditions are not met, we don't have to mark stream
+     * as having dirty context, since no action was taken yet */
+    if (outputDirective == fillOutput && maxOutputSize < 1) return 0;   /* Impossible to store anything */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;           /* Unsupported inputSize, too large (or negative) */
     if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
     if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
     assert(acceleration >= 1);
@@ -728,8 +839,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         /* Find a match */
         if (tableType == byPtr) {
             const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
             do {
                 U32 const h = forwardH;
                 ip = forwardIp;
@@ -743,14 +854,14 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
 
-            } while ( (match+MAX_DISTANCE < ip)
+            } while ( (match+LZ4_DISTANCE_MAX < ip)
                    || (LZ4_read32(match) != LZ4_read32(ip)) );
 
         } else {   /* byU32, byU16 */
 
             const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
             do {
                 U32 const h = forwardH;
                 U32 const current = (U32)(forwardIp - base);
@@ -794,8 +905,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
 
                 if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue;    /* match outside of valid area */
                 assert(matchIndex < current);
-                if ((tableType != byU16) && (matchIndex+MAX_DISTANCE < current)) continue;  /* too far */
-                if (tableType == byU16) assert((current - matchIndex) <= MAX_DISTANCE);     /* too_far presumed impossible with byU16 */
+                if ((tableType != byU16) && (matchIndex+LZ4_DISTANCE_MAX < current)) continue;  /* too far */
+                if (tableType == byU16) assert((current - matchIndex) <= LZ4_DISTANCE_MAX);     /* too_far presumed impossible with byU16 */
 
                 if (LZ4_read32(match) == LZ4_read32(ip)) {
                     if (maybe_extMem) offset = current - matchIndex;
@@ -811,16 +922,17 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         /* Encode Literals */
         {   unsigned const litLength = (unsigned)(ip - anchor);
             token = op++;
-            if ((outputLimited == limitedOutput) &&  /* Check output buffer overflow */
-                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
-                return 0;
-            if ((outputLimited == fillOutput) &&
+            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) )
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+
+            if ((outputDirective == fillOutput) &&
                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
                 op--;
                 goto _last_literals;
             }
             if (litLength >= RUN_MASK) {
-                int len = (int)litLength-RUN_MASK;
+                int len = (int)(litLength - RUN_MASK);
                 *token = (RUN_MASK<<ML_BITS);
                 for(; len >= 255 ; len-=255) *op++ = 255;
                 *op++ = (BYTE)len;
@@ -843,7 +955,7 @@ _next_match:
          * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
          */
 
-        if ((outputLimited == fillOutput) &&
+        if ((outputDirective == fillOutput) &&
             (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
             /* the match was too close to the end, rewind and go to last literals */
             op = token;
@@ -853,11 +965,11 @@ _next_match:
         /* Encode Offset */
         if (maybe_extMem) {   /* static test */
             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
-            assert(offset <= MAX_DISTANCE && offset > 0);
+            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
             LZ4_writeLE16(op, (U16)offset); op+=2;
         } else  {
             DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
-            assert(ip-match <= MAX_DISTANCE);
+            assert(ip-match <= LZ4_DISTANCE_MAX);
             LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
         }
 
@@ -870,7 +982,7 @@ _next_match:
                 assert(dictEnd > match);
                 if (limit > matchlimit) limit = matchlimit;
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
-                ip += MINMATCH + matchCode;
+                ip += (size_t)matchCode + MINMATCH;
                 if (ip==limit) {
                     unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
                     matchCode += more;
@@ -879,19 +991,20 @@ _next_match:
                 DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
             } else {
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-                ip += MINMATCH + matchCode;
+                ip += (size_t)matchCode + MINMATCH;
                 DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
             }
 
-            if ((outputLimited) &&    /* Check output buffer overflow */
+            if ((outputDirective) &&    /* Check output buffer overflow */
                 (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
-                if (outputLimited == limitedOutput)
-                  return 0;
-                if (outputLimited == fillOutput) {
+                if (outputDirective == fillOutput) {
                     /* Match description too long : reduce it */
                     U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
                     ip -= matchCode - newMatchCode;
                     matchCode = newMatchCode;
+                } else {
+                    assert(outputDirective == limitedOutput);
+                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
                 }
             }
             if (matchCode >= ML_MASK) {
@@ -922,7 +1035,7 @@ _next_match:
 
             match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
             LZ4_putPosition(ip, cctx->hashTable, tableType, base);
-            if ( (match+MAX_DISTANCE >= ip)
+            if ( (match+LZ4_DISTANCE_MAX >= ip)
               && (LZ4_read32(match) == LZ4_read32(ip)) )
             { token=op++; *token=0; goto _next_match; }
 
@@ -957,7 +1070,7 @@ _next_match:
             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
             assert(matchIndex < current);
             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
-              && ((tableType==byU16) ? 1 : (matchIndex+MAX_DISTANCE >= current))
+              && ((tableType==byU16) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
                 token=op++;
                 *token=0;
@@ -976,15 +1089,17 @@ _next_match:
 _last_literals:
     /* Encode Last Literals */
     {   size_t lastRun = (size_t)(iend - anchor);
-        if ( (outputLimited) &&  /* Check output buffer overflow */
+        if ( (outputDirective) &&  /* Check output buffer overflow */
             (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
-            if (outputLimited == fillOutput) {
+            if (outputDirective == fillOutput) {
                 /* adapt lastRun to fill 'dst' */
-                lastRun  = (olimit-op) - 1;
+                assert(olimit >= op);
+                lastRun  = (size_t)(olimit-op) - 1;
                 lastRun -= (lastRun+240)/255;
+            } else {
+                assert(outputDirective == limitedOutput);
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
             }
-            if (outputLimited == limitedOutput)
-                return 0;
         }
         if (lastRun >= RUN_MASK) {
             size_t accumulator = lastRun - RUN_MASK;
@@ -999,31 +1114,33 @@ _last_literals:
         op += lastRun;
     }
 
-    if (outputLimited == fillOutput) {
+    if (outputDirective == fillOutput) {
         *inputConsumed = (int) (((const char*)ip)-source);
     }
     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
-    return (int)(((char*)op) - dest);
+    result = (int)(((char*)op) - dest);
+    assert(result > 0);
+    return result;
 }
 
 
 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
-    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+    assert(ctx != NULL);
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
-    LZ4_resetStream((LZ4_stream_t*)state);
     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
         if (inputSize < LZ4_64Klimit) {
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
         } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
         }
     } else {
         if (inputSize < LZ4_64Klimit) {;
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
         } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
         }
     }
@@ -1053,7 +1170,7 @@ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst
                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
             }
         } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
             LZ4_prepareTable(ctx, srcSize, tableType);
             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
         }
@@ -1067,7 +1184,7 @@ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst
                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
             }
         } else {
-            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
             LZ4_prepareTable(ctx, srcSize, tableType);
             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
         }
@@ -1094,23 +1211,25 @@ int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutp
 }
 
 
-int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
 {
-    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
+    return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
 }
 
 
 /* hidden debug function */
 /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
-int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
 {
     LZ4_stream_t ctx;
-    LZ4_resetStream(&ctx);
+    LZ4_initStream(&ctx, sizeof(ctx));
 
-    if (inputSize < LZ4_64Klimit)
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
-    else
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    if (srcSize < LZ4_64Klimit) {
+        return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16,    noDict, noDictIssue, acceleration);
+    } else {
+        tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr;
+        return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration);
+    }
 }
 
 
@@ -1119,7 +1238,8 @@ int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int m
  * _continue() call without resetting it. */
 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
-    LZ4_resetStream(state);
+    void* const s = LZ4_initStream(state, sizeof (*state));
+    assert(s != NULL); (void)s;
 
     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
@@ -1127,8 +1247,8 @@ static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src,
         if (*srcSizePtr < LZ4_64Klimit) {
             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
         } else {
-            tableType_t const tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
-            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, tableType, noDict, noDictIssue, 1);
+            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
     }   }
 }
 
@@ -1159,14 +1279,40 @@ int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targe
 
 LZ4_stream_t* LZ4_createStream(void)
 {
-    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
+    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
     LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
     if (lz4s == NULL) return NULL;
-    LZ4_resetStream(lz4s);
+    LZ4_initStream(lz4s, sizeof(*lz4s));
     return lz4s;
 }
 
+#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
+                     it reports an aligment of 8-bytes,
+                     while actually aligning LZ4_stream_t on 4 bytes. */
+static size_t LZ4_stream_t_alignment(void)
+{
+    struct { char c; LZ4_stream_t t; } t_a;
+    return sizeof(t_a) - sizeof(t_a.t);
+}
+#endif
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+    DEBUGLOG(5, "LZ4_initStream");
+    if (buffer == NULL) return NULL;
+    if (size < sizeof(LZ4_stream_t)) return NULL;
+#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
+                     it reports an aligment of 8-bytes,
+                     while actually aligning LZ4_stream_t on 4 bytes. */
+    if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) return NULL;  /* alignment check */
+#endif
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
+    return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
 {
     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
@@ -1226,10 +1372,16 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
         p+=3;
     }
 
-    return dict->dictSize;
+    return (int)dict->dictSize;
 }
 
 void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
+    /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
+     * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
+     * marked as having dirty context, e.g. requiring full reset.
+     */
+    LZ4_resetStream_fast(working_stream);
+
     if (dictionary_stream != NULL) {
         /* If the current offset is zero, we will never look in the
          * external dictionary context, since there is no value a table
@@ -1248,7 +1400,8 @@ void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dic
 
 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
 {
-    if (LZ4_dict->currentOffset + nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
+    assert(nextSize >= 0);
+    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
         /* rescale hash table */
         U32 const delta = LZ4_dict->currentOffset - 64 KB;
         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
@@ -1265,7 +1418,10 @@ static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
 }
 
 
-int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+                                const char* source, char* dest,
+                                int inputSize, int maxOutputSize,
+                                int acceleration)
 {
     const tableType_t tableType = byU32;
     LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
@@ -1273,12 +1429,12 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, ch
 
     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
 
-    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if (streamPtr->dirty) return 0;   /* Uninitialized structure detected */
     LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
 
     /* invalidate tiny dictionaries */
-    if ( (streamPtr->dictSize-1 < 4)   /* intentional underflow */
+    if ( (streamPtr->dictSize-1 < 4-1)   /* intentional underflow */
       && (dictEnd != (const BYTE*)source) ) {
         DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
         streamPtr->dictSize = 0;
@@ -1371,7 +1527,7 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
     const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
 
     if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
-    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+    if ((U32)dictSize > dict->dictSize) dictSize = (int)dict->dictSize;
 
     memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
 
@@ -1393,6 +1549,37 @@ typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
 #undef MIN
 #define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
 
+/* Read the variable-length literal or match length.
+ *
+ * ip - pointer to use as input.
+ * lencheck - end ip.  Return an error if ip advances >= lencheck.
+ * loop_check - check ip >= lencheck in body of loop.  Returns loop_error if so.
+ * initial_check - check ip >= lencheck before start of loop.  Returns initial_error if so.
+ * error (output) - error code.  Should be set to 0 before call.
+ */
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
+LZ4_FORCE_INLINE unsigned
+read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error)
+{
+  unsigned length = 0;
+  unsigned s;
+  if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+    *error = initial_error;
+    return length;
+  }
+  do {
+    s = **ip;
+    (*ip)++;
+    length += s;
+    if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+      *error = loop_error;
+      return length;
+    }
+  } while (s==255);
+
+  return length;
+}
+
 /*! LZ4_decompress_generic() :
  *  This generic decompression function covers all use cases.
  *  It shall be instantiated several times, using different sets of directives.
@@ -1414,234 +1601,360 @@ LZ4_decompress_generic(
                  const size_t dictSize         /* note : = 0 if noDict */
                  )
 {
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* const iend = ip + srcSize;
+    if (src == NULL) return -1;
 
-    BYTE* op = (BYTE*) dst;
-    BYTE* const oend = op + outputSize;
-    BYTE* cpy;
+    {   const BYTE* ip = (const BYTE*) src;
+        const BYTE* const iend = ip + srcSize;
 
-    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
-    const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
-    const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
+        BYTE* op = (BYTE*) dst;
+        BYTE* const oend = op + outputSize;
+        BYTE* cpy;
 
-    const int safeDecode = (endOnInput==endOnInputSize);
-    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
 
-    /* Set up the "end" pointers for the shortcut. */
-    const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
-    const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+        const int safeDecode = (endOnInput==endOnInputSize);
+        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
 
-    DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
 
-    /* Special cases */
-    assert(lowPrefix <= op);
-    assert(src != NULL);
-    if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
-    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
-    if ((endOnInput) && unlikely(srcSize==0)) return -1;
+        /* Set up the "end" pointers for the shortcut. */
+        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
 
-    /* Main Loop : decode sequences */
-    while (1) {
         const BYTE* match;
         size_t offset;
+        unsigned token;
+        size_t length;
+
+
+        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+        /* Special cases */
+        assert(lowPrefix <= op);
+        if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+        if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
+        if ((endOnInput) && unlikely(srcSize==0)) return -1;
+
+	/* Currently the fast loop shows a regression on qualcomm arm chips. */
+#if LZ4_FAST_DEC_LOOP
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE)
+            goto safe_decode;
+
+        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
+                if (error == initial_error) goto _output_error;
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
+
+                /* copy literals */
+                cpy = op+length;
+                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+                if ( ((endOnInput) && ((cpy>oend-FASTLOOP_SAFE_DISTANCE) || (ip+length>iend-(2+1+LASTLITERALS))) )
+                     || ((!endOnInput) && (cpy>oend-FASTLOOP_SAFE_DISTANCE)) )
+                    {
+                        goto safe_literal_copy;
+                    }
+                LZ4_wildCopy32(op, ip, cpy);
+                ip += length; op = cpy;
+            } else {
+                cpy = op+length;
+                /* We don't need to check oend, since we check it once for each loop below */
+                if ( ((endOnInput) && (ip+16>iend-(2+1+LASTLITERALS))))
+                    {
+                        goto safe_literal_copy;
+                    }
+                /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
+                memcpy(op, ip, 16);
+                ip += length; op = cpy;
+            }
 
-        unsigned const token = *ip++;
-        size_t length = token >> ML_BITS;  /* literal length */
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
 
-        assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+            /* get matchlength */
+            length = token & ML_MASK;
 
-        /* A two-stage shortcut for the most common case:
-         * 1) If the literal length is 0..14, and there is enough space,
-         * enter the shortcut and copy 16 bytes on behalf of the literals
-         * (in the fast mode, only 8 bytes can be safely copied this way).
-         * 2) Further if the match length is 4..18, copy 18 bytes in a similar
-         * manner; but we ensure that there's enough space in the output for
-         * those 18 bytes earlier, upon entering the shortcut (in other words,
-         * there is a combined check for both stages).
-         */
-        if ( (endOnInput ? length != RUN_MASK : length <= 8)
-            /* strictly "less than" on input, to re-enter the loop with at least one byte */
-          && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
-            /* Copy the literals */
-            memcpy(op, ip, endOnInput ? 16 : 8);
-            op += length; ip += length;
-
-            /* The second stage: prepare for match copying, decode full info.
-             * If it doesn't work out, the info won't be wasted. */
-            length = token & ML_MASK; /* match length */
-            offset = LZ4_readLE16(ip); ip += 2;
-            match = op - offset;
-            assert(match <= op); /* check overflow */
-
-            /* Do not deal with overlapping matches. */
-            if ( (length != ML_MASK)
-              && (offset >= 8)
-              && (dict==withPrefix64k || match >= lowPrefix) ) {
-                /* Copy the match. */
-                memcpy(op + 0, match + 0, 8);
-                memcpy(op + 8, match + 8, 8);
-                memcpy(op +16, match +16, 2);
-                op += length + MINMATCH;
-                /* Both stages worked, load the next token. */
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+
+            if (length == ML_MASK) {
+              variable_length_error error = ok;
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
+              if (error != ok) goto _output_error;
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+            } else {
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+
+                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
+                if (!(dict == usingExtDict) || (match >= lowPrefix)) {
+                    if (offset >= 8) {
+                        memcpy(op, match, 8);
+                        memcpy(op+8, match+8, 8);
+                        memcpy(op+16, match+16, 2);
+                        op += length;
+                        continue;
+            }   }   }
+
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                    else goto _output_error;   /* doesn't respect parsing restriction */
+                }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) *op++ = *copyFrom++;
+                    } else {
+                        memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
                 continue;
             }
 
-            /* The second stage didn't work out, but the info is ready.
-             * Propel it right to the point of match copying. */
-            goto _copy_match;
-        }
+            /* copy match within block */
+            cpy = op + length;
 
-        /* decode literal length */
-        if (length == RUN_MASK) {
-            unsigned s;
-            if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error;   /* overflow detection */
-            do {
-                s = *ip++;
-                length += s;
-            } while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) & (s==255) );
-            if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
-            if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
+            assert((op <= oend) && (oend-op >= 32));
+            if (unlikely(offset<16)) {
+                LZ4_memcpy_using_offset(op, match, cpy, offset);
+            } else {
+                LZ4_wildCopy32(op, match, cpy);
+            }
+
+            op = cpy;   /* wildcopy correction */
         }
+    safe_decode:
+#endif
 
-        /* copy literals */
-        cpy = op+length;
-        LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
-        if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
-          || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
-        {
-            if (partialDecoding) {
-                if (cpy > oend) { cpy = oend; length = oend-op; }             /* Partial decoding : stop in the middle of literal segment */
-                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
-            } else {
-                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
-                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* A two-stage shortcut for the most common case:
+             * 1) If the literal length is 0..14, and there is enough space,
+             * enter the shortcut and copy 16 bytes on behalf of the literals
+             * (in the fast mode, only 8 bytes can be safely copied this way).
+             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+             * manner; but we ensure that there's enough space in the output for
+             * those 18 bytes earlier, upon entering the shortcut (in other words,
+             * there is a combined check for both stages).
+             */
+            if ( (endOnInput ? length != RUN_MASK : length <= 8)
+                /* strictly "less than" on input, to re-enter the loop with at least one byte */
+              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+                /* Copy the literals */
+                memcpy(op, ip, endOnInput ? 16 : 8);
+                op += length; ip += length;
+
+                /* The second stage: prepare for match copying, decode full info.
+                 * If it doesn't work out, the info won't be wasted. */
+                length = token & ML_MASK; /* match length */
+                offset = LZ4_readLE16(ip); ip += 2;
+                match = op - offset;
+                assert(match <= op); /* check overflow */
+
+                /* Do not deal with overlapping matches. */
+                if ( (length != ML_MASK)
+                  && (offset >= 8)
+                  && (dict==withPrefix64k || match >= lowPrefix) ) {
+                    /* Copy the match. */
+                    memcpy(op + 0, match + 0, 8);
+                    memcpy(op + 8, match + 8, 8);
+                    memcpy(op +16, match +16, 2);
+                    op += length + MINMATCH;
+                    /* Both stages worked, load the next token. */
+                    continue;
+                }
+
+                /* The second stage didn't work out, but the info is ready.
+                 * Propel it right to the point of match copying. */
+                goto _copy_match;
             }
-            memcpy(op, ip, length);
-            ip += length;
-            op += length;
-            if (!partialDecoding || (cpy == oend)) {
-                /* Necessarily EOF, due to parsing restrictions */
-                break;
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+              variable_length_error error = ok;
+              length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
+              if (error == initial_error) goto _output_error;
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
             }
 
-        } else {
-            LZ4_wildCopy(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
-            ip += length; op = cpy;
-        }
+            /* copy literals */
+            cpy = op+length;
+#if LZ4_FAST_DEC_LOOP
+        safe_literal_copy:
+#endif
+            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+            {
+                if (partialDecoding) {
+                    if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); }  /* Partial decoding : stop in the middle of literal segment */
+                    if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+                } else {
+                    if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+                }
+                memcpy(op, ip, length);
+                ip += length;
+                op += length;
+                if (!partialDecoding || (cpy == oend)) {
+                    /* Necessarily EOF, due to parsing restrictions */
+                    break;
+                }
 
-        /* get offset */
-        offset = LZ4_readLE16(ip); ip+=2;
-        match = op - offset;
+            } else {
+                LZ4_wildCopy(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+                ip += length; op = cpy;
+            }
 
-        /* get matchlength */
-        length = token & ML_MASK;
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
 
-_copy_match:
-        if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
-        if (!partialDecoding) {
-            assert(oend > op);
-            assert(oend - op >= 4);
-            LZ4_write32(op, 0);   /* silence an msan warning when offset==0; costs <1%; */
-        }   /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
+            /* get matchlength */
+            length = token & ML_MASK;
+
+    _copy_match:
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+            if (!partialDecoding) {
+                assert(oend > op);
+                assert(oend - op >= 4);
+                LZ4_write32(op, 0);   /* silence an msan warning when offset==0; costs <1%; */
+            }   /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
+
+            if (length == ML_MASK) {
+              variable_length_error error = ok;
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
+              if (error != ok) goto _output_error;
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
+            }
+            length += MINMATCH;
 
-        if (length == ML_MASK) {
-            unsigned s;
-            do {
-                s = *ip++;
-                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
-                length += s;
-            } while (s==255);
-            if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
-        }
-        length += MINMATCH;
+#if LZ4_FAST_DEC_LOOP
+        safe_match_copy:
+#endif
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                    else goto _output_error;   /* doesn't respect parsing restriction */
+                }
 
-        /* match starting within external dictionary */
-        if ((dict==usingExtDict) && (match < lowPrefix)) {
-            if (unlikely(op+length > oend-LASTLITERALS)) {
-                if (partialDecoding) length = MIN(length, (size_t)(oend-op));
-                else goto _output_error;   /* doesn't respect parsing restriction */
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) *op++ = *copyFrom++;
+                    } else {
+                        memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
             }
 
-            if (length <= (size_t)(lowPrefix-match)) {
-                /* match fits entirely within external dictionary : just copy */
-                memmove(op, dictEnd - (lowPrefix-match), length);
-                op += length;
-            } else {
-                /* match stretches into both external dictionary and current block */
-                size_t const copySize = (size_t)(lowPrefix - match);
-                size_t const restSize = length - copySize;
-                memcpy(op, dictEnd - copySize, copySize);
-                op += copySize;
-                if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
-                    BYTE* const endOfMatch = op + restSize;
-                    const BYTE* copyFrom = lowPrefix;
-                    while (op < endOfMatch) *op++ = *copyFrom++;
+            /* copy match within block */
+            cpy = op + length;
+
+            /* partialDecoding : may end anywhere within the block */
+            assert(op<=oend);
+            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                size_t const mlen = MIN(length, (size_t)(oend-op));
+                const BYTE* const matchEnd = match + mlen;
+                BYTE* const copyEnd = op + mlen;
+                if (matchEnd > op) {   /* overlap copy */
+                    while (op < copyEnd) *op++ = *match++;
                 } else {
-                    memcpy(op, lowPrefix, restSize);
-                    op += restSize;
-            }   }
-            continue;
-        }
+                    memcpy(op, match, mlen);
+                }
+                op = copyEnd;
+                if (op==oend) break;
+                continue;
+            }
 
-        /* copy match within block */
-        cpy = op + length;
-
-        /* partialDecoding : may not respect endBlock parsing restrictions */
-        assert(op<=oend);
-        if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
-            size_t const mlen = MIN(length, (size_t)(oend-op));
-            const BYTE* const matchEnd = match + mlen;
-            BYTE* const copyEnd = op + mlen;
-            if (matchEnd > op) {   /* overlap copy */
-                while (op < copyEnd) *op++ = *match++;
+            if (unlikely(offset<8)) {
+                op[0] = match[0];
+                op[1] = match[1];
+                op[2] = match[2];
+                op[3] = match[3];
+                match += inc32table[offset];
+                memcpy(op+4, match, 4);
+                match -= dec64table[offset];
             } else {
-                memcpy(op, match, mlen);
+                memcpy(op, match, 8);
+                match += 8;
             }
-            op = copyEnd;
-            if (op==oend) break;
-            continue;
-        }
-
-        if (unlikely(offset<8)) {
-            op[0] = match[0];
-            op[1] = match[1];
-            op[2] = match[2];
-            op[3] = match[3];
-            match += inc32table[offset];
-            memcpy(op+4, match, 4);
-            match -= dec64table[offset];
-        } else {
-            memcpy(op, match, 8);
-            match += 8;
-        }
-        op += 8;
-
-        if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
-            BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
-            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
-            if (op < oCopyLimit) {
-                LZ4_wildCopy(op, match, oCopyLimit);
-                match += oCopyLimit - op;
-                op = oCopyLimit;
+            op += 8;
+
+            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+                if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+                if (op < oCopyLimit) {
+                    LZ4_wildCopy(op, match, oCopyLimit);
+                    match += oCopyLimit - op;
+                    op = oCopyLimit;
+                }
+                while (op < cpy) *op++ = *match++;
+            } else {
+                memcpy(op, match, 8);
+                if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
             }
-            while (op < cpy) *op++ = *match++;
-        } else {
-            memcpy(op, match, 8);
-            if (length > 16) LZ4_wildCopy(op+8, match+8, cpy);
+            op = cpy;   /* wildcopy correction */
         }
-        op = cpy;   /* wildcopy correction */
-    }
 
-    /* end of decoding */
-    if (endOnInput)
-       return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
-    else
-       return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
+        /* end of decoding */
+        if (endOnInput)
+           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
+        else
+           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
 
-    /* Overflow error detected */
-_output_error:
-    return (int) (-(((const char*)ip)-src))-1;
+        /* Overflow error detected */
+    _output_error:
+        return (int) (-(((const char*)ip)-src))-1;
+    }
 }
 
 
@@ -1745,12 +2058,13 @@ int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalS
 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
 {
     LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal));    /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
     return lz4s;
 }
 
 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
 {
-    if (!LZ4_stream) return 0;   /* support free on NULL */
+    if (LZ4_stream == NULL) return 0;   /* support free on NULL */
     FREEMEM(LZ4_stream);
     return 0;
 }
@@ -1808,7 +2122,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
         assert(lz4sd->extDictSize == 0);
         result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
         if (result <= 0) return result;
-        lz4sd->prefixSize = result;
+        lz4sd->prefixSize = (size_t)result;
         lz4sd->prefixEnd = (BYTE*)dest + result;
     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
         /* They're rolling the current segment. */
@@ -1821,7 +2135,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
             result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
-        lz4sd->prefixSize += result;
+        lz4sd->prefixSize += (size_t)result;
         lz4sd->prefixEnd  += result;
     } else {
         /* The buffer wraps around, or they're switching to another buffer. */
@@ -1830,7 +2144,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
         result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
                                                   lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
-        lz4sd->prefixSize = result;
+        lz4sd->prefixSize = (size_t)result;
         lz4sd->prefixEnd  = (BYTE*)dest + result;
     }
 
@@ -1842,12 +2156,13 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
 {
     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
     int result;
+    assert(originalSize >= 0);
 
     if (lz4sd->prefixSize == 0) {
         assert(lz4sd->extDictSize == 0);
         result = LZ4_decompress_fast(source, dest, originalSize);
         if (result <= 0) return result;
-        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixSize = (size_t)originalSize;
         lz4sd->prefixEnd = (BYTE*)dest + originalSize;
     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
         if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
@@ -1856,7 +2171,7 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
             result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
-        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixSize += (size_t)originalSize;
         lz4sd->prefixEnd  += originalSize;
     } else {
         lz4sd->extDictSize = lz4sd->prefixSize;
@@ -1864,7 +2179,7 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
         result = LZ4_decompress_fast_extDict(source, dest, originalSize,
                                              lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
-        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixSize = (size_t)originalSize;
         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
     }
 
diff --git a/lib/lz4.h b/lib/lz4.h
index 059ef7c..1589be9 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h
@@ -51,19 +51,23 @@ extern "C" {
   multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
 
   The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
   Compression can be done in:
     - a single step (described as Simple Functions)
     - a single step, reusing a context (described in Advanced Functions)
     - unbounded multiple steps (described as Streaming compression)
 
-  lz4.h provides block compression functions. It gives full buffer control to user.
-  Decompressing an lz4-compressed block also requires metadata (such as compressed size).
-  Each application is free to encode such metadata in whichever way it wants.
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing a block requires additional metadata, such as its compressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.
 
-  An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
-  take care of encoding standard metadata alongside LZ4-compressed blocks.
-  Frame format is required for interoperability.
-  It is delivered through a companion API, declared in lz4frame.h.
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  This are required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  Note that the `lz4` CLI can only manage frames.
 */
 
 /*^***************************************************************
@@ -92,8 +96,8 @@ extern "C" {
 
 /*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    8    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
 
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 
@@ -103,7 +107,7 @@ extern "C" {
 #define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
 
 LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
-LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; unseful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version */
 
 
 /*-************************************
@@ -112,14 +116,15 @@ LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string;
 /*!
  * LZ4_MEMORY_USAGE :
  * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
- * Increasing memory usage improves compression ratio
- * Reduced memory usage may improve speed, thanks to cache effect
+ * Increasing memory usage improves compression ratio.
+ * Reduced memory usage may improve speed, thanks to better cache locality.
  * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  */
 #ifndef LZ4_MEMORY_USAGE
 # define LZ4_MEMORY_USAGE 14
 #endif
 
+
 /*-************************************
 *  Simple Functions
 **************************************/
@@ -130,21 +135,22 @@ LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string;
     It also runs faster, so it's a recommended setting.
     If the function cannot compress 'src' into a more limited 'dst' budget,
     compression stops *immediately*, and the function result is zero.
-    Note : as a consequence, 'dst' content is not valid.
-    Note 2 : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+    In which case, 'dst' content is undefined (invalid).
         srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
         dstCapacity : size of buffer 'dst' (which must be already allocated)
-        return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
-                  or 0 if compression fails */
+       @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+                  or 0 if compression fails
+    Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+*/
 LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
 
 /*! LZ4_decompress_safe() :
     compressedSize : is the exact complete size of the compressed block.
     dstCapacity : is the size of destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+   @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
              If destination buffer is not large enough, decoding will stop and output an error code (negative value).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against malicious data packets.
+    Note : This function is protected against malicious data packets (never writes outside 'dst' buffer, nor read outside 'source' buffer).
 */
 LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
 
@@ -155,8 +161,7 @@ LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSi
 #define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
 #define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
 
-/*!
-LZ4_compressBound() :
+/*! LZ4_compressBound() :
     Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
     This function is primarily useful for memory allocation purposes (destination buffer size).
     Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
@@ -167,8 +172,7 @@ LZ4_compressBound() :
 */
 LZ4LIB_API int LZ4_compressBound(int inputSize);
 
-/*!
-LZ4_compress_fast() :
+/*! LZ4_compress_fast() :
     Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
     The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
     It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
@@ -178,13 +182,12 @@ LZ4_compress_fast() :
 LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
 
-/*!
-LZ4_compress_fast_extState() :
-    Same compression function, just using an externally allocated memory space to store compression state.
-    Use LZ4_sizeofState() to know how much memory must be allocated,
-    and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide this buffer as 'void* state' to compression function.
-*/
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ *  Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
+ */
 LZ4LIB_API int LZ4_sizeofState(void);
 LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
@@ -204,27 +207,6 @@ LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* d
 LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
 
 
-/*! LZ4_decompress_fast() : **unsafe!**
- *  This function used to be a bit faster than LZ4_decompress_safe(),
- *  though situation has changed in recent versions,
- *  and now `LZ4_decompress_safe()` can be as fast and sometimes faster than `LZ4_decompress_fast()`.
- *  Moreover, LZ4_decompress_fast() is not protected vs malformed input, as it doesn't perform full validation of compressed data.
- *  As a consequence, this function is no longer recommended, and may be deprecated in future versions.
- *  It's only remaining specificity is that it can decompress data without knowing its compressed size.
- *
- *  originalSize : is the uncompressed size to regenerate.
- *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
- * @return : number of bytes read from source buffer (== compressed size).
- *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
- *  note : This function requires uncompressed originalSize to be known in advance.
- *         The function never writes past the output buffer.
- *         However, since it doesn't know its 'src' size, it may read past the intended input.
- *         Also, because match offsets are not validated during decoding,
- *         reads from 'src' may underflow.
- *         Use this function in trusted environment **only**.
- */
-LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
-
 /*! LZ4_decompress_safe_partial() :
  *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
  *  into destination buffer 'dst' of size 'dstCapacity'.
@@ -257,30 +239,49 @@ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcS
 ***********************************************/
 typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
 
-/*! LZ4_createStream() and LZ4_freeStream() :
- *  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
- *  LZ4_freeStream() releases its memory.
- */
 LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
 LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
 
-/*! LZ4_resetStream() :
- *  An LZ4_stream_t structure can be allocated once and re-used multiple times.
- *  Use this function to start compressing a new stream.
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
  */
-LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
 
 /*! LZ4_loadDict() :
- *  Use this function to load a static dictionary into LZ4_stream_t.
- *  Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for successful decoding.
+ *  Dictionary are useful for better compression of small data (KB range).
+ *  While LZ4 accept any input as dictionary,
+ *  results are generally better when using Zstandard's Dictionary Builder.
  *  Loading a size of 0 is allowed, and is the same as reset.
- * @return : dictionary size, in bytes (necessarily <= 64 KB)
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
  */
 LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
 
 /*! LZ4_compress_fast_continue() :
  *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
- *  'dst' buffer must be already allocated.
+ * 'dst' buffer must be already allocated.
  *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
  *
  * @return : size of compressed block
@@ -288,10 +289,10 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in
  *
  *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
  *           Each block has precise boundaries.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
  *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
- *           Each block must be decompressed separately, calling LZ4_decompress_*() with associated metadata.
  *
- *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory!
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
  *
  *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
  *           Make sure that buffers are separated, by at least one byte.
@@ -299,7 +300,7 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in
  *
  *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
  *
- *  Note 5 : After an error, the stream status is invalid, it can only be reset or freed.
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
  */
 LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
@@ -335,7 +336,7 @@ LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_str
  */
 LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 
-/*! LZ4_decoderRingBufferSize() : v1.8.2
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
  *  Note : in a ring buffer scenario (optional),
  *  blocks are presumed decompressed next to each other
  *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
@@ -347,7 +348,7 @@ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const
  *           or 0 if there is an error (invalid maxBlockSize).
  */
 LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
-#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs))  /* for static allocation; mbs presumed valid */
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
 
 /*! LZ4_decompress_*_continue() :
  *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
@@ -375,83 +376,67 @@ LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
  *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
 */
 LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
-LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
 
 
 /*! LZ4_decompress_*_usingDict() :
  *  These decoding functions work the same as
  *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
  *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
- *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
  */
 LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
-LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
 
 
-/*^**********************************************
+/*^*************************************
  * !!!!!!   STATIC LINKING ONLY   !!!!!!
- ***********************************************/
+ ***************************************/
 
-/*-************************************
- *  Unstable declarations
- **************************************
- * Declarations in this section should be considered unstable.
- * Use at your own peril, etc., etc.
- * They may be removed in the future.
- * Their signatures may change.
- **************************************/
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
 
 #ifdef LZ4_STATIC_LINKING_ONLY
 
-/*! LZ4_resetStream_fast() :
- *  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
- *  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
- *
- *  Note:
- *  Using this in advance of a non- streaming-compression function is redundant,
- *  and potentially bad for performance, since they all perform their own custom
- *  reset internally.
- *
- *  Differences from LZ4_resetStream():
- *  When an LZ4_stream_t is known to be in a internally coherent state,
- *  it can often be prepared for a new compression with almost no work, only
- *  sometimes falling back to the full, expensive reset that is always required
- *  when the stream is in an indeterminate state (i.e., the reset performed by
- *  LZ4_resetStream()).
- *
- *  LZ4_streams are guaranteed to be in a valid state when:
- *  - returned from LZ4_createStream()
- *  - reset by LZ4_resetStream()
- *  - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
- *  - the stream was in a valid state and was reset by LZ4_resetStream_fast()
- *  - the stream was in a valid state and was then used in any compression call
- *    that returned success
- *  - the stream was in an indeterminate state and was used in a compression
- *    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
- *    that returned success
- *
- *  When a stream isn't known to be in a valid state, it is not safe to pass to
- *  any fastReset or streaming function. It must first be cleansed by the full
- *  LZ4_resetStream().
- */
-LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
 
 /*! LZ4_compress_fast_extState_fastReset() :
  *  A variant of LZ4_compress_fast_extState().
  *
- *  Using this variant avoids an expensive initialization step. It is only safe
- *  to call if the state buffer is known to be correctly initialized already
- *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly
- *  initialized"). From a high level, the difference is that this function
- *  initializes the provided state with a call to something like
- *  LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
- *  call to LZ4_resetStream().
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ *  From a high level, the difference is that
+ *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
  */
-LZ4LIB_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
 /*! LZ4_attach_dictionary() :
- *  This is an experimental API that allows for the efficient use of a
- *  static dictionary many times.
+ *  This is an experimental API that allows
+ *  efficient use of a static dictionary many times.
  *
  *  Rather than re-loading the dictionary buffer into a working context before
  *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
@@ -462,8 +447,8 @@ LZ4LIB_API int LZ4_compress_fast_extState_fastReset (void* state, const char* sr
  *  Currently, only streams which have been prepared by LZ4_loadDict() should
  *  be expected to work.
  *
- *  Alternatively, the provided dictionary stream pointer may be NULL, in which
- *  case any existing dictionary stream is unset.
+ *  Alternatively, the provided dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
  *
  *  If a dictionary is provided, it replaces any pre-existing stream history.
  *  The dictionary contents are the only history that can be referenced and
@@ -475,17 +460,18 @@ LZ4LIB_API int LZ4_compress_fast_extState_fastReset (void* state, const char* sr
  *  stream (and source buffer) must remain in-place / accessible / unchanged
  *  through the completion of the first compression call on the stream.
  */
-LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream);
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
 
 #endif
 
-/*-************************************
- *  Private definitions
- **************************************
- * Do not use these definitions.
- * They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
- * Using these definitions will expose code to API and/or ABI break in future versions of the library.
- **************************************/
+
+/*-************************************************************
+ *  PRIVATE DEFINITIONS
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose code to API and/or ABI break in future versions of the library.
+ **************************************************************/
 #define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
 #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
 #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
@@ -497,7 +483,7 @@ typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
 struct LZ4_stream_t_internal {
     uint32_t hashTable[LZ4_HASH_SIZE_U32];
     uint32_t currentOffset;
-    uint16_t initCheck;
+    uint16_t dirty;
     uint16_t tableType;
     const uint8_t* dictionary;
     const LZ4_stream_t_internal* dictCtx;
@@ -517,7 +503,7 @@ typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
 struct LZ4_stream_t_internal {
     unsigned int hashTable[LZ4_HASH_SIZE_U32];
     unsigned int currentOffset;
-    unsigned short initCheck;
+    unsigned short dirty;
     unsigned short tableType;
     const unsigned char* dictionary;
     const LZ4_stream_t_internal* dictCtx;
@@ -526,38 +512,54 @@ struct LZ4_stream_t_internal {
 
 typedef struct {
     const unsigned char* externalDict;
-    size_t extDictSize;
     const unsigned char* prefixEnd;
+    size_t extDictSize;
     size_t prefixSize;
 } LZ4_streamDecode_t_internal;
 
 #endif
 
-/*!
- * LZ4_stream_t :
- * information structure to track an LZ4 stream.
- * init this structure before first use.
- * note : only use in association with static linking !
- *        this definition is not API/ABI safe,
- *        it may change in a future version !
+/*! LZ4_stream_t :
+ *  information structure to track an LZ4 stream.
+ *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ *  The structure definition can be convenient for static allocation
+ *  (on stack, or as part of larger structure).
+ *  Init this structure with LZ4_initStream() before first use.
+ *  note : only use this definition in association with static linking !
+ *    this definition is not API/ABI safe, and may change in a future version.
  */
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) /*AS-400*/ )
 #define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
 union LZ4_stream_u {
     unsigned long long table[LZ4_STREAMSIZE_U64];
     LZ4_stream_t_internal internal_donotuse;
 } ;  /* previously typedef'd to LZ4_stream_t */
 
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not respected.
+ *         In which case, the function will @return NULL.
+ *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ *  Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
 
-/*!
- * LZ4_streamDecode_t :
- * information structure to track an LZ4 stream during decompression.
- * init this structure  using LZ4_setStreamDecode (or memset()) before first use
- * note : only use in association with static linking !
- *        this definition is not API/ABI safe,
- *        and may change in a future version !
+
+/*! LZ4_streamDecode_t :
+ *  information structure to track an LZ4 stream during decompression.
+ *  init this structure  using LZ4_setStreamDecode() before first use.
+ *  note : only use in association with static linking !
+ *         this definition is not API/ABI safe,
+ *         and may change in a future version !
  */
-#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
 #define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
 union LZ4_streamDecode_u {
     unsigned long long table[LZ4_STREAMDECODESIZE_U64];
@@ -570,11 +572,16 @@ union LZ4_streamDecode_u {
 **************************************/
 
 /*! Deprecation warnings
-   Should deprecation warnings be a problem,
-   it is generally possible to disable them,
-   typically with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual.
-   Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to disable them,
+ *  typically with -Wno-deprecated-declarations for gcc
+ *  or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
 #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
 #  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
 #else
@@ -594,8 +601,8 @@ union LZ4_streamDecode_u {
 #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
 
 /* Obsolete compression functions */
-LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress               (const char* source, char* dest, int sourceSize);
-LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* source, char* dest, int sourceSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
@@ -616,13 +623,54 @@ LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompres
  */
 LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
 LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
-LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API  int   LZ4_resetStreamState(void* state, char* inputBuffer);
-LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API     char* LZ4_slideInputBuffer (void* state);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
 
 /* Obsolete streaming decoding functions */
 LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
 LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
 
+/*! LZ4_decompress_fast() : **unsafe!**
+ *  These functions are generally slightly faster than LZ4_decompress_safe(),
+ *  though the difference is small (generally ~5%).
+ *  However, the real cost is a risk :  LZ4_decompress_safe() is protected vs malformed input, while `LZ4_decompress_fast()` is not, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *  These functions will generate a deprecation warning in the future.
+ *
+ *  Last LZ4_decompress_fast() specificity is that it can decompress a block without knowing its compressed size.
+ *  Note that even that functionality could be achieved in a more secure manner if need be,
+ *  though it would require new prototypes, and adaptation of the implementation to this new use case.
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ *         These issues never happen if input (compressed) data is correct.
+ *         But they may happen if input data is invalid (error or intentional tampering).
+ *         As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+
+/* LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")  */
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+/* LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") */
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+/* LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") */
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+
 #endif /* LZ4_H_2983827168210 */
 
 
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index 08bf0fa..19efd0b 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -1,41 +1,44 @@
 /*
-LZ4 auto-framing library
-Copyright (C) 2011-2016, Yann Collet.
-
-BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-You can contact the author at :
-- LZ4 homepage : http://www.lz4.org
-- LZ4 source repository : https://github.com/lz4/lz4
-*/
+ * LZ4 auto-framing library
+ * Copyright (C) 2011-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following disclaimer
+ *   in the documentation and/or other materials provided with the
+ *   distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://www.lz4.org
+ * - LZ4 source repository : https://github.com/lz4/lz4
+ */
 
 /* LZ4F is a stand-alone API to create LZ4-compressed Frames
-*  in full conformance with specification v1.5.0
-*  All related operations, including memory management, are handled by the library.
-* */
+ * in full conformance with specification v1.6.1 .
+ * This library rely upon memory management capabilities (malloc, free)
+ * provided either by <stdlib.h>,
+ * or redirected towards another library of user's choice
+ * (see Memory Routines below).
+ */
 
 
 /*-************************************
@@ -62,16 +65,27 @@ You can contact the author at :
 /*-************************************
 *  Memory routines
 **************************************/
+/*
+ * User may redirect invocations of
+ * malloc(), calloc() and free()
+ * towards another library or solution of their choice
+ * by modifying below section.
+ */
 #include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOC(s)   malloc(s)
-#define ALLOC_AND_ZERO(s)   calloc(1,s)
-#define FREEMEM        free
+#define ALLOC(s)       malloc(s)
+#ifndef LZ4_SRC_INCLUDED   /* avoid redefinition when sources are coalesced */
+#  define ALLOC_AND_ZERO(s)  calloc(1,(s))
+#endif
+#define FREEMEM(p)     free(p)
+
 #include <string.h>   /* memset, memcpy, memmove */
-#define MEM_INIT       memset
+#ifndef LZ4_SRC_INCLUDED  /* avoid redefinition when sources are coalesced */
+#  define MEM_INIT       memset
+#endif
 
 
 /*-************************************
-*  Includes
+*  Library declarations
 **************************************/
 #define LZ4F_STATIC_LINKING_ONLY
 #include "lz4frame.h"
@@ -134,8 +148,8 @@ static U32 LZ4F_readLE32 (const void* src)
 {
     const BYTE* const srcPtr = (const BYTE*)src;
     U32 value32 = srcPtr[0];
-    value32 += (srcPtr[1]<<8);
-    value32 += (srcPtr[2]<<16);
+    value32 += ((U32)srcPtr[1])<< 8;
+    value32 += ((U32)srcPtr[2])<<16;
     value32 += ((U32)srcPtr[3])<<24;
     return value32;
 }
@@ -180,9 +194,11 @@ static void LZ4F_writeLE64 (void* dst, U64 value64)
 /*-************************************
 *  Constants
 **************************************/
-#define KB *(1<<10)
-#define MB *(1<<20)
-#define GB *(1<<30)
+#ifndef LZ4_SRC_INCLUDED   /* avoid double definition */
+#  define KB *(1<<10)
+#  define MB *(1<<20)
+#  define GB *(1<<30)
+#endif
 
 #define _1BIT  0x01
 #define _2BITS 0x03
@@ -195,9 +211,10 @@ static void LZ4F_writeLE64 (void* dst, U64 value64)
 #define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
 #define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB
 
-static const size_t minFHSize = 7;
+static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN;   /*  7 */
 static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX;   /* 19 */
-static const size_t BHSize = 4;
+static const size_t BHSize = 4;  /* block header : size, and compress flag */
+static const size_t BFSize = 4;  /* block footer : checksum (optional) */
 
 
 /*-************************************
@@ -258,22 +275,22 @@ unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; }
 
 int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; }
 
-
-/*-************************************
-*  Private functions
-**************************************/
-#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
-
-static size_t LZ4F_getBlockSize(unsigned blockSizeID)
+size_t LZ4F_getBlockSize(unsigned blockSizeID)
 {
     static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB };
 
     if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
-    blockSizeID -= 4;
-    if (blockSizeID > 3) return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+    if (blockSizeID < LZ4F_max64KB || blockSizeID > LZ4F_max4MB)
+        return err0r(LZ4F_ERROR_maxBlockSize_invalid);
+    blockSizeID -= LZ4F_max64KB;
     return blockSizes[blockSizeID];
 }
 
+/*-************************************
+*  Private functions
+**************************************/
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+
 static BYTE LZ4F_headerChecksum (const void* header, size_t length)
 {
     U32 const xxh = XXH32(header, length, 0);
@@ -323,11 +340,10 @@ static size_t LZ4F_compressBound_internal(size_t srcSize,
         size_t const lastBlockSize = flush ? partialBlockSize : 0;
         unsigned const nbBlocks = nbFullBlocks + (lastBlockSize>0);
 
-        size_t const blockHeaderSize = 4;
-        size_t const blockCRCSize = 4 * prefsPtr->frameInfo.blockChecksumFlag;
-        size_t const frameEnd = 4 + (prefsPtr->frameInfo.contentChecksumFlag*4);
+        size_t const blockCRCSize = BFSize * prefsPtr->frameInfo.blockChecksumFlag;
+        size_t const frameEnd = BHSize + (prefsPtr->frameInfo.contentChecksumFlag*BFSize);
 
-        return ((blockHeaderSize + blockCRCSize) * nbBlocks) +
+        return ((BHSize + blockCRCSize) * nbBlocks) +
                (blockSize * nbFullBlocks) + lastBlockSize + frameEnd;
     }
 }
@@ -388,15 +404,18 @@ size_t LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
       if (LZ4F_isError(headerSize)) return headerSize;
       dstPtr += headerSize;   /* header size */ }
 
-    { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, dstEnd-dstPtr, srcBuffer, srcSize, &options);
+    assert(dstEnd >= dstPtr);
+    { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, (size_t)(dstEnd-dstPtr), srcBuffer, srcSize, &options);
       if (LZ4F_isError(cSize)) return cSize;
       dstPtr += cSize; }
 
-    { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
+    assert(dstEnd >= dstPtr);
+    { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, (size_t)(dstEnd-dstPtr), &options);   /* flush last block, and generate suffix */
       if (LZ4F_isError(tailSize)) return tailSize;
       dstPtr += tailSize; }
 
-    return (dstPtr - dstStart);
+    assert(dstEnd >= dstStart);
+    return (size_t)(dstPtr - dstStart);
 }
 
 
@@ -428,7 +447,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity,
     if (preferencesPtr == NULL ||
         preferencesPtr->compressionLevel < LZ4HC_CLEVEL_MIN)
     {
-        LZ4_resetStream(&lz4ctx);
+        LZ4_initStream(&lz4ctx, sizeof(lz4ctx));
         cctxPtr->lz4CtxPtr = &lz4ctx;
         cctxPtr->lz4CtxAlloc = 1;
         cctxPtr->lz4CtxState = 1;
@@ -598,20 +617,22 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
         if (cctxPtr->lz4CtxAlloc < ctxTypeID) {
             FREEMEM(cctxPtr->lz4CtxPtr);
             if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
-                cctxPtr->lz4CtxPtr = (void*)LZ4_createStream();
+                cctxPtr->lz4CtxPtr = LZ4_createStream();
             } else {
-                cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC();
+                cctxPtr->lz4CtxPtr = LZ4_createStreamHC();
             }
-            if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed);
+            if (cctxPtr->lz4CtxPtr == NULL)
+                return err0r(LZ4F_ERROR_allocation_failed);
             cctxPtr->lz4CtxAlloc = ctxTypeID;
             cctxPtr->lz4CtxState = ctxTypeID;
         } else if (cctxPtr->lz4CtxState != ctxTypeID) {
             /* otherwise, a sufficient buffer is allocated, but we need to
              * reset it to the correct context type */
             if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
-                LZ4_resetStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr);
+                LZ4_initStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr, sizeof (LZ4_stream_t));
             } else {
-                LZ4_resetStreamHC((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+                LZ4_initStreamHC((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, sizeof(LZ4_streamHC_t));
+                LZ4_setCompressionLevel((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
             }
             cctxPtr->lz4CtxState = ctxTypeID;
         }
@@ -623,8 +644,8 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
     cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
 
     {   size_t const requiredBuffSize = preferencesPtr->autoFlush ?
-                (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB :  /* only needs windows size */
-                cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 128 KB);
+                ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 64 KB : 0) :  /* only needs past data up to window size */
+                cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 128 KB : 0);
 
         if (cctxPtr->maxBufferSize < requiredBuffSize) {
             cctxPtr->maxBufferSize = 0;
@@ -635,7 +656,7 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
     }   }
     cctxPtr->tmpIn = cctxPtr->tmpBuff;
     cctxPtr->tmpInSize = 0;
-    XXH32_reset(&(cctxPtr->xxh), 0);
+    (void)XXH32_reset(&(cctxPtr->xxh), 0);
 
     /* context init */
     cctxPtr->cdict = cdict;
@@ -644,7 +665,7 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
         LZ4F_initStream(cctxPtr->lz4CtxPtr, cdict, cctxPtr->prefs.compressionLevel, LZ4F_blockLinked);
     }
     if (preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) {
-          LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed);
+        LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed);
     }
 
     /* Magic Number */
@@ -656,7 +677,7 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
     *dstPtr++ = (BYTE)(((1 & _2BITS) << 6)    /* Version('01') */
         + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)
         + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4)
-        + ((cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
+        + ((unsigned)(cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
         + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)
         +  (cctxPtr->prefs.frameInfo.dictID > 0) );
     /* BD Byte */
@@ -673,11 +694,11 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
         dstPtr += 4;
     }
     /* Header CRC Byte */
-    *dstPtr = LZ4F_headerChecksum(headerStart, dstPtr - headerStart);
+    *dstPtr = LZ4F_headerChecksum(headerStart, (size_t)(dstPtr - headerStart));
     dstPtr++;
 
     cctxPtr->cStage = 1;   /* header written, now request input data block */
-    return (dstPtr - dstStart);
+    return (size_t)(dstPtr - dstStart);
 }
 
 
@@ -686,7 +707,7 @@ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr,
  *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
  *  preferencesPtr can be NULL, in which case default parameters are selected.
  * @return : number of bytes written into dstBuffer for the header
- *           or an error code (can be tested using LZ4F_isError())
+ *        or an error code (can be tested using LZ4F_isError())
  */
 size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr,
                           void* dstBuffer, size_t dstCapacity,
@@ -712,27 +733,31 @@ typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize
 
 
 /*! LZ4F_makeBlock():
- *  compress a single block, add header and checksum
- *  assumption : dst buffer capacity is >= srcSize */
-static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize,
+ *  compress a single block, add header and optional checksum.
+ *  assumption : dst buffer capacity is >= BHSize + srcSize + crcSize
+ */
+static size_t LZ4F_makeBlock(void* dst,
+                       const void* src, size_t srcSize,
                              compressFunc_t compress, void* lz4ctx, int level,
-                             const LZ4F_CDict* cdict, LZ4F_blockChecksum_t crcFlag)
+                       const LZ4F_CDict* cdict,
+                             LZ4F_blockChecksum_t crcFlag)
 {
     BYTE* const cSizePtr = (BYTE*)dst;
-    U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+4),
+    U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+BHSize),
                                       (int)(srcSize), (int)(srcSize-1),
                                       level, cdict);
-    LZ4F_writeLE32(cSizePtr, cSize);
     if (cSize == 0) {  /* compression failed */
         cSize = (U32)srcSize;
         LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG);
-        memcpy(cSizePtr+4, src, srcSize);
+        memcpy(cSizePtr+BHSize, src, srcSize);
+    } else {
+        LZ4F_writeLE32(cSizePtr, cSize);
     }
     if (crcFlag) {
-        U32 const crc32 = XXH32(cSizePtr+4, cSize, 0);  /* checksum of compressed data */
-        LZ4F_writeLE32(cSizePtr+4+cSize, crc32);
+        U32 const crc32 = XXH32(cSizePtr+BHSize, cSize, 0);  /* checksum of compressed data */
+        LZ4F_writeLE32(cSizePtr+BHSize+cSize, crc32);
     }
-    return 4 + cSize + ((U32)crcFlag)*4;
+    return BHSize + cSize + ((U32)crcFlag)*BFSize;
 }
 
 
@@ -832,9 +857,11 @@ size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
             memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
             srcPtr += sizeToCopy;
 
-            dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, blockSize,
+            dstPtr += LZ4F_makeBlock(dstPtr,
+                                     cctxPtr->tmpIn, blockSize,
                                      compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
-                                     cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
+                                     cctxPtr->cdict,
+                                     cctxPtr->prefs.frameInfo.blockChecksumFlag);
 
             if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize;
             cctxPtr->tmpInSize = 0;
@@ -844,18 +871,22 @@ size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
     while ((size_t)(srcEnd - srcPtr) >= blockSize) {
         /* compress full blocks */
         lastBlockCompressed = fromSrcBuffer;
-        dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, blockSize,
+        dstPtr += LZ4F_makeBlock(dstPtr,
+                                 srcPtr, blockSize,
                                  compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
-                                 cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
+                                 cctxPtr->cdict,
+                                 cctxPtr->prefs.frameInfo.blockChecksumFlag);
         srcPtr += blockSize;
     }
 
     if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) {
         /* compress remaining input < blockSize */
         lastBlockCompressed = fromSrcBuffer;
-        dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, srcEnd - srcPtr,
+        dstPtr += LZ4F_makeBlock(dstPtr,
+                                 srcPtr, (size_t)(srcEnd - srcPtr),
                                  compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
-                                 cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
+                                 cctxPtr->cdict,
+                                 cctxPtr->prefs.frameInfo.blockChecksumFlag);
         srcPtr  = srcEnd;
     }
 
@@ -881,28 +912,30 @@ size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr,
     /* some input data left, necessarily < blockSize */
     if (srcPtr < srcEnd) {
         /* fill tmp buffer */
-        size_t const sizeToCopy = srcEnd - srcPtr;
+        size_t const sizeToCopy = (size_t)(srcEnd - srcPtr);
         memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
         cctxPtr->tmpInSize = sizeToCopy;
     }
 
     if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
-        XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize);
+        (void)XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize);
 
     cctxPtr->totalInSize += srcSize;
-    return dstPtr - dstStart;
+    return (size_t)(dstPtr - dstStart);
 }
 
 
 /*! LZ4F_flush() :
- *  Should you need to create compressed data immediately, without waiting for a block to be filled,
- *  you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext.
- *  The result of the function is the number of bytes written into dstBuffer
- *  (it can be zero, this means there was no data left within compressionContext)
+ *  When compressed data must be sent immediately, without waiting for a block to be filled,
+ *  invoke LZ4_flush(), which will immediately compress any remaining data stored within LZ4F_cctx.
+ *  The result of the function is the number of bytes written into dstBuffer.
+ *  It can be zero, this means there was no data left within LZ4F_cctx.
  *  The function outputs an error code if it fails (can be tested using LZ4F_isError())
- *  The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ *  LZ4F_compressOptions_t* is optional. NULL is a valid argument.
  */
-size_t LZ4F_flush(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* compressOptionsPtr)
+size_t LZ4F_flush(LZ4F_cctx* cctxPtr,
+                  void* dstBuffer, size_t dstCapacity,
+            const LZ4F_compressOptions_t* compressOptionsPtr)
 {
     BYTE* const dstStart = (BYTE*)dstBuffer;
     BYTE* dstPtr = dstStart;
@@ -910,52 +943,65 @@ size_t LZ4F_flush(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const
 
     if (cctxPtr->tmpInSize == 0) return 0;   /* nothing to flush */
     if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
-    if (dstCapacity < (cctxPtr->tmpInSize + 4)) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);   /* +4 : block header(4)  */
+    if (dstCapacity < (cctxPtr->tmpInSize + BHSize + BFSize))
+        return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
     (void)compressOptionsPtr;   /* not yet useful */
 
     /* select compression function */
     compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
 
     /* compress tmp buffer */
-    dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize,
+    dstPtr += LZ4F_makeBlock(dstPtr,
+                             cctxPtr->tmpIn, cctxPtr->tmpInSize,
                              compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel,
-                             cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag);
-    if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize;
+                             cctxPtr->cdict,
+                             cctxPtr->prefs.frameInfo.blockChecksumFlag);
+    assert(((void)"flush overflows dstBuffer!", (size_t)(dstPtr - dstStart) <= dstCapacity));
+
+    if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked)
+        cctxPtr->tmpIn += cctxPtr->tmpInSize;
     cctxPtr->tmpInSize = 0;
 
     /* keep tmpIn within limits */
     if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) {  /* necessarily LZ4F_blockLinked */
-        int realDictSize = LZ4F_localSaveDict(cctxPtr);
+        int const realDictSize = LZ4F_localSaveDict(cctxPtr);
         cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
     }
 
-    return dstPtr - dstStart;
+    return (size_t)(dstPtr - dstStart);
 }
 
 
 /*! LZ4F_compressEnd() :
- * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
- * It will flush whatever data remained within compressionContext (like LZ4_flush())
- * but also properly finalize the frame, with an endMark and a checksum.
- * The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
- * The function outputs an error code if it fails (can be tested using LZ4F_isError())
- * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
- * compressionContext can then be used again, starting with LZ4F_compressBegin(). The preferences will remain the same.
+ *  When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ *  It will flush whatever data remained within compressionContext (like LZ4_flush())
+ *  but also properly finalize the frame, with an endMark and an (optional) checksum.
+ *  LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * @return: the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+ *       or an error code if it fails (can be tested using LZ4F_isError())
+ *  The context can then be used again to compress a new frame, starting with LZ4F_compressBegin().
  */
-size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr,
+                        void* dstBuffer, size_t dstCapacity,
+                  const LZ4F_compressOptions_t* compressOptionsPtr)
 {
     BYTE* const dstStart = (BYTE*)dstBuffer;
     BYTE* dstPtr = dstStart;
 
-    size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstMaxSize, compressOptionsPtr);
+    size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr);
     if (LZ4F_isError(flushSize)) return flushSize;
     dstPtr += flushSize;
 
+    assert(flushSize <= dstCapacity);
+    dstCapacity -= flushSize;
+
+    if (dstCapacity < 4) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
     LZ4F_writeLE32(dstPtr, 0);
-    dstPtr+=4;   /* endMark */
+    dstPtr += 4;   /* endMark */
 
     if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) {
         U32 const xxh = XXH32_digest(&(cctxPtr->xxh));
+        if (dstCapacity < 8) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
         LZ4F_writeLE32(dstPtr, xxh);
         dstPtr+=4;   /* content Checksum */
     }
@@ -968,7 +1014,7 @@ size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstMaxSize,
             return err0r(LZ4F_ERROR_frameSize_wrong);
     }
 
-    return dstPtr - dstStart;
+    return (size_t)(dstPtr - dstStart);
 }
 
 
@@ -1049,31 +1095,6 @@ void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx)
 }
 
 
-/*! LZ4F_headerSize() :
- *   @return : size of frame header
- *             or an error code, which can be tested using LZ4F_isError()
- */
-static size_t LZ4F_headerSize(const void* src, size_t srcSize)
-{
-    /* minimal srcSize to determine header size */
-    if (srcSize < 5) return err0r(LZ4F_ERROR_frameHeader_incomplete);
-
-    /* special case : skippable frames */
-    if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) return 8;
-
-    /* control magic number */
-    if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
-        return err0r(LZ4F_ERROR_frameType_unknown);
-
-    /* Frame Header Size */
-    {   BYTE const FLG = ((const BYTE*)src)[4];
-        U32 const contentSizeFlag = (FLG>>3) & _1BIT;
-        U32 const dictIDFlag = FLG & _1BIT;
-        return minFHSize + (contentSizeFlag*8) + (dictIDFlag*4);
-    }
-}
-
-
 /*! LZ4F_decodeHeader() :
  *  input   : `src` points at the **beginning of the frame**
  *  output  : set internal values of dctx, such as
@@ -1125,7 +1146,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
     }
 
     /* Frame Header Size */
-    frameHeaderSize = minFHSize + (contentSizeFlag*8) + (dictIDFlag*4);
+    frameHeaderSize = minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
 
     if (srcSize < frameHeaderSize) {
         /* not enough input to fully decode frame header */
@@ -1146,6 +1167,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
     }
 
     /* check header */
+    assert(frameHeaderSize > 5);
     {   BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
         if (HC != srcPtr[frameHeaderSize-1])
             return err0r(LZ4F_ERROR_headerChecksum_invalid);
@@ -1169,6 +1191,34 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
 }
 
 
+/*! LZ4F_headerSize() :
+ * @return : size of frame header
+ *           or an error code, which can be tested using LZ4F_isError()
+ */
+size_t LZ4F_headerSize(const void* src, size_t srcSize)
+{
+    if (src == NULL) return err0r(LZ4F_ERROR_srcPtr_wrong);
+
+    /* minimal srcSize to determine header size */
+    if (srcSize < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH)
+        return err0r(LZ4F_ERROR_frameHeader_incomplete);
+
+    /* special case : skippable frames */
+    if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START)
+        return 8;
+
+    /* control magic number */
+    if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
+        return err0r(LZ4F_ERROR_frameType_unknown);
+
+    /* Frame Header Size */
+    {   BYTE const FLG = ((const BYTE*)src)[4];
+        U32 const contentSizeFlag = (FLG>>3) & _1BIT;
+        U32 const dictIDFlag = FLG & _1BIT;
+        return minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0);
+    }
+}
+
 /*! LZ4F_getFrameInfo() :
  *  This function extracts frame parameters (max blockSize, frame checksum, etc.).
  *  Usage is optional. Objective is to provide relevant information for allocation purposes.
@@ -1184,10 +1234,12 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
  *  note 1 : in case of error, dctx is not modified. Decoding operations can resume from where they stopped.
  *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
  */
-LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoPtr,
-                                   const void* srcBuffer, size_t* srcSizePtr)
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx,
+                                   LZ4F_frameInfo_t* frameInfoPtr,
+                             const void* srcBuffer, size_t* srcSizePtr)
 {
-    if (dctx->dStage > dstage_storeFrameHeader) {  /* assumption :  dstage_* header enum at beginning of range */
+    LZ4F_STATIC_ASSERT(dstage_getFrameHeader < dstage_storeFrameHeader);
+    if (dctx->dStage > dstage_storeFrameHeader) {
         /* frameInfo already decoded */
         size_t o=0, i=0;
         *srcSizePtr = 0;
@@ -1200,7 +1252,6 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoP
             *srcSizePtr = 0;
             return err0r(LZ4F_ERROR_frameDecoding_alreadyStarted);
         } else {
-            size_t decodeResult;
             size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr);
             if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; }
             if (*srcSizePtr < hSize) {
@@ -1208,16 +1259,16 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoP
                 return err0r(LZ4F_ERROR_frameHeader_incomplete);
             }
 
-            decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize);
-            if (LZ4F_isError(decodeResult)) {
-                *srcSizePtr = 0;
-            } else {
-                *srcSizePtr = decodeResult;
-                decodeResult = BHSize;   /* block header size */
-            }
-            *frameInfoPtr = dctx->frameInfo;
-            return decodeResult;
-    }   }
+            {   size_t decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize);
+                if (LZ4F_isError(decodeResult)) {
+                    *srcSizePtr = 0;
+                } else {
+                    *srcSizePtr = decodeResult;
+                    decodeResult = BHSize;   /* block header size */
+                }
+                *frameInfoPtr = dctx->frameInfo;
+                return decodeResult;
+    }   }   }
 }
 
 
@@ -1235,9 +1286,10 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx,
         return;
     }
 
-    if (dstPtr - dstBufferStart + dstSize >= 64 KB) {  /* history in dstBuffer becomes large enough to become dictionary */
+    assert(dstPtr >= dstBufferStart);
+    if ((size_t)(dstPtr - dstBufferStart) + dstSize >= 64 KB) {  /* history in dstBuffer becomes large enough to become dictionary */
         dctx->dict = (const BYTE*)dstBufferStart;
-        dctx->dictSize = dstPtr - dstBufferStart + dstSize;
+        dctx->dictSize = (size_t)(dstPtr - dstBufferStart) + dstSize;
         return;
     }
 
@@ -1253,7 +1305,7 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx,
     }
 
     if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
-        size_t const preserveSize = dctx->tmpOut - dctx->tmpOutBuffer;
+        size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
         size_t copySize = 64 KB - dctx->tmpOutSize;
         const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
         if (dctx->tmpOutSize > 64 KB) copySize = 0;
@@ -1338,7 +1390,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
 
         case dstage_getFrameHeader:
             if ((size_t)(srcEnd-srcPtr) >= maxFHSize) {  /* enough to decode - shortcut */
-                size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, srcEnd-srcPtr);  /* will update dStage appropriately */
+                size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, (size_t)(srcEnd-srcPtr));  /* will update dStage appropriately */
                 if (LZ4F_isError(hSize)) return hSize;
                 srcPtr += hSize;
                 break;
@@ -1366,14 +1418,14 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
             break;
 
         case dstage_init:
-            if (dctx->frameInfo.contentChecksumFlag) XXH32_reset(&(dctx->xxh), 0);
+            if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_reset(&(dctx->xxh), 0);
             /* internal buffers allocation */
             {   size_t const bufferNeeded = dctx->maxBlockSize
-                    + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) * 128 KB);
+                    + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) ? 128 KB : 0);
                 if (bufferNeeded > dctx->maxBufferSize) {   /* tmp buffers too small */
                     dctx->maxBufferSize = 0;   /* ensure allocation will be re-attempted on next entry*/
                     FREEMEM(dctx->tmpIn);
-                    dctx->tmpIn = (BYTE*)ALLOC(dctx->maxBlockSize + 4 /* block checksum */);
+                    dctx->tmpIn = (BYTE*)ALLOC(dctx->maxBlockSize + BFSize /* block checksum */);
                     if (dctx->tmpIn == NULL)
                         return err0r(LZ4F_ERROR_allocation_failed);
                     FREEMEM(dctx->tmpOutBuffer);
@@ -1420,7 +1472,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
 
         /* decode block header */
             {   size_t const nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
-                size_t const crcSize = dctx->frameInfo.blockChecksumFlag * 4;
+                size_t const crcSize = dctx->frameInfo.blockChecksumFlag * BFSize;
                 if (nextCBlockSize==0) {  /* frameEnd signal, no more block */
                     dctx->dStage = dstage_getSuffix;
                     break;
@@ -1431,7 +1483,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
                     /* next block is uncompressed */
                     dctx->tmpInTarget = nextCBlockSize;
                     if (dctx->frameInfo.blockChecksumFlag) {
-                        XXH32_reset(&dctx->blockChecksum, 0);
+                        (void)XXH32_reset(&dctx->blockChecksum, 0);
                     }
                     dctx->dStage = dstage_copyDirect;
                     break;
@@ -1440,7 +1492,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
                 dctx->tmpInTarget = nextCBlockSize + crcSize;
                 dctx->dStage = dstage_getCBlock;
                 if (dstPtr==dstEnd) {
-                    nextSrcSizeHint = nextCBlockSize + crcSize + BHSize;
+                    nextSrcSizeHint = BHSize + nextCBlockSize + crcSize;
                     doAnotherStage = 0;
                 }
                 break;
@@ -1451,10 +1503,10 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
                 size_t const sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize);
                 memcpy(dstPtr, srcPtr, sizeToCopy);
                 if (dctx->frameInfo.blockChecksumFlag) {
-                    XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
+                    (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy);
                 }
                 if (dctx->frameInfo.contentChecksumFlag)
-                    XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
+                    (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy);
                 if (dctx->frameInfo.contentSize)
                     dctx->frameRemainingSize -= sizeToCopy;
 
@@ -1474,7 +1526,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
                 }
                 dctx->tmpInTarget -= sizeToCopy;  /* need to copy more */
                 nextSrcSizeHint = dctx->tmpInTarget +
-                                + dctx->frameInfo.contentChecksumFlag * 4  /* block checksum */
+                                +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
                                 + BHSize /* next header size */;
                 doAnotherStage = 0;
                 break;
@@ -1525,8 +1577,10 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
                 dctx->tmpInSize += sizeToCopy;
                 srcPtr += sizeToCopy;
                 if (dctx->tmpInSize < dctx->tmpInTarget) { /* need more input */
-                    nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize;
-                    doAnotherStage=0;
+                    nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize)
+                                    + (dctx->frameInfo.blockChecksumFlag ? BFSize : 0)
+                                    + BHSize /* next header size */;
+                    doAnotherStage = 0;
                     break;
                 }
                 selectedIn = dctx->tmpIn;
@@ -1558,13 +1612,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
                         dict, (int)dictSize);
                 if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC);   /* decompression failed */
                 if (dctx->frameInfo.contentChecksumFlag)
-                    XXH32_update(&(dctx->xxh), dstPtr, decodedSize);
+                    XXH32_update(&(dctx->xxh), dstPtr, (size_t)decodedSize);
                 if (dctx->frameInfo.contentSize)
-                    dctx->frameRemainingSize -= decodedSize;
+                    dctx->frameRemainingSize -= (size_t)decodedSize;
 
                 /* dictionary management */
                 if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
-                    LZ4F_updateDict(dctx, dstPtr, decodedSize, dstStart, 0);
+                    LZ4F_updateDict(dctx, dstPtr, (size_t)decodedSize, dstStart, 0);
 
                 dstPtr += decodedSize;
                 dctx->dStage = dstage_getBlockHeader;
@@ -1601,10 +1655,10 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
                 if (decodedSize < 0)  /* decompression failed */
                     return err0r(LZ4F_ERROR_decompressionFailed);
                 if (dctx->frameInfo.contentChecksumFlag)
-                    XXH32_update(&(dctx->xxh), dctx->tmpOut, decodedSize);
+                    XXH32_update(&(dctx->xxh), dctx->tmpOut, (size_t)decodedSize);
                 if (dctx->frameInfo.contentSize)
-                    dctx->frameRemainingSize -= decodedSize;
-                dctx->tmpOutSize = decodedSize;
+                    dctx->frameRemainingSize -= (size_t)decodedSize;
+                dctx->tmpOutSize = (size_t)decodedSize;
                 dctx->tmpOutStart = 0;
                 dctx->dStage = dstage_flushOut;
             }
@@ -1732,7 +1786,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
       && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) )  /* valid stages : [init ... getSuffix[ */
     {
         if (dctx->dStage == dstage_flushOut) {
-            size_t const preserveSize = dctx->tmpOut - dctx->tmpOutBuffer;
+            size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer);
             size_t copySize = 64 KB - dctx->tmpOutSize;
             const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart;
             if (dctx->tmpOutSize > 64 KB) copySize = 0;
@@ -1756,8 +1810,8 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
         }
     }
 
-    *srcSizePtr = (srcPtr - srcStart);
-    *dstSizePtr = (dstPtr - dstStart);
+    *srcSizePtr = (size_t)(srcPtr - srcStart);
+    *dstSizePtr = (size_t)(dstPtr - dstStart);
     return nextSrcSizeHint;
 }
 
diff --git a/lib/lz4frame.h b/lib/lz4frame.h
index 75f1fd9..ca20dc9 100644
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h
@@ -32,11 +32,14 @@
    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 
-/* LZ4F is a stand-alone API to create LZ4-compressed frames
- * conformant with specification v1.6.1.
- * It also offers streaming capabilities.
+/* LZ4F is a stand-alone API able to create and decode LZ4 frames
+ * conformant with specification v1.6.1 in doc/lz4_Frame_format.md .
+ * Generated frames are compatible with `lz4` CLI.
+ *
+ * LZ4F also offers streaming capabilities.
+ *
  * lz4.h is not required when using lz4frame.h,
- * except to get constant such as LZ4_VERSION_NUMBER.
+ * except to extract common constant such as LZ4_VERSION_NUMBER.
  * */
 
 #ifndef LZ4F_H_09782039843
@@ -195,7 +198,7 @@ typedef struct {
 *  Simple compression function
 ***********************************/
 
-LZ4FLIB_API int LZ4F_compressionLevel_max(void);
+LZ4FLIB_API int LZ4F_compressionLevel_max(void);   /* v1.8.0+ */
 
 /*! LZ4F_compressFrameBound() :
  *  Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
@@ -247,7 +250,9 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
 
 /*----    Compression    ----*/
 
-#define LZ4F_HEADER_SIZE_MAX 19   /* LZ4 Frame header size can vary from 7 to 19 bytes */
+#define LZ4F_HEADER_SIZE_MIN  7   /* LZ4 Frame header size can vary, depending on selected paramaters */
+#define LZ4F_HEADER_SIZE_MAX 19
+
 /*! LZ4F_compressBegin() :
  *  will write the frame header into dstBuffer.
  *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
@@ -260,15 +265,19 @@ LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx,
                                       const LZ4F_preferences_t* prefsPtr);
 
 /*! LZ4F_compressBound() :
- *  Provides minimum dstCapacity required to guarantee compression success
- *  given a srcSize and preferences, covering worst case scenario.
+ *  Provides minimum dstCapacity required to guarantee success of
+ *  LZ4F_compressUpdate(), given a srcSize and preferences, for a worst case scenario.
+ *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() instead.
+ *  Note that the result is only valid for a single invocation of LZ4F_compressUpdate().
+ *  When invoking LZ4F_compressUpdate() multiple times,
+ *  if the output buffer is gradually filled up instead of emptied and re-used from its start,
+ *  one must check if there is enough remaining capacity before each invocation, using LZ4F_compressBound().
+ * @return is always the same for a srcSize and prefsPtr.
  *  prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
- *  Estimation is valid for either LZ4F_compressUpdate(), LZ4F_flush() or LZ4F_compressEnd(),
- *  Estimation includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
- *  It also includes frame footer (ending + checksum), which would have to be generated by LZ4F_compressEnd().
- *  Estimation doesn't include frame header, as it was already generated by LZ4F_compressBegin().
- *  Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
- *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+ *  tech details :
+ * @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ *  It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd().
+ * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin().
  */
 LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
 
@@ -295,6 +304,7 @@ LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
  * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
  * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
  *           or an error code if it fails (which can be tested using LZ4F_isError())
+ *  Note : LZ4F_flush() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
  */
 LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx,
                               void* dstBuffer, size_t dstCapacity,
@@ -307,6 +317,7 @@ LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx,
  * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
  * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
  *           or an error code if it fails (which can be tested using LZ4F_isError())
+ *  Note : LZ4F_compressEnd() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr).
  *  A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
  */
 LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
@@ -345,23 +356,58 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
 *  Streaming decompression functions
 *************************************/
 
+#define LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH 5
+
+/*! LZ4F_headerSize() : v1.9.0+
+ *  Provide the header size of a frame starting at `src`.
+ * `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
+ *  which is enough to decode the header length.
+ * @return : size of frame header
+ *           or an error code, which can be tested using LZ4F_isError()
+ *  note : Frame header size is variable, but is guaranteed to be
+ *         >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes.
+ */
+size_t LZ4F_headerSize(const void* src, size_t srcSize);
+
 /*! LZ4F_getFrameInfo() :
  *  This function extracts frame parameters (max blockSize, dictID, etc.).
- *  Its usage is optional.
- *  Extracted information is typically useful for allocation and dictionary.
- *  This function works in 2 situations :
- *   - At the beginning of a new frame, in which case
- *     it will decode information from `srcBuffer`, starting the decoding process.
- *     Input size must be large enough to successfully decode the entire frame header.
- *     Frame header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes.
- *     It's allowed to provide more input data than this minimum.
- *   - After decoding has been started.
- *     In which case, no input is read, frame parameters are extracted from dctx.
- *   - If decoding has barely started, but not yet extracted information from header,
+ *  Its usage is optional: user can call LZ4F_decompress() directly.
+ *
+ *  Extracted information will fill an existing LZ4F_frameInfo_t structure.
+ *  This can be useful for allocation and dictionary identification purposes.
+ *
+ *  LZ4F_getFrameInfo() can work in the following situations :
+ *
+ *  1) At the beginning of a new frame, before any invocation of LZ4F_decompress().
+ *     It will decode header from `srcBuffer`,
+ *     consuming the header and starting the decoding process.
+ *
+ *     Input size must be large enough to contain the full frame header.
+ *     Frame header size can be known beforehand by LZ4F_headerSize().
+ *     Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes,
+ *     and not more than <= LZ4F_HEADER_SIZE_MAX bytes.
+ *     Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work.
+ *     It's allowed to provide more input data than the header size,
+ *     LZ4F_getFrameInfo() will only consume the header.
+ *
+ *     If input size is not large enough,
+ *     aka if it's smaller than header size,
+ *     function will fail and return an error code.
+ *
+ *  2) After decoding has been started,
+ *     it's possible to invoke LZ4F_getFrameInfo() anytime
+ *     to extract already decoded frame parameters stored within dctx.
+ *
+ *     Note that, if decoding has barely started,
+ *     and not yet read enough information to decode the header,
  *     LZ4F_getFrameInfo() will fail.
- *  The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
- *  Decompression must resume from (srcBuffer + *srcSizePtr).
- * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *
+ *  The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value).
+ *  LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started,
+ *  and when decoding the header has been successful.
+ *  Decompression must then resume from (srcBuffer + *srcSizePtr).
+ *
+ * @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call,
  *           or an error code which can be tested using LZ4F_isError().
  *  note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely.
  *  note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure.
@@ -427,15 +473,15 @@ LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx);   /* always su
 extern "C" {
 #endif
 
-/* These declarations are not stable and may change in the future. They are
- * therefore only safe to depend on when the caller is statically linked
- * against the library. To access their declarations, define
- * LZ4F_STATIC_LINKING_ONLY.
+/* These declarations are not stable and may change in the future.
+ * They are therefore only safe to depend on
+ * when the caller is statically linked against the library.
+ * To access their declarations, define LZ4F_STATIC_LINKING_ONLY.
  *
- * There is a further protection mechanism where these symbols aren't published
- * into shared/dynamic libraries. You can override this behavior and force
- * them to be published by defining LZ4F_PUBLISH_STATIC_FUNCTIONS. Use at
- * your own risk.
+ * By default, these symbols aren't published into shared/dynamic libraries.
+ * You can override this behavior and force them to be published
+ * by defining LZ4F_PUBLISH_STATIC_FUNCTIONS.
+ * Use at your own risk.
  */
 #ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
 #define LZ4FLIB_STATIC_API LZ4FLIB_API
@@ -471,19 +517,38 @@ extern "C" {
 #define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
 
 /* enum list is exposed, to handle specific errors */
-typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM)
+              _LZ4F_dummy_error_enum_for_c89_never_used } LZ4F_errorCodes;
 
 LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
 
-
+LZ4FLIB_STATIC_API size_t LZ4F_getBlockSize(unsigned);
 
 /**********************************
  *  Bulk processing dictionary API
  *********************************/
+
+/* A Dictionary is useful for the compression of small messages (KB range).
+ * It dramatically improves compression efficiency.
+ *
+ * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful.
+ * Best results are generally achieved by using Zstandard's Dictionary Builder
+ * to generate a high-quality dictionary from a set of samples.
+ *
+ * Loading a dictionary has a cost, since it involves construction of tables.
+ * The Bulk processing dictionary API makes it possible to share this cost
+ * over an arbitrary number of compression jobs, even concurrently,
+ * markedly improving compression latency for these cases.
+ *
+ * The same dictionary will have to be used on the decompression side
+ * for decoding to be successful.
+ * To help identify the correct dictionary at decoding stage,
+ * the frame header allows optional embedding of a dictID field.
+ */
 typedef struct LZ4F_CDict_s LZ4F_CDict;
 
 /*! LZ4_createCDict() :
- *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once.
  *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
  *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
  * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index e913ee7..f6ed779 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -61,9 +61,14 @@
 #  pragma clang diagnostic ignored "-Wunused-function"
 #endif
 
+/*===   Enums   ===*/
+typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
+
+
 #define LZ4_COMMONDEFS_ONLY
+#ifndef LZ4_SRC_INCLUDED
 #include "lz4.c"   /* LZ4_count, constants, mem */
-
+#endif
 
 /*===   Constants   ===*/
 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
@@ -76,12 +81,11 @@
 #define HASH_FUNCTION(i)         (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
 #define DELTANEXTMAXD(p)         chainTable[(p) & LZ4HC_MAXD_MASK]    /* flexible, LZ4HC_MAXD dependent */
 #define DELTANEXTU16(table, pos) table[(U16)(pos)]   /* faster */
+/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
+#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
 
 static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
 
-/*===   Enums   ===*/
-typedef enum { noDictCtx, usingDictCtx } dictCtx_directive;
-
 
 /**************************************
 *  HC Compression
@@ -92,9 +96,9 @@ static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
 }
 
-static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
 {
-    uptrval startingOffset = hc4->end - hc4->base;
+    uptrval startingOffset = (uptrval)(hc4->end - hc4->base);
     if (startingOffset > 1 GB) {
         LZ4HC_clearTables(hc4);
         startingOffset = 0;
@@ -121,7 +125,7 @@ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
     while (idx < target) {
         U32 const h = LZ4HC_hashPtr(base+idx);
         size_t delta = idx - hashTable[h];
-        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+        if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
         DELTANEXTU16(chainTable, idx) = (U16)delta;
         hashTable[h] = idx;
         idx++;
@@ -224,14 +228,13 @@ LZ4HC_InsertAndGetWiderMatch (
     const U32 dictLimit = hc4->dictLimit;
     const BYTE* const lowPrefixPtr = base + dictLimit;
     const U32 ipIndex = (U32)(ip - base);
-    const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
+    const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
     const BYTE* const dictBase = hc4->dictBase;
     int const lookBackLength = (int)(ip-iLowLimit);
     int nbAttempts = maxNbAttempts;
-    int matchChainPos = 0;
+    U32 matchChainPos = 0;
     U32 const pattern = LZ4_read32(ip);
     U32 matchIndex;
-    U32 dictMatchIndex;
     repeat_state_e repeat = rep_untested;
     size_t srcPatternLength = 0;
 
@@ -256,7 +259,7 @@ LZ4HC_InsertAndGetWiderMatch (
             if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
                 if (LZ4_read32(matchPtr) == pattern) {
                     int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
-                    matchLength = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
                     matchLength -= back;
                     if (matchLength > longest) {
                         longest = matchLength;
@@ -270,7 +273,7 @@ LZ4HC_InsertAndGetWiderMatch (
                 int back = 0;
                 const BYTE* vLimit = ip + (dictLimit - matchIndex);
                 if (vLimit > iHighLimit) vLimit = iHighLimit;
-                matchLength = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
                 if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
                     matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit);
                 back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
@@ -283,14 +286,14 @@ LZ4HC_InsertAndGetWiderMatch (
 
         if (chainSwap && matchLength==longest) {    /* better match => select a better chain */
             assert(lookBackLength==0);   /* search forward only */
-            if (matchIndex + longest <= ipIndex) {
+            if (matchIndex + (U32)longest <= ipIndex) {
                 U32 distanceToNextMatch = 1;
                 int pos;
                 for (pos = 0; pos <= longest - MINMATCH; pos++) {
-                    U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos);
+                    U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
                     if (candidateDist > distanceToNextMatch) {
                         distanceToNextMatch = candidateDist;
-                        matchChainPos = pos;
+                        matchChainPos = (U32)pos;
                 }   }
                 if (distanceToNextMatch > 1) {
                     if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
@@ -315,7 +318,7 @@ LZ4HC_InsertAndGetWiderMatch (
                     const BYTE* const matchPtr = base + matchCandidateIdx;
                     if (LZ4_read32(matchPtr) == pattern) {  /* good candidate */
                         size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
-                        const BYTE* const lowestMatchPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
+                        const BYTE* const lowestMatchPtr = (lowPrefixPtr + LZ4_DISTANCE_MAX >= ip) ? lowPrefixPtr : ip - LZ4_DISTANCE_MAX;
                         size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
                         size_t const currentSegmentLength = backLength + forwardPatternLength;
 
@@ -328,7 +331,7 @@ LZ4HC_InsertAndGetWiderMatch (
                                 size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
                                 if ((size_t)longest < maxML) {
                                     assert(base + matchIndex < ip);
-                                    if (ip - (base+matchIndex) > MAX_DISTANCE) break;
+                                    if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
                                     assert(maxML < 2 GB);
                                     longest = (int)maxML;
                                     *matchpos = base + matchIndex;   /* virtual pos, relative to ip, to retrieve offset */
@@ -343,16 +346,18 @@ LZ4HC_InsertAndGetWiderMatch (
         }   }   /* PA optimization */
 
         /* follow current chain */
-        matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos);
+        matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos);
 
     }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
 
-    if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) {
-        size_t const dictEndOffset = dictCtx->end - dictCtx->base;
+    if ( dict == usingDictCtxHc
+      && nbAttempts
+      && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
+        size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
+        U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
         assert(dictEndOffset <= 1 GB);
-        dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
         matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
-        while (ipIndex - matchIndex <= MAX_DISTANCE && nbAttempts--) {
+        while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
             const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
 
             if (LZ4_read32(matchPtr) == pattern) {
@@ -360,22 +365,19 @@ LZ4HC_InsertAndGetWiderMatch (
                 int back = 0;
                 const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
                 if (vLimit > iHighLimit) vLimit = iHighLimit;
-                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
                 back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
                 mlt -= back;
                 if (mlt > longest) {
                     longest = mlt;
                     *matchpos = base + matchIndex + back;
                     *startpos = ip + back;
-                }
-            }
+            }   }
 
             {   U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
                 dictMatchIndex -= nextOffset;
                 matchIndex -= nextOffset;
-            }
-        }
-    }
+    }   }   }
 
     return longest;
 }
@@ -395,14 +397,6 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4,   /* Index tabl
     return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
 }
 
-
-
-typedef enum {
-    noLimit = 0,
-    limitedOutput = 1,
-    limitedDestSize = 2,
-} limitedOutput_directive;
-
 /* LZ4HC_encodeSequence() :
  * @return : 0 if ok,
  *           1 if buffer issue detected */
@@ -437,7 +431,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
 
     /* Encode Literal length */
     length = (size_t)(*ip - *anchor);
-    if ((limit) && ((*op + (length >> 8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if ((limit) && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
     if (length >= RUN_MASK) {
         size_t len = length - RUN_MASK;
         *token = (RUN_MASK << ML_BITS);
@@ -452,13 +446,13 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
     *op += length;
 
     /* Encode Offset */
-    assert( (*ip - match) <= MAX_DISTANCE );   /* note : consider providing offset as a value, rather than as a pointer difference */
+    assert( (*ip - match) <= LZ4_DISTANCE_MAX );   /* note : consider providing offset as a value, rather than as a pointer difference */
     LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
 
     /* Encode MatchLength */
     assert(matchLength >= MINMATCH);
-    length = (size_t)(matchLength - MINMATCH);
-    if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    length = (size_t)matchLength - MINMATCH;
+    if ((limit) && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
     if (length >= ML_MASK) {
         *token += ML_MASK;
         length -= ML_MASK;
@@ -511,12 +505,12 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
 
     /* init */
     *srcSizePtr = 0;
-    if (limit == limitedDestSize) oend -= LASTLITERALS;                  /* Hack for support LZ4 format restriction */
+    if (limit == fillOutput) oend -= LASTLITERALS;                  /* Hack for support LZ4 format restriction */
     if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
 
     /* Main Loop */
     while (ip <= mflimit) {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
+        ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
         if (ml<MINMATCH) { ip++; continue; }
 
         /* saved, in case we would skip too much */
@@ -533,7 +527,7 @@ _Search2:
 
         if (ml2 == ml) { /* No better match => encode ML1 */
             optr = op;
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
+            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
             continue;
         }
 
@@ -581,10 +575,10 @@ _Search3:
             if (start2 < ip+ml)  ml = (int)(start2 - ip);
             /* Now, encode 2 sequences */
             optr = op;
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
+            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
             ip = start2;
             optr = op;
-            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) goto _dest_overflow;
+            if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) goto _dest_overflow;
             continue;
         }
 
@@ -603,7 +597,7 @@ _Search3:
                 }
 
                 optr = op;
-                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
+                if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
                 ip  = start3;
                 ref = ref3;
                 ml  = ml3;
@@ -641,7 +635,7 @@ _Search3:
             }
         }
         optr = op;
-        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
+        if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
 
         /* ML2 becomes ML1 */
         ip = start2; ref = ref2; ml = ml2;
@@ -658,7 +652,7 @@ _last_literals:
     {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
         size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
         size_t const totalSize = 1 + litLength + lastRunSize;
-        if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+        if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
         if (limit && (op + totalSize > oend)) {
             if (limit == limitedOutput) return 0;  /* Check output limit */
             /* adapt lastRunSize to fill 'dest' */
@@ -685,7 +679,7 @@ _last_literals:
     return (int) (((char*)op)-dest);
 
 _dest_overflow:
-    if (limit == limitedDestSize) {
+    if (limit == fillOutput) {
         op = optr;  /* restore correct out pointer */
         goto _last_literals;
     }
@@ -735,56 +729,64 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
         { lz4opt,16384,LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
     };
 
-    DEBUGLOG(4, "LZ4HC_compress_generic(%p, %p, %d)", ctx, src, *srcSizePtr);
+    DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d)", ctx, src, *srcSizePtr);
 
-    if (limit == limitedDestSize && dstCapacity < 1) return 0;         /* Impossible to store anything */
-    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;          /* Unsupported input size (too large or negative) */
+    if (limit == fillOutput && dstCapacity < 1) return 0;   /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;    /* Unsupported input size (too large or negative) */
 
     ctx->end += *srcSizePtr;
     if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe something to review */
     cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
     {   cParams_t const cParam = clTable[cLevel];
         HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
-        if (cParam.strat == lz4hc)
-            return LZ4HC_compress_hashChain(ctx,
+        int result;
+
+        if (cParam.strat == lz4hc) {
+            result = LZ4HC_compress_hashChain(ctx,
                                 src, dst, srcSizePtr, dstCapacity,
                                 cParam.nbSearches, limit, dict);
-        assert(cParam.strat == lz4opt);
-        return LZ4HC_compress_optimal(ctx,
-                            src, dst, srcSizePtr, dstCapacity,
-                            cParam.nbSearches, cParam.targetLength, limit,
-                            cLevel == LZ4HC_CLEVEL_MAX,   /* ultra mode */
-                            dict, favor);
+        } else {
+            assert(cParam.strat == lz4opt);
+            result = LZ4HC_compress_optimal(ctx,
+                                src, dst, srcSizePtr, dstCapacity,
+                                (int)cParam.nbSearches, cParam.targetLength, limit,
+                                cLevel == LZ4HC_CLEVEL_MAX,   /* ultra mode */
+                                dict, favor);
+        }
+        if (result <= 0) ctx->dirty = 1;
+        return result;
     }
 }
 
 static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock);
 
-static int LZ4HC_compress_generic_noDictCtx (
-    LZ4HC_CCtx_internal* const ctx,
-    const char* const src,
-    char* const dst,
-    int* const srcSizePtr,
-    int const dstCapacity,
-    int cLevel,
-    limitedOutput_directive limit
-    )
+static int
+LZ4HC_compress_generic_noDictCtx (
+        LZ4HC_CCtx_internal* const ctx,
+        const char* const src,
+        char* const dst,
+        int* const srcSizePtr,
+        int const dstCapacity,
+        int cLevel,
+        limitedOutput_directive limit
+        )
 {
     assert(ctx->dictCtx == NULL);
     return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
 }
 
-static int LZ4HC_compress_generic_dictCtx (
-    LZ4HC_CCtx_internal* const ctx,
-    const char* const src,
-    char* const dst,
-    int* const srcSizePtr,
-    int const dstCapacity,
-    int cLevel,
-    limitedOutput_directive limit
-    )
+static int
+LZ4HC_compress_generic_dictCtx (
+        LZ4HC_CCtx_internal* const ctx,
+        const char* const src,
+        char* const dst,
+        int* const srcSizePtr,
+        int const dstCapacity,
+        int cLevel,
+        limitedOutput_directive limit
+        )
 {
-    const size_t position = ctx->end - ctx->base - ctx->lowLimit;
+    const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit;
     assert(ctx->dictCtx != NULL);
     if (position >= 64 KB) {
         ctx->dictCtx = NULL;
@@ -795,19 +797,20 @@ static int LZ4HC_compress_generic_dictCtx (
         ctx->compressionLevel = (short)cLevel;
         return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
     } else {
-        return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtx);
+        return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc);
     }
 }
 
-static int LZ4HC_compress_generic (
-    LZ4HC_CCtx_internal* const ctx,
-    const char* const src,
-    char* const dst,
-    int* const srcSizePtr,
-    int const dstCapacity,
-    int cLevel,
-    limitedOutput_directive limit
-    )
+static int
+LZ4HC_compress_generic (
+        LZ4HC_CCtx_internal* const ctx,
+        const char* const src,
+        char* const dst,
+        int* const srcSizePtr,
+        int const dstCapacity,
+        int cLevel,
+        limitedOutput_directive limit
+        )
 {
     if (ctx->dictCtx == NULL) {
         return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
@@ -817,24 +820,41 @@ static int LZ4HC_compress_generic (
 }
 
 
-int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
+int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
 
+#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
+                   * it reports an aligment of 8-bytes,
+                   * while actually aligning LZ4_streamHC_t on 4 bytes. */
+static size_t LZ4_streamHC_t_alignment(void)
+{
+    struct { char c; LZ4_streamHC_t t; } t_a;
+    return sizeof(t_a) - sizeof(t_a.t);
+}
+#endif
+
+/* state is presumed correctly initialized,
+ * in which case its size and alignment have already been validate */
 int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
     LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
+                   * it reports an aligment of 8-bytes,
+                   * while actually aligning LZ4_streamHC_t on 4 bytes. */
+    assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0);  /* check alignment */
+#endif
     if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
     LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
-    LZ4HC_init (ctx, (const BYTE*)src);
+    LZ4HC_init_internal (ctx, (const BYTE*)src);
     if (dstCapacity < LZ4_compressBound(srcSize))
         return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
     else
-        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, noLimit);
+        return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited);
 }
 
 int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
-    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
-    LZ4_resetStreamHC ((LZ4_streamHC_t*)state, compressionLevel);
+    LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+    if (ctx==NULL) return 0;   /* init failure */
     return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
 }
 
@@ -853,14 +873,14 @@ int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, in
     return cSize;
 }
 
-/* LZ4_compress_HC_destSize() :
- * only compatible with regular HC parser */
-int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
+/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */
+int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
 {
-    LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
-    LZ4_resetStreamHC((LZ4_streamHC_t*)LZ4HC_Data, cLevel);
-    LZ4HC_init(ctx, (const BYTE*) source);
-    return LZ4HC_compress_generic(ctx, source, dest, sourceSizePtr, targetDestSize, cLevel, limitedDestSize);
+    LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+    if (ctx==NULL) return 0;   /* init failure */
+    LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source);
+    LZ4_setCompressionLevel(ctx, cLevel);
+    return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput);
 }
 
 
@@ -869,14 +889,16 @@ int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, i
 *  Streaming Functions
 **************************************/
 /* allocation */
-LZ4_streamHC_t* LZ4_createStreamHC(void) {
+LZ4_streamHC_t* LZ4_createStreamHC(void)
+{
     LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
     if (LZ4_streamHCPtr==NULL) return NULL;
-    LZ4_resetStreamHC(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+    LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));  /* full initialization, malloc'ed buffer can be full of garbage */
     return LZ4_streamHCPtr;
 }
 
-int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) {
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
+{
     DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
     if (!LZ4_streamHCPtr) return 0;  /* support free on NULL */
     free(LZ4_streamHCPtr);
@@ -884,29 +906,53 @@ int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) {
 }
 
 
-/* initialization */
-void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
 {
-    LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET);   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
-    DEBUGLOG(4, "LZ4_resetStreamHC(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+    LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
+    if (buffer == NULL) return NULL;
+    if (size < sizeof(LZ4_streamHC_t)) return NULL;
+#ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
+                   * it reports an aligment of 8-bytes,
+                   * while actually aligning LZ4_streamHC_t on 4 bytes. */
+    if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL;  /* alignment check */
+#endif
+    /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
+    DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size);
+    /* end-base will trigger a clearTable on starting compression */
     LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1;
     LZ4_streamHCPtr->internal_donotuse.base = NULL;
     LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
     LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
+    LZ4_streamHCPtr->internal_donotuse.dirty = 0;
+    LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+    return LZ4_streamHCPtr;
+}
+
+/* just a stub */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
     LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
 }
 
 void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
 {
     DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
-    LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
-    LZ4_streamHCPtr->internal_donotuse.base = NULL;
-    LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+    if (LZ4_streamHCPtr->internal_donotuse.dirty) {
+        LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+    } else {
+        /* preserve end - base : can trigger clearTable's threshold */
+        LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
+        LZ4_streamHCPtr->internal_donotuse.base = NULL;
+        LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+    }
     LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
 }
 
 void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
 {
+    DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel);
     if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
     if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
     LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
@@ -917,16 +963,24 @@ void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor)
     LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0);
 }
 
-int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
+/* LZ4_loadDictHC() :
+ * LZ4_streamHCPtr is presumed properly initialized */
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
+              const char* dictionary, int dictSize)
 {
     LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
     DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize);
+    assert(LZ4_streamHCPtr != NULL);
     if (dictSize > 64 KB) {
-        dictionary += dictSize - 64 KB;
+        dictionary += (size_t)dictSize - 64 KB;
         dictSize = 64 KB;
     }
-    LZ4_resetStreamHC(LZ4_streamHCPtr, ctxPtr->compressionLevel);
-    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
+    /* need a full initialization, there are bad side-effects when using resetFast() */
+    {   int const cLevel = ctxPtr->compressionLevel;
+        LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+        LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
+    }
+    LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary);
     ctxPtr->end = (const BYTE*)dictionary + dictSize;
     if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
     return dictSize;
@@ -959,9 +1013,11 @@ static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
                                             limitedOutput_directive limit)
 {
     LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
-    DEBUGLOG(4, "LZ4_compressHC_continue_generic(%p, %p, %d)", LZ4_streamHCPtr, src, *srcSizePtr);
+    DEBUGLOG(4, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d)",
+                LZ4_streamHCPtr, src, *srcSizePtr);
+    assert(ctxPtr != NULL);
     /* auto-init if forgotten */
-    if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) src);
+    if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
 
     /* Check overflow */
     if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
@@ -971,7 +1027,8 @@ static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
     }
 
     /* Check if blocks follow each other */
-    if ((const BYTE*)src != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
+    if ((const BYTE*)src != ctxPtr->end)
+        LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
 
     /* Check overlapping input/dictionary space */
     {   const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
@@ -992,12 +1049,12 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src,
     if (dstCapacity < LZ4_compressBound(srcSize))
         return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
     else
-        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, noLimit);
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited);
 }
 
 int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
 {
-    return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
+    return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput);
 }
 
 
@@ -1016,19 +1073,21 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
     {   U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
         streamPtr->end = (const BYTE*)safeBuffer + dictSize;
         streamPtr->base = streamPtr->end - endIndex;
-        streamPtr->dictLimit = endIndex - dictSize;
-        streamPtr->lowLimit = endIndex - dictSize;
+        streamPtr->dictLimit = endIndex - (U32)dictSize;
+        streamPtr->lowLimit = endIndex - (U32)dictSize;
         if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
     }
     return dictSize;
 }
 
 
-/***********************************
+/***************************************************
 *  Deprecated Functions
-***********************************/
+***************************************************/
+
 /* These functions currently generate deprecation warnings */
-/* Deprecated compression functions */
+
+/* Wrappers for deprecated compression functions */
 int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
 int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
 int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
@@ -1044,25 +1103,26 @@ int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src,
 /* Deprecated streaming functions */
 int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
 
+/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
+ * @return : 0 on success, !=0 if error */
 int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
 {
-    LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
-    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
-    LZ4_resetStreamHC((LZ4_streamHC_t*)state, ((LZ4_streamHC_t*)state)->internal_donotuse.compressionLevel);
-    LZ4HC_init(ctx, (const BYTE*)inputBuffer);
+    LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
+    if (hc4 == NULL) return 1;   /* init failed */
+    LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
     return 0;
 }
 
 void* LZ4_createHC (const char* inputBuffer)
 {
-    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
+    LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
     if (hc4 == NULL) return NULL;   /* not enough memory */
-    LZ4_resetStreamHC(hc4, 0 /* compressionLevel */);
-    LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+    LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
     return hc4;
 }
 
-int LZ4_freeHC (void* LZ4HC_Data) {
+int LZ4_freeHC (void* LZ4HC_Data)
+{
     if (!LZ4HC_Data) return 0;  /* support free on NULL */
     FREEMEM(LZ4HC_Data);
     return 0;
@@ -1070,7 +1130,7 @@ int LZ4_freeHC (void* LZ4HC_Data) {
 
 int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
 {
-    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, noLimit);
+    return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
 }
 
 int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
@@ -1089,7 +1149,7 @@ char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
 
 
 /* ================================================
- * LZ4 Optimal parser (levels 10-12)
+ *  LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
  * ===============================================*/
 typedef struct {
     int price;
@@ -1102,8 +1162,9 @@ typedef struct {
 LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
 {
     int price = litlen;
+    assert(litlen >= 0);
     if (litlen >= (int)RUN_MASK)
-        price += 1 + (litlen-RUN_MASK)/255;
+        price += 1 + ((litlen-(int)RUN_MASK) / 255);
     return price;
 }
 
@@ -1112,11 +1173,13 @@ LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
 LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
 {
     int price = 1 + 2 ; /* token + 16-bit offset */
+    assert(litlen >= 0);
+    assert(mlen >= MINMATCH);
 
     price += LZ4HC_literalsPrice(litlen);
 
     if (mlen >= (int)(ML_MASK+MINMATCH))
-        price += 1 + (mlen-(ML_MASK+MINMATCH))/255;
+        price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255);
 
     return price;
 }
@@ -1175,9 +1238,9 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
     BYTE* oend = op + dstCapacity;
 
     /* init */
-    DEBUGLOG(5, "LZ4HC_compress_optimal");
+    DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
     *srcSizePtr = 0;
-    if (limit == limitedDestSize) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
+    if (limit == fillOutput) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
     if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
 
     /* Main Loop */
@@ -1195,7 +1258,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
              int const firstML = firstMatch.len;
              const BYTE* const matchPos = ip - firstMatch.off;
              opSaved = op;
-             if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
+             if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
                  goto _dest_overflow;
              continue;
          }
@@ -1365,9 +1428,9 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
                  if (ml == 1) { ip++; rPos++; continue; }  /* literal; note: can end up with several literals, in which case, skip them */
                  rPos += ml;
                  assert(ml >= MINMATCH);
-                 assert((offset >= 1) && (offset <= MAX_DISTANCE));
+                 assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
                  opSaved = op;
-                 if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
+                 if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
                      goto _dest_overflow;
          }   }
      }  /* while (ip <= mflimit) */
@@ -1377,7 +1440,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
      {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
          size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
          size_t const totalSize = 1 + litLength + lastRunSize;
-         if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+         if (limit == fillOutput) oend += LASTLITERALS;  /* restore correct value */
          if (limit && (op + totalSize > oend)) {
              if (limit == limitedOutput) return 0;  /* Check output limit */
              /* adapt lastRunSize to fill 'dst' */
@@ -1404,7 +1467,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
      return (int) ((char*)op-dst);
 
  _dest_overflow:
-     if (limit == limitedDestSize) {
+     if (limit == fillOutput) {
          op = opSaved;  /* restore correct out pointer */
          goto _last_literals;
      }
diff --git a/lib/lz4hc.h b/lib/lz4hc.h
index 970fa39..cdc6d89 100644
--- a/lib/lz4hc.h
+++ b/lib/lz4hc.h
@@ -54,7 +54,7 @@ extern "C" {
  *  Block Compression
  **************************************/
 /*! LZ4_compress_HC() :
- *  Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
+ *  Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
  * `dst` must be already allocated.
  *  Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
  *  Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
@@ -77,7 +77,21 @@ LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dst
  *  Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
  */
 LZ4LIB_API int LZ4_sizeofStateHC(void);
-LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+
+
+/*! LZ4_compress_HC_destSize() : v1.9.0+
+ *  Will compress as much data as possible from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ *           or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
+ */
+LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
+                                  const char* src, char* dst,
+                                        int* srcSizePtr, int targetDstSize,
+                                        int compressionLevel);
 
 
 /*-************************************
@@ -89,46 +103,92 @@ LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* ds
 /*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
  *  These functions create and release memory for LZ4 HC streaming state.
  *  Newly created states are automatically initialized.
- *  Existing states can be re-used several times, using LZ4_resetStreamHC().
- *  These methods are API and ABI stable, they can be used in combination with a DLL.
+ *  A same state can be used multiple times consecutively,
+ *  starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
  */
 LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
 LZ4LIB_API int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
 
-LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
-LZ4LIB_API int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
-
-LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
-
-LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
-
 /*
-  These functions compress data in successive blocks of any size, using previous blocks as dictionary.
+  These functions compress data in successive blocks of any size,
+  using previous blocks as dictionary, to improve compression ratio.
   One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
   There is an exception for ring buffers, which can be smaller than 64 KB.
-  Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue().
+  Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
+
+  Before starting compression, state must be allocated and properly initialized.
+  LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
+
+  Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
+  or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
+  LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
+  which is automatically the case when state is created using LZ4_createStreamHC().
+
+  After reset, a first "fictional block" can be designated as initial dictionary,
+  using LZ4_loadDictHC() (Optional).
+
+  Invoke LZ4_compress_HC_continue() to compress each successive block.
+  The number of blocks is unlimited.
+  Previous input blocks, including initial dictionary when present,
+  must remain accessible and unmodified during compression.
+
+  It's allowed to update compression level anytime between blocks,
+  using LZ4_setCompressionLevel() (experimental).
+
+  'dst' buffer should be sized to handle worst case scenarios
+  (see LZ4_compressBound(), it ensures compression success).
+  In case of failure, the API does not guarantee recovery,
+  so the state _must_ be reset.
+  To ensure compression success
+  whenever `dst` buffer size cannot be made >= LZ4_compressBound(),
+  consider using LZ4_compress_HC_continue_destSize().
+
+  Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
+  it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
+  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
+
+  After completing a streaming compression,
+  it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
+  just by resetting it, using LZ4_resetStreamHC_fast().
+*/
 
-  Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
-  A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
+LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel);   /* v1.9.0+ */
+LZ4LIB_API int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
+
+LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr,
+                                   const char* src, char* dst,
+                                         int srcSize, int maxDstSize);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
+ *  Similar to LZ4_compress_HC_continue(),
+ *  but will read as much data as possible from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided into 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ *           or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
+ *           Note that this function may not consume the entire input.
+ */
+LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
+                                           const char* src, char* dst,
+                                                 int* srcSizePtr, int targetDstSize);
+
+LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
 
-  Then, use LZ4_compress_HC_continue() to compress each successive block.
-  Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
-  'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound()), to ensure operation success.
-  Because in case of failure, the API does not guarantee context recovery, and context will have to be reset.
-  If `dst` buffer budget cannot be >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize() instead.
 
-  If, for any reason, previous data block can't be preserved unmodified in memory for next compression block,
-  you can save it to a more stable memory space, using LZ4_saveDictHC().
-  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
-*/
 
+/*^**********************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***********************************************/
 
-/*-**************************************************************
+/*-******************************************************************
  * PRIVATE DEFINITIONS :
- * Do not use these definitions.
- * They are exposed to allow static allocation of `LZ4_streamHC_t`.
- * Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
- ****************************************************************/
+ * Do not use these definitions directly.
+ * They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Declare an `LZ4_streamHC_t` directly, rather than any type below.
+ * Even then, only do so in the context of static linking, as definitions may change between versions.
+ ********************************************************************/
+
 #define LZ4HC_DICTIONARY_LOGSIZE 16
 #define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
 #define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
@@ -153,7 +213,9 @@ struct LZ4HC_CCtx_internal
     uint32_t   lowLimit;        /* below that point, no more dict */
     uint32_t   nextToUpdate;    /* index from which to continue dictionary update */
     short      compressionLevel;
-    short      favorDecSpeed;
+    int8_t     favorDecSpeed;   /* favor decompression speed if this flag set,
+                                   otherwise, favor compression ratio */
+    int8_t     dirty;           /* stream has to be fully reset if this flag is set */
     const LZ4HC_CCtx_internal* dictCtx;
 };
 
@@ -171,26 +233,43 @@ struct LZ4HC_CCtx_internal
     unsigned int   lowLimit;         /* below that point, no more dict */
     unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
     short          compressionLevel;
-    short          favorDecSpeed;
+    char           favorDecSpeed;    /* favor decompression speed if this flag set,
+                                        otherwise, favor compression ratio */
+    char           dirty;            /* stream has to be fully reset if this flag is set */
     const LZ4HC_CCtx_internal* dictCtx;
 };
 
 #endif
 
-#define LZ4_STREAMHCSIZE       (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 262200 */
+
+/* Do not use these definitions directly !
+ * Declare or allocate an LZ4_streamHC_t instead.
+ */
+#define LZ4_STREAMHCSIZE       (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56 + ((sizeof(void*)==16) ? 56 : 0) /* AS400*/ ) /* 262200 or 262256*/
 #define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
 union LZ4_streamHC_u {
     size_t table[LZ4_STREAMHCSIZE_SIZET];
     LZ4HC_CCtx_internal internal_donotuse;
-};   /* previously typedef'd to LZ4_streamHC_t */
-/*
-  LZ4_streamHC_t :
-  This structure allows static allocation of LZ4 HC streaming state.
-  State must be initialized using LZ4_resetStreamHC() before first use.
+}; /* previously typedef'd to LZ4_streamHC_t */
 
-  Static allocation shall only be used in combination with static linking.
-  When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable.
-*/
+/* LZ4_streamHC_t :
+ * This structure allows static allocation of LZ4 HC streaming state.
+ * This can be used to allocate statically, on state, or as part of a larger structure.
+ *
+ * Such state **must** be initialized using LZ4_initStreamHC() before first use.
+ *
+ * Note that invoking LZ4_initStreamHC() is not required when
+ * the state was created using LZ4_createStreamHC() (which is recommended).
+ * Using the normal builder, a newly created state is automatically initialized.
+ *
+ * Static allocation shall only be used in combination with static linking.
+ */
+
+/* LZ4_initStreamHC() : v1.9.0+
+ * Required before first use of a statically allocated LZ4_streamHC_t.
+ * Before v1.9.0 : use LZ4_resetStreamHC() instead
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size);
 
 
 /*-************************************
@@ -201,11 +280,11 @@ union LZ4_streamHC_u {
 /* deprecated compression functions */
 LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC               (const char* source, char* dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2              (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
 LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
 LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
 LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
@@ -221,10 +300,21 @@ LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_comp
 LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
 LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
 LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API   int   LZ4_freeHC (void* LZ4HC_Data);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue               (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
 LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
 LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int   LZ4_sizeofStreamStateHC(void);
-LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
+/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
+ * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
+ * which is now the recommended function to start a new stream of blocks,
+ * but cannot be used to initialize a memory segment containing arbitrary garbage data.
+ *
+ * It is recommended to switch to LZ4_initStreamHC().
+ * LZ4_resetStreamHC() will generate deprecation warnings in a future version.
+ */
+LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
 
 
 #if defined (__cplusplus)
@@ -250,44 +340,22 @@ LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API  int   LZ4_resetStr
 extern "C" {
 #endif
 
-/*! LZ4_compress_HC_destSize() : v1.8.0 (experimental)
- *  Will try to compress as much data from `src` as possible
- *  that can fit into `targetDstSize` budget.
- *  Result is provided in 2 parts :
- * @return : the number of bytes written into 'dst'
- *           or 0 if compression fails.
- * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`
- */
-int LZ4_compress_HC_destSize(void* LZ4HC_Data,
-                             const char* src, char* dst,
-                             int* srcSizePtr, int targetDstSize,
-                             int compressionLevel);
-
-/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental)
- *  Similar as LZ4_compress_HC_continue(),
- *  but will read a variable nb of bytes from `src`
- *  to fit into `targetDstSize` budget.
- *  Result is provided in 2 parts :
- * @return : the number of bytes written into 'dst'
- *           or 0 if compression fails.
- * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
- */
-int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
-                            const char* src, char* dst,
-                            int* srcSizePtr, int targetDstSize);
-
-/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
- *  It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*()
+/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
+ *  It's possible to change compression level
+ *  between successive invocations of LZ4_compress_HC_continue*()
+ *  for dynamic adaptation.
  */
-void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+LZ4LIB_STATIC_API void LZ4_setCompressionLevel(
+    LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
 
-/*! LZ4_favorDecompressionSpeed() : v1.8.2 (experimental)
- *  Parser will select decisions favoring decompression over compression ratio.
- *  Only work at highest compression settings (level >= LZ4HC_CLEVEL_OPT_MIN)
+/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
+ *  Opt. Parser will favor decompression speed over compression ratio.
+ *  Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
  */
-void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
+LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
+    LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
 
-/*! LZ4_resetStreamHC_fast() :
+/*! LZ4_resetStreamHC_fast() : v1.9.0+
  *  When an LZ4_streamHC_t is known to be in a internally coherent state,
  *  it can often be prepared for a new compression with almost no work, only
  *  sometimes falling back to the full, expensive reset that is always required
@@ -304,8 +372,14 @@ void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
  *  - the stream was in an indeterminate state and was used in a compression
  *    call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
  *    returned success
+ *
+ *  Note:
+ *  A stream that was last used in a compression call that returned an error
+ *  may be passed to this function. However, it will be fully reset, which will
+ *  clear any existing history and settings from the context.
  */
-void LZ4_resetStreamHC_fast(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
+    LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
 
 /*! LZ4_compress_HC_extStateHC_fastReset() :
  *  A variant of LZ4_compress_HC_extStateHC().
@@ -318,7 +392,11 @@ void LZ4_resetStreamHC_fast(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLeve
  *  LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
  *  call to LZ4_resetStreamHC().
  */
-int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
+LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
+    void* state,
+    const char* src, char* dst,
+    int srcSize, int dstCapacity,
+    int compressionLevel);
 
 /*! LZ4_attach_HC_dictionary() :
  *  This is an experimental API that allows for the efficient use of a
@@ -345,7 +423,9 @@ int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* ds
  *  stream (and source buffer) must remain in-place / accessible / unchanged
  *  through the lifetime of the stream session.
  */
-LZ4LIB_API void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream);
+LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary(
+          LZ4_streamHC_t *working_stream,
+    const LZ4_streamHC_t *dictionary_stream);
 
 #if defined (__cplusplus)
 }
diff --git a/lib/xxhash.c b/lib/xxhash.c
index 3fc97fd..ff28749 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -50,20 +50,26 @@
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+                        || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+                        || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define XXH_FORCE_MEMORY_ACCESS 2
 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+                    || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+                    || defined(__ARM_ARCH_7S__) ))
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
 
 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
- * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
- * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
- * By default, this option is disabled. To enable it, uncomment below define :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
  */
-/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
+#  define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
 
 /*!XXH_FORCE_NATIVE_FORMAT :
  * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
@@ -80,8 +86,9 @@
 /*!XXH_FORCE_ALIGN_CHECK :
  * This is a minor performance trick, only useful with lots of very small keys.
  * It means : check for aligned/unaligned input.
- * The check costs one initial branch per hash; set to 0 when the input data
- * is guaranteed to be aligned.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
  */
 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
 #  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
@@ -104,6 +111,8 @@ static void  XXH_free  (void* p)  { free(p); }
 #include <string.h>
 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
 
+#include <assert.h>   /* assert */
+
 #define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"
 
@@ -113,40 +122,35 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
 ***************************************/
 #ifdef _MSC_VER    /* Visual Studio */
 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
-#endif
-
-#ifndef XXH_FORCE_INLINE
-#  ifdef _MSC_VER    /* Visual Studio */
-#    define XXH_FORCE_INLINE static __forceinline
-#  else
-#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#      ifdef __GNUC__
-#        define XXH_FORCE_INLINE static inline __attribute__((always_inline))
-#      else
-#        define XXH_FORCE_INLINE static inline
-#      endif
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
 #    else
-#      define XXH_FORCE_INLINE static
-#    endif /* __STDC_VERSION__ */
-#  endif  /* _MSC_VER */
-#endif /* XXH_FORCE_INLINE */
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
 
 
 /* *************************************
 *  Basic Types
 ***************************************/
 #ifndef MEM_MODULE
-# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 #   include <stdint.h>
     typedef uint8_t  BYTE;
     typedef uint16_t U16;
     typedef uint32_t U32;
-    typedef  int32_t S32;
 # else
     typedef unsigned char      BYTE;
     typedef unsigned short     U16;
     typedef unsigned int       U32;
-    typedef   signed int       S32;
 # endif
 #endif
 
@@ -213,8 +217,12 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
 
 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
 #ifndef XXH_CPU_LITTLE_ENDIAN
-    static const int g_one = 1;
-#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
+static int XXH_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
 #endif
 
 
@@ -223,7 +231,7 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
 *****************************/
 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
 
-XXH_FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
@@ -231,7 +239,7 @@ XXH_FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, X
         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
 }
 
-XXH_FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
 }
@@ -245,12 +253,12 @@ static U32 XXH_readBE32(const void* ptr)
 /* *************************************
 *  Macros
 ***************************************/
-#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
+#define XXH_STATIC_ASSERT(c)  { enum { XXH_sa = 1/(int)(!!(c)) }; }  /* use after variable declarations */
 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 
 
 /* *******************************************************************
-*  32-bits hash functions
+*  32-bit hash functions
 *********************************************************************/
 static const U32 PRIME32_1 = 2654435761U;
 static const U32 PRIME32_2 = 2246822519U;
@@ -266,14 +274,89 @@ static U32 XXH32_round(U32 seed, U32 input)
     return seed;
 }
 
-XXH_FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+/* mix all bits */
+static U32 XXH32_avalanche(U32 h32)
+{
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+    return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+static U32
+XXH32_finalize(U32 h32, const void* ptr, size_t len,
+                XXH_endianess endian, XXH_alignment align)
+
+{
+    const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1               \
+    h32 += (*p++) * PRIME32_5; \
+    h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+
+#define PROCESS4                         \
+    h32 += XXH_get32bits(p) * PRIME32_3; \
+    p+=4;                                \
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+
+    switch(len&15)  /* or switch(bEnd - p) */
+    {
+      case 12:      PROCESS4;
+                    /* fallthrough */
+      case 8:       PROCESS4;
+                    /* fallthrough */
+      case 4:       PROCESS4;
+                    return XXH32_avalanche(h32);
+
+      case 13:      PROCESS4;
+                    /* fallthrough */
+      case 9:       PROCESS4;
+                    /* fallthrough */
+      case 5:       PROCESS4;
+                    PROCESS1;
+                    return XXH32_avalanche(h32);
+
+      case 14:      PROCESS4;
+                    /* fallthrough */
+      case 10:      PROCESS4;
+                    /* fallthrough */
+      case 6:       PROCESS4;
+                    PROCESS1;
+                    PROCESS1;
+                    return XXH32_avalanche(h32);
+
+      case 15:      PROCESS4;
+                    /* fallthrough */
+      case 11:      PROCESS4;
+                    /* fallthrough */
+      case 7:       PROCESS4;
+                    /* fallthrough */
+      case 3:       PROCESS1;
+                    /* fallthrough */
+      case 2:       PROCESS1;
+                    /* fallthrough */
+      case 1:       PROCESS1;
+                    /* fallthrough */
+      case 0:       return XXH32_avalanche(h32);
+    }
+    assert(0);
+    return h32;   /* reaching this point is deemed impossible */
+}
+
+
+FORCE_INLINE U32
+XXH32_endian_align(const void* input, size_t len, U32 seed,
+                    XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* bEnd = p + len;
     U32 h32;
-#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
 
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
     if (p==NULL) {
         len=0;
         bEnd=p=(const BYTE*)(size_t)16;
@@ -281,7 +364,7 @@ XXH_FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed,
 #endif
 
     if (len>=16) {
-        const BYTE* const limit = bEnd - 16;
+        const BYTE* const limit = bEnd - 15;
         U32 v1 = seed + PRIME32_1 + PRIME32_2;
         U32 v2 = seed + PRIME32_2;
         U32 v3 = seed + 0;
@@ -292,34 +375,17 @@ XXH_FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed,
             v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
             v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
             v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
-        } while (p<=limit);
+        } while (p < limit);
 
-        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
+            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
     } else {
         h32  = seed + PRIME32_5;
     }
 
-    h32 += (U32) len;
-
-    while (p+4<=bEnd) {
-        h32 += XXH_get32bits(p) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h32 += (*p) * PRIME32_5;
-        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
-        p++;
-    }
-
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
+    h32 += (U32)len;
 
-    return h32;
+    return XXH32_finalize(h32, p, len&15, endian, align);
 }
 
 
@@ -371,74 +437,81 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t
 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
 {
     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
+    memset(&state, 0, sizeof(state));
     state.v1 = seed + PRIME32_1 + PRIME32_2;
     state.v2 = seed + PRIME32_2;
     state.v3 = seed + 0;
     state.v4 = seed - PRIME32_1;
-    memcpy(statePtr, &state, sizeof(state));
+    /* do not write into reserved, planned to be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
     return XXH_OK;
 }
 
 
-XXH_FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+FORCE_INLINE XXH_errorcode
+XXH32_update_endian(XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (input==NULL) return XXH_ERROR;
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
 #endif
 
-    state->total_len_32 += (unsigned)len;
-    state->large_len |= (len>=16) | (state->total_len_32>=16);
+    {   const BYTE* p = (const BYTE*)input;
+        const BYTE* const bEnd = p + len;
 
-    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
-        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
-        state->memsize += (unsigned)len;
-        return XXH_OK;
-    }
+        state->total_len_32 += (unsigned)len;
+        state->large_len |= (len>=16) | (state->total_len_32>=16);
 
-    if (state->memsize) {   /* some data left from previous update */
-        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
-        {   const U32* p32 = state->mem32;
-            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
-            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
-            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
-            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
+        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+            XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+            state->memsize += (unsigned)len;
+            return XXH_OK;
         }
-        p += 16-state->memsize;
-        state->memsize = 0;
-    }
-
-    if (p <= bEnd-16) {
-        const BYTE* const limit = bEnd - 16;
-        U32 v1 = state->v1;
-        U32 v2 = state->v2;
-        U32 v3 = state->v3;
-        U32 v4 = state->v4;
 
-        do {
-            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
-            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
-            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
-            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
-        } while (p<=limit);
+        if (state->memsize) {   /* some data left from previous update */
+            XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+            {   const U32* p32 = state->mem32;
+                state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+                state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+                state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+                state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian));
+            }
+            p += 16-state->memsize;
+            state->memsize = 0;
+        }
 
-        state->v1 = v1;
-        state->v2 = v2;
-        state->v3 = v3;
-        state->v4 = v4;
-    }
+        if (p <= bEnd-16) {
+            const BYTE* const limit = bEnd - 16;
+            U32 v1 = state->v1;
+            U32 v2 = state->v2;
+            U32 v3 = state->v3;
+            U32 v4 = state->v4;
+
+            do {
+                v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+                v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+                v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+                v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+            } while (p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
 
-    if (p < bEnd) {
-        XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
-        state->memsize = (unsigned)(bEnd-p);
+        if (p < bEnd) {
+            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
     }
 
     return XXH_OK;
 }
 
+
 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
@@ -450,40 +523,23 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void*
 }
 
 
-
-XXH_FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+FORCE_INLINE U32
+XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
-    const BYTE * p = (const BYTE*)state->mem32;
-    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
     U32 h32;
 
     if (state->large_len) {
-        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+        h32 = XXH_rotl32(state->v1, 1)
+            + XXH_rotl32(state->v2, 7)
+            + XXH_rotl32(state->v3, 12)
+            + XXH_rotl32(state->v4, 18);
     } else {
         h32 = state->v3 /* == seed */ + PRIME32_5;
     }
 
     h32 += state->total_len_32;
 
-    while (p+4<=bEnd) {
-        h32 += XXH_readLE32(p, endian) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h32 += (*p) * PRIME32_5;
-        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
-        p++;
-    }
-
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
-
-    return h32;
+    return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned);
 }
 
 
@@ -503,7 +559,7 @@ XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
 /*! Default XXH result types are basic unsigned 32 and 64 bits.
 *   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
 *   These functions allow transformation of hash result into and from its canonical format.
-*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*   This way, hash values can be written into a file or buffer, remaining comparable across different systems.
 */
 
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
@@ -522,18 +578,21 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
 #ifndef XXH_NO_LONG_LONG
 
 /* *******************************************************************
-*  64-bits hash functions
+*  64-bit hash functions
 *********************************************************************/
 
 /*======   Memory access   ======*/
 
 #ifndef MEM_MODULE
 # define MEM_MODULE
-# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 #   include <stdint.h>
     typedef uint64_t U64;
 # else
-    typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
+    /* if compiler doesn't support unsigned long long, replace by another 64-bit type */
+    typedef unsigned long long U64;
 # endif
 #endif
 
@@ -583,7 +642,7 @@ static U64 XXH_swap64 (U64 x)
 }
 #endif
 
-XXH_FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
@@ -591,7 +650,7 @@ XXH_FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, X
         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
 }
 
-XXH_FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
 {
     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
 }
@@ -626,14 +685,137 @@ static U64 XXH64_mergeRound(U64 acc, U64 val)
     return acc;
 }
 
-XXH_FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+static U64 XXH64_avalanche(U64 h64)
+{
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+static U64
+XXH64_finalize(U64 h64, const void* ptr, size_t len,
+               XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1_64            \
+    h64 ^= (*p++) * PRIME64_5; \
+    h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+
+#define PROCESS4_64          \
+    h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
+    p+=4;                    \
+    h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+
+#define PROCESS8_64 {        \
+    U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
+    p+=8;                    \
+    h64 ^= k1;               \
+    h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
+}
+
+    switch(len&31) {
+      case 24: PROCESS8_64;
+                    /* fallthrough */
+      case 16: PROCESS8_64;
+                    /* fallthrough */
+      case  8: PROCESS8_64;
+               return XXH64_avalanche(h64);
+
+      case 28: PROCESS8_64;
+                    /* fallthrough */
+      case 20: PROCESS8_64;
+                    /* fallthrough */
+      case 12: PROCESS8_64;
+                    /* fallthrough */
+      case  4: PROCESS4_64;
+               return XXH64_avalanche(h64);
+
+      case 25: PROCESS8_64;
+                    /* fallthrough */
+      case 17: PROCESS8_64;
+                    /* fallthrough */
+      case  9: PROCESS8_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 29: PROCESS8_64;
+                    /* fallthrough */
+      case 21: PROCESS8_64;
+                    /* fallthrough */
+      case 13: PROCESS8_64;
+                    /* fallthrough */
+      case  5: PROCESS4_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 26: PROCESS8_64;
+                    /* fallthrough */
+      case 18: PROCESS8_64;
+                    /* fallthrough */
+      case 10: PROCESS8_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 30: PROCESS8_64;
+                    /* fallthrough */
+      case 22: PROCESS8_64;
+                    /* fallthrough */
+      case 14: PROCESS8_64;
+                    /* fallthrough */
+      case  6: PROCESS4_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 27: PROCESS8_64;
+                    /* fallthrough */
+      case 19: PROCESS8_64;
+                    /* fallthrough */
+      case 11: PROCESS8_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 31: PROCESS8_64;
+                    /* fallthrough */
+      case 23: PROCESS8_64;
+                    /* fallthrough */
+      case 15: PROCESS8_64;
+                    /* fallthrough */
+      case  7: PROCESS4_64;
+                    /* fallthrough */
+      case  3: PROCESS1_64;
+                    /* fallthrough */
+      case  2: PROCESS1_64;
+                    /* fallthrough */
+      case  1: PROCESS1_64;
+                    /* fallthrough */
+      case  0: return XXH64_avalanche(h64);
+    }
+
+    /* impossible to reach */
+    assert(0);
+    return 0;  /* unreachable, but some compilers complain without it */
+}
+
+FORCE_INLINE U64
+XXH64_endian_align(const void* input, size_t len, U64 seed,
+                XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
+    const BYTE* bEnd = p + len;
     U64 h64;
-#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
 
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
     if (p==NULL) {
         len=0;
         bEnd=p=(const BYTE*)(size_t)32;
@@ -666,32 +848,7 @@ XXH_FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed,
 
     h64 += (U64) len;
 
-    while (p+8<=bEnd) {
-        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
-
-    if (p+4<=bEnd) {
-        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
-        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h64 ^= (*p) * PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
-
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
-
-    return h64;
+    return XXH64_finalize(h64, p, len, endian, align);
 }
 
 
@@ -741,65 +898,71 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t
 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
 {
     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
+    memset(&state, 0, sizeof(state));
     state.v1 = seed + PRIME64_1 + PRIME64_2;
     state.v2 = seed + PRIME64_2;
     state.v3 = seed + 0;
     state.v4 = seed - PRIME64_1;
-    memcpy(statePtr, &state, sizeof(state));
+     /* do not write into reserved, planned to be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
     return XXH_OK;
 }
 
-XXH_FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+FORCE_INLINE XXH_errorcode
+XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
-    const BYTE* p = (const BYTE*)input;
-    const BYTE* const bEnd = p + len;
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (input==NULL) return XXH_ERROR;
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
 #endif
 
-    state->total_len += len;
-
-    if (state->memsize + len < 32) {  /* fill in tmp buffer */
-        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
-        state->memsize += (U32)len;
-        return XXH_OK;
-    }
+    {   const BYTE* p = (const BYTE*)input;
+        const BYTE* const bEnd = p + len;
 
-    if (state->memsize) {   /* tmp buffer is full */
-        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
-        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
-        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
-        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
-        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
-        p += 32-state->memsize;
-        state->memsize = 0;
-    }
+        state->total_len += len;
 
-    if (p+32 <= bEnd) {
-        const BYTE* const limit = bEnd - 32;
-        U64 v1 = state->v1;
-        U64 v2 = state->v2;
-        U64 v3 = state->v3;
-        U64 v4 = state->v4;
+        if (state->memsize + len < 32) {  /* fill in tmp buffer */
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+            state->memsize += (U32)len;
+            return XXH_OK;
+        }
 
-        do {
-            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
-            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
-            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
-            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
-        } while (p<=limit);
+        if (state->memsize) {   /* tmp buffer is full */
+            XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+            state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+            state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+            state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+            state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+            p += 32-state->memsize;
+            state->memsize = 0;
+        }
 
-        state->v1 = v1;
-        state->v2 = v2;
-        state->v3 = v3;
-        state->v4 = v4;
-    }
+        if (p+32 <= bEnd) {
+            const BYTE* const limit = bEnd - 32;
+            U64 v1 = state->v1;
+            U64 v2 = state->v2;
+            U64 v3 = state->v3;
+            U64 v4 = state->v4;
+
+            do {
+                v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+                v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+                v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+                v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+            } while (p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
 
-    if (p < bEnd) {
-        XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
-        state->memsize = (unsigned)(bEnd-p);
+        if (p < bEnd) {
+            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
     }
 
     return XXH_OK;
@@ -815,10 +978,8 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void*
         return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
 }
 
-XXH_FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
 {
-    const BYTE * p = (const BYTE*)state->mem64;
-    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
     U64 h64;
 
     if (state->total_len >= 32) {
@@ -833,37 +994,12 @@ XXH_FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endian
         h64 = XXH64_mergeRound(h64, v3);
         h64 = XXH64_mergeRound(h64, v4);
     } else {
-        h64  = state->v3 + PRIME64_5;
+        h64  = state->v3 /*seed*/ + PRIME64_5;
     }
 
     h64 += (U64) state->total_len;
 
-    while (p+8<=bEnd) {
-        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
-
-    if (p+4<=bEnd) {
-        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
-        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h64 ^= (*p) * PRIME64_5;
-        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
-
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
-
-    return h64;
+    return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned);
 }
 
 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
diff --git a/lib/xxhash.h b/lib/xxhash.h
index 870a6d9..d6bad94 100644
--- a/lib/xxhash.h
+++ b/lib/xxhash.h
@@ -57,8 +57,8 @@ Q.Score is a measure of quality of the hash function.
 It depends on successfully passing SMHasher test set.
 10 is a perfect score.
 
-A 64-bits version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bits applications only.
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
 Name     Speed on 64 bits    Speed on 32 bits
 XXH64       13.8 GB/s            1.9 GB/s
 XXH32        6.8 GB/s            6.0 GB/s
@@ -80,18 +80,19 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 
 
 /* ****************************
-*  API modifier
-******************************/
-/** XXH_PRIVATE_API
-*   This is useful to include xxhash functions in `static` mode
-*   in order to inline them, and remove their symbol from the public list.
-*   Methodology :
-*     #define XXH_PRIVATE_API
-*     #include "xxhash.h"
-*   `xxhash.c` is automatically included.
-*   It's not useful to compile and link it as a separate module.
-*/
-#ifdef XXH_PRIVATE_API
+ *  API modifier
+ ******************************/
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ *  This is useful to include xxhash functions in `static` mode
+ *  in order to inline them, and remove their symbol from the public list.
+ *  Inlining can offer dramatic performance improvement on small keys.
+ *  Methodology :
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ *  It's not useful to compile and link it as a separate module.
+ */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  ifndef XXH_STATIC_LINKING_ONLY
 #    define XXH_STATIC_LINKING_ONLY
 #  endif
@@ -102,23 +103,24 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 #  elif defined(_MSC_VER)
 #    define XXH_PUBLIC_API static __inline
 #  else
-#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+     /* this version may generate warnings for unused static functions */
+#    define XXH_PUBLIC_API static
 #  endif
 #else
 #  define XXH_PUBLIC_API   /* do nothing */
-#endif /* XXH_PRIVATE_API */
-
-/*!XXH_NAMESPACE, aka Namespace Emulation :
-
-If you want to include _and expose_ xxHash functions from within your own library,
-but also want to avoid symbol collisions with other libraries which may also include xxHash,
-
-you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
-with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
-
-Note that no change is required within the calling program as long as it includes `xxhash.h` :
-regular symbol name will be automatically translated by this header.
-*/
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
 #ifdef XXH_NAMESPACE
 #  define XXH_CAT(A,B) A##B
 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
@@ -149,18 +151,18 @@ regular symbol name will be automatically translated by this header.
 ***************************************/
 #define XXH_VERSION_MAJOR    0
 #define XXH_VERSION_MINOR    6
-#define XXH_VERSION_RELEASE  2
+#define XXH_VERSION_RELEASE  5
 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
 
 
 /*-**********************************************************************
-*  32-bits hash
+*  32-bit hash
 ************************************************************************/
-typedef unsigned int       XXH32_hash_t;
+typedef unsigned int XXH32_hash_t;
 
 /*! XXH32() :
-    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
     The memory between input & input+length must be valid (allocated and read-accessible).
     "seed" can be used to alter the result predictably.
     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
@@ -177,26 +179,25 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
 XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
 
 /*
-These functions generate the xxHash of an input provided in multiple segments.
-Note that, for small input, they are slower than single-call functions, due to state management.
-For small input, prefer `XXH32()` and `XXH64()` .
-
-XXH state must first be allocated, using XXH*_createState() .
-
-Start a new hash by initializing state with a seed, using XXH*_reset().
-
-Then, feed the hash state by calling XXH*_update() as many times as necessary.
-Obviously, input must be allocated and read accessible.
-The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
-
-Finally, a hash value can be produced anytime, by using XXH*_digest().
-This function returns the nn-bits hash as an int or long long.
-
-It's still possible to continue inserting input into the hash state after a digest,
-and generate some new hashes later on, by calling again XXH*_digest().
-
-When done, free XXH state space if it was allocated dynamically.
-*/
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
+ * Note that, for small input, they are slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hashes later on, by calling again XXH*_digest().
+ *
+ * When done, free XXH state space if it was allocated dynamically.
+ */
 
 /*======   Canonical representation   ======*/
 
@@ -205,22 +206,22 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 
 /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
-*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
-*  These functions allow transformation of hash result into and from its canonical format.
-*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
-*/
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+ * These functions allow transformation of hash result into and from its canonical format.
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ */
 
 
 #ifndef XXH_NO_LONG_LONG
 /*-**********************************************************************
-*  64-bits hash
+*  64-bit hash
 ************************************************************************/
 typedef unsigned long long XXH64_hash_t;
 
 /*! XXH64() :
-    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
     "seed" can be used to alter the result predictably.
-    This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+    This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
 */
 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
 
@@ -241,48 +242,82 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #endif  /* XXH_NO_LONG_LONG */
 
 
+
 #ifdef XXH_STATIC_LINKING_ONLY
 
 /* ================================================================================================
-   This section contains definitions which are not guaranteed to remain stable.
+   This section contains declarations which are not guaranteed to remain stable.
    They may change in future versions, becoming incompatible with a different version of the library.
-   They shall only be used with static linking.
-   Never use these definitions in association with dynamic linking !
+   These declarations should only be used with static linking.
+   Never use them in association with dynamic linking !
 =================================================================================================== */
 
-/* These definitions are only meant to allow allocation of XXH state
-   statically, on stack, or in a struct for example.
-   Do not use members directly. */
-
-   struct XXH32_state_s {
-       unsigned total_len_32;
-       unsigned large_len;
-       unsigned v1;
-       unsigned v2;
-       unsigned v3;
-       unsigned v4;
-       unsigned mem32[4];   /* buffer defined as U32 for alignment */
-       unsigned memsize;
-       unsigned reserved;   /* never read nor write, will be removed in a future version */
-   };   /* typedef'd to XXH32_state_t */
-
-#ifndef XXH_NO_LONG_LONG
-   struct XXH64_state_s {
-       unsigned long long total_len;
-       unsigned long long v1;
-       unsigned long long v2;
-       unsigned long long v3;
-       unsigned long long v4;
-       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
-       unsigned memsize;
-       unsigned reserved[2];          /* never read nor write, will be removed in a future version */
-   };   /* typedef'd to XXH64_state_t */
+/* These definitions are only present to allow
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+
+struct XXH32_state_s {
+   uint32_t total_len_32;
+   uint32_t large_len;
+   uint32_t v1;
+   uint32_t v2;
+   uint32_t v3;
+   uint32_t v4;
+   uint32_t mem32[4];
+   uint32_t memsize;
+   uint32_t reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+struct XXH64_state_s {
+   uint64_t total_len;
+   uint64_t v1;
+   uint64_t v2;
+   uint64_t v3;
+   uint64_t v4;
+   uint64_t mem64[4];
+   uint32_t memsize;
+   uint32_t reserved[2];          /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+
+# else
+
+struct XXH32_state_s {
+   unsigned total_len_32;
+   unsigned large_len;
+   unsigned v1;
+   unsigned v2;
+   unsigned v3;
+   unsigned v4;
+   unsigned mem32[4];
+   unsigned memsize;
+   unsigned reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+#   ifndef XXH_NO_LONG_LONG  /* remove 64-bit support */
+struct XXH64_state_s {
+   unsigned long long total_len;
+   unsigned long long v1;
+   unsigned long long v2;
+   unsigned long long v3;
+   unsigned long long v4;
+   unsigned long long mem64[4];
+   unsigned memsize;
+   unsigned reserved[2];     /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+#    endif
+
+# endif
+
+
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */
 #endif
 
-#  ifdef XXH_PRIVATE_API
-#    include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */
-#  endif
-
 #endif /* XXH_STATIC_LINKING_ONLY */
 
 
diff --git a/programs/Makefile b/programs/Makefile
index bd33d9b..92fd683 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -94,7 +94,7 @@ lz4.1: lz4.1.md $(LIBVER_SRC)
 man: lz4.1
 
 clean-man:
-	rm lz4.1
+	$(RM) lz4.1
 
 preview-man: clean-man man
 	man ./lz4.1
@@ -109,7 +109,7 @@ clean:
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD))
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku MidnightBSD MINGW32_NT-6.1 MINGW64_NT-6.1 MINGW32_NT-10.0 MINGW64_NT-10.0))
 
 unlz4: lz4
 	ln -s lz4$(EXT) unlz4$(EXT)
diff --git a/programs/README.md b/programs/README.md
index 2ad0449..c1995af 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -1,18 +1,26 @@
 Command Line Interface for LZ4 library
 ============================================
 
-Command Line Interface (CLI) can be created using the `make` command without any additional parameters.
-There are also multiple targets that create different variations of CLI:
+### Build
+The Command Line Interface (CLI) can be generated
+using the `make` command without any additional parameters.
+
+The `Makefile` script supports all [standard conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html),
+including standard targets (`all`, `install`, `clean`, etc.)
+and standard variables (`CC`, `CFLAGS`, `CPPFLAGS`, etc.).
+
+For advanced use cases, there are targets to different variations of the CLI:
 - `lz4` : default CLI, with a command line syntax close to gzip
 - `lz4c` : Same as `lz4` with additional support legacy lz4 commands (incompatible with gzip)
 - `lz4c32` : Same as `lz4c`, but forced to compile in 32-bits mode
 
+The CLI generates and decodes [LZ4-compressed frames](../doc/lz4_Frame_format.md).
+
 
 #### Aggregation of parameters
 CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
 
 
-
 #### Benchmark in Command Line Interface
 CLI includes in-memory compression benchmark module for lz4.
 The benchmark is conducted using a given filename.
@@ -38,7 +46,9 @@ Arguments :
  -9     : High compression
  -d     : decompression (default for .lz4 extension)
  -z     : force compression
+ -D FILE: use FILE as dictionary
  -f     : overwrite output without prompting
+ -k     : preserve source files(s)  (default)
 --rm    : remove source file(s) after successful de/compression
  -h/-H  : display help/long help and exit
 
@@ -51,17 +61,20 @@ Advanced arguments :
  -m     : multiple input files (implies automatic output filenames)
  -r     : operate recursively on directories (sets also -m)
  -l     : compress using Legacy format (Linux kernel compression)
- -B#    : Block size [4-7] (default : 7)
+ -B#    : cut file into blocks of size # bytes [32+]
+                     or predefined block size [4-7] (default: 7)
  -BD    : Block dependency (improve compression ratio)
+ -BX    : enable block checksum (default:disabled)
 --no-frame-crc : disable stream checksum (default:enabled)
 --content-size : compressed frame includes original size (default:not present)
 --[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)
+--favor-decSpeed: compressed files decompress faster, but are less compressed
+--fast[=#]: switch to ultra fast compression level (default: 1)
+
 Benchmark arguments :
  -b#    : benchmark file(s), using # compression level (default : 1)
  -e#    : test all compression levels from -bX to # (default : 1)
- -i#    : minimum evaluation time in seconds (default : 3s)
- -B#    : cut file into independent blocks of size # bytes [32+]
-                      or predefined block size [4-7] (default: 7)
+ -i#    : minimum evaluation time in seconds (default : 3s)```
 ```
 
 #### License
diff --git a/programs/lz4.1 b/programs/lz4.1
index f35e29d..1576e45 100644
--- a/programs/lz4.1
+++ b/programs/lz4.1
@@ -1,5 +1,5 @@
 .
-.TH "LZ4" "1" "September 2018" "lz4 1.8.3" "User Commands"
+.TH "LZ4" "1" "April 2019" "lz4 1.9.0" "User Commands"
 .
 .SH "NAME"
 \fBlz4\fR \- lz4, unlz4, lz4cat \- Compress or decompress \.lz4 files
@@ -118,6 +118,14 @@ Benchmark mode, using \fB#\fR compression level\.
 Compression level, with # being any value from 1 to 12\. Higher values trade compression speed for compression ratio\. Values above 12 are considered the same as 12\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\.
 .
 .TP
+\fB\-\-fast[=#]\fR
+Switch to ultra\-fast compression levels\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. If \fB=#\fR is not present, it defaults to \fB1\fR\. This setting overrides compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
+.
+.TP
+\fB\-\-favor\-decSpeed\fR
+Generate compressed data optimized for decompression speed\. Compressed data will be larger as a consequence (typically by ~0\.5%), while decompression speed will be improved by 5\-20%, depending on use cases\. This option only works in combination with very high compression levels (>=10)\.
+.
+.TP
 \fB\-D dictionaryName\fR
 Compress, decompress or benchmark using dictionary \fIdictionaryName\fR\. Compression and decompression must use the same dictionary to be compatible\. Using a different dictionary during decompression will either abort due to decompression error, or generate a checksum error\.
 .
@@ -151,12 +159,12 @@ Block size [4\-7](default : 7)
 \fB\-B4\fR= 64KB ; \fB\-B5\fR= 256KB ; \fB\-B6\fR= 1MB ; \fB\-B7\fR= 4MB
 .
 .TP
-\fB\-BD\fR
-Block Dependency (improves compression ratio on small blocks)
+\fB\-BI\fR
+Produce independent blocks (default)
 .
 .TP
-\fB\-\-fast[=#]\fR
-switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
+\fB\-BD\fR
+Blocks depend on predecessors (improves compression ratio, more noticeable on small blocks)
 .
 .TP
 \fB\-\-[no\-]frame\-crc\fR
diff --git a/programs/lz4.1.md b/programs/lz4.1.md
index 12b8e29..10449a0 100644
--- a/programs/lz4.1.md
+++ b/programs/lz4.1.md
@@ -126,12 +126,18 @@ only the latest one will be applied.
   Decompression speed remains fast at all settings.
 
 * `--fast[=#]`:
-  switch to ultra-fast compression levels.
+  Switch to ultra-fast compression levels.
   The higher the value, the faster the compression speed, at the cost of some compression ratio.
   If `=#` is not present, it defaults to `1`.
   This setting overrides compression level if one was set previously.
   Similarly, if a compression level is set after `--fast`, it overrides it.
 
+* `--favor-decSpeed`:
+  Generate compressed data optimized for decompression speed.
+  Compressed data will be larger as a consequence (typically by ~0.5%),
+  while decompression speed will be improved by 5-20%, depending on use cases.
+  This option only works in combination with very high compression levels (>=10).
+
 * `-D dictionaryName`:
   Compress, decompress or benchmark using dictionary _dictionaryName_.
   Compression and decompression must use the same dictionary to be compatible.
@@ -166,8 +172,11 @@ only the latest one will be applied.
   Block size \[4-7\](default : 7)<br/>
   `-B4`= 64KB ; `-B5`= 256KB ; `-B6`= 1MB ; `-B7`= 4MB
 
+* `-BI`:
+  Produce independent blocks (default)
+
 * `-BD`:
-  Block Dependency (improves compression ratio on small blocks)
+  Blocks depend on predecessors (improves compression ratio, more noticeable on small blocks)
 
 * `--[no-]frame-crc`:
   Select frame checksum (default:enabled)
diff --git a/programs/lz4cli.c b/programs/lz4cli.c
index 26a8089..8bd7042 100644
--- a/programs/lz4cli.c
+++ b/programs/lz4cli.c
@@ -92,7 +92,7 @@ static unsigned displayLevel = 2;   /* 0 : no display ; 1: errors only ; 2 : dow
 ***************************************/
 #define DEFAULT_COMPRESSOR   LZ4IO_compressFilename
 #define DEFAULT_DECOMPRESSOR LZ4IO_decompressFilename
-int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel);   /* hidden function */
+int LZ4IO_compressFilename_Legacy(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename, int compressionlevel);   /* hidden function */
 
 
 /*-***************************
@@ -134,20 +134,20 @@ static int usage_advanced(const char* exeName)
     DISPLAY( " -r     : operate recursively on directories (sets also -m) \n");
 #endif
     DISPLAY( " -l     : compress using Legacy format (Linux kernel compression)\n");
-    DISPLAY( " -B#    : Block size [4-7] (default : 7) \n");
-    DISPLAY( " -BD    : Block dependency (improve compression ratio) \n");
+    DISPLAY( " -B#    : cut file into blocks of size # bytes [32+] \n");
+    DISPLAY( "                     or predefined block size [4-7] (default: 7) \n");
+    DISPLAY( " -BI    : Block Independence (default) \n");
+    DISPLAY( " -BD    : Block dependency (improves compression ratio) \n");
     DISPLAY( " -BX    : enable block checksum (default:disabled) \n");
     DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n");
     DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n");
     DISPLAY( "--[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)\n");
     DISPLAY( "--favor-decSpeed: compressed files decompress faster, but are less compressed \n");
-    DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
+    DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %i)\n", 1);
     DISPLAY( "Benchmark arguments : \n");
     DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
     DISPLAY( " -e#    : test all compression levels from -bX to # (default : 1)\n");
     DISPLAY( " -i#    : minimum evaluation time in seconds (default : 3s) \n");
-    DISPLAY( " -B#    : cut file into independent blocks of size # bytes [32+] \n");
-    DISPLAY( "                     or predefined block size [4-7] (default: 7) \n");
     if (g_lz4c_legacy_commands) {
         DISPLAY( "Legacy arguments : \n");
         DISPLAY( " -c0    : fast compression \n");
@@ -251,16 +251,16 @@ static int exeNameMatch(const char* exeName, const char* test)
 }
 
 /*! readU32FromChar() :
-    @return : unsigned integer value read from input in `char` format
-    allows and interprets K, KB, KiB, M, MB and MiB suffix.
-    Will also modify `*stringPtr`, advancing it to position where it stopped reading.
-    Note : function result can overflow if digit string > MAX_UINT */
+ * @return : unsigned integer value read from input in `char` format
+ *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
+ *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
+ *  Note : function result can overflow if digit string > MAX_UINT */
 static unsigned readU32FromChar(const char** stringPtr)
 {
     unsigned result = 0;
     while ((**stringPtr >='0') && (**stringPtr <='9')) {
         result *= 10;
-        result += **stringPtr - '0';
+        result += (unsigned)(**stringPtr - '0');
         (*stringPtr)++ ;
     }
     if ((**stringPtr=='K') || (**stringPtr=='M')) {
@@ -278,7 +278,7 @@ static unsigned readU32FromChar(const char** stringPtr)
  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
  * @return 0 and doesn't modify *stringPtr otherwise.
  */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+static int longCommandWArg(const char** stringPtr, const char* longCommand)
 {
     size_t const comSize = strlen(longCommand);
     int const result = !strncmp(*stringPtr, longCommand, comSize);
@@ -288,6 +288,19 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
 
 typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench } operationMode_e;
 
+/** determineOpMode() :
+ *  auto-determine operation mode, based on input filename extension
+ *  @return `om_decompress` if input filename has .lz4 extension and `om_compress` otherwise.
+ */
+static operationMode_e determineOpMode(const char* inputFilename)
+{
+    size_t const inSize  = strlen(inputFilename);
+    size_t const extSize = strlen(LZ4_EXTENSION);
+    size_t const extStart= (inSize > extSize) ? inSize-extSize : 0;
+    if (!strcmp(inputFilename+extStart, LZ4_EXTENSION)) return om_decompress;
+    else return om_compress;
+}
+
 int main(int argc, const char** argv)
 {
     int i,
@@ -304,11 +317,12 @@ int main(int argc, const char** argv)
     const char* output_filename= NULL;
     const char* dictionary_filename = NULL;
     char* dynNameSpace = NULL;
-    const char** inFileNames = (const char**) calloc(argc, sizeof(char*));
+    const char** inFileNames = (const char**)calloc((size_t)argc, sizeof(char*));
     unsigned ifnIdx=0;
+    LZ4IO_prefs_t* const prefs = LZ4IO_defaultPreferences();
     const char nullOutput[] = NULL_OUTPUT;
     const char extension[] = LZ4_EXTENSION;
-    size_t blockSize = LZ4IO_setBlockSizeID(LZ4_BLOCKSIZEID_DEFAULT);
+    size_t blockSize = LZ4IO_setBlockSizeID(prefs, LZ4_BLOCKSIZEID_DEFAULT);
     const char* const exeName = lastNameFromPath(argv[0]);
 #ifdef UTIL_HAS_CREATEFILELIST
     const char** extendedFileList = NULL;
@@ -322,13 +336,14 @@ int main(int argc, const char** argv)
         return 1;
     }
     inFileNames[0] = stdinmark;
-    LZ4IO_setOverwrite(0);
+    LZ4IO_setOverwrite(prefs, 0);
 
     /* predefined behaviors, based on binary/link name */
     if (exeNameMatch(exeName, LZ4CAT)) {
         mode = om_decompress;
-        LZ4IO_setOverwrite(1);
-        LZ4IO_setRemoveSrcFile(0);
+        LZ4IO_setOverwrite(prefs, 1);
+        LZ4IO_setPassThrough(prefs, 1);
+        LZ4IO_setRemoveSrcFile(prefs, 0);
         forceStdout=1;
         output_filename=stdoutmark;
         displayLevel=1;
@@ -360,23 +375,23 @@ int main(int argc, const char** argv)
                     || (!strcmp(argument, "--uncompress"))) { mode = om_decompress; continue; }
                 if (!strcmp(argument,  "--multiple")) { multiple_inputs = 1; continue; }
                 if (!strcmp(argument,  "--test")) { mode = om_test; continue; }
-                if (!strcmp(argument,  "--force")) { LZ4IO_setOverwrite(1); continue; }
-                if (!strcmp(argument,  "--no-force")) { LZ4IO_setOverwrite(0); continue; }
+                if (!strcmp(argument,  "--force")) { LZ4IO_setOverwrite(prefs, 1); continue; }
+                if (!strcmp(argument,  "--no-force")) { LZ4IO_setOverwrite(prefs, 0); continue; }
                 if ((!strcmp(argument, "--stdout"))
                     || (!strcmp(argument, "--to-stdout"))) { forceStdout=1; output_filename=stdoutmark; continue; }
-                if (!strcmp(argument,  "--frame-crc")) { LZ4IO_setStreamChecksumMode(1); continue; }
-                if (!strcmp(argument,  "--no-frame-crc")) { LZ4IO_setStreamChecksumMode(0); continue; }
-                if (!strcmp(argument,  "--content-size")) { LZ4IO_setContentSize(1); continue; }
-                if (!strcmp(argument,  "--no-content-size")) { LZ4IO_setContentSize(0); continue; }
-                if (!strcmp(argument,  "--sparse")) { LZ4IO_setSparseFile(2); continue; }
-                if (!strcmp(argument,  "--no-sparse")) { LZ4IO_setSparseFile(0); continue; }
-                if (!strcmp(argument,  "--favor-decSpeed")) { LZ4IO_favorDecSpeed(1); continue; }
+                if (!strcmp(argument,  "--frame-crc")) { LZ4IO_setStreamChecksumMode(prefs, 1); continue; }
+                if (!strcmp(argument,  "--no-frame-crc")) { LZ4IO_setStreamChecksumMode(prefs, 0); continue; }
+                if (!strcmp(argument,  "--content-size")) { LZ4IO_setContentSize(prefs, 1); continue; }
+                if (!strcmp(argument,  "--no-content-size")) { LZ4IO_setContentSize(prefs, 0); continue; }
+                if (!strcmp(argument,  "--sparse")) { LZ4IO_setSparseFile(prefs, 2); continue; }
+                if (!strcmp(argument,  "--no-sparse")) { LZ4IO_setSparseFile(prefs, 0); continue; }
+                if (!strcmp(argument,  "--favor-decSpeed")) { LZ4IO_favorDecSpeed(prefs, 1); continue; }
                 if (!strcmp(argument,  "--verbose")) { displayLevel++; continue; }
                 if (!strcmp(argument,  "--quiet")) { if (displayLevel) displayLevel--; continue; }
                 if (!strcmp(argument,  "--version")) { DISPLAY(WELCOME_MESSAGE); return 0; }
                 if (!strcmp(argument,  "--help")) { usage_advanced(exeName); goto _cleanup; }
-                if (!strcmp(argument,  "--keep")) { LZ4IO_setRemoveSrcFile(0); continue; }   /* keep source file (default) */
-                if (!strcmp(argument,  "--rm")) { LZ4IO_setRemoveSrcFile(1); continue; }
+                if (!strcmp(argument,  "--keep")) { LZ4IO_setRemoveSrcFile(prefs, 0); continue; }   /* keep source file (default) */
+                if (!strcmp(argument,  "--rm")) { LZ4IO_setRemoveSrcFile(prefs, 1); continue; }
                 if (longCommandWArg(&argument, "--fast")) {
                         /* Parse optional acceleration factor */
                         if (*argument == '=') {
@@ -407,11 +422,11 @@ int main(int argc, const char** argv)
                     if (!strcmp(argument,  "c1")) { cLevel=9; argument++; continue; }  /* -c1 (high compression) */
                     if (!strcmp(argument,  "c2")) { cLevel=12; argument++; continue; } /* -c2 (very high compression) */
                     if (!strcmp(argument,  "hc")) { cLevel=12; argument++; continue; } /* -hc (very high compression) */
-                    if (!strcmp(argument,  "y"))  { LZ4IO_setOverwrite(1); continue; } /* -y (answer 'yes' to overwrite permission) */
+                    if (!strcmp(argument,  "y"))  { LZ4IO_setOverwrite(prefs, 1); continue; } /* -y (answer 'yes' to overwrite permission) */
                 }
 
                 if ((*argument>='0') && (*argument<='9')) {
-                    cLevel = readU32FromChar(&argument);
+                    cLevel = (int)readU32FromChar(&argument);
                     argument--;
                     continue;
                 }
@@ -426,7 +441,7 @@ int main(int argc, const char** argv)
 
                 case 'e':
                     argument++;
-                    cLevelLast = readU32FromChar(&argument);
+                    cLevelLast = (int)readU32FromChar(&argument);
                     argument--;
                     break;
 
@@ -456,13 +471,17 @@ int main(int argc, const char** argv)
                 case 'd': mode = om_decompress; break;
 
                     /* Force stdout, even if stdout==console */
-                case 'c': forceStdout=1; output_filename=stdoutmark; break;
+                case 'c':
+                  forceStdout=1;
+                  output_filename=stdoutmark;
+                  LZ4IO_setPassThrough(prefs, 1);
+                  break;
 
                     /* Test integrity */
                 case 't': mode = om_test; break;
 
                     /* Overwrite */
-                case 'f': LZ4IO_setOverwrite(1); break;
+                case 'f': LZ4IO_setOverwrite(prefs, 1); break;
 
                     /* Verbose mode */
                 case 'v': displayLevel++; break;
@@ -471,7 +490,7 @@ int main(int argc, const char** argv)
                 case 'q': if (displayLevel) displayLevel--; break;
 
                     /* keep source file (default anyway, so useless) (for xz/lzma compatibility) */
-                case 'k': LZ4IO_setRemoveSrcFile(0); break;
+                case 'k': LZ4IO_setRemoveSrcFile(prefs, 0); break;
 
                     /* Modify Block Properties */
                 case 'B':
@@ -479,8 +498,9 @@ int main(int argc, const char** argv)
                         int exitBlockProperties=0;
                         switch(argument[1])
                         {
-                        case 'D': LZ4IO_setBlockMode(LZ4IO_blockLinked); argument++; break;
-                        case 'X': LZ4IO_setBlockChecksumMode(1); argument ++; break;   /* disabled by default */
+                        case 'D': LZ4IO_setBlockMode(prefs, LZ4IO_blockLinked); argument++; break;
+                        case 'I': LZ4IO_setBlockMode(prefs, LZ4IO_blockIndependent); argument++; break;
+                        case 'X': LZ4IO_setBlockChecksumMode(prefs, 1); argument ++; break;   /* disabled by default */
                         default :
                             if (argument[1] < '0' || argument[1] > '9') {
                                 exitBlockProperties=1;
@@ -492,16 +512,17 @@ int main(int argc, const char** argv)
                                 argument--;
                                 if (B < 4) badusage(exeName);
                                 if (B <= 7) {
-                                    blockSize = LZ4IO_setBlockSizeID(B);
+                                    blockSize = LZ4IO_setBlockSizeID(prefs, B);
                                     BMK_setBlockSize(blockSize);
                                     DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
                                 } else {
                                     if (B < 32) badusage(exeName);
-                                    BMK_setBlockSize(B);
-                                    if (B >= 1024) {
-                                        DISPLAYLEVEL(2, "bench: using blocks of size %u KB \n", (U32)(B>>10));
+                                    blockSize = LZ4IO_setBlockSize(prefs, B);
+                                    BMK_setBlockSize(blockSize);
+                                    if (blockSize >= 1024) {
+                                        DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
                                     } else {
-                                        DISPLAYLEVEL(2, "bench: using blocks of size %u bytes \n", (U32)(B));
+                                        DISPLAYLEVEL(2, "using blocks of size %u bytes \n", (U32)(blockSize));
                                     }
                                 }
                                 break;
@@ -605,7 +626,7 @@ int main(int argc, const char** argv)
     }
 
     if (mode == om_test) {
-        LZ4IO_setTestMode(1);
+        LZ4IO_setTestMode(prefs, 1);
         output_filename = nulmark;
         mode = om_decompress;   /* defer to decompress */
     }
@@ -615,7 +636,7 @@ int main(int argc, const char** argv)
             DISPLAYLEVEL(1, "refusing to read from a console\n");
             exit(1);
         }
-        LZ4IO_setDictionaryFilename(dictionary_filename);
+        LZ4IO_setDictionaryFilename(prefs, dictionary_filename);
     }
 
     /* compress or decompress */
@@ -633,11 +654,7 @@ int main(int argc, const char** argv)
     while ((!output_filename) && (multiple_inputs==0)) {
         if (!IS_CONSOLE(stdout)) { output_filename=stdoutmark; break; }   /* Default to stdout whenever possible (i.e. not a console) */
         if (mode == om_auto) {  /* auto-determine compression or decompression, based on file extension */
-            size_t const inSize  = strlen(input_filename);
-            size_t const extSize = strlen(LZ4_EXTENSION);
-            size_t const extStart= (inSize > extSize) ? inSize-extSize : 0;
-            if (!strcmp(input_filename+extStart, LZ4_EXTENSION)) mode = om_decompress;
-            else mode = om_compress;
+            mode = determineOpMode(input_filename);
         }
         if (mode == om_compress) {   /* compression to file */
             size_t const l = strlen(input_filename);
@@ -675,23 +692,28 @@ int main(int argc, const char** argv)
     if (!strcmp(output_filename,stdoutmark) && (displayLevel==2)) displayLevel=1;
     if ((multiple_inputs) && (displayLevel==2)) displayLevel=1;
 
+    /* Auto-determine compression or decompression, based on file extension */
+    if (mode == om_auto) {
+        mode = determineOpMode(input_filename);
+    }
+
     /* IO Stream/File */
-    LZ4IO_setNotificationLevel(displayLevel);
+    LZ4IO_setNotificationLevel((int)displayLevel);
     if (ifnIdx == 0) multiple_inputs = 0;
     if (mode == om_decompress) {
         if (multiple_inputs)
-            operationResult = LZ4IO_decompressMultipleFilenames(inFileNames, ifnIdx, !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION);
+            operationResult = LZ4IO_decompressMultipleFilenames(prefs, inFileNames, ifnIdx, !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION);
         else
-            operationResult = DEFAULT_DECOMPRESSOR(input_filename, output_filename);
+            operationResult = DEFAULT_DECOMPRESSOR(prefs, input_filename, output_filename);
     } else {   /* compression is default action */
         if (legacy_format) {
             DISPLAYLEVEL(3, "! Generating LZ4 Legacy format (deprecated) ! \n");
-            LZ4IO_compressFilename_Legacy(input_filename, output_filename, cLevel);
+            LZ4IO_compressFilename_Legacy(prefs, input_filename, output_filename, cLevel);
         } else {
             if (multiple_inputs)
-                operationResult = LZ4IO_compressMultipleFilenames(inFileNames, ifnIdx, LZ4_EXTENSION, cLevel);
+                operationResult = LZ4IO_compressMultipleFilenames(prefs, inFileNames, ifnIdx, !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION, cLevel);
             else
-                operationResult = DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel);
+                operationResult = DEFAULT_COMPRESSOR(prefs, input_filename, output_filename, cLevel);
         }
     }
 
@@ -704,6 +726,7 @@ _cleanup:
         inFileNames = NULL;
     }
 #endif
+    LZ4IO_freePreferences(prefs);
     free((void*)inFileNames);
     return operationResult;
 }
diff --git a/programs/lz4io.c b/programs/lz4io.c
index 28d6537..121bd44 100644
--- a/programs/lz4io.c
+++ b/programs/lz4io.c
@@ -107,18 +107,23 @@ static clock_t g_time = 0;
 /**************************************
 *  Local Parameters
 **************************************/
-static int g_overwrite = 1;
-static int g_testMode = 0;
-static int g_blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
-static int g_blockChecksum = 0;
-static int g_streamChecksum = 1;
-static int g_blockIndependence = 1;
-static int g_sparseFileSupport = 1;
-static int g_contentSizeFlag = 0;
-static int g_useDictionary = 0;
-static unsigned g_favorDecSpeed = 0;
-static const char* g_dictionaryFilename = NULL;
 
+struct LZ4IO_prefs_s {
+  int passThrough;
+  int overwrite;
+  int testMode;
+  int blockSizeId;
+  size_t blockSize;
+  int blockChecksum;
+  int streamChecksum;
+  int blockIndependence;
+  int sparseFileSupport;
+  int contentSizeFlag;
+  int useDictionary;
+  unsigned favorDecSpeed;
+  const char* dictionaryFilename;
+  int removeSrcFile;
+};
 
 /**************************************
 *  Exceptions
@@ -150,55 +155,109 @@ static const char* g_dictionaryFilename = NULL;
 /* ****************** Parameters ******************** */
 /* ************************************************** */
 
-int LZ4IO_setDictionaryFilename(const char* dictionaryFilename) {
-    g_dictionaryFilename = dictionaryFilename;
-    g_useDictionary = dictionaryFilename != NULL;
-    return g_useDictionary;
+LZ4IO_prefs_t* LZ4IO_defaultPreferences(void)
+{
+  LZ4IO_prefs_t* const ret = (LZ4IO_prefs_t*)malloc(sizeof(LZ4IO_prefs_t));
+  if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
+  ret->passThrough = 0;
+  ret->overwrite = 1;
+  ret->testMode = 0;
+  ret->blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
+  ret->blockSize = 0;
+  ret->blockChecksum = 0;
+  ret->streamChecksum = 1;
+  ret->blockIndependence = 1;
+  ret->sparseFileSupport = 1;
+  ret->contentSizeFlag = 0;
+  ret->useDictionary = 0;
+  ret->favorDecSpeed = 0;
+  ret->dictionaryFilename = NULL;
+  ret->removeSrcFile = 0;
+  return ret;
+}
+
+void LZ4IO_freePreferences(LZ4IO_prefs_t* const prefs)
+{
+  free(prefs);
+}
+
+
+int LZ4IO_setDictionaryFilename(LZ4IO_prefs_t* const prefs, const char* dictionaryFilename)
+{
+    prefs->dictionaryFilename = dictionaryFilename;
+    prefs->useDictionary = dictionaryFilename != NULL;
+    return prefs->useDictionary;
+}
+
+/* Default setting : passThrough = 0; return : passThrough mode (0/1) */
+int LZ4IO_setPassThrough(LZ4IO_prefs_t* const prefs, int yes)
+{
+   prefs->passThrough = (yes!=0);
+   return prefs->passThrough;
 }
 
+
 /* Default setting : overwrite = 1; return : overwrite mode (0/1) */
-int LZ4IO_setOverwrite(int yes)
+int LZ4IO_setOverwrite(LZ4IO_prefs_t* const prefs, int yes)
 {
-   g_overwrite = (yes!=0);
-   return g_overwrite;
+   prefs->overwrite = (yes!=0);
+   return prefs->overwrite;
 }
 
 /* Default setting : testMode = 0; return : testMode (0/1) */
-int LZ4IO_setTestMode(int yes)
+int LZ4IO_setTestMode(LZ4IO_prefs_t* const prefs, int yes)
 {
-   g_testMode = (yes!=0);
-   return g_testMode;
+   prefs->testMode = (yes!=0);
+   return prefs->testMode;
 }
 
 /* blockSizeID : valid values : 4-5-6-7 */
-size_t LZ4IO_setBlockSizeID(unsigned bsid)
+size_t LZ4IO_setBlockSizeID(LZ4IO_prefs_t* const prefs, unsigned bsid)
 {
     static const size_t blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB };
     static const unsigned minBlockSizeID = 4;
     static const unsigned maxBlockSizeID = 7;
     if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0;
-    g_blockSizeId = bsid;
-    return blockSizeTable[g_blockSizeId-minBlockSizeID];
+    prefs->blockSizeId = bsid;
+    prefs->blockSize = blockSizeTable[prefs->blockSizeId-minBlockSizeID];
+    return prefs->blockSize;
 }
 
-int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode)
+size_t LZ4IO_setBlockSize(LZ4IO_prefs_t* const prefs, size_t blockSize)
 {
-    g_blockIndependence = (blockMode == LZ4IO_blockIndependent);
-    return g_blockIndependence;
+    static const size_t minBlockSize = 32;
+    static const size_t maxBlockSize = 4 MB;
+    unsigned bsid = 0;
+    if (blockSize < minBlockSize) blockSize = minBlockSize;
+    if (blockSize > maxBlockSize) blockSize = maxBlockSize;
+    prefs->blockSize = blockSize;
+    blockSize--;
+    /* find which of { 64k, 256k, 1MB, 4MB } is closest to blockSize */
+    while (blockSize >>= 2)
+        bsid++;
+    if (bsid < 7) bsid = 7;
+    prefs->blockSizeId = bsid-3;
+    return prefs->blockSize;
+}
+
+int LZ4IO_setBlockMode(LZ4IO_prefs_t* const prefs, LZ4IO_blockMode_t blockMode)
+{
+    prefs->blockIndependence = (blockMode == LZ4IO_blockIndependent);
+    return prefs->blockIndependence;
 }
 
 /* Default setting : no block checksum */
-int LZ4IO_setBlockChecksumMode(int enable)
+int LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
 {
-    g_blockChecksum = (enable != 0);
-    return g_blockChecksum;
+    prefs->blockChecksum = (enable != 0);
+    return prefs->blockChecksum;
 }
 
 /* Default setting : checksum enabled */
-int LZ4IO_setStreamChecksumMode(int enable)
+int LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
 {
-    g_streamChecksum = (enable != 0);
-    return g_streamChecksum;
+    prefs->streamChecksum = (enable != 0);
+    return prefs->streamChecksum;
 }
 
 /* Default setting : 0 (no notification) */
@@ -209,27 +268,29 @@ int LZ4IO_setNotificationLevel(int level)
 }
 
 /* Default setting : 0 (disabled) */
-int LZ4IO_setSparseFile(int enable)
+int LZ4IO_setSparseFile(LZ4IO_prefs_t* const prefs, int enable)
 {
-    g_sparseFileSupport = (enable!=0);
-    return g_sparseFileSupport;
+    prefs->sparseFileSupport = (enable!=0);
+    return prefs->sparseFileSupport;
 }
 
 /* Default setting : 0 (disabled) */
-int LZ4IO_setContentSize(int enable)
+int LZ4IO_setContentSize(LZ4IO_prefs_t* const prefs, int enable)
 {
-    g_contentSizeFlag = (enable!=0);
-    return g_contentSizeFlag;
+    prefs->contentSizeFlag = (enable!=0);
+    return prefs->contentSizeFlag;
 }
 
 /* Default setting : 0 (disabled) */
-void LZ4IO_favorDecSpeed(int favor)
+void LZ4IO_favorDecSpeed(LZ4IO_prefs_t* const prefs, int favor)
 {
-    g_favorDecSpeed = (favor!=0);
+    prefs->favorDecSpeed = (favor!=0);
 }
 
-static U32 g_removeSrcFile = 0;
-void LZ4IO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }
+void LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t* const prefs, unsigned flag)
+{
+  prefs->removeSrcFile = (flag>0);
+}
 
 
 
@@ -237,7 +298,6 @@ void LZ4IO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }
 ** ********************** LZ4 File / Pipe compression ********************* **
 ** ************************************************************************ */
 
-static int LZ4IO_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
 static int LZ4IO_isSkippableMagicNumber(unsigned int magic) {
     return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0;
 }
@@ -263,22 +323,23 @@ static FILE* LZ4IO_openSrcFile(const char* srcFileName)
 }
 
 /** FIO_openDstFile() :
- * condition : `dstFileName` must be non-NULL.
+ *  condition : `dstFileName` must be non-NULL.
  * @result : FILE* to `dstFileName`, or NULL if it fails */
-static FILE* LZ4IO_openDstFile(const char* dstFileName)
+static FILE* LZ4IO_openDstFile(LZ4IO_prefs_t* const prefs, const char* dstFileName)
 {
     FILE* f;
+    assert(dstFileName != NULL);
 
     if (!strcmp (dstFileName, stdoutmark)) {
         DISPLAYLEVEL(4,"Using stdout for output\n");
         f = stdout;
         SET_BINARY_MODE(stdout);
-        if (g_sparseFileSupport==1) {
-            g_sparseFileSupport = 0;
+        if (prefs->sparseFileSupport==1) {
+            prefs->sparseFileSupport = 0;
             DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
         }
     } else {
-        if (!g_overwrite && strcmp (dstFileName, nulmark)) {  /* Check if destination file already exists */
+        if (!prefs->overwrite && strcmp (dstFileName, nulmark)) {  /* Check if destination file already exists */
             f = fopen( dstFileName, "rb" );
             if (f != NULL) {  /* dest exists, prompt for overwrite authorization */
                 fclose(f);
@@ -299,7 +360,7 @@ static FILE* LZ4IO_openDstFile(const char* dstFileName)
     }
 
     /* sparse file */
-    if (f && g_sparseFileSupport) { SET_SPARSE_FILE_MODE(f); }
+    if (f && prefs->sparseFileSupport) { SET_SPARSE_FILE_MODE(f); }
 
     return f;
 }
@@ -329,44 +390,50 @@ static int LZ4IO_LZ4_compress(const char* src, char* dst, int srcSize, int dstSi
 /* LZ4IO_compressFilename_Legacy :
  * This function is intentionally "hidden" (not published in .h)
  * It generates compressed streams using the old 'legacy' format */
-int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel)
+int LZ4IO_compressFilename_Legacy(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename, int compressionlevel)
 {
-    int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
+    typedef int (*compress_f)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
+    compress_f const compressionFunction = (compressionlevel < 3) ? LZ4IO_LZ4_compress : LZ4_compress_HC;
     unsigned long long filesize = 0;
     unsigned long long compressedfilesize = MAGICNUMBER_SIZE;
     char* in_buff;
     char* out_buff;
     const int outBuffSize = LZ4_compressBound(LEGACY_BLOCKSIZE);
-    FILE* finput;
+    FILE* const finput = LZ4IO_openSrcFile(input_filename);
     FILE* foutput;
     clock_t clockEnd;
 
     /* Init */
     clock_t const clockStart = clock();
-    compressionFunction = (compressionlevel < 3) ? LZ4IO_LZ4_compress : LZ4_compress_HC;
+    if (finput == NULL)
+        EXM_THROW(20, "%s : open file error ", input_filename);
 
-    finput = LZ4IO_openSrcFile(input_filename);
-    if (finput == NULL) EXM_THROW(20, "%s : open file error ", input_filename);
-    foutput = LZ4IO_openDstFile(output_filename);
-    if (foutput == NULL) { fclose(finput); EXM_THROW(20, "%s : open file error ", input_filename); }
+    foutput = LZ4IO_openDstFile(prefs, output_filename);
+    if (foutput == NULL) {
+        fclose(finput);
+        EXM_THROW(20, "%s : open file error ", input_filename);
+    }
 
     /* Allocate Memory */
     in_buff = (char*)malloc(LEGACY_BLOCKSIZE);
-    out_buff = (char*)malloc(outBuffSize);
-    if (!in_buff || !out_buff) EXM_THROW(21, "Allocation error : not enough memory");
+    out_buff = (char*)malloc(outBuffSize + 4);
+    if (!in_buff || !out_buff)
+        EXM_THROW(21, "Allocation error : not enough memory");
 
     /* Write Archive Header */
     LZ4IO_writeLE32(out_buff, LEGACY_MAGICNUMBER);
-    { size_t const sizeCheck = fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput);
-      if (sizeCheck != MAGICNUMBER_SIZE) EXM_THROW(22, "Write error : cannot write header"); }
+    {   size_t const writeSize = fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput);
+        if (writeSize != MAGICNUMBER_SIZE)
+            EXM_THROW(22, "Write error : cannot write header");
+    }
 
     /* Main Loop */
     while (1) {
-        unsigned int outSize;
+        int outSize;
         /* Read Block */
-        size_t const inSize = (int) fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput);
+        size_t const inSize = fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput);
+        assert(inSize <= LEGACY_BLOCKSIZE);
         if (inSize == 0) break;
-        if (inSize > LEGACY_BLOCKSIZE) EXM_THROW(23, "Read error : wrong fread() size report ");   /* should be impossible */
         filesize += inSize;
 
         /* Compress Block */
@@ -376,9 +443,11 @@ int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output
                 (int)(filesize>>20), (double)compressedfilesize/filesize*100);
 
         /* Write Block */
-        LZ4IO_writeLE32(out_buff, outSize);
-        {   size_t const sizeCheck = fwrite(out_buff, 1, outSize+4, foutput);
-            if (sizeCheck!=(size_t)(outSize+4))
+        assert(outSize > 0);
+        assert(outSize < outBuffSize);
+        LZ4IO_writeLE32(out_buff, (unsigned)outSize);
+        {   size_t const writeSize = fwrite(out_buff, 1, outSize+4, foutput);
+            if (writeSize != (size_t)(outSize+4))
                 EXM_THROW(24, "Write error : cannot write compressed block");
     }   }
     if (ferror(finput)) EXM_THROW(25, "Error while reading %s ", input_filename);
@@ -418,7 +487,7 @@ typedef struct {
     LZ4F_CDict* cdict;
 } cRess_t;
 
-static void* LZ4IO_createDict(const char* dictFilename, size_t *dictSize) {
+static void* LZ4IO_createDict(LZ4IO_prefs_t* const prefs, size_t *dictSize) {
     size_t readSize;
     size_t dictEnd = 0;
     size_t dictLen = 0;
@@ -426,6 +495,7 @@ static void* LZ4IO_createDict(const char* dictFilename, size_t *dictSize) {
     size_t circularBufSize = LZ4_MAX_DICT_SIZE;
     char* circularBuf;
     char* dictBuf;
+    const char* dictFilename = prefs->dictionaryFilename;
     FILE* dictFile;
 
     if (!dictFilename) EXM_THROW(25, "Dictionary error : no filename provided");
@@ -475,23 +545,23 @@ static void* LZ4IO_createDict(const char* dictFilename, size_t *dictSize) {
     return dictBuf;
 }
 
-static LZ4F_CDict* LZ4IO_createCDict(void) {
+static LZ4F_CDict* LZ4IO_createCDict(LZ4IO_prefs_t* const prefs) {
     size_t dictionarySize;
     void* dictionaryBuffer;
     LZ4F_CDict* cdict;
-    if (!g_useDictionary) {
+    if (!prefs->useDictionary) {
         return NULL;
     }
-    dictionaryBuffer = LZ4IO_createDict(g_dictionaryFilename, &dictionarySize);
+    dictionaryBuffer = LZ4IO_createDict(prefs, &dictionarySize);
     if (!dictionaryBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
     cdict = LZ4F_createCDict(dictionaryBuffer, dictionarySize);
     free(dictionaryBuffer);
     return cdict;
 }
 
-static cRess_t LZ4IO_createCResources(void)
+static cRess_t LZ4IO_createCResources(LZ4IO_prefs_t* const prefs)
 {
-    const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
+    const size_t blockSize = prefs->blockSize;
     cRess_t ress;
 
     LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION);
@@ -504,7 +574,7 @@ static cRess_t LZ4IO_createCResources(void)
     ress.dstBuffer = malloc(ress.dstBufferSize);
     if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory");
 
-    ress.cdict = LZ4IO_createCDict();
+    ress.cdict = LZ4IO_createCDict(prefs);
 
     return ress;
 }
@@ -526,7 +596,10 @@ static void LZ4IO_freeCResources(cRess_t ress)
  * result : 0 : compression completed correctly
  *          1 : missing or pb opening srcFileName
  */
-static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, const char* dstFileName, int compressionLevel)
+static int
+LZ4IO_compressFilename_extRess(LZ4IO_prefs_t* const io_prefs, cRess_t ress,
+                               const char* srcFileName, const char* dstFileName,
+                               int compressionLevel)
 {
     unsigned long long filesize = 0;
     unsigned long long compressedfilesize = 0;
@@ -535,7 +608,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
     void* const srcBuffer = ress.srcBuffer;
     void* const dstBuffer = ress.dstBuffer;
     const size_t dstBufferSize = ress.dstBufferSize;
-    const size_t blockSize = (size_t)LZ4IO_GetBlockSize_FromBlockId (g_blockSizeId);
+    const size_t blockSize = io_prefs->blockSize;
     size_t readSize;
     LZ4F_compressionContext_t ctx = ress.ctx;   /* just a pointer */
     LZ4F_preferences_t prefs;
@@ -543,7 +616,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
     /* Init */
     srcFile = LZ4IO_openSrcFile(srcFileName);
     if (srcFile == NULL) return 1;
-    dstFile = LZ4IO_openDstFile(dstFileName);
+    dstFile = LZ4IO_openDstFile(io_prefs, dstFileName);
     if (dstFile == NULL) { fclose(srcFile); return 1; }
     memset(&prefs, 0, sizeof(prefs));
 
@@ -551,12 +624,12 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
     /* Set compression parameters */
     prefs.autoFlush = 1;
     prefs.compressionLevel = compressionLevel;
-    prefs.frameInfo.blockMode = (LZ4F_blockMode_t)g_blockIndependence;
-    prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)g_blockSizeId;
-    prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)g_blockChecksum;
-    prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)g_streamChecksum;
-    prefs.favorDecSpeed = g_favorDecSpeed;
-    if (g_contentSizeFlag) {
+    prefs.frameInfo.blockMode = (LZ4F_blockMode_t)io_prefs->blockIndependence;
+    prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)io_prefs->blockSizeId;
+    prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)io_prefs->blockChecksum;
+    prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)io_prefs->streamChecksum;
+    prefs.favorDecSpeed = io_prefs->favorDecSpeed;
+    if (io_prefs->contentSizeFlag) {
       U64 const fileSize = UTIL_getFileSize(srcFileName);
       prefs.frameInfo.contentSize = fileSize;   /* == 0 if input == stdin */
       if (fileSize==0)
@@ -622,9 +695,9 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
         compressedfilesize += headerSize;
     }
 
-    /* Release files */
+    /* Release file handlers */
     fclose (srcFile);
-    fclose (dstFile);
+    if (strcmp(dstFileName,stdoutmark)) fclose (dstFile);   /* do not close stdout */
 
     /* Copy owner, file permissions and modification time */
     {   stat_t statbuf;
@@ -635,7 +708,7 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
             UTIL_setFileStat(dstFileName, &statbuf);
     }   }
 
-    if (g_removeSrcFile) {  /* remove source file : --rm */
+    if (io_prefs->removeSrcFile) {  /* remove source file : --rm */
         if (remove(srcFileName))
             EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno));
     }
@@ -650,13 +723,13 @@ static int LZ4IO_compressFilename_extRess(cRess_t ress, const char* srcFileName,
 }
 
 
-int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel)
+int LZ4IO_compressFilename(LZ4IO_prefs_t* const prefs, const char* srcFileName, const char* dstFileName, int compressionLevel)
 {
     UTIL_time_t const timeStart = UTIL_getTime();
     clock_t const cpuStart = clock();
-    cRess_t const ress = LZ4IO_createCResources();
+    cRess_t const ress = LZ4IO_createCResources(prefs);
 
-    int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel);
+    int const result = LZ4IO_compressFilename_extRess(prefs, ress, srcFileName, dstFileName, compressionLevel);
 
     /* Free resources */
     LZ4IO_freeCResources(ress);
@@ -675,7 +748,10 @@ int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int
 
 
 #define FNSPACE 30
-int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionLevel)
+int LZ4IO_compressMultipleFilenames(LZ4IO_prefs_t* const prefs,
+                              const char** inFileNamesTable, int ifntSize,
+                              const char* suffix,
+                              int compressionLevel)
 {
     int i;
     int missed_files = 0;
@@ -685,16 +761,31 @@ int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize,
     cRess_t ress;
 
     if (dstFileName == NULL) return ifntSize;   /* not enough memory */
-    ress = LZ4IO_createCResources();
+    ress = LZ4IO_createCResources(prefs);
 
     /* loop on each file */
     for (i=0; i<ifntSize; i++) {
         size_t const ifnSize = strlen(inFileNamesTable[i]);
-        if (ofnSize <= ifnSize+suffixSize+1) { free(dstFileName); ofnSize = ifnSize + 20; dstFileName = (char*)malloc(ofnSize); if (dstFileName==NULL) { LZ4IO_freeCResources(ress); return ifntSize; } }
+        if (!strcmp(suffix, stdoutmark)) {
+            missed_files += LZ4IO_compressFilename_extRess(prefs, ress,
+                                    inFileNamesTable[i], stdoutmark,
+                                    compressionLevel);
+            continue;
+        }
+        if (ofnSize <= ifnSize+suffixSize+1) {
+            free(dstFileName);
+            ofnSize = ifnSize + 20;
+            dstFileName = (char*)malloc(ofnSize);
+            if (dstFileName==NULL) {
+                LZ4IO_freeCResources(ress);
+                return ifntSize;
+        }   }
         strcpy(dstFileName, inFileNamesTable[i]);
         strcat(dstFileName, suffix);
 
-        missed_files += LZ4IO_compressFilename_extRess(ress, inFileNamesTable[i], dstFileName, compressionLevel);
+        missed_files += LZ4IO_compressFilename_extRess(prefs, ress,
+                                inFileNamesTable[i], dstFileName,
+                                compressionLevel);
     }
 
     /* Close & Free */
@@ -709,18 +800,19 @@ int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize,
 /* ********************** LZ4 file-stream Decompression **************** */
 /* ********************************************************************* */
 
+/* It's presumed that s points to a memory space of size >= 4 */
 static unsigned LZ4IO_readLE32 (const void* s)
 {
     const unsigned char* const srcPtr = (const unsigned char*)s;
     unsigned value32 = srcPtr[0];
-    value32 += (srcPtr[1]<<8);
-    value32 += (srcPtr[2]<<16);
-    value32 += ((unsigned)srcPtr[3])<<24;
+    value32 += (unsigned)srcPtr[1] <<  8;
+    value32 += (unsigned)srcPtr[2] << 16;
+    value32 += (unsigned)srcPtr[3] << 24;
     return value32;
 }
 
 
-static unsigned LZ4IO_fwriteSparse(FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips)
+static unsigned LZ4IO_fwriteSparse(LZ4IO_prefs_t* const prefs, FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips)
 {
     const size_t sizeT = sizeof(size_t);
     const size_t maskT = sizeT -1 ;
@@ -730,7 +822,7 @@ static unsigned LZ4IO_fwriteSparse(FILE* file, const void* buffer, size_t buffer
     const size_t* const bufferTEnd = bufferT + bufferSizeT;
     const size_t segmentSizeT = (32 KB) / sizeT;
 
-    if (!g_sparseFileSupport) {  /* normal write */
+    if (!prefs->sparseFileSupport) {  /* normal write */
         size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
         if (sizeCheck != bufferSize) EXM_THROW(70, "Write error : cannot write decoded block");
         return 0;
@@ -799,7 +891,7 @@ static void LZ4IO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
 
 
 static unsigned g_magicRead = 0;   /* out-parameter of LZ4IO_decodeLegacyStream() */
-static unsigned long long LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput)
+static unsigned long long LZ4IO_decodeLegacyStream(LZ4IO_prefs_t* const prefs, FILE* finput, FILE* foutput)
 {
     unsigned long long streamSize = 0;
     unsigned storedSkips = 0;
@@ -833,7 +925,7 @@ static unsigned long long LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput)
             if (decodeSize < 0) EXM_THROW(53, "Decoding Failed ! Corrupted input detected !");
             streamSize += decodeSize;
             /* Write Block */
-            storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, decodeSize, storedSkips); /* success or die */
+            storedSkips = LZ4IO_fwriteSparse(prefs, foutput, out_buff, decodeSize, storedSkips); /* success or die */
     }   }
     if (ferror(finput)) EXM_THROW(54, "Read error : ferror");
 
@@ -859,19 +951,19 @@ typedef struct {
     size_t dictBufferSize;
 } dRess_t;
 
-static void LZ4IO_loadDDict(dRess_t* ress) {
-    if (!g_useDictionary) {
+static void LZ4IO_loadDDict(LZ4IO_prefs_t* const prefs, dRess_t* ress) {
+    if (!prefs->useDictionary) {
         ress->dictBuffer = NULL;
         ress->dictBufferSize = 0;
         return;
     }
 
-    ress->dictBuffer = LZ4IO_createDict(g_dictionaryFilename, &ress->dictBufferSize);
+    ress->dictBuffer = LZ4IO_createDict(prefs, &ress->dictBufferSize);
     if (!ress->dictBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary");
 }
 
 static const size_t LZ4IO_dBufferSize = 64 KB;
-static dRess_t LZ4IO_createDResources(void)
+static dRess_t LZ4IO_createDResources(LZ4IO_prefs_t* const prefs)
 {
     dRess_t ress;
 
@@ -886,7 +978,7 @@ static dRess_t LZ4IO_createDResources(void)
     ress.dstBuffer = malloc(ress.dstBufferSize);
     if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory");
 
-    LZ4IO_loadDDict(&ress);
+    LZ4IO_loadDDict(prefs, &ress);
 
     ress.dstFile = NULL;
     return ress;
@@ -902,7 +994,7 @@ static void LZ4IO_freeDResources(dRess_t ress)
 }
 
 
-static unsigned long long LZ4IO_decompressLZ4F(dRess_t ress, FILE* srcFile, FILE* dstFile)
+static unsigned long long LZ4IO_decompressLZ4F(LZ4IO_prefs_t* const prefs, dRess_t ress, FILE* srcFile, FILE* dstFile)
 {
     unsigned long long filesize = 0;
     LZ4F_errorCode_t nextToLoad;
@@ -937,8 +1029,8 @@ static unsigned long long LZ4IO_decompressLZ4F(dRess_t ress, FILE* srcFile, FILE
 
             /* Write Block */
             if (decodedBytes) {
-                if (!g_testMode)
-                    storedSkips = LZ4IO_fwriteSparse(dstFile, ress.dstBuffer, decodedBytes, storedSkips);
+                if (!prefs->testMode)
+                    storedSkips = LZ4IO_fwriteSparse(prefs, dstFile, ress.dstBuffer, decodedBytes, storedSkips);
                 filesize += decodedBytes;
                 DISPLAYUPDATE(2, "\rDecompressed : %u MB  ", (unsigned)(filesize>>20));
             }
@@ -949,7 +1041,7 @@ static unsigned long long LZ4IO_decompressLZ4F(dRess_t ress, FILE* srcFile, FILE
     /* can be out because readSize == 0, which could be an fread() error */
     if (ferror(srcFile)) EXM_THROW(67, "Read error");
 
-    if (!g_testMode) LZ4IO_fwriteSparseEnd(dstFile, storedSkips);
+    if (!prefs->testMode) LZ4IO_fwriteSparseEnd(dstFile, storedSkips);
     if (nextToLoad!=0) EXM_THROW(68, "Unfinished stream");
 
     return filesize;
@@ -958,7 +1050,7 @@ static unsigned long long LZ4IO_decompressLZ4F(dRess_t ress, FILE* srcFile, FILE
 
 #define PTSIZE  (64 KB)
 #define PTSIZET (PTSIZE / sizeof(size_t))
-static unsigned long long LZ4IO_passThrough(FILE* finput, FILE* foutput, unsigned char MNstore[MAGICNUMBER_SIZE])
+static unsigned long long LZ4IO_passThrough(LZ4IO_prefs_t* const prefs, FILE* finput, FILE* foutput, unsigned char MNstore[MAGICNUMBER_SIZE])
 {
 	size_t buffer[PTSIZET];
     size_t readBytes = 1;
@@ -971,7 +1063,7 @@ static unsigned long long LZ4IO_passThrough(FILE* finput, FILE* foutput, unsigne
     while (readBytes) {
         readBytes = fread(buffer, 1, PTSIZE, finput);
         total += readBytes;
-        storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, storedSkips);
+        storedSkips = LZ4IO_fwriteSparse(prefs, foutput, buffer, readBytes, storedSkips);
     }
     if (ferror(finput)) EXM_THROW(51, "Read Error");
 
@@ -998,7 +1090,7 @@ static int fseek_u32(FILE *fp, unsigned offset, int where)
 }
 
 #define ENDOFSTREAM ((unsigned long long)-1)
-static unsigned long long selectDecoder(dRess_t ress, FILE* finput, FILE* foutput)
+static unsigned long long selectDecoder(LZ4IO_prefs_t* const prefs, dRess_t ress, FILE* finput, FILE* foutput)
 {
     unsigned char MNstore[MAGICNUMBER_SIZE];
     unsigned magicNumber;
@@ -1024,10 +1116,10 @@ static unsigned long long selectDecoder(dRess_t ress, FILE* finput, FILE* foutpu
     switch(magicNumber)
     {
     case LZ4IO_MAGICNUMBER:
-        return LZ4IO_decompressLZ4F(ress, finput, foutput);
+        return LZ4IO_decompressLZ4F(prefs, ress, finput, foutput);
     case LEGACY_MAGICNUMBER:
         DISPLAYLEVEL(4, "Detected : Legacy format \n");
-        return LZ4IO_decodeLegacyStream(finput, foutput);
+        return LZ4IO_decodeLegacyStream(prefs, finput, foutput);
     case LZ4IO_SKIPPABLE0:
         DISPLAYLEVEL(4, "Skipping detected skippable area \n");
         {   size_t const nbReadBytes = fread(MNstore, 1, 4, finput);
@@ -1044,9 +1136,9 @@ static unsigned long long selectDecoder(dRess_t ress, FILE* finput, FILE* foutpu
     default:
         if (nbFrames == 1) {  /* just started */
             /* Wrong magic number at the beginning of 1st stream */
-            if (!g_testMode && g_overwrite) {
+            if (!prefs->testMode && prefs->overwrite && prefs->passThrough) {
                 nbFrames = 0;
-                return LZ4IO_passThrough(finput, foutput, MNstore);
+                return LZ4IO_passThrough(prefs, finput, foutput, MNstore);
             }
             EXM_THROW(44,"Unrecognized header : file cannot be decoded");
         }
@@ -1061,7 +1153,7 @@ static unsigned long long selectDecoder(dRess_t ress, FILE* finput, FILE* foutpu
 }
 
 
-static int LZ4IO_decompressSrcFile(dRess_t ress, const char* input_filename, const char* output_filename)
+static int LZ4IO_decompressSrcFile(LZ4IO_prefs_t* const prefs, dRess_t ress, const char* input_filename, const char* output_filename)
 {
     FILE* const foutput = ress.dstFile;
     unsigned long long filesize = 0;
@@ -1073,14 +1165,14 @@ static int LZ4IO_decompressSrcFile(dRess_t ress, const char* input_filename, con
     /* Loop over multiple streams */
     for ( ; ; ) {  /* endless loop, see break condition */
         unsigned long long const decodedSize =
-                        selectDecoder(ress, finput, foutput);
+                        selectDecoder(prefs, ress, finput, foutput);
         if (decodedSize == ENDOFSTREAM) break;
         filesize += decodedSize;
     }
 
     /* Close input */
     fclose(finput);
-    if (g_removeSrcFile) {  /* --rm */
+    if (prefs->removeSrcFile) {  /* --rm */
         if (remove(input_filename))
             EXM_THROW(45, "Remove error : %s: %s", input_filename, strerror(errno));
     }
@@ -1094,11 +1186,11 @@ static int LZ4IO_decompressSrcFile(dRess_t ress, const char* input_filename, con
 }
 
 
-static int LZ4IO_decompressDstFile(dRess_t ress, const char* input_filename, const char* output_filename)
+static int LZ4IO_decompressDstFile(LZ4IO_prefs_t* const prefs, dRess_t ress, const char* input_filename, const char* output_filename)
 {
     stat_t statbuf;
     int stat_result = 0;
-    FILE* const foutput = LZ4IO_openDstFile(output_filename);
+    FILE* const foutput = LZ4IO_openDstFile(prefs, output_filename);
     if (foutput==NULL) return 1;   /* failure */
 
     if ( strcmp(input_filename, stdinmark)
@@ -1106,7 +1198,7 @@ static int LZ4IO_decompressDstFile(dRess_t ress, const char* input_filename, con
         stat_result = 1;
 
     ress.dstFile = foutput;
-    LZ4IO_decompressSrcFile(ress, input_filename, output_filename);
+    LZ4IO_decompressSrcFile(prefs, ress, input_filename, output_filename);
 
     fclose(foutput);
 
@@ -1122,12 +1214,12 @@ static int LZ4IO_decompressDstFile(dRess_t ress, const char* input_filename, con
 }
 
 
-int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename)
+int LZ4IO_decompressFilename(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename)
 {
-    dRess_t const ress = LZ4IO_createDResources();
+    dRess_t const ress = LZ4IO_createDResources(prefs);
     clock_t const start = clock();
 
-    int const missingFiles = LZ4IO_decompressDstFile(ress, input_filename, output_filename);
+    int const missingFiles = LZ4IO_decompressDstFile(prefs, ress, input_filename, output_filename);
 
     clock_t const end = clock();
     double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
@@ -1138,7 +1230,7 @@ int LZ4IO_decompressFilename(const char* input_filename, const char* output_file
 }
 
 
-int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix)
+int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** inFileNamesTable, int ifntSize, const char* suffix)
 {
     int i;
     int skippedFiles = 0;
@@ -1146,16 +1238,16 @@ int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSiz
     char* outFileName = (char*)malloc(FNSPACE);
     size_t ofnSize = FNSPACE;
     size_t const suffixSize = strlen(suffix);
-    dRess_t ress = LZ4IO_createDResources();
+    dRess_t ress = LZ4IO_createDResources(prefs);
 
     if (outFileName==NULL) return ifntSize;   /* not enough memory */
-    ress.dstFile = LZ4IO_openDstFile(stdoutmark);
+    ress.dstFile = LZ4IO_openDstFile(prefs, stdoutmark);
 
     for (i=0; i<ifntSize; i++) {
         size_t const ifnSize = strlen(inFileNamesTable[i]);
         const char* const suffixPtr = inFileNamesTable[i] + ifnSize - suffixSize;
         if (!strcmp(suffix, stdoutmark)) {
-            missingFiles += LZ4IO_decompressSrcFile(ress, inFileNamesTable[i], stdoutmark);
+            missingFiles += LZ4IO_decompressSrcFile(prefs, ress, inFileNamesTable[i], stdoutmark);
             continue;
         }
         if (ofnSize <= ifnSize-suffixSize+1) { free(outFileName); ofnSize = ifnSize + 20; outFileName = (char*)malloc(ofnSize); if (outFileName==NULL) return ifntSize; }
@@ -1166,7 +1258,7 @@ int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSiz
         }
         memcpy(outFileName, inFileNamesTable[i], ifnSize - suffixSize);
         outFileName[ifnSize-suffixSize] = '\0';
-        missingFiles += LZ4IO_decompressDstFile(ress, inFileNamesTable[i], outFileName);
+        missingFiles += LZ4IO_decompressDstFile(prefs, ress, inFileNamesTable[i], outFileName);
     }
 
     LZ4IO_freeDResources(ress);
diff --git a/programs/lz4io.h b/programs/lz4io.h
index 22c5e3e..54d49be 100644
--- a/programs/lz4io.h
+++ b/programs/lz4io.h
@@ -48,61 +48,80 @@ static const char nulmark[] = "nul";
 static const char nulmark[] = "/dev/null";
 #endif
 
+/* ************************************************** */
+/* ****************** Type Definitions ************** */
+/* ************************************************** */
+
+typedef struct LZ4IO_prefs_s LZ4IO_prefs_t;
+
+LZ4IO_prefs_t* LZ4IO_defaultPreferences(void);
+void LZ4IO_freePreferences(LZ4IO_prefs_t* const prefs);
+
 
 /* ************************************************** */
 /* ****************** Functions ********************* */
 /* ************************************************** */
 
-int LZ4IO_compressFilename  (const char* input_filename, const char* output_filename, int compressionlevel);
-int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename);
+/* if output_filename == stdoutmark, writes to stdout */
+int LZ4IO_compressFilename(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename, int compressionlevel);
+int LZ4IO_decompressFilename(LZ4IO_prefs_t* const prefs, const char* input_filename, const char* output_filename);
 
-int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel);
-int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix);
+/* if suffix == stdoutmark, writes to stdout */
+int LZ4IO_compressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel);
+int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** inFileNamesTable, int ifntSize, const char* suffix);
 
 
 /* ************************************************** */
 /* ****************** Parameters ******************** */
 /* ************************************************** */
 
-int LZ4IO_setDictionaryFilename(const char* dictionaryFilename);
+int LZ4IO_setDictionaryFilename(LZ4IO_prefs_t* const prefs, const char* dictionaryFilename);
+
+/* Default setting : passThrough = 0;
+   return : passThrough mode (0/1) */
+int LZ4IO_setPassThrough(LZ4IO_prefs_t* const prefs, int yes);
 
 /* Default setting : overwrite = 1;
    return : overwrite mode (0/1) */
-int LZ4IO_setOverwrite(int yes);
+int LZ4IO_setOverwrite(LZ4IO_prefs_t* const prefs, int yes);
 
 /* Default setting : testMode = 0;
    return : testMode (0/1) */
-int LZ4IO_setTestMode(int yes);
+int LZ4IO_setTestMode(LZ4IO_prefs_t* const prefs, int yes);
 
 /* blockSizeID : valid values : 4-5-6-7
    return : 0 if error, blockSize if OK */
-size_t LZ4IO_setBlockSizeID(unsigned blockSizeID);
+size_t LZ4IO_setBlockSizeID(LZ4IO_prefs_t* const prefs, unsigned blockSizeID);
+
+/* blockSize : valid values : 32 -> 4MB
+   return : 0 if error, actual blocksize if OK */
+size_t LZ4IO_setBlockSize(LZ4IO_prefs_t* const prefs, size_t blockSize);
 
 /* Default setting : independent blocks */
 typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t;
-int LZ4IO_setBlockMode(LZ4IO_blockMode_t blockMode);
+int LZ4IO_setBlockMode(LZ4IO_prefs_t* const prefs, LZ4IO_blockMode_t blockMode);
 
 /* Default setting : no block checksum */
-int LZ4IO_setBlockChecksumMode(int xxhash);
+int LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t* const prefs, int xxhash);
 
 /* Default setting : stream checksum enabled */
-int LZ4IO_setStreamChecksumMode(int xxhash);
+int LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t* const prefs, int xxhash);
 
 /* Default setting : 0 (no notification) */
 int LZ4IO_setNotificationLevel(int level);
 
 /* Default setting : 0 (disabled) */
-int LZ4IO_setSparseFile(int enable);
+int LZ4IO_setSparseFile(LZ4IO_prefs_t* const prefs, int enable);
 
 /* Default setting : 0 == no content size present in frame header */
-int LZ4IO_setContentSize(int enable);
+int LZ4IO_setContentSize(LZ4IO_prefs_t* const prefs, int enable);
 
 /* Default setting : 0 == src file preserved */
-void LZ4IO_setRemoveSrcFile(unsigned flag);
+void LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t* const prefs, unsigned flag);
 
 /* Default setting : 0 == favor compression ratio
  * Note : 1 only works for high compression levels (10+) */
-void LZ4IO_favorDecSpeed(int favor);
+void LZ4IO_favorDecSpeed(LZ4IO_prefs_t* const prefs, int favor);
 
 
 #endif  /* LZ4IO_H_237902873 */
diff --git a/programs/util.h b/programs/util.h
index d74db0d..6a35481 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -34,6 +34,7 @@ extern "C" {
 #include <stdlib.h>       /* malloc */
 #include <string.h>       /* strlen, strncpy */
 #include <stdio.h>        /* fprintf */
+#include <assert.h>
 #include <sys/types.h>    /* stat, utime */
 #include <sys/stat.h>     /* stat */
 #if defined(_MSC_VER)
@@ -44,6 +45,7 @@ extern "C" {
 #  include <utime.h>      /* utime */
 #endif
 #include <time.h>         /* time */
+#include <limits.h>       /* INT_MAX */
 #include <errno.h>
 
 
@@ -375,9 +377,9 @@ UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFi
  * A modified version of realloc().
  * If UTIL_realloc() fails the original block is freed.
 */
-UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
+UTIL_STATIC void* UTIL_realloc(void* ptr, size_t size)
 {
-    void *newptr = realloc(ptr, size);
+    void* newptr = realloc(ptr, size);
     if (newptr) return newptr;
     free(ptr);
     return NULL;
@@ -387,14 +389,14 @@ UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
 #ifdef _WIN32
 #  define UTIL_HAS_CREATEFILELIST
 
-UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+UTIL_STATIC int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd)
 {
     char* path;
-    int dirLength, fnameLength, pathLength, nbFiles = 0;
+    size_t dirLength, nbFiles = 0;
     WIN32_FIND_DATAA cFile;
     HANDLE hFile;
 
-    dirLength = (int)strlen(dirName);
+    dirLength = strlen(dirName);
     path = (char*) malloc(dirLength + 3);
     if (!path) return 0;
 
@@ -411,7 +413,8 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
     free(path);
 
     do {
-        fnameLength = (int)strlen(cFile.cFileName);
+        size_t pathLength;
+        int const fnameLength = (int)strlen(cFile.cFileName);
         path = (char*) malloc(dirLength + fnameLength + 2);
         if (!path) { FindClose(hFile); return 0; }
         memcpy(path, dirName, dirLength);
@@ -443,7 +446,8 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
     } while (FindNextFileA(hFile, &cFile));
 
     FindClose(hFile);
-    return nbFiles;
+    assert(nbFiles < INT_MAX);
+    return (int)nbFiles;
 }
 
 #elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L)  /* opendir, readdir require POSIX.1-2001 */
@@ -451,12 +455,11 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
 #  include <dirent.h>       /* opendir, readdir */
 #  include <string.h>       /* strerror, memcpy */
 
-UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+UTIL_STATIC int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd)
 {
-    DIR *dir;
-    struct dirent *entry;
-    char* path;
-    int dirLength, fnameLength, pathLength, nbFiles = 0;
+    DIR* dir;
+    struct dirent * entry;
+    int dirLength, nbFiles = 0;
 
     if (!(dir = opendir(dirName))) {
         fprintf(stderr, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
@@ -466,6 +469,8 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
     dirLength = (int)strlen(dirName);
     errno = 0;
     while ((entry = readdir(dir)) != NULL) {
+        char* path;
+        int fnameLength, pathLength;
         if (strcmp (entry->d_name, "..") == 0 ||
             strcmp (entry->d_name, ".") == 0) continue;
         fnameLength = (int)strlen(entry->d_name);
@@ -508,7 +513,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
 
 #else
 
-UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd)
+UTIL_STATIC int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd)
 {
     (void)bufStart; (void)bufEnd; (void)pos;
     fprintf(stderr, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
@@ -523,12 +528,13 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
  * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
  * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
  */
-UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb)
+UTIL_STATIC const char**
+UTIL_createFileList(const char** inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb)
 {
     size_t pos;
     unsigned i, nbFiles;
     char* buf = (char*)malloc(LIST_SIZE_INCREASE);
-    char* bufend = buf + LIST_SIZE_INCREASE;
+    size_t bufSize = LIST_SIZE_INCREASE;
     const char** fileTable;
 
     if (!buf) return NULL;
@@ -536,25 +542,28 @@ UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned i
     for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
         if (!UTIL_isDirectory(inputNames[i])) {
             size_t const len = strlen(inputNames[i]);
-            if (buf + pos + len >= bufend) {
-                ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
+            if (pos + len >= bufSize) {
+                size_t newListSize = bufSize + LIST_SIZE_INCREASE;
                 buf = (char*)UTIL_realloc(buf, newListSize);
-                bufend = buf + newListSize;
+                bufSize = newListSize;
                 if (!buf) return NULL;
             }
-            if (buf + pos + len < bufend) {
-                strncpy(buf + pos, inputNames[i], bufend - (buf + pos));
+            if (pos + len < bufSize) {
+                strncpy(buf + pos, inputNames[i], bufSize - pos);
                 pos += len + 1;
                 nbFiles++;
             }
         } else {
-            nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend);
+            char* bufend = buf + bufSize;
+            nbFiles += (unsigned)UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend);
             if (buf == NULL) return NULL;
+            assert(bufend > buf);
+            bufSize = (size_t)(bufend - buf);
     }   }
 
     if (nbFiles == 0) { free(buf); return NULL; }
 
-    fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*));
+    fileTable = (const char**)malloc(((size_t)nbFiles+1) * sizeof(const char*));
     if (!fileTable) { free(buf); return NULL; }
 
     for (i=0, pos=0; i<nbFiles; i++) {
@@ -562,7 +571,11 @@ UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned i
         pos += strlen(fileTable[i]) + 1;
     }
 
-    if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
+    if (pos > bufSize) {
+        free(buf);
+        free((void*)fileTable);
+        return NULL;
+    }   /* can this happen ? */
 
     *allocatedBuffer = buf;
     *allocatedNamesNb = nbFiles;
@@ -571,7 +584,8 @@ UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned i
 }
 
 
-UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer)
+UTIL_STATIC void
+UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer)
 {
     if (allocatedBuffer) free(allocatedBuffer);
     if (filenameTable) free((void*)filenameTable);
diff --git a/tests/.gitignore b/tests/.gitignore
index 9aa42a0..c4f9092 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -10,6 +10,7 @@ fuzzer32
 fasttest
 roundTripTest
 checkTag
+checkFrame
 
 # test artefacts
 tmp*
diff --git a/tests/Makefile b/tests/Makefile
index 3de111b..70cae63 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -63,7 +63,7 @@ NB_LOOPS     ?= -i1
 
 default: all
 
-all: fullbench fuzzer frametest roundTripTest datagen
+all: fullbench fuzzer frametest roundTripTest datagen checkFrame
 
 all32: CFLAGS+=-m32
 all32: all
@@ -109,17 +109,21 @@ roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c
 datagen : $(PRGDIR)/datagen.c datagencli.c
 	$(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT)
 
+checkFrame : lz4frame.o lz4.o lz4hc.o xxhash.o checkFrame.c
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
+
 clean:
 	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
 	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
-	@$(RM) core *.o *.test tmp* \
+	@$(RM) -rf core *.o *.test tmp* \
         fullbench-dll$(EXT) fullbench-lib$(EXT) \
         fullbench$(EXT) fullbench32$(EXT) \
         fuzzer$(EXT) fuzzer32$(EXT) \
         frametest$(EXT) frametest32$(EXT) \
         fasttest$(EXT) roundTripTest$(EXT) \
-        datagen$(EXT) checkTag$(EXT)
-	@rm -fR $(TESTDIR)
+        datagen$(EXT) checkTag$(EXT) \
+        frameTest$(EXT)
+	@$(RM) -rf $(TESTDIR)
 	@echo Cleaning completed
 
 .PHONY: versionsTest
@@ -147,12 +151,25 @@ endif
 
 DD:=dd
 
+.PHONY: list
+list:
+	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
 
-test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install
+.PHONY: test
+test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install test-amalgamation
 
+.PHONY: test32
 test32: CFLAGS+=-m32
 test32: test
 
+.PHONY: test-amalgamation
+test-amalgamation: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c
+	cat $(LZ4DIR)/lz4.c > lz4_all.c
+	cat $(LZ4DIR)/lz4hc.c >> lz4_all.c
+	cat $(LZ4DIR)/lz4frame.c >> lz4_all.c
+	$(CC) -I$(LZ4DIR) -c lz4_all.c
+	$(RM) lz4_all.c
+
 test-install: lz4 lib liblz4.pc
 	lz4_root=.. ./test_install.sh
 
@@ -205,7 +222,7 @@ test-lz4-frame-concatenation: lz4 datagen
 	@$(LZ4) -zq tmp-lfc-nonempty > tmp-lfc-nonempty.lz4
 	cat tmp-lfc-nonempty.lz4 tmp-lfc-empty.lz4 tmp-lfc-nonempty.lz4 > tmp-lfc-concat.lz4
 	$(LZ4) -d tmp-lfc-concat.lz4 > tmp-lfc-result
-	sdiff tmp-lfc-src tmp-lfc-result
+	cmp tmp-lfc-src tmp-lfc-result
 	@$(RM) tmp-lfc-*
 	@echo frame concatenation test completed
 
@@ -213,13 +230,36 @@ test-lz4-multiple: lz4 datagen
 	@echo "\n ---- test multiple files ----"
 	@./datagen -s1        > tmp-tlm1 2> $(VOID)
 	@./datagen -s2 -g100K > tmp-tlm2 2> $(VOID)
-	@./datagen -s3 -g1M   > tmp-tlm3 2> $(VOID)
+	@./datagen -s3 -g200K > tmp-tlm3 2> $(VOID)
+	# compress multiple files : one .lz4 per source file
 	$(LZ4) -f -m tmp-tlm*
-	ls -ls tmp-tlm*
-	@$(RM) tmp-tlm1 tmp-tlm2 tmp-tlm3
-	$(LZ4) -df -m tmp-tlm*.lz4
-	ls -ls tmp-tlm*
-	$(LZ4) -f -m tmp-tlm1 notHere tmp-tlm2; echo $$?
+	test -f tmp-tlm1.lz4
+	test -f tmp-tlm2.lz4
+	test -f tmp-tlm3.lz4
+	# decompress multiple files : one output file per .lz4
+	mv tmp-tlm1 tmp-tlm1-orig
+	mv tmp-tlm2 tmp-tlm2-orig
+	mv tmp-tlm3 tmp-tlm3-orig
+	$(LZ4) -d -f -m tmp-tlm*.lz4
+	cmp tmp-tlm1 tmp-tlm1-orig   # must be identical
+	cmp tmp-tlm2 tmp-tlm2-orig
+	cmp tmp-tlm3 tmp-tlm3-orig
+	# compress multiple files into stdout
+	cat tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 > tmp-tlm-concat1
+	$(RM) *.lz4
+	$(LZ4) -m tmp-tlm1 tmp-tlm2 tmp-tlm3 -c > tmp-tlm-concat2
+	test ! -f tmp-tlm1.lz4  # must not create .lz4 artefact
+	cmp tmp-tlm-concat1 tmp-tlm-concat2  # must be equivalent
+	# decompress multiple files into stdout
+	$(RM) tmp-tlm-concat1 tmp-tlm-concat2
+	$(LZ4) -f -m tmp-tlm1 tmp-tlm2 tmp-tlm3   # generate .lz4 to decompress
+	cat tmp-tlm1 tmp-tlm2 tmp-tlm3 > tmp-tlm-concat1   # create concatenated reference
+	$(RM) tmp-tlm1 tmp-tlm2 tmp-tlm3
+	$(LZ4) -d -m tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 -c > tmp-tlm-concat2
+	test ! -f tmp-tlm1  # must not create file artefact
+	cmp tmp-tlm-concat1 tmp-tlm-concat2  # must be equivalent
+	# compress multiple files, one of which is absent (must fail)
+	! $(LZ4) -f -m tmp-tlm-concat1 notHere tmp-tlm-concat2  # must fail : notHere not present
 	@$(RM) tmp-tlm*
 
 test-lz4-basic: lz4 datagen unlz4 lz4cat
@@ -231,7 +271,7 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat
 	$(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec
 	$(LZ4) --no-frame-crc < tmp-tlb-dg20k | $(LZ4) -d > tmp-tlb-dec
 	$(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec
-	./datagen           | $(LZ4)        | $(LZ4) -t
+	./datagen           | $(LZ4) -BI    | $(LZ4) -t
 	./datagen -g6M -P99 | $(LZ4) -9BD   | $(LZ4) -t
 	./datagen -g17M     | $(LZ4) -9v    | $(LZ4) -qt
 	./datagen -g33M     | $(LZ4) --no-frame-crc | $(LZ4) -t
@@ -273,6 +313,11 @@ test-lz4-basic: lz4 datagen unlz4 lz4cat
 	test "$(shell ./datagen -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell ./datagen -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1
 	! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0
 	! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1
+	# Test for #596
+	@echo "TEST" > tmp-tlb-test
+	$(LZ4) tmp-tlb-test
+	$(LZ4) tmp-tlb-test.lz4 tmp-tlb-test2
+	$(DIFF) -q tmp-tlb-test tmp-tlb-test2
 	@$(RM) tmp-tlb*
 
 
@@ -305,7 +350,7 @@ test-lz4-dict: lz4 datagen
 test-lz4-hugefile: lz4 datagen
 	@echo "\n ---- test huge files compression/decompression ----"
 	./datagen -g6GB   | $(LZ4) -vB5D  | $(LZ4) -qt
-	./datagen -g6GB   | $(LZ4) -v5BD  | $(LZ4) -qt
+	./datagen -g5GB   | $(LZ4) -v4BD  | $(LZ4) -qt
 	# test large file size [2-4] GB
 	@./datagen -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmphf1
 	@ls -ls tmphf1
@@ -321,8 +366,11 @@ test-lz4-testmode: lz4 datagen
 	! ./datagen | $(LZ4) -t
 	! ./datagen | $(LZ4) -tf
 	@echo "\n ---- pass-through mode ----"
-	! ./datagen | $(LZ4) -d  > $(VOID)
-	./datagen | $(LZ4) -df > $(VOID)
+	@echo "Why hello there " > tmp-tlt2.lz4
+	! $(LZ4) -f tmp-tlt2.lz4 > $(VOID)
+	! ./datagen | $(LZ4) -dc  > $(VOID)
+	! ./datagen | $(LZ4) -df > $(VOID)
+	./datagen | $(LZ4) -dcf > $(VOID)
 	@echo "Hello World !" > tmp-tlt1
 	$(LZ4) -dcf tmp-tlt1
 	@echo "from underground..." > tmp-tlt2
@@ -331,7 +379,7 @@ test-lz4-testmode: lz4 datagen
 	! $(LZ4)     file-does-not-exist
 	! $(LZ4) -f  file-does-not-exist
 	! $(LZ4) -fm file1-dne file2-dne
-	@$(RM) tmp-tlt
+	@$(RM) tmp-tlt tmp-tlt1 tmp-tlt2 tmp-tlt2.lz4
 
 test-lz4-opt-parser: lz4 datagen
 	@echo "\n ---- test opt-parser ----"
diff --git a/tests/checkFrame.c b/tests/checkFrame.c
new file mode 100644
index 0000000..50c0405
--- /dev/null
+++ b/tests/checkFrame.c
@@ -0,0 +1,311 @@
+  /*
+      checkFrame - verify frame headers
+      Copyright (C) Yann Collet 2014-present
+
+      GPL v2 License
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License along
+      with this program; if not, write to the Free Software Foundation, Inc.,
+      51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+      You can contact the author at :
+      - LZ4 homepage : http://www.lz4.org
+      - LZ4 source repository : https://github.com/lz4/lz4
+  */
+
+  /*-************************************
+  *  Compiler specific
+  **************************************/
+  #ifdef _MSC_VER    /* Visual Studio */
+  #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+  #  pragma warning(disable : 4146)        /* disable: C4146: minus unsigned expression */
+  #endif
+
+
+  /*-************************************
+  *  Includes
+  **************************************/
+  #include "util.h"       /* U32 */
+  #include <stdlib.h>     /* malloc, free */
+  #include <stdio.h>      /* fprintf */
+  #include <string.h>     /* strcmp */
+  #include <time.h>       /* clock_t, clock(), CLOCKS_PER_SEC */
+  #include <assert.h>
+  #include "lz4frame.h"   /* include multiple times to test correctness/safety */
+  #include "lz4frame.h"
+  #define LZ4F_STATIC_LINKING_ONLY
+  #include "lz4frame.h"
+  #include "lz4frame.h"
+  #include "lz4.h"        /* LZ4_VERSION_STRING */
+  #define XXH_STATIC_LINKING_ONLY
+  #include "xxhash.h"     /* XXH64 */
+
+
+  /*-************************************
+  *  Constants
+  **************************************/
+  #define KB *(1U<<10)
+  #define MB *(1U<<20)
+  #define GB *(1U<<30)
+
+
+  /*-************************************
+  *  Macros
+  **************************************/
+  #define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
+  #define DISPLAYLEVEL(l, ...)  if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+
+  /**************************************
+  *  Exceptions
+  ***************************************/
+  #ifndef DEBUG
+  #  define DEBUG 0
+  #endif
+  #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+  #define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, " \n");                                               \
+    return(error);                                                          \
+}
+
+
+
+/*-***************************************
+*  Local Parameters
+*****************************************/
+static U32 no_prompt = 0;
+static U32 displayLevel = 2;
+static U32 use_pause = 0;
+
+
+/*-*******************************************************
+*  Fuzzer functions
+*********************************************************/
+#define MIN(a,b)  ( (a) < (b) ? (a) : (b) )
+#define MAX(a,b)  ( (a) > (b) ? (a) : (b) )
+
+typedef struct {
+    void*  srcBuffer;
+    size_t srcBufferSize;
+    void*  dstBuffer;
+    size_t dstBufferSize;
+    LZ4F_decompressionContext_t ctx;
+} cRess_t;
+
+static int createCResources(cRess_t *ress)
+{
+    ress->srcBufferSize = 4 MB;
+    ress->srcBuffer = malloc(ress->srcBufferSize);
+    ress->dstBufferSize = 4 MB;
+    ress->dstBuffer = malloc(ress->dstBufferSize);
+
+    if (!ress->srcBuffer || !ress->dstBuffer) {
+        free(ress->srcBuffer);
+        free(ress->dstBuffer);
+        EXM_THROW(20, "Allocation error : not enough memory");
+    }
+
+    if (LZ4F_isError( LZ4F_createDecompressionContext(&(ress->ctx), LZ4F_VERSION) )) {
+        free(ress->srcBuffer);
+        free(ress->dstBuffer);
+        EXM_THROW(21, "Unable to create decompression context");
+    }
+    return 0;
+}
+
+static void freeCResources(cRess_t ress)
+{
+    free(ress.srcBuffer);
+    free(ress.dstBuffer);
+
+    (void) LZ4F_freeDecompressionContext(ress.ctx);
+}
+
+int frameCheck(cRess_t ress, FILE* const srcFile, unsigned bsid, size_t blockSize)
+{
+    LZ4F_errorCode_t nextToLoad = 0;
+    size_t curblocksize = 0;
+    int partialBlock = 0;
+
+    /* Main Loop */
+    for (;;) {
+        size_t readSize;
+        size_t pos = 0;
+        size_t decodedBytes = ress.dstBufferSize;
+        size_t remaining;
+        LZ4F_frameInfo_t frameInfo;
+
+        /* Read input */
+        readSize = fread(ress.srcBuffer, 1, ress.srcBufferSize, srcFile);
+        if (!readSize) break;   /* reached end of file or stream */
+
+        while (pos < readSize) {  /* still to read */
+            /* Decode Input (at least partially) */
+            if (!nextToLoad) {
+                /* LZ4F_decompress returned 0 : starting new frame */
+                curblocksize = 0;
+                remaining = readSize - pos;
+                nextToLoad = LZ4F_getFrameInfo(ress.ctx, &frameInfo, (char*)(ress.srcBuffer)+pos, &remaining);
+                if (LZ4F_isError(nextToLoad))
+                    EXM_THROW(22, "Error getting frame info: %s",
+                                LZ4F_getErrorName(nextToLoad));
+                if (frameInfo.blockSizeID != bsid)
+                    EXM_THROW(23, "Block size ID %u != expected %u",
+                                frameInfo.blockSizeID, bsid);
+                pos += remaining;
+                /* nextToLoad should be block header size */
+                remaining = nextToLoad;
+                decodedBytes = ress.dstBufferSize;
+                nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL);
+                if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
+                pos += remaining;
+            }
+            decodedBytes = ress.dstBufferSize;
+            /* nextToLoad should be just enough to cover the next block */
+            if (nextToLoad > (readSize - pos)) {
+                /* block is not fully contained in current buffer */
+                partialBlock = 1;
+                remaining = readSize - pos;
+            } else {
+                if (partialBlock) {
+                    partialBlock = 0;
+                }
+                remaining = nextToLoad;
+            }
+            nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL);
+            if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
+            curblocksize += decodedBytes;
+            pos += remaining;
+            if (!partialBlock) {
+                /* detect small block due to end of frame; the final 4-byte frame checksum could be left in the buffer */
+                if ((curblocksize != 0) && (nextToLoad > 4)) {
+                    if (curblocksize != blockSize)
+                        EXM_THROW(25, "Block size %u != expected %u, pos %u\n",
+                                    (unsigned)curblocksize, (unsigned)blockSize, (unsigned)pos);
+                }
+                curblocksize = 0;
+            }
+        }
+    }
+    /* can be out because readSize == 0, which could be an fread() error */
+    if (ferror(srcFile)) EXM_THROW(26, "Read error");
+
+    if (nextToLoad!=0) EXM_THROW(27, "Unfinished stream");
+
+    return 0;
+}
+
+int FUZ_usage(const char* programName)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [args] filename\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -b#    : expected blocksizeID [4-7] (required)\n");
+    DISPLAY( " -B#    : expected blocksize [32-4194304] (required)\n");
+    DISPLAY( " -v     : verbose\n");
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, const char** argv)
+{
+    int argNb;
+    unsigned bsid=0;
+    size_t blockSize=0;
+    const char* const programName = argv[0];
+
+    /* Check command line */
+    for (argNb=1; argNb<argc; argNb++) {
+        const char* argument = argv[argNb];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Decode command (note : aggregated short commands are allowed) */
+        if (argument[0]=='-') {
+            if (!strcmp(argument, "--no-prompt")) {
+                no_prompt=1;
+                displayLevel=1;
+                continue;
+            }
+            argument++;
+
+            while (*argument!=0) {
+                switch(*argument)
+                {
+                case 'h':
+                    return FUZ_usage(programName);
+                case 'v':
+                    argument++;
+                    displayLevel++;
+                    break;
+                case 'q':
+                    argument++;
+                    displayLevel--;
+                    break;
+                case 'p': /* pause at the end */
+                    argument++;
+                    use_pause = 1;
+                    break;
+
+                case 'b':
+                    argument++;
+                    bsid=0;
+                    while ((*argument>='0') && (*argument<='9')) {
+                        bsid *= 10;
+                        bsid += (unsigned)(*argument - '0');
+                        argument++;
+                    }
+                    break;
+
+                case 'B':
+                    argument++;
+                    blockSize=0;
+                    while ((*argument>='0') && (*argument<='9')) {
+                        blockSize *= 10;
+                        blockSize += (size_t)(*argument - '0');
+                        argument++;
+                    }
+                    break;
+
+                default:
+                    ;
+                    return FUZ_usage(programName);
+                }
+            }
+        } else {
+            int err;
+            FILE *srcFile;
+            cRess_t ress;
+            if (bsid == 0 || blockSize == 0)
+              return FUZ_usage(programName);
+            DISPLAY("Starting frame checker (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION_STRING);
+            err = createCResources(&ress);
+            if (err) return (err);
+            srcFile = fopen(argument, "rb");
+            if ( srcFile==NULL ) {
+                freeCResources(ress);
+                EXM_THROW(1, "%s: %s \n", argument, strerror(errno));
+            }
+            err = frameCheck(ress, srcFile, bsid, blockSize);
+            freeCResources(ress);
+            fclose(srcFile);
+            return (err);
+        }
+    }
+    return 0;
+}
diff --git a/tests/frametest.c b/tests/frametest.c
index 4efeb6f..a5197ff 100644
--- a/tests/frametest.c
+++ b/tests/frametest.c
@@ -27,8 +27,8 @@
 *  Compiler specific
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4146)        /* disable: C4146: minus unsigned expression */
+#  pragma warning(disable : 4127)     /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
 #endif
 
 
@@ -55,7 +55,7 @@
 static void FUZ_writeLE32 (void* dstVoidPtr, U32 value32)
 {
     BYTE* dstPtr = (BYTE*)dstVoidPtr;
-    dstPtr[0] = (BYTE)value32;
+    dstPtr[0] = (BYTE) value32;
     dstPtr[1] = (BYTE)(value32 >> 8);
     dstPtr[2] = (BYTE)(value32 >> 16);
     dstPtr[3] = (BYTE)(value32 >> 24);
@@ -77,7 +77,6 @@ static const U32 prime1 = 2654435761U;
 static const U32 prime2 = 2246822519U;
 
 
-
 /*-************************************
 *  Macros
 **************************************/
@@ -160,7 +159,7 @@ static unsigned FUZ_highbit(U32 v32)
 {
     unsigned nbBits = 0;
     if (v32==0) return 0;
-    while (v32) v32 >>= 1, nbBits ++;
+    while (v32) {v32 >>= 1; nbBits ++;}
     return nbBits;
 }
 
@@ -211,9 +210,13 @@ int basicTests(U32 seed, double compressibility)
     CHECK ( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) );
 
     DISPLAYLEVEL(3, "LZ4F_getFrameInfo on null-content frame (#157) \n");
-    {   size_t avail_in = cSize;
-        LZ4F_frameInfo_t frame_info;
+    assert(cSize >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH);
+    {   LZ4F_frameInfo_t frame_info;
+        size_t const fhs = LZ4F_headerSize(compressedBuffer, LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH);
+        size_t avail_in = fhs;
+        CHECK( fhs );
         CHECK( LZ4F_getFrameInfo(dCtx, &frame_info, compressedBuffer, &avail_in) );
+        if (avail_in != fhs) goto _output_error;  /* must consume all, since header size is supposed to be exact */
     }
 
     DISPLAYLEVEL(3, "LZ4F_freeDecompressionContext \n");
@@ -270,7 +273,7 @@ int basicTests(U32 seed, double compressibility)
             if (decResult != 0) goto _output_error;   /* should finish now */
             op += oSize;
             if (op>oend) { DISPLAY("decompression write overflow \n"); goto _output_error; }
-            {   U64 const crcDest = XXH64(decodedBuffer, op-ostart, 1);
+            {   U64 const crcDest = XXH64(decodedBuffer, (size_t)(op-ostart), 1);
                 if (crcDest != crcOrig) goto _output_error;
         }   }
 
@@ -306,10 +309,10 @@ int basicTests(U32 seed, double compressibility)
             }
 
             DISPLAYLEVEL(3, "LZ4F_getFrameInfo on enough input : ");
-            iSize = 15 - iSize;
+            iSize = LZ4F_headerSize(ip, LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH);
+            CHECK( iSize );
             CHECK( LZ4F_getFrameInfo(dCtx, &fi, ip, &iSize) );
             DISPLAYLEVEL(3, " correctly decoded \n");
-            ip += iSize;
         }
 
         DISPLAYLEVEL(3, "Decode a buggy input : ");
@@ -337,15 +340,16 @@ int basicTests(U32 seed, double compressibility)
             const BYTE* ip = (const BYTE*) compressedBuffer;
             const BYTE* const iend = ip + cSize;
             while (ip < iend) {
-                size_t oSize = oend-op;
+                size_t oSize = (size_t)(oend-op);
                 size_t iSize = 1;
                 CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) );
                 op += oSize;
                 ip += iSize;
             }
-            { U64 const crcDest = XXH64(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, 1);
-              if (crcDest != crcOrig) goto _output_error; }
-            DISPLAYLEVEL(3, "Regenerated %u/%u bytes \n", (unsigned)(op-ostart), COMPRESSIBLE_NOISE_LENGTH);
+            {   U64 const crcDest = XXH64(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, 1);
+                if (crcDest != crcOrig) goto _output_error;
+            }
+            DISPLAYLEVEL(3, "Regenerated %u/%u bytes \n", (unsigned)(op-ostart), (unsigned)COMPRESSIBLE_NOISE_LENGTH);
         }
     }
 
@@ -379,8 +383,8 @@ int basicTests(U32 seed, double compressibility)
         while (ip < iend) {
             unsigned const nbBits = FUZ_rand(&randState) % maxBits;
             size_t iSize = (FUZ_rand(&randState) & ((1<<nbBits)-1)) + 1;
-            size_t oSize = oend-op;
-            if (iSize > (size_t)(iend-ip)) iSize = iend-ip;
+            size_t oSize = (size_t)(oend-op);
+            if (iSize > (size_t)(iend-ip)) iSize = (size_t)(iend-ip);
             CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) );
             op += oSize;
             ip += iSize;
@@ -520,7 +524,7 @@ int basicTests(U32 seed, double compressibility)
         LZ4F_CDict* const cdict = LZ4F_createCDict(CNBuffer, dictSize);
         if (cdict == NULL) goto _output_error;
         CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) );
-        
+
         DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with NULL dict : ");
         CHECK_V(cSizeNoDict,
                 LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
@@ -657,6 +661,29 @@ int basicTests(U32 seed, double compressibility)
         CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL;
     }
 
+    DISPLAYLEVEL(3, "getBlockSize test: \n");
+    { size_t result;
+      unsigned blockSizeID;
+      for (blockSizeID = 4; blockSizeID < 8; ++blockSizeID) {
+        result = LZ4F_getBlockSize(blockSizeID);
+        CHECK(result);
+        DISPLAYLEVEL(3, "Returned block size of %zu bytes for blockID %u \n",
+                         result, blockSizeID);
+      }
+
+      /* Test an invalid input that's too large */
+      result = LZ4F_getBlockSize(8);
+      if(!LZ4F_isError(result) ||
+          LZ4F_getErrorCode(result) != LZ4F_ERROR_maxBlockSize_invalid)
+        goto _output_error;
+
+      /* Test an invalid input that's too small */
+      result = LZ4F_getBlockSize(3);
+      if(!LZ4F_isError(result) ||
+          LZ4F_getErrorCode(result) != LZ4F_ERROR_maxBlockSize_invalid)
+        goto _output_error;
+    }
+
 
     DISPLAYLEVEL(3, "Skippable frame test : \n");
     {   size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH;
@@ -755,17 +782,16 @@ static void locateBuffDiff(const void* buff1, const void* buff2, size_t size, un
 
 int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressibility, U32 duration_s)
 {
-    unsigned testResult = 0;
+    int testResult = 0;
     unsigned testNb = 0;
     size_t const srcDataLength = 9 MB;  /* needs to be > 2x4MB to test large blocks */
     void* srcBuffer = NULL;
-    size_t const compressedBufferSize = LZ4F_compressFrameBound(srcDataLength, NULL);
+    size_t const compressedBufferSize = LZ4F_compressFrameBound(srcDataLength, NULL) + 4 MB;  /* needs some margin */
     void* compressedBuffer = NULL;
     void* decodedBuffer = NULL;
     U32 coreRand = seed;
     LZ4F_decompressionContext_t dCtx = NULL;
     LZ4F_compressionContext_t cCtx = NULL;
-    size_t result;
     clock_t const startClock = clock();
     clock_t const clockDuration = duration_s * CLOCKS_PER_SEC;
 #   undef CHECK
@@ -773,10 +799,10 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi
                             DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
 
     /* Create buffers */
-    result = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
-    CHECK(LZ4F_isError(result), "Allocation failed (error %i)", (int)result);
-    result = LZ4F_createCompressionContext(&cCtx, LZ4F_VERSION);
-    CHECK(LZ4F_isError(result), "Allocation failed (error %i)", (int)result);
+    {   size_t const creationStatus = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
+        CHECK(LZ4F_isError(creationStatus), "Allocation failed (error %i)", (int)creationStatus); }
+    {   size_t const creationStatus = LZ4F_createCompressionContext(&cCtx, LZ4F_VERSION);
+        CHECK(LZ4F_isError(creationStatus), "Allocation failed (error %i)", (int)creationStatus); }
     srcBuffer = malloc(srcDataLength);
     CHECK(srcBuffer==NULL, "srcBuffer Allocation failed");
     compressedBuffer = malloc(compressedBufferSize);
@@ -829,37 +855,59 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi
             BYTE* op = (BYTE*)compressedBuffer;
             BYTE* const oend = op + (neverFlush ? LZ4F_compressFrameBound(srcSize, prefsPtr) : compressedBufferSize);  /* when flushes are possible, can't guarantee a max compressed size */
             unsigned const maxBits = FUZ_highbit((U32)srcSize);
+            size_t cSegmentSize;
             LZ4F_compressOptions_t cOptions;
             memset(&cOptions, 0, sizeof(cOptions));
-            result = LZ4F_compressBegin(cCtx, op, oend-op, prefsPtr);
-            CHECK(LZ4F_isError(result), "Compression header failed (error %i)", (int)result);
-            op += result;
+            cSegmentSize = LZ4F_compressBegin(cCtx, op, (size_t)(oend-op), prefsPtr);
+            CHECK(LZ4F_isError(cSegmentSize), "Compression header failed (error %i)",
+                                            (int)cSegmentSize);
+            op += cSegmentSize;
             while (ip < iend) {
                 unsigned const nbBitsSeg = FUZ_rand(&randState) % maxBits;
                 size_t const sampleMax = (FUZ_rand(&randState) & ((1<<nbBitsSeg)-1)) + 1;
                 size_t const iSize = MIN(sampleMax, (size_t)(iend-ip));
                 size_t const oSize = LZ4F_compressBound(iSize, prefsPtr);
+                size_t flushedSize;
                 cOptions.stableSrc = ((FUZ_rand(&randState) & 3) == 1);
-                DISPLAYLEVEL(6, "Sending %zi bytes to compress (stableSrc:%u) \n",
-                                iSize, cOptions.stableSrc);
+                DISPLAYLEVEL(6, "Sending %u bytes to compress (stableSrc:%u) \n",
+                                (unsigned)iSize, cOptions.stableSrc);
 
-                result = LZ4F_compressUpdate(cCtx, op, oSize, ip, iSize, &cOptions);
-                CHECK(LZ4F_isError(result), "Compression failed (error %i : %s)", (int)result, LZ4F_getErrorName(result));
-                op += result;
+                flushedSize = LZ4F_compressUpdate(cCtx, op, oSize, ip, iSize, &cOptions);
+                CHECK(LZ4F_isError(flushedSize), "Compression failed (error %i : %s)",
+                            (int)flushedSize, LZ4F_getErrorName(flushedSize));
+                op += flushedSize;
                 ip += iSize;
 
                 {   unsigned const forceFlush = neverFlush ? 0 : ((FUZ_rand(&randState) & 3) == 1);
                     if (forceFlush) {
-                        result = LZ4F_flush(cCtx, op, oend-op, &cOptions);
-                        CHECK(LZ4F_isError(result), "Compression failed (error %i)", (int)result);
-                        op += result;
+                        size_t const flushSize = LZ4F_flush(cCtx, op, (size_t)(oend-op), &cOptions);
+                        DISPLAYLEVEL(6,"flushing %u bytes \n", (unsigned)flushSize);
+                        CHECK(LZ4F_isError(flushSize), "Compression failed (error %i)", (int)flushSize);
+                        op += flushSize;
                 }   }
             }
             CHECK(op>=oend, "LZ4F_compressFrameBound overflow");
-            result = LZ4F_compressEnd(cCtx, op, oend-op, &cOptions);
-            CHECK(LZ4F_isError(result), "Compression completion failed (error %i : %s)", (int)result, LZ4F_getErrorName(result));
-            op += result;
-            cSize = op-(BYTE*)compressedBuffer;
+            {   size_t const dstEndSafeSize = LZ4F_compressBound(0, prefsPtr);
+                int const tooSmallDstEnd = ((FUZ_rand(&randState) & 31) == 3);
+                size_t const dstEndTooSmallSize = (FUZ_rand(&randState) % dstEndSafeSize) + 1;
+                size_t const dstEndSize = tooSmallDstEnd ? dstEndTooSmallSize : dstEndSafeSize;
+                BYTE const canaryByte = (BYTE)(FUZ_rand(&randState) & 255);
+                size_t flushedSize;
+                DISPLAYLEVEL(7,"canaryByte at pos %u / %u \n",
+                            (unsigned)((size_t)(op - (BYTE*)compressedBuffer) + dstEndSize),
+                            (unsigned)compressedBufferSize);
+                assert(op + dstEndSize < (BYTE*)compressedBuffer + compressedBufferSize);
+                op[dstEndSize] = canaryByte;
+                flushedSize = LZ4F_compressEnd(cCtx, op, dstEndSize, &cOptions);
+                CHECK(op[dstEndSize] != canaryByte, "LZ4F_compressEnd writes beyond dstCapacity !");
+                if (LZ4F_isError(flushedSize)) {
+                    if (tooSmallDstEnd) /* failure is allowed */ continue;
+                    CHECK(1, "Compression completion failed (error %i : %s)",
+                            (int)flushedSize, LZ4F_getErrorName(flushedSize));
+                }
+                op += flushedSize;
+            }
+            cSize = (size_t)(op - (BYTE*)compressedBuffer);
             DISPLAYLEVEL(5, "\nCompressed %u bytes into %u \n", (U32)srcSize, (U32)cSize);
         }
 
@@ -872,8 +920,10 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi
             unsigned const maxBits = MAX(3, suggestedBits);
             unsigned const nonContiguousDst = FUZ_rand(&randState) % 3;   /* 0 : contiguous; 1 : non-contiguous; 2 : dst overwritten */
             size_t totalOut = 0;
+            size_t decSize = 0;
             XXH64_state_t xxh64;
             XXH64_reset(&xxh64, 1);
+            assert(ip < iend);
             while (ip < iend) {
                 unsigned const nbBitsI = (FUZ_rand(&randState) % (maxBits-1)) + 1;
                 unsigned const nbBitsO = (FUZ_rand(&randState) % (maxBits)) + 1;
@@ -885,10 +935,11 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi
                 memset(&dOptions, 0, sizeof(dOptions));
                 dOptions.stableDst = FUZ_rand(&randState) & 1;
                 if (nonContiguousDst==2) dOptions.stableDst = 0;   /* overwrite mode */
-                result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, &dOptions);
-                if (LZ4F_getErrorCode(result) == LZ4F_ERROR_contentChecksum_invalid)
+                decSize = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, &dOptions);
+                if (LZ4F_getErrorCode(decSize) == LZ4F_ERROR_contentChecksum_invalid)
                     locateBuffDiff(srcStart, decodedBuffer, srcSize, nonContiguousDst);
-                CHECK(LZ4F_isError(result), "Decompression failed (error %i:%s)", (int)result, LZ4F_getErrorName(result));
+                CHECK(LZ4F_isError(decSize), "Decompression failed (error %i:%s)",
+                                    (int)decSize, LZ4F_getErrorName(decSize));
                 XXH64_update(&xxh64, op, (U32)oSize);
                 totalOut += oSize;
                 op += oSize;
@@ -896,7 +947,7 @@ int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, double compressi
                 op += nonContiguousDst;
                 if (nonContiguousDst==2) op = (BYTE*)decodedBuffer;   /* overwritten destination */
             }
-            CHECK(result != 0, "Frame decompression failed (error %i)", (int)result);
+            CHECK(decSize != 0, "Frame decompression failed (error %i)", (int)decSize);
             if (totalOut) {  /* otherwise, it's a skippable frame */
                 U64 const crcDecoded = XXH64_digest(&xxh64);
                 if (crcDecoded != crcOrig) locateBuffDiff(srcStart, decodedBuffer, srcSize, nonContiguousDst);
diff --git a/tests/fullbench.c b/tests/fullbench.c
index fd1202d..1a52aab 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -42,6 +42,7 @@
 #include <string.h>      /* strcmp */
 #include <time.h>        /* clock_t, clock(), CLOCKS_PER_SEC */
 
+#define LZ4_DISABLE_DEPRECATE_WARNINGS   /* LZ4_decompress_fast */
 #include "lz4.h"
 #include "lz4hc.h"
 #include "lz4frame.h"
@@ -160,12 +161,14 @@ static size_t BMK_findMaxMem(U64 requiredMem)
 static LZ4_stream_t LZ4_stream;
 static void local_LZ4_resetDictT(void)
 {
-    LZ4_resetStream(&LZ4_stream);
+    void* const r = LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream));
+    assert(r != NULL);
 }
 
 static void local_LZ4_createStream(void)
 {
-    LZ4_resetStream(&LZ4_stream);
+    void* const r = LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream));
+    assert(r != NULL);
 }
 
 static int local_LZ4_saveDict(const char* in, char* out, int inSize)
@@ -242,7 +245,7 @@ static int local_LZ4_compress_forceDict(const char* in, char* out, int inSize)
 LZ4_streamHC_t LZ4_streamHC;
 static void local_LZ4_resetStreamHC(void)
 {
-    LZ4_resetStreamHC(&LZ4_streamHC, 0);
+    LZ4_initStreamHC(&LZ4_streamHC, sizeof(LZ4_streamHC));
 }
 
 static int local_LZ4_saveDictHC(const char* in, char* out, int inSize)
@@ -326,16 +329,19 @@ static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSi
 /* frame functions */
 static int local_LZ4F_compressFrame(const char* in, char* out, int inSize)
 {
-    return (int)LZ4F_compressFrame(out, LZ4F_compressFrameBound(inSize, NULL), in, inSize, NULL);
+    assert(inSize >= 0);
+    return (int)LZ4F_compressFrame(out, LZ4F_compressFrameBound((size_t)inSize, NULL), in, (size_t)inSize, NULL);
 }
 
 static LZ4F_decompressionContext_t g_dCtx;
 
 static int local_LZ4F_decompress(const char* in, char* out, int inSize, int outSize)
 {
-    size_t srcSize = inSize;
-    size_t dstSize = outSize;
+    size_t srcSize = (size_t)inSize;
+    size_t dstSize = (size_t)outSize;
     size_t result;
+    assert(inSize >= 0);
+    assert(outSize >= 0);
     result = LZ4F_decompress(g_dCtx, out, &dstSize, in, &srcSize, NULL);
     if (result!=0) { DISPLAY("Error decompressing frame : unfinished frame\n"); exit(8); }
     if (srcSize != (size_t)inSize) { DISPLAY("Error decompressing frame : read size incorrect\n"); exit(9); }
@@ -367,7 +373,6 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
       size_t readSize;
       int compressedBuffSize;
       U32 crcOriginal;
-      size_t errorCode;
 
       /* Check file existence */
       if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; }
@@ -384,7 +389,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
       /* Allocation */
       chunkP = (struct chunkParameters*) malloc(((benchedSize / (size_t)g_chunkSize)+1) * sizeof(struct chunkParameters));
       orig_buff = (char*) malloc(benchedSize);
-      nbChunks = (int) ((benchedSize + (g_chunkSize-1)) / g_chunkSize);
+      nbChunks = (int) ((benchedSize + (size_t)g_chunkSize - 1) / (size_t)g_chunkSize);
       maxCompressedChunkSize = LZ4_compressBound(g_chunkSize);
       compressedBuffSize = nbChunks * maxCompressedChunkSize;
       compressed_buff = (char*)malloc((size_t)compressedBuffSize);
@@ -439,7 +444,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
                 char* out = compressed_buff;
                 nbChunks = (int) (((int)benchedSize + (g_chunkSize-1))/ g_chunkSize);
                 for (i=0; i<nbChunks; i++) {
-                    chunkP[i].id = i;
+                    chunkP[i].id = (U32)i;
                     chunkP[i].origBuffer = in; in += g_chunkSize;
                     if ((int)remaining > g_chunkSize) { chunkP[i].origSize = g_chunkSize; remaining -= g_chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; }
                     chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize;
@@ -561,7 +566,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
             case 8: decompressionFunction = local_LZ4_decompress_safe_forceExtDict; dName = "LZ4_decompress_safe_forceExtDict"; break;
 #endif
             case 9: decompressionFunction = local_LZ4F_decompress; dName = "LZ4F_decompress";
-                    errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL);
+                {   size_t const errorCode = LZ4F_compressFrame(compressed_buff, compressedBuffSize, orig_buff, benchedSize, NULL);
                     if (LZ4F_isError(errorCode)) {
                         DISPLAY("Error while preparing compressed frame\n");
                         free(orig_buff);
@@ -573,6 +578,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
                     chunkP[0].compressedSize = (int)errorCode;
                     nbChunks = 1;
                     break;
+                }
             default :
                 continue;   /* skip if unknown ID */
             }
@@ -610,7 +616,7 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles)
                 PROGRESS("%2i-%-34.34s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
 
                 /* CRC Checking */
-                crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
+                crcDecoded = XXH32(orig_buff, benchedSize, 0);
                 if (checkResult && (crcOriginal!=crcDecoded)) {
                     DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n",
                             inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded);
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index b29e82e..3128e6d 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -32,13 +32,13 @@
 #  pragma warning(disable : 4310)    /* disable: C4310: constant char value > 127 */
 #endif
 
-#define LZ4_DISABLE_DEPRECATE_WARNINGS
-
 
 /*-************************************
 *  Dependencies
 **************************************/
 #if defined(__unix__) && !defined(_AIX)   /* must be included before platform.h for MAP_ANONYMOUS */
+#  undef  _GNU_SOURCE     /* in case it's already defined */
+#  define _GNU_SOURCE     /* MAP_ANONYMOUS even in -std=c99 mode */
 #  include <sys/mman.h>   /* mmap */
 #endif
 #include "platform.h"   /* _CRT_SECURE_NO_WARNINGS */
@@ -48,11 +48,11 @@
 #include <string.h>     /* strcmp */
 #include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */
 #include <assert.h>
-#if defined(__unix__) && defined(_AIX)
-#  include <sys/mman.h>   /* mmap */
-#endif
+#include <limits.h>     /* INT_MAX */
 
+#define LZ4_DISABLE_DEPRECATE_WARNINGS   /* LZ4_decompress_fast */
 #define LZ4_STATIC_LINKING_ONLY
+#include "lz4.h"
 #define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
 #define XXH_STATIC_LINKING_ONLY
@@ -145,10 +145,10 @@ static void FUZ_fillCompressibleNoiseBuffer(void* buffer, size_t bufferSize, dou
         /* Select : Literal (noise) or copy (within 64K) */
         if (FUZ_RAND15BITS < P32) {
             /* Copy (within 64K) */
-            size_t const length = FUZ_RANDLENGTH + 4;
+            size_t const length = (size_t)FUZ_RANDLENGTH + 4;
             size_t const d = MIN(pos+length, bufferSize);
             size_t match;
-            size_t offset = FUZ_RAND15BITS + 1;
+            size_t offset = (size_t)FUZ_RAND15BITS + 1;
             while (offset > pos) offset >>= 1;
             match = pos - offset;
             while (pos < d) BBuffer[pos++] = BBuffer[match++];
@@ -308,13 +308,13 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
     unsigned long long hcbytes = 0;
     unsigned long long ccbytes = 0;
     void* const CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
-    size_t const compressedBufferSize = LZ4_compressBound(FUZ_MAX_BLOCK_SIZE);
+    size_t const compressedBufferSize = (size_t)LZ4_compressBound(FUZ_MAX_BLOCK_SIZE);
     char* const compressedBuffer = (char*)malloc(compressedBufferSize);
     char* const decodedBuffer = (char*)malloc(FUZ_MAX_DICT_SIZE + FUZ_MAX_BLOCK_SIZE);
     size_t const labSize = 96 KB;
     void* const lowAddrBuffer = FUZ_createLowAddr(labSize);
-    void* const stateLZ4   = malloc(LZ4_sizeofState());
-    void* const stateLZ4HC = malloc(LZ4_sizeofStateHC());
+    void* const stateLZ4   = malloc((size_t)LZ4_sizeofState());
+    void* const stateLZ4HC = malloc((size_t)LZ4_sizeofStateHC());
     LZ4_stream_t LZ4dict;
     LZ4_streamHC_t LZ4dictHC;
     U32 coreRandState = seed;
@@ -345,7 +345,8 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         DISPLAY("Not enough memory to start fuzzer tests");
         goto _output_error;
     }
-    memset(&LZ4dict, 0, sizeof(LZ4dict));
+    if ( LZ4_initStream(&LZ4dict, sizeof(LZ4dict)) == NULL) abort();
+    if ( LZ4_initStreamHC(&LZ4dictHC, sizeof(LZ4dictHC)) == NULL) abort();
     {   U32 randState = coreRandState ^ PRIME3;
         FUZ_fillCompressibleNoiseBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, &randState);
     }
@@ -369,7 +370,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         const char* dict = block - dictSize;
         int compressedSize, HCcompressedSize;
         int blockContinueCompressedSize;
-        U32 const crcOrig = XXH32(block, blockSize, 0);
+        U32 const crcOrig = XXH32(block, (size_t)blockSize, 0);
         int ret;
 
         FUZ_displayUpdate(cycleNb);
@@ -394,7 +395,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             DISPLAYLEVEL(5, "destSize : %7i/%7i; content%7i/%7i ", ret, targetSize, srcSize, blockSize);
             if (targetSize>0) {
                 /* check correctness */
-                U32 const crcBase = XXH32(block, srcSize, 0);
+                U32 const crcBase = XXH32(block, (size_t)srcSize, 0);
                 char const canary = FUZ_rand(&randState) & 255;
                 FUZ_CHECKTEST((ret==0), "LZ4_compress_destSize() compression failed");
                 FUZ_DISPLAYTEST();
@@ -429,7 +430,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             FUZ_CHECKTEST(srcSize > blockSize, "LZ4_compress_HC_destSize() fed more than src buffer !");
             if (targetSize>0) {
                 /* check correctness */
-                U32 const crcBase = XXH32(block, srcSize, 0);
+                U32 const crcBase = XXH32(block, (size_t)srcSize, 0);
                 char const canary = FUZ_rand(&randState) & 255;
                 FUZ_CHECKTEST((ret==0), "LZ4_compress_HC_destSize() compression failed");
                 FUZ_DISPLAYTEST();
@@ -508,7 +509,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
 
         /* Test decoding with a one byte input */
         FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input");
-        {   char const tmp = 0xFF;
+        {   char const tmp = (char)0xFF;
             LZ4_decompress_safe(&tmp, decodedBuffer, 1, blockSize);
         }
 
@@ -518,7 +519,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             /* 14 bytes of literals, followed by a 14 byte match.
              * Should not read beyond the end of the buffer.
              * See https://github.com/lz4/lz4/issues/508. */
-            *tmp = 0xEE;
+            *tmp = (char)0xEE;
             memset(tmp + 1, 0, 14);
             tmp[15] = 14;
             tmp[16] = 0;
@@ -546,7 +547,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite amply sufficient space");
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size");
-        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+        {   U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0);
             FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
         }
 
@@ -581,7 +582,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         /* Test partial decoding => must work */
         FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial");
         {   size_t const missingBytes = FUZ_rand(&randState) % blockSize;
-            int const targetSize = (int)(blockSize - missingBytes);
+            int const targetSize = (int)((size_t)blockSize - missingBytes);
             char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A;
             int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, targetSize, blockSize);
             FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult);
@@ -641,7 +642,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         /* Compress using dictionary */
         FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary of size %i", dictSize);
         {   LZ4_stream_t LZ4_stream;
-            LZ4_resetStream(&LZ4_stream);
+            LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream));
             LZ4_compress_fast_continue (&LZ4_stream, dict, compressedBuffer, dictSize, (int)compressedBufferSize, 1);   /* Just to fill hash tables */
             blockContinueCompressedSize = LZ4_compress_fast_continue (&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
             FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue failed");
@@ -652,7 +653,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         memcpy(decodedBuffer, dict, dictSize);
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer+dictSize, blockSize, decodedBuffer, dictSize);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
-        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
+        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, (size_t)blockSize, 0);
             if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
             FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
         }
@@ -660,13 +661,13 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         FUZ_DISPLAYTEST("test LZ4_decompress_safe_usingDict()");
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer+dictSize, blockContinueCompressedSize, blockSize, decodedBuffer, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
-        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
+        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, (size_t)blockSize, 0);
             FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
         }
 
         /* Compress using External dictionary */
         FUZ_DISPLAYTEST("test LZ4_compress_fast_continue(), with non-contiguous dictionary");
-        dict -= (FUZ_rand(&randState) & 0xF) + 1;   /* create space, so now dictionary is an ExtDict */
+        dict -= (size_t)(FUZ_rand(&randState) & 0xF) + 1;   /* create space, so now dictionary is an ExtDict */
         if (dict < (char*)CNBuffer) dict = (char*)CNBuffer;
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
@@ -678,7 +679,8 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using ExtDict should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
 
         FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary loaded with LZ4_loadDict()");
-        DISPLAYLEVEL(5, " compress %i bytes from buffer(%p) into dst(%p) using dict(%p) of size %i \n", blockSize, block, decodedBuffer, dict, dictSize);
+        DISPLAYLEVEL(5, " compress %i bytes from buffer(%p) into dst(%p) using dict(%p) of size %i \n",
+                     blockSize, (const void *)block, (void *)decodedBuffer, (const void *)dict, dictSize);
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         ret = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
@@ -686,7 +688,8 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
 
         /* Decompress with dictionary as external */
         FUZ_DISPLAYTEST("test LZ4_decompress_fast_usingDict() with dictionary as extDict");
-        DISPLAYLEVEL(5, " decoding %i bytes from buffer(%p) using dict(%p) of size %i \n", blockSize, decodedBuffer, dict, dictSize);
+        DISPLAYLEVEL(5, " decoding %i bytes from buffer(%p) using dict(%p) of size %i \n",
+                     blockSize, (void *)decodedBuffer, (const void *)dict, dictSize);
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
@@ -740,10 +743,11 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
 
             FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary()");
             LZ4_loadDict(&LZ4dict, dict, dictSize);
-            LZ4_resetStream(&LZ4_stream);
+            LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream));
             LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
             blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
             FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue using extDictCtx failed");
+            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirty, "context should be good");
 
             /* In the future, it might be desirable to let extDictCtx mode's
              * output diverge from the output generated by regular extDict mode.
@@ -754,19 +758,21 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             FUZ_CHECKTEST(XXH32(compressedBuffer, blockContinueCompressedSize, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
 
             FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary(), but output buffer is 1 byte too short");
-            LZ4_resetStream(&LZ4_stream);
+            LZ4_resetStream_fast(&LZ4_stream);
             LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
             ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1);
             FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using extDictCtx should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
+            /* note : context is no longer dirty after a failed compressed block */
 
             FUZ_DISPLAYTEST();
-            LZ4_resetStream(&LZ4_stream);
+            LZ4_resetStream_fast(&LZ4_stream);
             LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
             ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
             FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
             FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx should work : enough size available within output buffer");
             FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
             FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirty, "context should be good");
 
             FUZ_DISPLAYTEST();
             LZ4_resetStream_fast(&LZ4_stream);
@@ -776,6 +782,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx with re-used context should work : enough size available within output buffer");
             FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
             FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+            FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirty, "context should be good");
         }
 
         /* Decompress with dictionary as external */
@@ -794,7 +801,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
-        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+        {   U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0);
             FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
         }
 
@@ -823,29 +830,31 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         FUZ_DISPLAYTEST("LZ4_compress_HC_continue with an external dictionary");
         dict -= (FUZ_rand(&randState) & 7);    /* even bigger separation */
         if (dict < (char*)CNBuffer) dict = (char*)CNBuffer;
-        LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
-        LZ4_setCompressionLevel(&LZ4dictHC, compressionLevel-1);
+        LZ4_setCompressionLevel (&LZ4dictHC, compressionLevel);
         blockContinueCompressedSize = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, (int)compressedBufferSize);
         FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue failed");
+        FUZ_CHECKTEST(LZ4dictHC.internal_donotuse.dirty, "Context should be clean");
 
-        FUZ_DISPLAYTEST();
+        FUZ_DISPLAYTEST("LZ4_compress_HC_continue with same external dictionary, but output buffer 1 byte too short");
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
         ret = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1);
-        FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDict should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize);
+        FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDict should fail : one missing byte for output buffer (expected %i, but result=%i)", blockContinueCompressedSize, ret);
+        /* note : context is no longer dirty after a failed compressed block */
 
-        FUZ_DISPLAYTEST();
+        FUZ_DISPLAYTEST("LZ4_compress_HC_continue with same external dictionary, and output buffer exactly the right size");
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
         ret = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
-        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue size is different (%i != %i)", ret, blockContinueCompressedSize);
+        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue size is different : ret(%i) != expected(%i)", ret, blockContinueCompressedSize);
         FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue should work : enough size available within output buffer");
+        FUZ_CHECKTEST(LZ4dictHC.internal_donotuse.dirty, "Context should be clean");
 
         FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
-        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+        {   U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0);
             if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
             FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
         }
@@ -854,26 +863,29 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
         FUZ_DISPLAYTEST();
         {
             LZ4_streamHC_t LZ4_streamHC;
+            LZ4_initStreamHC(&LZ4_streamHC, sizeof(LZ4_streamHC));
 
-            LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
             LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
-            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
             LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            LZ4_setCompressionLevel (&LZ4_streamHC, compressionLevel);
             blockContinueCompressedSize = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, (int)compressedBufferSize);
             FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue with ExtDictCtx failed");
+            FUZ_CHECKTEST(LZ4_streamHC.internal_donotuse.dirty, "Context should be clean");
 
             FUZ_DISPLAYTEST();
-            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_resetStreamHC_fast (&LZ4_streamHC, compressionLevel);
             LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
             ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1);
             FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDictCtx should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize);
+            /* note : context is no longer dirty after a failed compressed block */
 
             FUZ_DISPLAYTEST();
-            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_resetStreamHC_fast (&LZ4_streamHC, compressionLevel);
             LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
             ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
             FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx size is different (%i != %i)", ret, blockContinueCompressedSize);
             FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx should work : enough size available within output buffer");
+            FUZ_CHECKTEST(LZ4_streamHC.internal_donotuse.dirty, "Context should be clean");
 
             FUZ_DISPLAYTEST();
             LZ4_resetStreamHC_fast (&LZ4_streamHC, compressionLevel);
@@ -881,25 +893,26 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
             FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx and fast reset size is different (%i != %i)", ret, blockContinueCompressedSize);
             FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx and fast reset should work : enough size available within output buffer");
+            FUZ_CHECKTEST(LZ4_streamHC.internal_donotuse.dirty, "Context should be clean");
+        }
 
-            FUZ_DISPLAYTEST();
-            decodedBuffer[blockSize] = 0;
-            ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
-            FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
-            FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
-            {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
-                if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
-                FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
-            }
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize] = 0;
+        ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
+        FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
+        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+        {   U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
         }
 
         /* Compress HC continue destSize */
         FUZ_DISPLAYTEST();
-        {   int const availableSpace = (FUZ_rand(&randState) % blockSize) + 5;
+        {   int const availableSpace = (int)(FUZ_rand(&randState) % blockSize) + 5;
             int consumedSize = blockSize;
             FUZ_DISPLAYTEST();
-            LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
             LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
+            LZ4_setCompressionLevel(&LZ4dictHC, compressionLevel);
             blockContinueCompressedSize = LZ4_compress_HC_continue_destSize(&LZ4dictHC, block, compressedBuffer, &consumedSize, availableSpace);
             DISPLAYLEVEL(5, " LZ4_compress_HC_continue_destSize : compressed %6i/%6i into %6i/%6i at cLevel=%i\n", consumedSize, blockSize, blockContinueCompressedSize, availableSpace, compressionLevel);
             FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue_destSize failed");
@@ -911,8 +924,8 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c
             ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, consumedSize, dict, dictSize);
             FUZ_CHECKTEST(ret!=consumedSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
             FUZ_CHECKTEST(decodedBuffer[consumedSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size")
-            {   U32 const crcSrc = XXH32(block, consumedSize, 0);
-                U32 const crcDst = XXH32(decodedBuffer, consumedSize, 0);
+            {   U32 const crcSrc = XXH32(block, (size_t)consumedSize, 0);
+                U32 const crcDst = XXH32(decodedBuffer, (size_t)consumedSize, 0);
                 if (crcSrc!=crcDst) FUZ_findDiff(block, decodedBuffer);
                 FUZ_CHECKTEST(crcSrc!=crcDst, "LZ4_decompress_safe_usingDict corrupted decoded data");
             }
@@ -987,9 +1000,10 @@ static void FUZ_unitTests(int compressionLevel)
 
         /* simple compression test */
         crcOrig = XXH64(testInput, testCompressedSize, 0);
-        LZ4_resetStream(&streamingState);
+        LZ4_initStream(&streamingState, sizeof(streamingState));
         result = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1);
         FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed!");
+        FUZ_CHECKTEST(streamingState.internal_donotuse.dirty, "context should be clean")
 
         result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize);
         FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed");
@@ -1012,7 +1026,7 @@ static void FUZ_unitTests(int compressionLevel)
             XXH64_reset(&xxhOrig, 0);
             XXH64_reset(&xxhNewSafe, 0);
             XXH64_reset(&xxhNewFast, 0);
-            LZ4_resetStream(&streamingState);
+            LZ4_resetStream_fast(&streamingState);
             LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
             LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
@@ -1050,69 +1064,112 @@ static void FUZ_unitTests(int compressionLevel)
     }
 
     /* LZ4 HC streaming tests */
-    {   LZ4_streamHC_t* sp;
-        LZ4_streamHC_t  sHC;
+    {   LZ4_streamHC_t sHC;   /* statically allocated */
         U64 crcOrig;
         int result;
+        LZ4_initStreamHC(&sHC, sizeof(sHC));
 
         /* Allocation test */
-        sp = LZ4_createStreamHC();
-        FUZ_CHECKTEST(sp==NULL, "LZ4_createStreamHC() allocation failed");
-        LZ4_freeStreamHC(sp);
+        DISPLAYLEVEL(3, " Basic HC allocation : ");
+        {   LZ4_streamHC_t* const sp = LZ4_createStreamHC();
+            FUZ_CHECKTEST(sp==NULL, "LZ4_createStreamHC() allocation failed");
+            LZ4_freeStreamHC(sp);
+        }
+        DISPLAYLEVEL(3, " OK \n");
 
         /* simple HC compression test */
-        crcOrig = XXH64(testInput, testCompressedSize, 0);
-        LZ4_resetStreamHC(&sHC, compressionLevel);
-        result = LZ4_compress_HC_continue(&sHC, testInput, testCompressed, testCompressedSize, testCompressedSize-1);
-        FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
-
-        result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize);
-        FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed");
-        { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
-          FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+        DISPLAYLEVEL(3, " Simple HC round-trip : ");
+        {   U64 const crc64 = XXH64(testInput, testCompressedSize, 0);
+            LZ4_setCompressionLevel(&sHC, compressionLevel);
+            result = LZ4_compress_HC_continue(&sHC, testInput, testCompressed, testCompressedSize, testCompressedSize-1);
+            FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
+            FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
+
+            result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize);
+            FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed");
+            {   U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
+                FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe() decompression corruption");
+        }   }
+        DISPLAYLEVEL(3, " OK \n");
+
+        /* long sequence test */
+        DISPLAYLEVEL(3, " Long sequence HC test : ");
+        {   size_t const blockSize = 1 MB;
+            size_t const targetSize = 4116;  /* size carefully selected to trigger an overflow */
+            void*  const block = malloc(blockSize);
+            void*  const dstBlock = malloc(targetSize+1);
+            BYTE   const sentinel = 101;
+            int srcSize;
+
+            assert(block != NULL); assert(dstBlock != NULL);
+            memset(block, 0, blockSize);
+            ((char*)dstBlock)[targetSize] = sentinel;
+
+            LZ4_resetStreamHC_fast(&sHC, 3);
+            assert(blockSize < INT_MAX);
+            srcSize = (int)blockSize;
+            assert(targetSize < INT_MAX);
+            result = LZ4_compress_HC_destSize(&sHC, (const char*)block, (char*)dstBlock, &srcSize, (int)targetSize, 3);
+            DISPLAYLEVEL(4, "cSize=%i; readSize=%i; ", result, srcSize);
+            FUZ_CHECKTEST(result!=4116, "LZ4_compress_HC_destSize() : compression must fill dstBuffer completely, but no more !");
+            FUZ_CHECKTEST(((char*)dstBlock)[targetSize] != sentinel, "LZ4_compress_HC_destSize()")
+
+            LZ4_resetStreamHC_fast(&sHC, 3);   /* make sure the context is clean after the test */
+            free(block);
+            free(dstBlock);
+        }
+        DISPLAYLEVEL(3, " OK \n");
 
         /* simple dictionary HC compression test */
-        crcOrig = XXH64(testInput + 64 KB, testCompressedSize, 0);
-        LZ4_resetStreamHC(&sHC, compressionLevel);
-        LZ4_loadDictHC(&sHC, testInput, 64 KB);
-        result = LZ4_compress_HC_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1);
-        FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
-
-        result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, testCompressedSize, testInput, 64 KB);
-        FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() simple dictionary decompression test failed");
-        { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
-          FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() simple dictionary decompression test : corruption"); }
+        DISPLAYLEVEL(3, " HC dictionary compression test : ");
+        {   U64 const crc64 = XXH64(testInput + 64 KB, testCompressedSize, 0);
+            LZ4_resetStreamHC_fast(&sHC, compressionLevel);
+            LZ4_loadDictHC(&sHC, testInput, 64 KB);
+            result = LZ4_compress_HC_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1);
+            FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
+            FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
+
+            result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, testCompressedSize, testInput, 64 KB);
+            FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() simple dictionary decompression test failed");
+            {   U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
+                FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe() simple dictionary decompression test : corruption");
+        }   }
+        DISPLAYLEVEL(3, " OK \n");
 
         /* multiple HC compression test with dictionary */
         {   int result1, result2;
             int segSize = testCompressedSize / 2;
-            crcOrig = XXH64(testInput + segSize, testCompressedSize, 0);
-            LZ4_resetStreamHC(&sHC, compressionLevel);
+            U64 const crc64 = XXH64(testInput + segSize, testCompressedSize, 0);
+            LZ4_resetStreamHC_fast(&sHC, compressionLevel);
             LZ4_loadDictHC(&sHC, testInput, segSize);
             result1 = LZ4_compress_HC_continue(&sHC, testInput + segSize, testCompressed, segSize, segSize -1);
             FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result1);
-            result2 = LZ4_compress_HC_continue(&sHC, testInput + 2*segSize, testCompressed+result1, segSize, segSize-1);
+            FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
+            result2 = LZ4_compress_HC_continue(&sHC, testInput + 2*(size_t)segSize, testCompressed+result1, segSize, segSize-1);
             FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result2);
+            FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
 
             result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result1, segSize, testInput, segSize);
             FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 1 failed");
             result = LZ4_decompress_safe_usingDict(testCompressed+result1, testVerify+segSize, result2, segSize, testInput, 2*segSize);
             FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 2 failed");
-            { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
-              FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption"); }
-        }
+            {   U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
+                FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption");
+        }   }
 
         /* remote dictionary HC compression test */
-        crcOrig = XXH64(testInput + 64 KB, testCompressedSize, 0);
-        LZ4_resetStreamHC(&sHC, compressionLevel);
-        LZ4_loadDictHC(&sHC, testInput, 32 KB);
-        result = LZ4_compress_HC_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1);
-        FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() remote dictionary failed : result = %i", result);
-
-        result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, testCompressedSize, testInput, 32 KB);
-        FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe_usingDict() decompression failed following remote dictionary HC compression test");
-        { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
-          FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() decompression corruption"); }
+        {   U64 const crc64 = XXH64(testInput + 64 KB, testCompressedSize, 0);
+            LZ4_resetStreamHC_fast(&sHC, compressionLevel);
+            LZ4_loadDictHC(&sHC, testInput, 32 KB);
+            result = LZ4_compress_HC_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1);
+            FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() remote dictionary failed : result = %i", result);
+            FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
+
+            result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, testCompressedSize, testInput, 32 KB);
+            FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe_usingDict() decompression failed following remote dictionary HC compression test");
+            {   U64 const crcNew = XXH64(testVerify, testCompressedSize, 0);
+                FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe_usingDict() decompression corruption");
+        }   }
 
         /* multiple HC compression with ext. dictionary */
         {   XXH64_state_t crcOrigState;
@@ -1121,11 +1178,11 @@ static void FUZ_unitTests(int compressionLevel)
             int dictSize = (FUZ_rand(&randState) & 8191);
             char* dst = testVerify;
 
-            size_t segStart = dictSize + 7;
+            size_t segStart = (size_t)dictSize + 7;
             int segSize = (FUZ_rand(&randState) & 8191);
             int segNb = 1;
 
-            LZ4_resetStreamHC(&sHC, compressionLevel);
+            LZ4_resetStreamHC_fast(&sHC, compressionLevel);
             LZ4_loadDictHC(&sHC, dict, dictSize);
 
             XXH64_reset(&crcOrigState, 0);
@@ -1136,6 +1193,7 @@ static void FUZ_unitTests(int compressionLevel)
                 crcOrig = XXH64_digest(&crcOrigState);
                 result = LZ4_compress_HC_continue(&sHC, testInput + segStart, testCompressed, segSize, LZ4_compressBound(segSize));
                 FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result);
+                FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
 
                 result = LZ4_decompress_safe_usingDict(testCompressed, dst, result, segSize, dict, dictSize);
                 FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe_usingDict() dictionary decompression part %i failed", segNb);
@@ -1145,13 +1203,14 @@ static void FUZ_unitTests(int compressionLevel)
                     FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() part %i corruption", segNb);
                 }
 
+                assert(segSize >= 0);
                 dict = dst;
                 dictSize = segSize;
 
-                dst += segSize + 1;
+                dst += (size_t)segSize + 1;
                 segNb ++;
 
-                segStart += segSize + (FUZ_rand(&randState) & 0xF) + 1;
+                segStart += (size_t)segSize + (FUZ_rand(&randState) & 0xF) + 1;
                 segSize = (FUZ_rand(&randState) & 8191);
             }
         }
@@ -1172,7 +1231,7 @@ static void FUZ_unitTests(int compressionLevel)
             XXH64_reset(&xxhOrig, 0);
             XXH64_reset(&xxhNewSafe, 0);
             XXH64_reset(&xxhNewFast, 0);
-            LZ4_resetStreamHC(&sHC, compressionLevel);
+            LZ4_resetStreamHC_fast(&sHC, compressionLevel);
             LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
             LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
@@ -1183,6 +1242,7 @@ static void FUZ_unitTests(int compressionLevel)
                 memcpy (ringBuffer + rNext, testInput + iNext, messageSize);
                 compressedSize = LZ4_compress_HC_continue(&sHC, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
                 FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
+                FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
 
                 result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, messageSize);
                 FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed");
@@ -1229,11 +1289,11 @@ static void FUZ_unitTests(int compressionLevel)
             int dNext = 0;
             int compressedSize;
 
-            assert((size_t)(dBufferSize + 1 + dBufferSize) < testVerifySize);   /* space used by ringBufferSafe and ringBufferFast */
+            assert((size_t)dBufferSize * 2 + 1 < testVerifySize);   /* space used by ringBufferSafe and ringBufferFast */
             XXH64_reset(&xxhOrig, 0);
             XXH64_reset(&xxhNewSafe, 0);
             XXH64_reset(&xxhNewFast, 0);
-            LZ4_resetStreamHC(&sHC, compressionLevel);
+            LZ4_resetStreamHC_fast(&sHC, compressionLevel);
             LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
             LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
@@ -1246,6 +1306,7 @@ static void FUZ_unitTests(int compressionLevel)
 
             compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
             FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
+            FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
 
             result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, ringBufferSafe + dNext, compressedSize, messageSize);
             FUZ_CHECKTEST(result!=messageSize, "64K D.ringBuffer : LZ4_decompress_safe_continue() test failed");
@@ -1263,7 +1324,8 @@ static void FUZ_unitTests(int compressionLevel)
 
             /* prepare second message */
             dNext += messageSize;
-            totalMessageSize += messageSize;
+            assert(messageSize >= 0);
+            totalMessageSize += (unsigned)messageSize;
             messageSize = maxMessageSize;
             iNext = BSIZE1+1;
             assert(BSIZE1 >= 65535);
@@ -1277,6 +1339,7 @@ static void FUZ_unitTests(int compressionLevel)
 
                 compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
                 FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
+                FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean");
                 DISPLAYLEVEL(5, "compressed %i bytes to %i bytes \n", messageSize, compressedSize);
 
                 /* test LZ4_decompress_safe_continue */
diff --git a/tests/test_custom_block_sizes.sh b/tests/test_custom_block_sizes.sh
new file mode 100755
index 0000000..aba6733
--- /dev/null
+++ b/tests/test_custom_block_sizes.sh
@@ -0,0 +1,72 @@
+#/usr/bin/env sh
+set -e
+
+LZ4=../lz4
+CHECKFRAME=./checkFrame
+DATAGEN=./datagen
+
+failures=""
+
+TMPFILE=/tmp/test_custom_block_sizes.$$
+TMPFILE1=/tmp/test_custom_block_sizes1.$$
+TMPFILE2=/tmp/test_custom_block_sizes2.$$
+$DATAGEN -g12345678 > $TMPFILE1
+$DATAGEN -g12345678 > $TMPFILE2
+
+echo Testing -B31
+$LZ4 -f -B31 $TMPFILE1 && failures="31 (should fail) "
+
+for blocksize in 32 65535 65536
+do
+  echo Testing -B$blocksize
+  $LZ4 -f -B$blocksize $TMPFILE1
+  $LZ4 -f -B$blocksize $TMPFILE2
+  cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
+  $CHECKFRAME -B$blocksize -b4 $TMPFILE.lz4 || failures="$failures $blocksize "
+done
+
+for blocksize in 65537 262143 262144
+do
+  echo Testing -B$blocksize
+  $LZ4 -f -B$blocksize $TMPFILE1
+  $LZ4 -f -B$blocksize $TMPFILE2
+  cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
+  $CHECKFRAME -B$blocksize -b5 $TMPFILE.lz4 || failures="$failures $blocksize "
+done
+
+for blocksize in 262145 1048575 1048576
+do
+  echo Testing -B$blocksize
+  $LZ4 -f -B$blocksize $TMPFILE1
+  $LZ4 -f -B$blocksize $TMPFILE2
+  cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
+  $CHECKFRAME -B$blocksize -b6 $TMPFILE.lz4 || failures="$failures $blocksize "
+done
+
+for blocksize in 1048577 4194303 4194304
+do
+  echo Testing -B$blocksize
+  $LZ4 -f -B$blocksize $TMPFILE1
+  $LZ4 -f -B$blocksize $TMPFILE2
+  cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
+  $CHECKFRAME -B$blocksize -b7 $TMPFILE.lz4 || failures="$failures $blocksize "
+done
+
+for blocksize in 4194305 10485760
+do
+  echo Testing -B$blocksize
+  $LZ4 -f -B$blocksize $TMPFILE1
+  $LZ4 -f -B$blocksize $TMPFILE2
+  cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4
+  $CHECKFRAME -B4194304 -b7 $TMPFILE.lz4 || failures="$failures $blocksize "
+done
+
+rm $TMPFILE.lz4 $TMPFILE1 $TMPFILE1.lz4 $TMPFILE2 $TMPFILE2.lz4
+if [ "$failures" == "" ]
+then
+  echo ---- All tests passed
+  exit 0
+else
+  echo ---- The following tests had failures: $failures
+  exit 1
+fi
diff --git a/visual/VS2017/lz4.sln b/visual/VS2017/lz4.sln
index 78f223b..72e98fc 100644
--- a/visual/VS2017/lz4.sln
+++ b/visual/VS2017/lz4.sln
@@ -1,7 +1,7 @@
 Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Express 2012 for Windows Desktop
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lz4", "lz4\lz4.vcxproj", "{E30329AC-0057-4FE0-8FDA-7F650D398C4C}"
-EndProject
+# Visual Studio 15
+VisualStudioVersion = 15.0.28307.271
+MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblz4-dll", "liblz4-dll\liblz4-dll.vcxproj", "{9800039D-4AAA-43A4-BB78-FEF6F4836927}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblz4", "liblz4\liblz4.vcxproj", "{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}"
@@ -27,14 +27,6 @@ Global
 		Release|x64 = Release|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|Win32.ActiveCfg = Debug|Win32
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|Win32.Build.0 = Debug|Win32
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|x64.ActiveCfg = Debug|x64
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|x64.Build.0 = Debug|x64
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|Win32.ActiveCfg = Release|Win32
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|Win32.Build.0 = Release|Win32
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|x64.ActiveCfg = Release|x64
-		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|x64.Build.0 = Release|x64
 		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|Win32.ActiveCfg = Debug|Win32
 		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|Win32.Build.0 = Debug|Win32
 		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|x64.ActiveCfg = Debug|x64
@@ -95,4 +87,7 @@ Global
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {BBC259B2-BABF-47CD-8A6A-7B8318A803AC}
+	EndGlobalSection
 EndGlobal
author	Yann Collet <Cyan4973@users.noreply.github.com>	2019-04-16 17:07:41 (GMT)
committer	GitHub <noreply@github.com>	2019-04-16 17:07:41 (GMT)
commit	f1226ac53dee1b41a801e8003bb3708a2e671d12 (patch)
tree	dd17db72ce5d38e4d1cc65e4a298b11d2d96c817
parent	01d2a721d393646384291af2f6f2f940493cd78f (diff)
parent	fc3176f6aa6b8034e0a27598c23bdda559b5cf9c (diff)
download	lz4-f1226ac53dee1b41a801e8003bb3708a2e671d12.zip lz4-f1226ac53dee1b41a801e8003bb3708a2e671d12.tar.gz lz4-f1226ac53dee1b41a801e8003bb3708a2e671d12.tar.bz2